These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / drivers / net / bonding / bond_main.c
index 16d87bf..28bbca0 100644 (file)
@@ -76,7 +76,7 @@
 #include <net/netns/generic.h>
 #include <net/pkt_sched.h>
 #include <linux/rculist.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
 #include <net/switchdev.h>
 #include <net/bonding.h>
 #include <net/bond_3ad.h>
@@ -214,6 +214,8 @@ static void bond_uninit(struct net_device *bond_dev);
 static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
                                                struct rtnl_link_stats64 *stats);
 static void bond_slave_arr_handler(struct work_struct *work);
+static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act,
+                                 int mod);
 
 /*---------------------------- General routines -----------------------------*/
 
@@ -709,40 +711,57 @@ out:
 
 }
 
-static bool bond_should_change_active(struct bonding *bond)
+static struct slave *bond_choose_primary_or_current(struct bonding *bond)
 {
        struct slave *prim = rtnl_dereference(bond->primary_slave);
        struct slave *curr = rtnl_dereference(bond->curr_active_slave);
 
-       if (!prim || !curr || curr->link != BOND_LINK_UP)
-               return true;
+       if (!prim || prim->link != BOND_LINK_UP) {
+               if (!curr || curr->link != BOND_LINK_UP)
+                       return NULL;
+               return curr;
+       }
+
        if (bond->force_primary) {
                bond->force_primary = false;
-               return true;
+               return prim;
+       }
+
+       if (!curr || curr->link != BOND_LINK_UP)
+               return prim;
+
+       /* At this point, prim and curr are both up */
+       switch (bond->params.primary_reselect) {
+       case BOND_PRI_RESELECT_ALWAYS:
+               return prim;
+       case BOND_PRI_RESELECT_BETTER:
+               if (prim->speed < curr->speed)
+                       return curr;
+               if (prim->speed == curr->speed && prim->duplex <= curr->duplex)
+                       return curr;
+               return prim;
+       case BOND_PRI_RESELECT_FAILURE:
+               return curr;
+       default:
+               netdev_err(bond->dev, "impossible primary_reselect %d\n",
+                          bond->params.primary_reselect);
+               return curr;
        }
-       if (bond->params.primary_reselect == BOND_PRI_RESELECT_BETTER &&
-           (prim->speed < curr->speed ||
-            (prim->speed == curr->speed && prim->duplex <= curr->duplex)))
-               return false;
-       if (bond->params.primary_reselect == BOND_PRI_RESELECT_FAILURE)
-               return false;
-       return true;
 }
 
 /**
- * find_best_interface - select the best available slave to be the active one
+ * bond_find_best_slave - select the best available slave to be the active one
  * @bond: our bonding struct
  */
 static struct slave *bond_find_best_slave(struct bonding *bond)
 {
-       struct slave *slave, *bestslave = NULL, *primary;
+       struct slave *slave, *bestslave = NULL;
        struct list_head *iter;
        int mintime = bond->params.updelay;
 
-       primary = rtnl_dereference(bond->primary_slave);
-       if (primary && primary->link == BOND_LINK_UP &&
-           bond_should_change_active(bond))
-               return primary;
+       slave = bond_choose_primary_or_current(bond);
+       if (slave)
+               return slave;
 
        bond_for_each_slave(bond, slave, iter) {
                if (slave->link == BOND_LINK_UP)
@@ -769,6 +788,7 @@ static bool bond_should_notify_peers(struct bonding *bond)
                   slave ? slave->dev->name : "NULL");
 
        if (!slave || !bond->send_peer_notif ||
+           !netif_carrier_ok(bond->dev) ||
            test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
                return false;
 
@@ -961,7 +981,6 @@ static void bond_poll_controller(struct net_device *bond_dev)
                if (bond_3ad_get_active_agg_info(bond, &ad_info))
                        return;
 
-       rcu_read_lock_bh();
        bond_for_each_slave_rcu(bond, slave, iter) {
                ops = slave->dev->netdev_ops;
                if (!bond_slave_is_up(slave) || !ops->ndo_poll_controller)
@@ -982,7 +1001,6 @@ static void bond_poll_controller(struct net_device *bond_dev)
                ops->ndo_poll_controller(slave->dev);
                up(&ni->dev_lock);
        }
-       rcu_read_unlock_bh();
 }
 
 static void bond_netpoll_cleanup(struct net_device *bond_dev)
@@ -1035,10 +1053,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev,
        netdev_features_t mask;
        struct slave *slave;
 
-       /* If any slave has the offload feature flag set,
-        * set the offload flag on the bond.
-        */
-       mask = features | NETIF_F_HW_SWITCH_OFFLOAD;
+       mask = features;
 
        features &= ~NETIF_F_ONE_FOR_ALL;
        features |= NETIF_F_ALL_FOR_ALL;
@@ -1058,7 +1073,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev,
                                 NETIF_F_HIGHDMA | NETIF_F_LRO)
 
 #define BOND_ENC_FEATURES      (NETIF_F_ALL_CSUM | NETIF_F_SG | NETIF_F_RXCSUM |\
-                                NETIF_F_TSO)
+                                NETIF_F_ALL_TSO)
 
 static void bond_compute_features(struct bonding *bond)
 {
@@ -1194,7 +1209,6 @@ static int bond_master_upper_dev_link(struct net_device *bond_dev,
        err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave);
        if (err)
                return err;
-       slave_dev->flags |= IFF_SLAVE;
        rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL);
        return 0;
 }
@@ -1452,6 +1466,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
                }
        }
 
+       /* set slave flag before open to prevent IPv6 addrconf */
+       slave_dev->flags |= IFF_SLAVE;
+
        /* open the slave since the application closed it */
        res = dev_open(slave_dev);
        if (res) {
@@ -1712,6 +1729,7 @@ err_close:
        dev_close(slave_dev);
 
 err_restore_mac:
+       slave_dev->flags &= ~IFF_SLAVE;
        if (!bond->params.fail_over_mac ||
            BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
                /* XXX TODO - fom follow mode needs to change master's
@@ -1731,9 +1749,17 @@ err_free:
 
 err_undo_flags:
        /* Enslave of first slave has failed and we need to fix master's mac */
-       if (!bond_has_slaves(bond) &&
-           ether_addr_equal_64bits(bond_dev->dev_addr, slave_dev->dev_addr))
-               eth_hw_addr_random(bond_dev);
+       if (!bond_has_slaves(bond)) {
+               if (ether_addr_equal_64bits(bond_dev->dev_addr,
+                                           slave_dev->dev_addr))
+                       eth_hw_addr_random(bond_dev);
+               if (bond_dev->type != ARPHRD_ETHER) {
+                       dev_close(bond_dev);
+                       ether_setup(bond_dev);
+                       bond_dev->flags |= IFF_MASTER;
+                       bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+               }
+       }
 
        return res;
 }
@@ -2394,7 +2420,7 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
                 struct slave *slave)
 {
        struct arphdr *arp = (struct arphdr *)skb->data;
-       struct slave *curr_active_slave;
+       struct slave *curr_active_slave, *curr_arp_slave;
        unsigned char *arp_ptr;
        __be32 sip, tip;
        int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP);
@@ -2441,26 +2467,41 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
                     &sip, &tip);
 
        curr_active_slave = rcu_dereference(bond->curr_active_slave);
+       curr_arp_slave = rcu_dereference(bond->current_arp_slave);
 
-       /* Backup slaves won't see the ARP reply, but do come through
-        * here for each ARP probe (so we swap the sip/tip to validate
-        * the probe).  In a "redundant switch, common router" type of
-        * configuration, the ARP probe will (hopefully) travel from
-        * the active, through one switch, the router, then the other
-        * switch before reaching the backup.
+       /* We 'trust' the received ARP enough to validate it if:
+        *
+        * (a) the slave receiving the ARP is active (which includes the
+        * current ARP slave, if any), or
+        *
+        * (b) the receiving slave isn't active, but there is a currently
+        * active slave and it received valid arp reply(s) after it became
+        * the currently active slave, or
+        *
+        * (c) there is an ARP slave that sent an ARP during the prior ARP
+        * interval, and we receive an ARP reply on any slave.  We accept
+        * these because switch FDB update delays may deliver the ARP
+        * reply to a slave other than the sender of the ARP request.
         *
-        * We 'trust' the arp requests if there is an active slave and
-        * it received valid arp reply(s) after it became active. This
-        * is done to avoid endless looping when we can't reach the
+        * Note: for (b), backup slaves are receiving the broadcast ARP
+        * request, not a reply.  This request passes from the sending
+        * slave through the L2 switch(es) to the receiving slave.  Since
+        * this is checking the request, sip/tip are swapped for
+        * validation.
+        *
+        * This is done to avoid endless looping when we can't reach the
         * arp_ip_target and fool ourselves with our own arp requests.
         */
-
        if (bond_is_active_slave(slave))
                bond_validate_arp(bond, slave, sip, tip);
        else if (curr_active_slave &&
                 time_after(slave_last_rx(bond, curr_active_slave),
                            curr_active_slave->last_link_up))
                bond_validate_arp(bond, slave, tip, sip);
+       else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) &&
+                bond_time_in_interval(bond,
+                                      dev_trans_start(curr_arp_slave->dev), 1))
+               bond_validate_arp(bond, slave, sip, tip);
 
 out_unlock:
        if (arp != (struct arphdr *)skb->data)
@@ -2923,8 +2964,6 @@ static int bond_slave_netdev_event(unsigned long event,
        struct slave *slave = bond_slave_get_rtnl(slave_dev), *primary;
        struct bonding *bond;
        struct net_device *bond_dev;
-       u32 old_speed;
-       u8 old_duplex;
 
        /* A netdev event can be generated while enslaving a device
         * before netdev_rx_handler_register is called in which case
@@ -2945,17 +2984,9 @@ static int bond_slave_netdev_event(unsigned long event,
                break;
        case NETDEV_UP:
        case NETDEV_CHANGE:
-               old_speed = slave->speed;
-               old_duplex = slave->duplex;
-
                bond_update_speed_duplex(slave);
-
-               if (BOND_MODE(bond) == BOND_MODE_8023AD) {
-                       if (old_speed != slave->speed)
-                               bond_3ad_adapter_speed_changed(slave);
-                       if (old_duplex != slave->duplex)
-                               bond_3ad_adapter_duplex_changed(slave);
-               }
+               if (BOND_MODE(bond) == BOND_MODE_8023AD)
+                       bond_3ad_adapter_speed_duplex_changed(slave);
                /* Fallthrough */
        case NETDEV_DOWN:
                /* Refresh slave-array if applicable!
@@ -3075,16 +3106,15 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
        int noff, proto = -1;
 
        if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23)
-               return skb_flow_dissect(skb, fk);
+               return skb_flow_dissect_flow_keys(skb, fk, 0);
 
-       fk->ports = 0;
+       fk->ports.ports = 0;
        noff = skb_network_offset(skb);
        if (skb->protocol == htons(ETH_P_IP)) {
                if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph))))
                        return false;
                iph = ip_hdr(skb);
-               fk->src = iph->saddr;
-               fk->dst = iph->daddr;
+               iph_to_flow_copy_v4addrs(fk, iph);
                noff += iph->ihl << 2;
                if (!ip_is_fragment(iph))
                        proto = iph->protocol;
@@ -3092,15 +3122,14 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
                if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6))))
                        return false;
                iph6 = ipv6_hdr(skb);
-               fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr);
-               fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr);
+               iph_to_flow_copy_v6addrs(fk, iph6);
                noff += sizeof(*iph6);
                proto = iph6->nexthdr;
        } else {
                return false;
        }
        if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0)
-               fk->ports = skb_flow_get_ports(skb, noff, proto);
+               fk->ports.ports = skb_flow_get_ports(skb, noff, proto);
 
        return true;
 }
@@ -3118,6 +3147,10 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
        struct flow_keys flow;
        u32 hash;
 
+       if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
+           skb->l4_hash)
+               return skb->hash;
+
        if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
            !bond_flow_dissect(bond, skb, &flow))
                return bond_eth_hash(skb);
@@ -3126,8 +3159,9 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
            bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23)
                hash = bond_eth_hash(skb);
        else
-               hash = (__force u32)flow.ports;
-       hash ^= (__force u32)flow.dst ^ (__force u32)flow.src;
+               hash = (__force u32)flow.ports.ports;
+       hash ^= (__force u32)flow_get_u32_dst(&flow) ^
+               (__force u32)flow_get_u32_src(&flow);
        hash ^= (hash >> 16);
        hash ^= (hash >> 8);
 
@@ -3759,7 +3793,6 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
        struct slave *slave;
        struct list_head *iter;
        struct bond_up_slave *new_arr, *old_arr;
-       int slaves_in_agg;
        int agg_id = 0;
        int ret = 0;
 
@@ -3790,7 +3823,6 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
                        }
                        goto out;
                }
-               slaves_in_agg = ad_info.ports;
                agg_id = ad_info.aggregator_id;
        }
        bond_for_each_slave(bond, slave, iter) {
@@ -4060,8 +4092,12 @@ static const struct net_device_ops bond_netdev_ops = {
        .ndo_add_slave          = bond_enslave,
        .ndo_del_slave          = bond_release,
        .ndo_fix_features       = bond_fix_features,
-       .ndo_bridge_setlink     = ndo_dflt_netdev_switch_port_bridge_setlink,
-       .ndo_bridge_dellink     = ndo_dflt_netdev_switch_port_bridge_dellink,
+       .ndo_bridge_setlink     = switchdev_port_bridge_setlink,
+       .ndo_bridge_getlink     = switchdev_port_bridge_getlink,
+       .ndo_bridge_dellink     = switchdev_port_bridge_dellink,
+       .ndo_fdb_add            = switchdev_port_fdb_add,
+       .ndo_fdb_del            = switchdev_port_fdb_del,
+       .ndo_fdb_dump           = switchdev_port_fdb_dump,
        .ndo_features_check     = passthru_features_check,
 };
 
@@ -4097,9 +4133,8 @@ void bond_setup(struct net_device *bond_dev)
        SET_NETDEV_DEVTYPE(bond_dev, &bond_type);
 
        /* Initialize the device options */
-       bond_dev->tx_queue_len = 0;
        bond_dev->flags |= IFF_MASTER|IFF_MULTICAST;
-       bond_dev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT;
+       bond_dev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT | IFF_NO_QUEUE;
        bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
 
        /* don't acquire bond device's netif_tx_lock when transmitting */
@@ -4161,6 +4196,8 @@ static int bond_check_params(struct bond_params *params)
        struct bond_opt_value newval;
        const struct bond_opt_value *valptr;
        int arp_all_targets_value;
+       u16 ad_actor_sys_prio = 0;
+       u16 ad_user_port_key = 0;
 
        /* Convert string parameters. */
        if (mode) {
@@ -4455,6 +4492,24 @@ static int bond_check_params(struct bond_params *params)
                fail_over_mac_value = BOND_FOM_NONE;
        }
 
+       bond_opt_initstr(&newval, "default");
+       valptr = bond_opt_parse(
+                       bond_opt_get(BOND_OPT_AD_ACTOR_SYS_PRIO),
+                                    &newval);
+       if (!valptr) {
+               pr_err("Error: No ad_actor_sys_prio default value");
+               return -EINVAL;
+       }
+       ad_actor_sys_prio = valptr->value;
+
+       valptr = bond_opt_parse(bond_opt_get(BOND_OPT_AD_USER_PORT_KEY),
+                               &newval);
+       if (!valptr) {
+               pr_err("Error: No ad_user_port_key default value");
+               return -EINVAL;
+       }
+       ad_user_port_key = valptr->value;
+
        if (lp_interval == 0) {
                pr_warn("Warning: ip_interval must be between 1 and %d, so it was reset to %d\n",
                        INT_MAX, BOND_ALB_DEFAULT_LP_INTERVAL);
@@ -4483,6 +4538,9 @@ static int bond_check_params(struct bond_params *params)
        params->lp_interval = lp_interval;
        params->packets_per_slave = packets_per_slave;
        params->tlb_dynamic_lb = 1; /* Default value */
+       params->ad_actor_sys_prio = ad_actor_sys_prio;
+       eth_zero_addr(params->ad_actor_system);
+       params->ad_user_port_key = ad_user_port_key;
        if (packets_per_slave > 0) {
                params->reciprocal_packets_per_slave =
                        reciprocal_value(packets_per_slave);