These changes are the raw update to linux-4.4.6-rt14. Kernel sources

[kvmfornfv.git] / kernel / drivers / net / bonding / bond_main.c
diff --git a/kernel/drivers/net/bonding/bond_main.c b/kernel/drivers/net/bonding/bond_main.c

index 16d87bf..28bbca0 100644 (file)
--- a/kernel/drivers/net/bonding/bond_main.c
+++ b/kernel/drivers/net/bonding/bond_main.c
@@ -76,7 +76,7 @@
  #include <net/netns/generic.h>
  #include <net/pkt_sched.h>
  #include <linux/rculist.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
  #include <net/switchdev.h>
  #include <net/bonding.h>
  #include <net/bond_3ad.h>
@@ -214,6 +214,8 @@ static void bond_uninit(struct net_device *bond_dev);
  static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
                                                 struct rtnl_link_stats64 *stats);
  static void bond_slave_arr_handler(struct work_struct *work);
+static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act,
+                                 int mod);
  
  /*---------------------------- General routines -----------------------------*/
  
@@ -709,40 +711,57 @@ out:
  
  }
  
-static bool bond_should_change_active(struct bonding *bond)
+static struct slave *bond_choose_primary_or_current(struct bonding *bond)
  {
         struct slave *prim = rtnl_dereference(bond->primary_slave);
         struct slave *curr = rtnl_dereference(bond->curr_active_slave);
  
-       if (!prim || !curr || curr->link != BOND_LINK_UP)
-               return true;
+       if (!prim || prim->link != BOND_LINK_UP) {
+               if (!curr || curr->link != BOND_LINK_UP)
+                       return NULL;
+               return curr;
+       }
+
         if (bond->force_primary) {
                 bond->force_primary = false;
-               return true;
+               return prim;
+       }
+
+       if (!curr || curr->link != BOND_LINK_UP)
+               return prim;
+
+       /* At this point, prim and curr are both up */
+       switch (bond->params.primary_reselect) {
+       case BOND_PRI_RESELECT_ALWAYS:
+               return prim;
+       case BOND_PRI_RESELECT_BETTER:
+               if (prim->speed < curr->speed)
+                       return curr;
+               if (prim->speed == curr->speed && prim->duplex <= curr->duplex)
+                       return curr;
+               return prim;
+       case BOND_PRI_RESELECT_FAILURE:
+               return curr;
+       default:
+               netdev_err(bond->dev, "impossible primary_reselect %d\n",
+                          bond->params.primary_reselect);
+               return curr;
         }
-       if (bond->params.primary_reselect == BOND_PRI_RESELECT_BETTER &&
-           (prim->speed < curr->speed ||
-            (prim->speed == curr->speed && prim->duplex <= curr->duplex)))
-               return false;
-       if (bond->params.primary_reselect == BOND_PRI_RESELECT_FAILURE)
-               return false;
-       return true;
  }
  
  /**
- * find_best_interface - select the best available slave to be the active one
+ * bond_find_best_slave - select the best available slave to be the active one
   * @bond: our bonding struct
   */
  static struct slave *bond_find_best_slave(struct bonding *bond)
  {
-       struct slave *slave, *bestslave = NULL, *primary;
+       struct slave *slave, *bestslave = NULL;
         struct list_head *iter;
         int mintime = bond->params.updelay;
  
-       primary = rtnl_dereference(bond->primary_slave);
-       if (primary && primary->link == BOND_LINK_UP &&
-           bond_should_change_active(bond))
-               return primary;
+       slave = bond_choose_primary_or_current(bond);
+       if (slave)
+               return slave;
  
         bond_for_each_slave(bond, slave, iter) {
                 if (slave->link == BOND_LINK_UP)
@@ -769,6 +788,7 @@ static bool bond_should_notify_peers(struct bonding *bond)
                    slave ? slave->dev->name : "NULL");
  
         if (!slave || !bond->send_peer_notif ||
+           !netif_carrier_ok(bond->dev) ||
             test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
                 return false;
  
@@ -961,7 +981,6 @@ static void bond_poll_controller(struct net_device *bond_dev)
                 if (bond_3ad_get_active_agg_info(bond, &ad_info))
                         return;
  
-       rcu_read_lock_bh();
         bond_for_each_slave_rcu(bond, slave, iter) {
                 ops = slave->dev->netdev_ops;
                 if (!bond_slave_is_up(slave) || !ops->ndo_poll_controller)
@@ -982,7 +1001,6 @@ static void bond_poll_controller(struct net_device *bond_dev)
                 ops->ndo_poll_controller(slave->dev);
                 up(&ni->dev_lock);
         }
-       rcu_read_unlock_bh();
  }
  
  static void bond_netpoll_cleanup(struct net_device *bond_dev)
@@ -1035,10 +1053,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev,
         netdev_features_t mask;
         struct slave *slave;
  
-       /* If any slave has the offload feature flag set,
-        * set the offload flag on the bond.
-        */
-       mask = features | NETIF_F_HW_SWITCH_OFFLOAD;
+       mask = features;
  
         features &= ~NETIF_F_ONE_FOR_ALL;
         features |= NETIF_F_ALL_FOR_ALL;
@@ -1058,7 +1073,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev,
                                  NETIF_F_HIGHDMA | NETIF_F_LRO)
  
  #define BOND_ENC_FEATURES      (NETIF_F_ALL_CSUM | NETIF_F_SG | NETIF_F_RXCSUM |\
-                                NETIF_F_TSO)
+                                NETIF_F_ALL_TSO)
  
  static void bond_compute_features(struct bonding *bond)
  {
@@ -1194,7 +1209,6 @@ static int bond_master_upper_dev_link(struct net_device *bond_dev,
         err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave);
         if (err)
                 return err;
-       slave_dev->flags |= IFF_SLAVE;
         rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL);
         return 0;
  }
@@ -1452,6 +1466,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
                 }
         }
  
+       /* set slave flag before open to prevent IPv6 addrconf */
+       slave_dev->flags |= IFF_SLAVE;
+
         /* open the slave since the application closed it */
         res = dev_open(slave_dev);
         if (res) {
@@ -1712,6 +1729,7 @@ err_close:
         dev_close(slave_dev);
  
  err_restore_mac:
+       slave_dev->flags &= ~IFF_SLAVE;
         if (!bond->params.fail_over_mac ||
             BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
                 /* XXX TODO - fom follow mode needs to change master's
@@ -1731,9 +1749,17 @@ err_free:
  
  err_undo_flags:
         /* Enslave of first slave has failed and we need to fix master's mac */
-       if (!bond_has_slaves(bond) &&
-           ether_addr_equal_64bits(bond_dev->dev_addr, slave_dev->dev_addr))
-               eth_hw_addr_random(bond_dev);
+       if (!bond_has_slaves(bond)) {
+               if (ether_addr_equal_64bits(bond_dev->dev_addr,
+                                           slave_dev->dev_addr))
+                       eth_hw_addr_random(bond_dev);
+               if (bond_dev->type != ARPHRD_ETHER) {
+                       dev_close(bond_dev);
+                       ether_setup(bond_dev);
+                       bond_dev->flags |= IFF_MASTER;
+                       bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+               }
+       }
  
         return res;
  }
@@ -2394,7 +2420,7 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
                  struct slave *slave)
  {
         struct arphdr *arp = (struct arphdr *)skb->data;
-       struct slave *curr_active_slave;
+       struct slave *curr_active_slave, *curr_arp_slave;
         unsigned char *arp_ptr;
         __be32 sip, tip;
         int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP);
@@ -2441,26 +2467,41 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
                      &sip, &tip);
  
         curr_active_slave = rcu_dereference(bond->curr_active_slave);
+       curr_arp_slave = rcu_dereference(bond->current_arp_slave);
  
-       /* Backup slaves won't see the ARP reply, but do come through
-        * here for each ARP probe (so we swap the sip/tip to validate
-        * the probe).  In a "redundant switch, common router" type of
-        * configuration, the ARP probe will (hopefully) travel from
-        * the active, through one switch, the router, then the other
-        * switch before reaching the backup.
+       /* We 'trust' the received ARP enough to validate it if:
+        *
+        * (a) the slave receiving the ARP is active (which includes the
+        * current ARP slave, if any), or
+        *
+        * (b) the receiving slave isn't active, but there is a currently
+        * active slave and it received valid arp reply(s) after it became
+        * the currently active slave, or
+        *
+        * (c) there is an ARP slave that sent an ARP during the prior ARP
+        * interval, and we receive an ARP reply on any slave.  We accept
+        * these because switch FDB update delays may deliver the ARP
+        * reply to a slave other than the sender of the ARP request.
          *
-        * We 'trust' the arp requests if there is an active slave and
-        * it received valid arp reply(s) after it became active. This
-        * is done to avoid endless looping when we can't reach the
+        * Note: for (b), backup slaves are receiving the broadcast ARP
+        * request, not a reply.  This request passes from the sending
+        * slave through the L2 switch(es) to the receiving slave.  Since
+        * this is checking the request, sip/tip are swapped for
+        * validation.
+        *
+        * This is done to avoid endless looping when we can't reach the
          * arp_ip_target and fool ourselves with our own arp requests.
          */
-
         if (bond_is_active_slave(slave))
                 bond_validate_arp(bond, slave, sip, tip);
         else if (curr_active_slave &&
                  time_after(slave_last_rx(bond, curr_active_slave),
                             curr_active_slave->last_link_up))
                 bond_validate_arp(bond, slave, tip, sip);
+       else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) &&
+                bond_time_in_interval(bond,
+                                      dev_trans_start(curr_arp_slave->dev), 1))
+               bond_validate_arp(bond, slave, sip, tip);
  
  out_unlock:
         if (arp != (struct arphdr *)skb->data)
@@ -2923,8 +2964,6 @@ static int bond_slave_netdev_event(unsigned long event,
         struct slave *slave = bond_slave_get_rtnl(slave_dev), *primary;
         struct bonding *bond;
         struct net_device *bond_dev;
-       u32 old_speed;
-       u8 old_duplex;
  
         /* A netdev event can be generated while enslaving a device
          * before netdev_rx_handler_register is called in which case
@@ -2945,17 +2984,9 @@ static int bond_slave_netdev_event(unsigned long event,
                 break;
         case NETDEV_UP:
         case NETDEV_CHANGE:
-               old_speed = slave->speed;
-               old_duplex = slave->duplex;
-
                 bond_update_speed_duplex(slave);
-
-               if (BOND_MODE(bond) == BOND_MODE_8023AD) {
-                       if (old_speed != slave->speed)
-                               bond_3ad_adapter_speed_changed(slave);
-                       if (old_duplex != slave->duplex)
-                               bond_3ad_adapter_duplex_changed(slave);
-               }
+               if (BOND_MODE(bond) == BOND_MODE_8023AD)
+                       bond_3ad_adapter_speed_duplex_changed(slave);
                 /* Fallthrough */
         case NETDEV_DOWN:
                 /* Refresh slave-array if applicable!
@@ -3075,16 +3106,15 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
         int noff, proto = -1;
  
         if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23)
-               return skb_flow_dissect(skb, fk);
+               return skb_flow_dissect_flow_keys(skb, fk, 0);
  
-       fk->ports = 0;
+       fk->ports.ports = 0;
         noff = skb_network_offset(skb);
         if (skb->protocol == htons(ETH_P_IP)) {
                 if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph))))
                         return false;
                 iph = ip_hdr(skb);
-               fk->src = iph->saddr;
-               fk->dst = iph->daddr;
+               iph_to_flow_copy_v4addrs(fk, iph);
                 noff += iph->ihl << 2;
                 if (!ip_is_fragment(iph))
                         proto = iph->protocol;
@@ -3092,15 +3122,14 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
                 if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6))))
                         return false;
                 iph6 = ipv6_hdr(skb);
-               fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr);
-               fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr);
+               iph_to_flow_copy_v6addrs(fk, iph6);
                 noff += sizeof(*iph6);
                 proto = iph6->nexthdr;
         } else {
                 return false;
         }
         if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0)
-               fk->ports = skb_flow_get_ports(skb, noff, proto);
+               fk->ports.ports = skb_flow_get_ports(skb, noff, proto);
  
         return true;
  }
@@ -3118,6 +3147,10 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
         struct flow_keys flow;
         u32 hash;
  
+       if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
+           skb->l4_hash)
+               return skb->hash;
+
         if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
             !bond_flow_dissect(bond, skb, &flow))
                 return bond_eth_hash(skb);
@@ -3126,8 +3159,9 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
             bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23)
                 hash = bond_eth_hash(skb);
         else
-               hash = (__force u32)flow.ports;
-       hash ^= (__force u32)flow.dst ^ (__force u32)flow.src;
+               hash = (__force u32)flow.ports.ports;
+       hash ^= (__force u32)flow_get_u32_dst(&flow) ^
+               (__force u32)flow_get_u32_src(&flow);
         hash ^= (hash >> 16);
         hash ^= (hash >> 8);
  
@@ -3759,7 +3793,6 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
         struct slave *slave;
         struct list_head *iter;
         struct bond_up_slave *new_arr, *old_arr;
-       int slaves_in_agg;
         int agg_id = 0;
         int ret = 0;
  
@@ -3790,7 +3823,6 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
                         }
                         goto out;
                 }
-               slaves_in_agg = ad_info.ports;
                 agg_id = ad_info.aggregator_id;
         }
         bond_for_each_slave(bond, slave, iter) {
@@ -4060,8 +4092,12 @@ static const struct net_device_ops bond_netdev_ops = {
         .ndo_add_slave          = bond_enslave,
         .ndo_del_slave          = bond_release,
         .ndo_fix_features       = bond_fix_features,
-       .ndo_bridge_setlink     = ndo_dflt_netdev_switch_port_bridge_setlink,
-       .ndo_bridge_dellink     = ndo_dflt_netdev_switch_port_bridge_dellink,
+       .ndo_bridge_setlink     = switchdev_port_bridge_setlink,
+       .ndo_bridge_getlink     = switchdev_port_bridge_getlink,
+       .ndo_bridge_dellink     = switchdev_port_bridge_dellink,
+       .ndo_fdb_add            = switchdev_port_fdb_add,
+       .ndo_fdb_del            = switchdev_port_fdb_del,
+       .ndo_fdb_dump           = switchdev_port_fdb_dump,
         .ndo_features_check     = passthru_features_check,
  };
  
@@ -4097,9 +4133,8 @@ void bond_setup(struct net_device *bond_dev)
         SET_NETDEV_DEVTYPE(bond_dev, &bond_type);
  
         /* Initialize the device options */
-       bond_dev->tx_queue_len = 0;
         bond_dev->flags |= IFF_MASTER|IFF_MULTICAST;
-       bond_dev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT;
+       bond_dev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT | IFF_NO_QUEUE;
         bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
  
         /* don't acquire bond device's netif_tx_lock when transmitting */
@@ -4161,6 +4196,8 @@ static int bond_check_params(struct bond_params *params)
         struct bond_opt_value newval;
         const struct bond_opt_value *valptr;
         int arp_all_targets_value;
+       u16 ad_actor_sys_prio = 0;
+       u16 ad_user_port_key = 0;
  
         /* Convert string parameters. */
         if (mode) {
@@ -4455,6 +4492,24 @@ static int bond_check_params(struct bond_params *params)
                 fail_over_mac_value = BOND_FOM_NONE;
         }
  
+       bond_opt_initstr(&newval, "default");
+       valptr = bond_opt_parse(
+                       bond_opt_get(BOND_OPT_AD_ACTOR_SYS_PRIO),
+                                    &newval);
+       if (!valptr) {
+               pr_err("Error: No ad_actor_sys_prio default value");
+               return -EINVAL;
+       }
+       ad_actor_sys_prio = valptr->value;
+
+       valptr = bond_opt_parse(bond_opt_get(BOND_OPT_AD_USER_PORT_KEY),
+                               &newval);
+       if (!valptr) {
+               pr_err("Error: No ad_user_port_key default value");
+               return -EINVAL;
+       }
+       ad_user_port_key = valptr->value;
+
         if (lp_interval == 0) {
                 pr_warn("Warning: ip_interval must be between 1 and %d, so it was reset to %d\n",
                         INT_MAX, BOND_ALB_DEFAULT_LP_INTERVAL);
@@ -4483,6 +4538,9 @@ static int bond_check_params(struct bond_params *params)
         params->lp_interval = lp_interval;
         params->packets_per_slave = packets_per_slave;
         params->tlb_dynamic_lb = 1; /* Default value */
+       params->ad_actor_sys_prio = ad_actor_sys_prio;
+       eth_zero_addr(params->ad_actor_system);
+       params->ad_user_port_key = ad_user_port_key;
         if (packets_per_slave > 0) {
                 params->reciprocal_packets_per_slave =
                         reciprocal_value(packets_per_slave);