X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=kernel%2Fdrivers%2Fnet%2Fbonding%2Fbond_main.c;fp=kernel%2Fdrivers%2Fnet%2Fbonding%2Fbond_main.c;h=28bbca0af238bb6a2642045a2e4e0273e38f2dc5;hb=e09b41010ba33a20a87472ee821fa407a5b8da36;hp=16d87bf8ac3cd268006b57e41a8476dc17a80e40;hpb=f93b97fd65072de626c074dbe099a1fff05ce060;p=kvmfornfv.git diff --git a/kernel/drivers/net/bonding/bond_main.c b/kernel/drivers/net/bonding/bond_main.c index 16d87bf8a..28bbca0af 100644 --- a/kernel/drivers/net/bonding/bond_main.c +++ b/kernel/drivers/net/bonding/bond_main.c @@ -76,7 +76,7 @@ #include #include #include -#include +#include #include #include #include @@ -214,6 +214,8 @@ static void bond_uninit(struct net_device *bond_dev); static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev, struct rtnl_link_stats64 *stats); static void bond_slave_arr_handler(struct work_struct *work); +static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, + int mod); /*---------------------------- General routines -----------------------------*/ @@ -709,40 +711,57 @@ out: } -static bool bond_should_change_active(struct bonding *bond) +static struct slave *bond_choose_primary_or_current(struct bonding *bond) { struct slave *prim = rtnl_dereference(bond->primary_slave); struct slave *curr = rtnl_dereference(bond->curr_active_slave); - if (!prim || !curr || curr->link != BOND_LINK_UP) - return true; + if (!prim || prim->link != BOND_LINK_UP) { + if (!curr || curr->link != BOND_LINK_UP) + return NULL; + return curr; + } + if (bond->force_primary) { bond->force_primary = false; - return true; + return prim; + } + + if (!curr || curr->link != BOND_LINK_UP) + return prim; + + /* At this point, prim and curr are both up */ + switch (bond->params.primary_reselect) { + case BOND_PRI_RESELECT_ALWAYS: + return prim; + case BOND_PRI_RESELECT_BETTER: + if (prim->speed < curr->speed) + return curr; + if (prim->speed == curr->speed && prim->duplex <= curr->duplex) + return curr; + return prim; + case BOND_PRI_RESELECT_FAILURE: + return curr; + default: + netdev_err(bond->dev, "impossible primary_reselect %d\n", + bond->params.primary_reselect); + return curr; } - if (bond->params.primary_reselect == BOND_PRI_RESELECT_BETTER && - (prim->speed < curr->speed || - (prim->speed == curr->speed && prim->duplex <= curr->duplex))) - return false; - if (bond->params.primary_reselect == BOND_PRI_RESELECT_FAILURE) - return false; - return true; } /** - * find_best_interface - select the best available slave to be the active one + * bond_find_best_slave - select the best available slave to be the active one * @bond: our bonding struct */ static struct slave *bond_find_best_slave(struct bonding *bond) { - struct slave *slave, *bestslave = NULL, *primary; + struct slave *slave, *bestslave = NULL; struct list_head *iter; int mintime = bond->params.updelay; - primary = rtnl_dereference(bond->primary_slave); - if (primary && primary->link == BOND_LINK_UP && - bond_should_change_active(bond)) - return primary; + slave = bond_choose_primary_or_current(bond); + if (slave) + return slave; bond_for_each_slave(bond, slave, iter) { if (slave->link == BOND_LINK_UP) @@ -769,6 +788,7 @@ static bool bond_should_notify_peers(struct bonding *bond) slave ? slave->dev->name : "NULL"); if (!slave || !bond->send_peer_notif || + !netif_carrier_ok(bond->dev) || test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state)) return false; @@ -961,7 +981,6 @@ static void bond_poll_controller(struct net_device *bond_dev) if (bond_3ad_get_active_agg_info(bond, &ad_info)) return; - rcu_read_lock_bh(); bond_for_each_slave_rcu(bond, slave, iter) { ops = slave->dev->netdev_ops; if (!bond_slave_is_up(slave) || !ops->ndo_poll_controller) @@ -982,7 +1001,6 @@ static void bond_poll_controller(struct net_device *bond_dev) ops->ndo_poll_controller(slave->dev); up(&ni->dev_lock); } - rcu_read_unlock_bh(); } static void bond_netpoll_cleanup(struct net_device *bond_dev) @@ -1035,10 +1053,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev, netdev_features_t mask; struct slave *slave; - /* If any slave has the offload feature flag set, - * set the offload flag on the bond. - */ - mask = features | NETIF_F_HW_SWITCH_OFFLOAD; + mask = features; features &= ~NETIF_F_ONE_FOR_ALL; features |= NETIF_F_ALL_FOR_ALL; @@ -1058,7 +1073,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev, NETIF_F_HIGHDMA | NETIF_F_LRO) #define BOND_ENC_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | NETIF_F_RXCSUM |\ - NETIF_F_TSO) + NETIF_F_ALL_TSO) static void bond_compute_features(struct bonding *bond) { @@ -1194,7 +1209,6 @@ static int bond_master_upper_dev_link(struct net_device *bond_dev, err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave); if (err) return err; - slave_dev->flags |= IFF_SLAVE; rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL); return 0; } @@ -1452,6 +1466,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) } } + /* set slave flag before open to prevent IPv6 addrconf */ + slave_dev->flags |= IFF_SLAVE; + /* open the slave since the application closed it */ res = dev_open(slave_dev); if (res) { @@ -1712,6 +1729,7 @@ err_close: dev_close(slave_dev); err_restore_mac: + slave_dev->flags &= ~IFF_SLAVE; if (!bond->params.fail_over_mac || BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { /* XXX TODO - fom follow mode needs to change master's @@ -1731,9 +1749,17 @@ err_free: err_undo_flags: /* Enslave of first slave has failed and we need to fix master's mac */ - if (!bond_has_slaves(bond) && - ether_addr_equal_64bits(bond_dev->dev_addr, slave_dev->dev_addr)) - eth_hw_addr_random(bond_dev); + if (!bond_has_slaves(bond)) { + if (ether_addr_equal_64bits(bond_dev->dev_addr, + slave_dev->dev_addr)) + eth_hw_addr_random(bond_dev); + if (bond_dev->type != ARPHRD_ETHER) { + dev_close(bond_dev); + ether_setup(bond_dev); + bond_dev->flags |= IFF_MASTER; + bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING; + } + } return res; } @@ -2394,7 +2420,7 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave) { struct arphdr *arp = (struct arphdr *)skb->data; - struct slave *curr_active_slave; + struct slave *curr_active_slave, *curr_arp_slave; unsigned char *arp_ptr; __be32 sip, tip; int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP); @@ -2441,26 +2467,41 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, &sip, &tip); curr_active_slave = rcu_dereference(bond->curr_active_slave); + curr_arp_slave = rcu_dereference(bond->current_arp_slave); - /* Backup slaves won't see the ARP reply, but do come through - * here for each ARP probe (so we swap the sip/tip to validate - * the probe). In a "redundant switch, common router" type of - * configuration, the ARP probe will (hopefully) travel from - * the active, through one switch, the router, then the other - * switch before reaching the backup. + /* We 'trust' the received ARP enough to validate it if: + * + * (a) the slave receiving the ARP is active (which includes the + * current ARP slave, if any), or + * + * (b) the receiving slave isn't active, but there is a currently + * active slave and it received valid arp reply(s) after it became + * the currently active slave, or + * + * (c) there is an ARP slave that sent an ARP during the prior ARP + * interval, and we receive an ARP reply on any slave. We accept + * these because switch FDB update delays may deliver the ARP + * reply to a slave other than the sender of the ARP request. * - * We 'trust' the arp requests if there is an active slave and - * it received valid arp reply(s) after it became active. This - * is done to avoid endless looping when we can't reach the + * Note: for (b), backup slaves are receiving the broadcast ARP + * request, not a reply. This request passes from the sending + * slave through the L2 switch(es) to the receiving slave. Since + * this is checking the request, sip/tip are swapped for + * validation. + * + * This is done to avoid endless looping when we can't reach the * arp_ip_target and fool ourselves with our own arp requests. */ - if (bond_is_active_slave(slave)) bond_validate_arp(bond, slave, sip, tip); else if (curr_active_slave && time_after(slave_last_rx(bond, curr_active_slave), curr_active_slave->last_link_up)) bond_validate_arp(bond, slave, tip, sip); + else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) && + bond_time_in_interval(bond, + dev_trans_start(curr_arp_slave->dev), 1)) + bond_validate_arp(bond, slave, sip, tip); out_unlock: if (arp != (struct arphdr *)skb->data) @@ -2923,8 +2964,6 @@ static int bond_slave_netdev_event(unsigned long event, struct slave *slave = bond_slave_get_rtnl(slave_dev), *primary; struct bonding *bond; struct net_device *bond_dev; - u32 old_speed; - u8 old_duplex; /* A netdev event can be generated while enslaving a device * before netdev_rx_handler_register is called in which case @@ -2945,17 +2984,9 @@ static int bond_slave_netdev_event(unsigned long event, break; case NETDEV_UP: case NETDEV_CHANGE: - old_speed = slave->speed; - old_duplex = slave->duplex; - bond_update_speed_duplex(slave); - - if (BOND_MODE(bond) == BOND_MODE_8023AD) { - if (old_speed != slave->speed) - bond_3ad_adapter_speed_changed(slave); - if (old_duplex != slave->duplex) - bond_3ad_adapter_duplex_changed(slave); - } + if (BOND_MODE(bond) == BOND_MODE_8023AD) + bond_3ad_adapter_speed_duplex_changed(slave); /* Fallthrough */ case NETDEV_DOWN: /* Refresh slave-array if applicable! @@ -3075,16 +3106,15 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, int noff, proto = -1; if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) - return skb_flow_dissect(skb, fk); + return skb_flow_dissect_flow_keys(skb, fk, 0); - fk->ports = 0; + fk->ports.ports = 0; noff = skb_network_offset(skb); if (skb->protocol == htons(ETH_P_IP)) { if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph)))) return false; iph = ip_hdr(skb); - fk->src = iph->saddr; - fk->dst = iph->daddr; + iph_to_flow_copy_v4addrs(fk, iph); noff += iph->ihl << 2; if (!ip_is_fragment(iph)) proto = iph->protocol; @@ -3092,15 +3122,14 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6)))) return false; iph6 = ipv6_hdr(skb); - fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr); - fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr); + iph_to_flow_copy_v6addrs(fk, iph6); noff += sizeof(*iph6); proto = iph6->nexthdr; } else { return false; } if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0) - fk->ports = skb_flow_get_ports(skb, noff, proto); + fk->ports.ports = skb_flow_get_ports(skb, noff, proto); return true; } @@ -3118,6 +3147,10 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) struct flow_keys flow; u32 hash; + if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 && + skb->l4_hash) + return skb->hash; + if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 || !bond_flow_dissect(bond, skb, &flow)) return bond_eth_hash(skb); @@ -3126,8 +3159,9 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) hash = bond_eth_hash(skb); else - hash = (__force u32)flow.ports; - hash ^= (__force u32)flow.dst ^ (__force u32)flow.src; + hash = (__force u32)flow.ports.ports; + hash ^= (__force u32)flow_get_u32_dst(&flow) ^ + (__force u32)flow_get_u32_src(&flow); hash ^= (hash >> 16); hash ^= (hash >> 8); @@ -3759,7 +3793,6 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) struct slave *slave; struct list_head *iter; struct bond_up_slave *new_arr, *old_arr; - int slaves_in_agg; int agg_id = 0; int ret = 0; @@ -3790,7 +3823,6 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) } goto out; } - slaves_in_agg = ad_info.ports; agg_id = ad_info.aggregator_id; } bond_for_each_slave(bond, slave, iter) { @@ -4060,8 +4092,12 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_add_slave = bond_enslave, .ndo_del_slave = bond_release, .ndo_fix_features = bond_fix_features, - .ndo_bridge_setlink = ndo_dflt_netdev_switch_port_bridge_setlink, - .ndo_bridge_dellink = ndo_dflt_netdev_switch_port_bridge_dellink, + .ndo_bridge_setlink = switchdev_port_bridge_setlink, + .ndo_bridge_getlink = switchdev_port_bridge_getlink, + .ndo_bridge_dellink = switchdev_port_bridge_dellink, + .ndo_fdb_add = switchdev_port_fdb_add, + .ndo_fdb_del = switchdev_port_fdb_del, + .ndo_fdb_dump = switchdev_port_fdb_dump, .ndo_features_check = passthru_features_check, }; @@ -4097,9 +4133,8 @@ void bond_setup(struct net_device *bond_dev) SET_NETDEV_DEVTYPE(bond_dev, &bond_type); /* Initialize the device options */ - bond_dev->tx_queue_len = 0; bond_dev->flags |= IFF_MASTER|IFF_MULTICAST; - bond_dev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT; + bond_dev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT | IFF_NO_QUEUE; bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); /* don't acquire bond device's netif_tx_lock when transmitting */ @@ -4161,6 +4196,8 @@ static int bond_check_params(struct bond_params *params) struct bond_opt_value newval; const struct bond_opt_value *valptr; int arp_all_targets_value; + u16 ad_actor_sys_prio = 0; + u16 ad_user_port_key = 0; /* Convert string parameters. */ if (mode) { @@ -4455,6 +4492,24 @@ static int bond_check_params(struct bond_params *params) fail_over_mac_value = BOND_FOM_NONE; } + bond_opt_initstr(&newval, "default"); + valptr = bond_opt_parse( + bond_opt_get(BOND_OPT_AD_ACTOR_SYS_PRIO), + &newval); + if (!valptr) { + pr_err("Error: No ad_actor_sys_prio default value"); + return -EINVAL; + } + ad_actor_sys_prio = valptr->value; + + valptr = bond_opt_parse(bond_opt_get(BOND_OPT_AD_USER_PORT_KEY), + &newval); + if (!valptr) { + pr_err("Error: No ad_user_port_key default value"); + return -EINVAL; + } + ad_user_port_key = valptr->value; + if (lp_interval == 0) { pr_warn("Warning: ip_interval must be between 1 and %d, so it was reset to %d\n", INT_MAX, BOND_ALB_DEFAULT_LP_INTERVAL); @@ -4483,6 +4538,9 @@ static int bond_check_params(struct bond_params *params) params->lp_interval = lp_interval; params->packets_per_slave = packets_per_slave; params->tlb_dynamic_lb = 1; /* Default value */ + params->ad_actor_sys_prio = ad_actor_sys_prio; + eth_zero_addr(params->ad_actor_system); + params->ad_user_port_key = ad_user_port_key; if (packets_per_slave > 0) { params->reciprocal_packets_per_slave = reciprocal_value(packets_per_slave);