These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / drivers / net / ethernet / cisco / enic / enic_main.c
index eadae1b..b36643e 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/prefetch.h>
 #include <net/ip6_checksum.h>
 #include <linux/ktime.h>
+#include <linux/numa.h>
 #ifdef CONFIG_RFS_ACCEL
 #include <linux/cpu_rmap.h>
 #endif
@@ -112,6 +113,71 @@ static struct enic_intr_mod_range mod_range[ENIC_MAX_LINK_SPEEDS] = {
        {3,  6}, /* 10 - 40 Gbps */
 };
 
+static void enic_init_affinity_hint(struct enic *enic)
+{
+       int numa_node = dev_to_node(&enic->pdev->dev);
+       int i;
+
+       for (i = 0; i < enic->intr_count; i++) {
+               if (enic_is_err_intr(enic, i) || enic_is_notify_intr(enic, i) ||
+                   (enic->msix[i].affinity_mask &&
+                    !cpumask_empty(enic->msix[i].affinity_mask)))
+                       continue;
+               if (zalloc_cpumask_var(&enic->msix[i].affinity_mask,
+                                      GFP_KERNEL))
+                       cpumask_set_cpu(cpumask_local_spread(i, numa_node),
+                                       enic->msix[i].affinity_mask);
+       }
+}
+
+static void enic_free_affinity_hint(struct enic *enic)
+{
+       int i;
+
+       for (i = 0; i < enic->intr_count; i++) {
+               if (enic_is_err_intr(enic, i) || enic_is_notify_intr(enic, i))
+                       continue;
+               free_cpumask_var(enic->msix[i].affinity_mask);
+       }
+}
+
+static void enic_set_affinity_hint(struct enic *enic)
+{
+       int i;
+       int err;
+
+       for (i = 0; i < enic->intr_count; i++) {
+               if (enic_is_err_intr(enic, i)           ||
+                   enic_is_notify_intr(enic, i)        ||
+                   !enic->msix[i].affinity_mask        ||
+                   cpumask_empty(enic->msix[i].affinity_mask))
+                       continue;
+               err = irq_set_affinity_hint(enic->msix_entry[i].vector,
+                                           enic->msix[i].affinity_mask);
+               if (err)
+                       netdev_warn(enic->netdev, "irq_set_affinity_hint failed, err %d\n",
+                                   err);
+       }
+
+       for (i = 0; i < enic->wq_count; i++) {
+               int wq_intr = enic_msix_wq_intr(enic, i);
+
+               if (enic->msix[wq_intr].affinity_mask &&
+                   !cpumask_empty(enic->msix[wq_intr].affinity_mask))
+                       netif_set_xps_queue(enic->netdev,
+                                           enic->msix[wq_intr].affinity_mask,
+                                           i);
+       }
+}
+
+static void enic_unset_affinity_hint(struct enic *enic)
+{
+       int i;
+
+       for (i = 0; i < enic->intr_count; i++)
+               irq_set_affinity_hint(enic->msix_entry[i].vector, NULL);
+}
+
 int enic_is_dynamic(struct enic *enic)
 {
        return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_DYN;
@@ -178,13 +244,15 @@ static int enic_wq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc,
        return 0;
 }
 
-static void enic_log_q_error(struct enic *enic)
+static bool enic_log_q_error(struct enic *enic)
 {
        unsigned int i;
        u32 error_status;
+       bool err = false;
 
        for (i = 0; i < enic->wq_count; i++) {
                error_status = vnic_wq_error_status(&enic->wq[i]);
+               err |= error_status;
                if (error_status)
                        netdev_err(enic->netdev, "WQ[%d] error_status %d\n",
                                i, error_status);
@@ -192,10 +260,13 @@ static void enic_log_q_error(struct enic *enic)
 
        for (i = 0; i < enic->rq_count; i++) {
                error_status = vnic_rq_error_status(&enic->rq[i]);
+               err |= error_status;
                if (error_status)
                        netdev_err(enic->netdev, "RQ[%d] error_status %d\n",
                                i, error_status);
        }
+
+       return err;
 }
 
 static void enic_msglvl_check(struct enic *enic)
@@ -333,10 +404,9 @@ static irqreturn_t enic_isr_msix_err(int irq, void *data)
 
        vnic_intr_return_all_credits(&enic->intr[intr]);
 
-       enic_log_q_error(enic);
-
-       /* schedule recovery from WQ/RQ error */
-       schedule_work(&enic->reset);
+       if (enic_log_q_error(enic))
+               /* schedule recovery from WQ/RQ error */
+               schedule_work(&enic->reset);
 
        return IRQ_HANDLED;
 }
@@ -804,7 +874,7 @@ static void enic_set_rx_mode(struct net_device *netdev)
 static void enic_tx_timeout(struct net_device *netdev)
 {
        struct enic *enic = netdev_priv(netdev);
-       schedule_work(&enic->reset);
+       schedule_work(&enic->tx_hang_reset);
 }
 
 static int enic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
@@ -1149,6 +1219,64 @@ static int enic_rq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc,
        return 0;
 }
 
+static void enic_set_int_moderation(struct enic *enic, struct vnic_rq *rq)
+{
+       unsigned int intr = enic_msix_rq_intr(enic, rq->index);
+       struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
+       u32 timer = cq->tobe_rx_coal_timeval;
+
+       if (cq->tobe_rx_coal_timeval != cq->cur_rx_coal_timeval) {
+               vnic_intr_coalescing_timer_set(&enic->intr[intr], timer);
+               cq->cur_rx_coal_timeval = cq->tobe_rx_coal_timeval;
+       }
+}
+
+static void enic_calc_int_moderation(struct enic *enic, struct vnic_rq *rq)
+{
+       struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting;
+       struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
+       struct vnic_rx_bytes_counter *pkt_size_counter = &cq->pkt_size_counter;
+       int index;
+       u32 timer;
+       u32 range_start;
+       u32 traffic;
+       u64 delta;
+       ktime_t now = ktime_get();
+
+       delta = ktime_us_delta(now, cq->prev_ts);
+       if (delta < ENIC_AIC_TS_BREAK)
+               return;
+       cq->prev_ts = now;
+
+       traffic = pkt_size_counter->large_pkt_bytes_cnt +
+                 pkt_size_counter->small_pkt_bytes_cnt;
+       /* The table takes Mbps
+        * traffic *= 8    => bits
+        * traffic *= (10^6 / delta)    => bps
+        * traffic /= 10^6     => Mbps
+        *
+        * Combining, traffic *= (8 / delta)
+        */
+
+       traffic <<= 3;
+       traffic = delta > UINT_MAX ? 0 : traffic / (u32)delta;
+
+       for (index = 0; index < ENIC_MAX_COALESCE_TIMERS; index++)
+               if (traffic < mod_table[index].rx_rate)
+                       break;
+       range_start = (pkt_size_counter->small_pkt_bytes_cnt >
+                      pkt_size_counter->large_pkt_bytes_cnt << 1) ?
+                     rx_coal->small_pkt_range_start :
+                     rx_coal->large_pkt_range_start;
+       timer = range_start + ((rx_coal->range_end - range_start) *
+                              mod_table[index].range_percent / 100);
+       /* Damping */
+       cq->tobe_rx_coal_timeval = (timer + cq->tobe_rx_coal_timeval) >> 1;
+
+       pkt_size_counter->large_pkt_bytes_cnt = 0;
+       pkt_size_counter->small_pkt_bytes_cnt = 0;
+}
+
 static int enic_poll(struct napi_struct *napi, int budget)
 {
        struct net_device *netdev = napi->dev;
@@ -1170,7 +1298,7 @@ static int enic_poll(struct napi_struct *napi, int budget)
                                                 wq_work_done,
                                                 0 /* dont unmask intr */,
                                                 0 /* dont reset intr timer */);
-               return rq_work_done;
+               return budget;
        }
 
        if (budget > 0)
@@ -1191,6 +1319,7 @@ static int enic_poll(struct napi_struct *napi, int budget)
                        0 /* don't reset intr timer */);
 
        err = vnic_rq_fill(&enic->rq[0], enic_rq_alloc_buf);
+       enic_poll_unlock_napi(&enic->rq[cq_rq], napi);
 
        /* Buffer allocation failed. Stay in polling
         * mode so we can try to fill the ring again.
@@ -1198,6 +1327,11 @@ static int enic_poll(struct napi_struct *napi, int budget)
 
        if (err)
                rq_work_done = rq_work_to_do;
+       if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
+               /* Call the function which refreshes the intr coalescing timer
+                * value based on the traffic.
+                */
+               enic_calc_int_moderation(enic, &enic->rq[0]);
 
        if (rq_work_done < rq_work_to_do) {
 
@@ -1206,71 +1340,14 @@ static int enic_poll(struct napi_struct *napi, int budget)
                 */
 
                napi_complete(napi);
+               if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
+                       enic_set_int_moderation(enic, &enic->rq[0]);
                vnic_intr_unmask(&enic->intr[intr]);
        }
-       enic_poll_unlock_napi(&enic->rq[cq_rq]);
 
        return rq_work_done;
 }
 
-static void enic_set_int_moderation(struct enic *enic, struct vnic_rq *rq)
-{
-       unsigned int intr = enic_msix_rq_intr(enic, rq->index);
-       struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
-       u32 timer = cq->tobe_rx_coal_timeval;
-
-       if (cq->tobe_rx_coal_timeval != cq->cur_rx_coal_timeval) {
-               vnic_intr_coalescing_timer_set(&enic->intr[intr], timer);
-               cq->cur_rx_coal_timeval = cq->tobe_rx_coal_timeval;
-       }
-}
-
-static void enic_calc_int_moderation(struct enic *enic, struct vnic_rq *rq)
-{
-       struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting;
-       struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
-       struct vnic_rx_bytes_counter *pkt_size_counter = &cq->pkt_size_counter;
-       int index;
-       u32 timer;
-       u32 range_start;
-       u32 traffic;
-       u64 delta;
-       ktime_t now = ktime_get();
-
-       delta = ktime_us_delta(now, cq->prev_ts);
-       if (delta < ENIC_AIC_TS_BREAK)
-               return;
-       cq->prev_ts = now;
-
-       traffic = pkt_size_counter->large_pkt_bytes_cnt +
-                 pkt_size_counter->small_pkt_bytes_cnt;
-       /* The table takes Mbps
-        * traffic *= 8    => bits
-        * traffic *= (10^6 / delta)    => bps
-        * traffic /= 10^6     => Mbps
-        *
-        * Combining, traffic *= (8 / delta)
-        */
-
-       traffic <<= 3;
-       traffic = delta > UINT_MAX ? 0 : traffic / (u32)delta;
-
-       for (index = 0; index < ENIC_MAX_COALESCE_TIMERS; index++)
-               if (traffic < mod_table[index].rx_rate)
-                       break;
-       range_start = (pkt_size_counter->small_pkt_bytes_cnt >
-                      pkt_size_counter->large_pkt_bytes_cnt << 1) ?
-                     rx_coal->small_pkt_range_start :
-                     rx_coal->large_pkt_range_start;
-       timer = range_start + ((rx_coal->range_end - range_start) *
-                              mod_table[index].range_percent / 100);
-       /* Damping */
-       cq->tobe_rx_coal_timeval = (timer + cq->tobe_rx_coal_timeval) >> 1;
-
-       pkt_size_counter->large_pkt_bytes_cnt = 0;
-       pkt_size_counter->small_pkt_bytes_cnt = 0;
-}
-
 #ifdef CONFIG_RFS_ACCEL
 static void enic_free_rx_cpu_rmap(struct enic *enic)
 {
@@ -1407,14 +1484,12 @@ static int enic_poll_msix_rq(struct napi_struct *napi, int budget)
        if (err)
                work_done = work_to_do;
        if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
-               /* Call the function which refreshes
-                * the intr coalescing timer value based on
-                * the traffic.  This is supported only in
-                * the case of MSI-x mode
+               /* Call the function which refreshes the intr coalescing timer
+                * value based on the traffic.
                 */
                enic_calc_int_moderation(enic, &enic->rq[rq]);
 
-       enic_poll_unlock_napi(&enic->rq[rq]);
+       enic_poll_unlock_napi(&enic->rq[rq], napi);
        if (work_done < work_to_do) {
 
                /* Some work done, but not enough to stay in polling,
@@ -1569,12 +1644,6 @@ static void enic_set_rx_coal_setting(struct enic *enic)
        int index = -1;
        struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting;
 
-       /* If intr mode is not MSIX, do not do adaptive coalescing */
-       if (VNIC_DEV_INTR_MODE_MSIX != vnic_dev_get_intr_mode(enic->vdev)) {
-               netdev_info(enic->netdev, "INTR mode is not MSIX, Not initializing adaptive coalescing");
-               return;
-       }
-
        /* 1. Read the link speed from fw
         * 2. Pick the default range for the speed
         * 3. Update it in enic->rx_coalesce_setting
@@ -1646,6 +1715,8 @@ static int enic_open(struct net_device *netdev)
                netdev_err(netdev, "Unable to request irq.\n");
                return err;
        }
+       enic_init_affinity_hint(enic);
+       enic_set_affinity_hint(enic);
 
        err = enic_dev_notify_set(enic);
        if (err) {
@@ -1698,6 +1769,7 @@ err_out_free_rq:
                vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
        enic_dev_notify_unset(enic);
 err_out_free_intr:
+       enic_unset_affinity_hint(enic);
        enic_free_intr(enic);
 
        return err;
@@ -1751,6 +1823,7 @@ static int enic_stop(struct net_device *netdev)
        }
 
        enic_dev_notify_unset(enic);
+       enic_unset_affinity_hint(enic);
        enic_free_intr(enic);
 
        for (i = 0; i < enic->wq_count; i++)
@@ -1925,6 +1998,19 @@ static int enic_dev_open(struct enic *enic)
        return err;
 }
 
+static int enic_dev_soft_reset(struct enic *enic)
+{
+       int err;
+
+       err = enic_dev_wait(enic->vdev, vnic_dev_soft_reset,
+                           vnic_dev_soft_reset_done, 0);
+       if (err)
+               netdev_err(enic->netdev, "vNIC soft reset failed, err %d\n",
+                          err);
+
+       return err;
+}
+
 static int enic_dev_hang_reset(struct enic *enic)
 {
        int err;
@@ -2060,6 +2146,26 @@ static void enic_reset(struct work_struct *work)
 
        rtnl_lock();
 
+       spin_lock(&enic->enic_api_lock);
+       enic_stop(enic->netdev);
+       enic_dev_soft_reset(enic);
+       enic_reset_addr_lists(enic);
+       enic_init_vnic_resources(enic);
+       enic_set_rss_nic_cfg(enic);
+       enic_dev_set_ig_vlan_rewrite_mode(enic);
+       enic_open(enic->netdev);
+       spin_unlock(&enic->enic_api_lock);
+       call_netdevice_notifiers(NETDEV_REBOOT, enic->netdev);
+
+       rtnl_unlock();
+}
+
+static void enic_tx_hang_reset(struct work_struct *work)
+{
+       struct enic *enic = container_of(work, struct enic, tx_hang_reset);
+
+       rtnl_lock();
+
        spin_lock(&enic->enic_api_lock);
        enic_dev_hang_notify(enic);
        enic_stop(enic->netdev);
@@ -2273,6 +2379,7 @@ static void enic_dev_deinit(struct enic *enic)
 
        enic_free_vnic_resources(enic);
        enic_clear_intr_mode(enic);
+       enic_free_affinity_hint(enic);
 }
 
 static void enic_kdump_kernel_config(struct enic *enic)
@@ -2368,6 +2475,7 @@ static int enic_dev_init(struct enic *enic)
        return 0;
 
 err_out_free_vnic_resources:
+       enic_free_affinity_hint(enic);
        enic_clear_intr_mode(enic);
        enic_free_vnic_resources(enic);
 
@@ -2485,6 +2593,11 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                goto err_out_iounmap;
        }
 
+       err = vnic_devcmd_init(enic->vdev);
+
+       if (err)
+               goto err_out_vnic_unregister;
+
 #ifdef CONFIG_PCI_IOV
        /* Get number of subvnics */
        pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
@@ -2579,6 +2692,7 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        enic_set_rx_coal_setting(enic);
        INIT_WORK(&enic->reset, enic_reset);
+       INIT_WORK(&enic->tx_hang_reset, enic_tx_hang_reset);
        INIT_WORK(&enic->change_mtu_work, enic_change_mtu_work);
 
        for (i = 0; i < enic->wq_count; i++)
@@ -2659,8 +2773,8 @@ err_out_disable_sriov_pp:
                pci_disable_sriov(pdev);
                enic->priv_flags &= ~ENIC_SRIOV_ENABLED;
        }
-err_out_vnic_unregister:
 #endif
+err_out_vnic_unregister:
        vnic_dev_unregister(enic->vdev);
 err_out_iounmap:
        enic_iounmap(enic);