These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / drivers / net / hyperv / netvsc_drv.c
index 5993c7e..409b48e 100644 (file)
@@ -46,6 +46,8 @@ static int ring_size = 128;
 module_param(ring_size, int, S_IRUGO);
 MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
 
+static int max_num_vrss_chns = 8;
+
 static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
                                NETIF_MSG_LINK | NETIF_MSG_IFUP |
                                NETIF_MSG_IFDOWN | NETIF_MSG_RX_ERR |
@@ -104,7 +106,7 @@ static int netvsc_open(struct net_device *net)
                return ret;
        }
 
-       netif_tx_start_all_queues(net);
+       netif_tx_wake_all_queues(net);
 
        nvdev = hv_get_drvdata(device_obj);
        rdev = nvdev->extension;
@@ -118,15 +120,56 @@ static int netvsc_close(struct net_device *net)
 {
        struct net_device_context *net_device_ctx = netdev_priv(net);
        struct hv_device *device_obj = net_device_ctx->device_ctx;
+       struct netvsc_device *nvdev = hv_get_drvdata(device_obj);
        int ret;
+       u32 aread, awrite, i, msec = 10, retry = 0, retry_max = 20;
+       struct vmbus_channel *chn;
 
        netif_tx_disable(net);
 
        /* Make sure netvsc_set_multicast_list doesn't re-enable filter! */
        cancel_work_sync(&net_device_ctx->work);
        ret = rndis_filter_close(device_obj);
-       if (ret != 0)
+       if (ret != 0) {
                netdev_err(net, "unable to close device (ret %d).\n", ret);
+               return ret;
+       }
+
+       /* Ensure pending bytes in ring are read */
+       while (true) {
+               aread = 0;
+               for (i = 0; i < nvdev->num_chn; i++) {
+                       chn = nvdev->chn_table[i];
+                       if (!chn)
+                               continue;
+
+                       hv_get_ringbuffer_availbytes(&chn->inbound, &aread,
+                                                    &awrite);
+
+                       if (aread)
+                               break;
+
+                       hv_get_ringbuffer_availbytes(&chn->outbound, &aread,
+                                                    &awrite);
+
+                       if (aread)
+                               break;
+               }
+
+               retry++;
+               if (retry > retry_max || aread == 0)
+                       break;
+
+               msleep(msec);
+
+               if (msec < 1000)
+                       msec *= 2;
+       }
+
+       if (aread) {
+               netdev_err(net, "Ring buffer not empty after closing rndis\n");
+               ret = -ETIMEDOUT;
+       }
 
        return ret;
 }
@@ -196,12 +239,12 @@ static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb)
        struct flow_keys flow;
        int data_len;
 
-       if (!skb_flow_dissect(skb, &flow) ||
-           !(flow.n_proto == htons(ETH_P_IP) ||
-             flow.n_proto == htons(ETH_P_IPV6)))
+       if (!skb_flow_dissect_flow_keys(skb, &flow, 0) ||
+           !(flow.basic.n_proto == htons(ETH_P_IP) ||
+             flow.basic.n_proto == htons(ETH_P_IPV6)))
                return false;
 
-       if (flow.ip_proto == IPPROTO_TCP)
+       if (flow.basic.ip_proto == IPPROTO_TCP)
                data_len = 12;
        else
                data_len = 8;
@@ -391,7 +434,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
        u32 skb_length;
        u32 pkt_sz;
        struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
-
+       struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats);
 
        /* We will atmost need two pages to describe the rndis
         * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
@@ -580,8 +623,10 @@ do_send:
 
 drop:
        if (ret == 0) {
-               net->stats.tx_bytes += skb_length;
-               net->stats.tx_packets++;
+               u64_stats_update_begin(&tx_stats->syncp);
+               tx_stats->packets++;
+               tx_stats->bytes += skb_length;
+               u64_stats_update_end(&tx_stats->syncp);
        } else {
                if (ret != -EAGAIN) {
                        dev_kfree_skb_any(skb);
@@ -644,13 +689,17 @@ int netvsc_recv_callback(struct hv_device *device_obj,
                                struct ndis_tcp_ip_checksum_info *csum_info)
 {
        struct net_device *net;
+       struct net_device_context *net_device_ctx;
        struct sk_buff *skb;
+       struct netvsc_stats *rx_stats;
 
        net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev;
        if (!net || net->reg_state != NETREG_REGISTERED) {
                packet->status = NVSP_STAT_FAIL;
                return 0;
        }
+       net_device_ctx = netdev_priv(net);
+       rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);
 
        /* Allocate a skb - TODO direct I/O to pages? */
        skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen);
@@ -686,8 +735,10 @@ int netvsc_recv_callback(struct hv_device *device_obj,
        skb_record_rx_queue(skb, packet->channel->
                            offermsg.offer.sub_channel_index);
 
-       net->stats.rx_packets++;
-       net->stats.rx_bytes += packet->total_data_buflen;
+       u64_stats_update_begin(&rx_stats->syncp);
+       rx_stats->packets++;
+       rx_stats->bytes += packet->total_data_buflen;
+       u64_stats_update_end(&rx_stats->syncp);
 
        /*
         * Pass the skb back up. Network stack will deallocate the skb when it
@@ -719,6 +770,104 @@ static void netvsc_get_channels(struct net_device *net,
        }
 }
 
+static int netvsc_set_channels(struct net_device *net,
+                              struct ethtool_channels *channels)
+{
+       struct net_device_context *net_device_ctx = netdev_priv(net);
+       struct hv_device *dev = net_device_ctx->device_ctx;
+       struct netvsc_device *nvdev = hv_get_drvdata(dev);
+       struct netvsc_device_info device_info;
+       u32 num_chn;
+       u32 max_chn;
+       int ret = 0;
+       bool recovering = false;
+
+       if (!nvdev || nvdev->destroy)
+               return -ENODEV;
+
+       num_chn = nvdev->num_chn;
+       max_chn = min_t(u32, nvdev->max_chn, num_online_cpus());
+
+       if (nvdev->nvsp_version < NVSP_PROTOCOL_VERSION_5) {
+               pr_info("vRSS unsupported before NVSP Version 5\n");
+               return -EINVAL;
+       }
+
+       /* We do not support rx, tx, or other */
+       if (!channels ||
+           channels->rx_count ||
+           channels->tx_count ||
+           channels->other_count ||
+           (channels->combined_count < 1))
+               return -EINVAL;
+
+       if (channels->combined_count > max_chn) {
+               pr_info("combined channels too high, using %d\n", max_chn);
+               channels->combined_count = max_chn;
+       }
+
+       ret = netvsc_close(net);
+       if (ret)
+               goto out;
+
+ do_set:
+       nvdev->start_remove = true;
+       rndis_filter_device_remove(dev);
+
+       nvdev->num_chn = channels->combined_count;
+
+       net_device_ctx->device_ctx = dev;
+       hv_set_drvdata(dev, net);
+
+       memset(&device_info, 0, sizeof(device_info));
+       device_info.num_chn = nvdev->num_chn; /* passed to RNDIS */
+       device_info.ring_size = ring_size;
+       device_info.max_num_vrss_chns = max_num_vrss_chns;
+
+       ret = rndis_filter_device_add(dev, &device_info);
+       if (ret) {
+               if (recovering) {
+                       netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
+                       return ret;
+               }
+               goto recover;
+       }
+
+       nvdev = hv_get_drvdata(dev);
+
+       ret = netif_set_real_num_tx_queues(net, nvdev->num_chn);
+       if (ret) {
+               if (recovering) {
+                       netdev_err(net, "could not set tx queue count (ret %d)\n", ret);
+                       return ret;
+               }
+               goto recover;
+       }
+
+       ret = netif_set_real_num_rx_queues(net, nvdev->num_chn);
+       if (ret) {
+               if (recovering) {
+                       netdev_err(net, "could not set rx queue count (ret %d)\n", ret);
+                       return ret;
+               }
+               goto recover;
+       }
+
+ out:
+       netvsc_open(net);
+
+       return ret;
+
+ recover:
+       /* If the above failed, we attempt to recover through the same
+        * process but with the original number of channels.
+        */
+       netdev_err(net, "could not set channels, recovering\n");
+       recovering = true;
+       channels->combined_count = num_chn;
+       goto do_set;
+}
+
 static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 {
        struct net_device_context *ndevctx = netdev_priv(ndev);
@@ -726,6 +875,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
        struct netvsc_device *nvdev = hv_get_drvdata(hdev);
        struct netvsc_device_info device_info;
        int limit = ETH_DATA_LEN;
+       int ret = 0;
 
        if (nvdev == NULL || nvdev->destroy)
                return -ENODEV;
@@ -733,26 +883,73 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
        if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2)
                limit = NETVSC_MTU - ETH_HLEN;
 
-       /* Hyper-V hosts don't support MTU < ETH_DATA_LEN (1500) */
-       if (mtu < ETH_DATA_LEN || mtu > limit)
+       if (mtu < NETVSC_MTU_MIN || mtu > limit)
                return -EINVAL;
 
+       ret = netvsc_close(ndev);
+       if (ret)
+               goto out;
+
        nvdev->start_remove = true;
-       cancel_work_sync(&ndevctx->work);
-       netif_tx_disable(ndev);
        rndis_filter_device_remove(hdev);
 
        ndev->mtu = mtu;
 
        ndevctx->device_ctx = hdev;
        hv_set_drvdata(hdev, ndev);
+
+       memset(&device_info, 0, sizeof(device_info));
        device_info.ring_size = ring_size;
+       device_info.num_chn = nvdev->num_chn;
+       device_info.max_num_vrss_chns = max_num_vrss_chns;
        rndis_filter_device_add(hdev, &device_info);
-       netif_tx_wake_all_queues(ndev);
 
-       return 0;
+out:
+       netvsc_open(ndev);
+
+       return ret;
 }
 
+static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net,
+                                                   struct rtnl_link_stats64 *t)
+{
+       struct net_device_context *ndev_ctx = netdev_priv(net);
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               struct netvsc_stats *tx_stats = per_cpu_ptr(ndev_ctx->tx_stats,
+                                                           cpu);
+               struct netvsc_stats *rx_stats = per_cpu_ptr(ndev_ctx->rx_stats,
+                                                           cpu);
+               u64 tx_packets, tx_bytes, rx_packets, rx_bytes;
+               unsigned int start;
+
+               do {
+                       start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
+                       tx_packets = tx_stats->packets;
+                       tx_bytes = tx_stats->bytes;
+               } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
+
+               do {
+                       start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
+                       rx_packets = rx_stats->packets;
+                       rx_bytes = rx_stats->bytes;
+               } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
+
+               t->tx_bytes     += tx_bytes;
+               t->tx_packets   += tx_packets;
+               t->rx_bytes     += rx_bytes;
+               t->rx_packets   += rx_packets;
+       }
+
+       t->tx_dropped   = net->stats.tx_dropped;
+       t->tx_errors    = net->stats.tx_dropped;
+
+       t->rx_dropped   = net->stats.rx_dropped;
+       t->rx_errors    = net->stats.rx_errors;
+
+       return t;
+}
 
 static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
 {
@@ -793,6 +990,7 @@ static const struct ethtool_ops ethtool_ops = {
        .get_drvinfo    = netvsc_get_drvinfo,
        .get_link       = ethtool_op_get_link,
        .get_channels   = netvsc_get_channels,
+       .set_channels   = netvsc_set_channels,
 };
 
 static const struct net_device_ops device_ops = {
@@ -804,6 +1002,7 @@ static const struct net_device_ops device_ops = {
        .ndo_validate_addr =            eth_validate_addr,
        .ndo_set_mac_address =          netvsc_set_mac_addr,
        .ndo_select_queue =             netvsc_select_queue,
+       .ndo_get_stats64 =              netvsc_get_stats64,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller =          netvsc_poll_controller,
 #endif
@@ -855,6 +1054,14 @@ static void netvsc_link_change(struct work_struct *w)
                netdev_notify_peers(net);
 }
 
+static void netvsc_free_netdev(struct net_device *netdev)
+{
+       struct net_device_context *net_device_ctx = netdev_priv(netdev);
+
+       free_percpu(net_device_ctx->tx_stats);
+       free_percpu(net_device_ctx->rx_stats);
+       free_netdev(netdev);
+}
 
 static int netvsc_probe(struct hv_device *dev,
                        const struct hv_vmbus_device_id *dev_id)
@@ -883,6 +1090,18 @@ static int netvsc_probe(struct hv_device *dev,
                netdev_dbg(net, "netvsc msg_enable: %d\n",
                           net_device_ctx->msg_enable);
 
+       net_device_ctx->tx_stats = netdev_alloc_pcpu_stats(struct netvsc_stats);
+       if (!net_device_ctx->tx_stats) {
+               free_netdev(net);
+               return -ENOMEM;
+       }
+       net_device_ctx->rx_stats = netdev_alloc_pcpu_stats(struct netvsc_stats);
+       if (!net_device_ctx->rx_stats) {
+               free_percpu(net_device_ctx->tx_stats);
+               free_netdev(net);
+               return -ENOMEM;
+       }
+
        hv_set_drvdata(dev, net);
        INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
        INIT_WORK(&net_device_ctx->work, do_set_multicast);
@@ -905,11 +1124,13 @@ static int netvsc_probe(struct hv_device *dev,
        net->needed_headroom = max_needed_headroom;
 
        /* Notify the netvsc driver of the new device */
+       memset(&device_info, 0, sizeof(device_info));
        device_info.ring_size = ring_size;
+       device_info.max_num_vrss_chns = max_num_vrss_chns;
        ret = rndis_filter_device_add(dev, &device_info);
        if (ret != 0) {
                netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
-               free_netdev(net);
+               netvsc_free_netdev(net);
                hv_set_drvdata(dev, NULL);
                return ret;
        }
@@ -923,7 +1144,7 @@ static int netvsc_probe(struct hv_device *dev,
        if (ret != 0) {
                pr_err("Unable to register netdev.\n");
                rndis_filter_device_remove(dev);
-               free_netdev(net);
+               netvsc_free_netdev(net);
        } else {
                schedule_delayed_work(&net_device_ctx->dwork, 0);
        }
@@ -962,7 +1183,7 @@ static int netvsc_remove(struct hv_device *dev)
         */
        rndis_filter_device_remove(dev);
 
-       free_netdev(net);
+       netvsc_free_netdev(net);
        return 0;
 }