Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / drivers / misc / mic / host / mic_virtio.c
diff --git a/kernel/drivers/misc/mic/host/mic_virtio.c b/kernel/drivers/misc/mic/host/mic_virtio.c
new file mode 100644 (file)
index 0000000..a020e4e
--- /dev/null
@@ -0,0 +1,812 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+#include <linux/dmaengine.h>
+#include <linux/mic_common.h>
+
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_smpt.h"
+#include "mic_virtio.h"
+
+/*
+ * Size of the internal buffer used during DMA's as an intermediate buffer
+ * for copy to/from user.
+ */
+#define MIC_INT_DMA_BUF_SIZE PAGE_ALIGN(64 * 1024ULL)
+
+static int mic_sync_dma(struct mic_device *mdev, dma_addr_t dst,
+                       dma_addr_t src, size_t len)
+{
+       int err = 0;
+       struct dma_async_tx_descriptor *tx;
+       struct dma_chan *mic_ch = mdev->dma_ch;
+
+       if (!mic_ch) {
+               err = -EBUSY;
+               goto error;
+       }
+
+       tx = mic_ch->device->device_prep_dma_memcpy(mic_ch, dst, src, len,
+                                                   DMA_PREP_FENCE);
+       if (!tx) {
+               err = -ENOMEM;
+               goto error;
+       } else {
+               dma_cookie_t cookie = tx->tx_submit(tx);
+
+               err = dma_submit_error(cookie);
+               if (err)
+                       goto error;
+               err = dma_sync_wait(mic_ch, cookie);
+       }
+error:
+       if (err)
+               dev_err(mdev->sdev->parent, "%s %d err %d\n",
+                       __func__, __LINE__, err);
+       return err;
+}
+
+/*
+ * Initiates the copies across the PCIe bus from card memory to a user
+ * space buffer. When transfers are done using DMA, source/destination
+ * addresses and transfer length must follow the alignment requirements of
+ * the MIC DMA engine.
+ */
+static int mic_virtio_copy_to_user(struct mic_vdev *mvdev, void __user *ubuf,
+                                  size_t len, u64 daddr, size_t dlen,
+                                  int vr_idx)
+{
+       struct mic_device *mdev = mvdev->mdev;
+       void __iomem *dbuf = mdev->aper.va + daddr;
+       struct mic_vringh *mvr = &mvdev->mvr[vr_idx];
+       size_t dma_alignment = 1 << mdev->dma_ch->device->copy_align;
+       size_t dma_offset;
+       size_t partlen;
+       int err;
+
+       dma_offset = daddr - round_down(daddr, dma_alignment);
+       daddr -= dma_offset;
+       len += dma_offset;
+
+       while (len) {
+               partlen = min_t(size_t, len, MIC_INT_DMA_BUF_SIZE);
+
+               err = mic_sync_dma(mdev, mvr->buf_da, daddr,
+                                  ALIGN(partlen, dma_alignment));
+               if (err)
+                       goto err;
+
+               if (copy_to_user(ubuf, mvr->buf + dma_offset,
+                                partlen - dma_offset)) {
+                       err = -EFAULT;
+                       goto err;
+               }
+               daddr += partlen;
+               ubuf += partlen;
+               dbuf += partlen;
+               mvdev->in_bytes_dma += partlen;
+               mvdev->in_bytes += partlen;
+               len -= partlen;
+               dma_offset = 0;
+       }
+       return 0;
+err:
+       dev_err(mic_dev(mvdev), "%s %d err %d\n", __func__, __LINE__, err);
+       return err;
+}
+
+/*
+ * Initiates copies across the PCIe bus from a user space buffer to card
+ * memory. When transfers are done using DMA, source/destination addresses
+ * and transfer length must follow the alignment requirements of the MIC
+ * DMA engine.
+ */
+static int mic_virtio_copy_from_user(struct mic_vdev *mvdev, void __user *ubuf,
+                                    size_t len, u64 daddr, size_t dlen,
+                                    int vr_idx)
+{
+       struct mic_device *mdev = mvdev->mdev;
+       void __iomem *dbuf = mdev->aper.va + daddr;
+       struct mic_vringh *mvr = &mvdev->mvr[vr_idx];
+       size_t dma_alignment = 1 << mdev->dma_ch->device->copy_align;
+       size_t partlen;
+       int err;
+
+       if (daddr & (dma_alignment - 1)) {
+               mvdev->tx_dst_unaligned += len;
+               goto memcpy;
+       } else if (ALIGN(len, dma_alignment) > dlen) {
+               mvdev->tx_len_unaligned += len;
+               goto memcpy;
+       }
+
+       while (len) {
+               partlen = min_t(size_t, len, MIC_INT_DMA_BUF_SIZE);
+
+               if (copy_from_user(mvr->buf, ubuf, partlen)) {
+                       err = -EFAULT;
+                       goto err;
+               }
+               err = mic_sync_dma(mdev, daddr, mvr->buf_da,
+                                  ALIGN(partlen, dma_alignment));
+               if (err)
+                       goto err;
+               daddr += partlen;
+               ubuf += partlen;
+               dbuf += partlen;
+               mvdev->out_bytes_dma += partlen;
+               mvdev->out_bytes += partlen;
+               len -= partlen;
+       }
+memcpy:
+       /*
+        * We are copying to IO below and should ideally use something
+        * like copy_from_user_toio(..) if it existed.
+        */
+       if (copy_from_user((void __force *)dbuf, ubuf, len)) {
+               err = -EFAULT;
+               goto err;
+       }
+       mvdev->out_bytes += len;
+       return 0;
+err:
+       dev_err(mic_dev(mvdev), "%s %d err %d\n", __func__, __LINE__, err);
+       return err;
+}
+
+#define MIC_VRINGH_READ true
+
+/* The function to call to notify the card about added buffers */
+static void mic_notify(struct vringh *vrh)
+{
+       struct mic_vringh *mvrh = container_of(vrh, struct mic_vringh, vrh);
+       struct mic_vdev *mvdev = mvrh->mvdev;
+       s8 db = mvdev->dc->h2c_vdev_db;
+
+       if (db != -1)
+               mvdev->mdev->ops->send_intr(mvdev->mdev, db);
+}
+
+/* Determine the total number of bytes consumed in a VRINGH KIOV */
+static inline u32 mic_vringh_iov_consumed(struct vringh_kiov *iov)
+{
+       int i;
+       u32 total = iov->consumed;
+
+       for (i = 0; i < iov->i; i++)
+               total += iov->iov[i].iov_len;
+       return total;
+}
+
+/*
+ * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
+ * This API is heavily based on the vringh_iov_xfer(..) implementation
+ * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
+ * and vringh_iov_push_kern(..) directly is because there is no
+ * way to override the VRINGH xfer(..) routines as of v3.10.
+ */
+static int mic_vringh_copy(struct mic_vdev *mvdev, struct vringh_kiov *iov,
+                       void __user *ubuf, size_t len, bool read, int vr_idx,
+                       size_t *out_len)
+{
+       int ret = 0;
+       size_t partlen, tot_len = 0;
+
+       while (len && iov->i < iov->used) {
+               partlen = min(iov->iov[iov->i].iov_len, len);
+               if (read)
+                       ret = mic_virtio_copy_to_user(mvdev, ubuf, partlen,
+                                               (u64)iov->iov[iov->i].iov_base,
+                                               iov->iov[iov->i].iov_len,
+                                               vr_idx);
+               else
+                       ret = mic_virtio_copy_from_user(mvdev, ubuf, partlen,
+                                               (u64)iov->iov[iov->i].iov_base,
+                                               iov->iov[iov->i].iov_len,
+                                               vr_idx);
+               if (ret) {
+                       dev_err(mic_dev(mvdev), "%s %d err %d\n",
+                               __func__, __LINE__, ret);
+                       break;
+               }
+               len -= partlen;
+               ubuf += partlen;
+               tot_len += partlen;
+               iov->consumed += partlen;
+               iov->iov[iov->i].iov_len -= partlen;
+               iov->iov[iov->i].iov_base += partlen;
+               if (!iov->iov[iov->i].iov_len) {
+                       /* Fix up old iov element then increment. */
+                       iov->iov[iov->i].iov_len = iov->consumed;
+                       iov->iov[iov->i].iov_base -= iov->consumed;
+
+                       iov->consumed = 0;
+                       iov->i++;
+               }
+       }
+       *out_len = tot_len;
+       return ret;
+}
+
+/*
+ * Use the standard VRINGH infrastructure in the kernel to fetch new
+ * descriptors, initiate the copies and update the used ring.
+ */
+static int _mic_virtio_copy(struct mic_vdev *mvdev,
+       struct mic_copy_desc *copy)
+{
+       int ret = 0;
+       u32 iovcnt = copy->iovcnt;
+       struct iovec iov;
+       struct iovec __user *u_iov = copy->iov;
+       void __user *ubuf = NULL;
+       struct mic_vringh *mvr = &mvdev->mvr[copy->vr_idx];
+       struct vringh_kiov *riov = &mvr->riov;
+       struct vringh_kiov *wiov = &mvr->wiov;
+       struct vringh *vrh = &mvr->vrh;
+       u16 *head = &mvr->head;
+       struct mic_vring *vr = &mvr->vring;
+       size_t len = 0, out_len;
+
+       copy->out_len = 0;
+       /* Fetch a new IOVEC if all previous elements have been processed */
+       if (riov->i == riov->used && wiov->i == wiov->used) {
+               ret = vringh_getdesc_kern(vrh, riov, wiov,
+                               head, GFP_KERNEL);
+               /* Check if there are available descriptors */
+               if (ret <= 0)
+                       return ret;
+       }
+       while (iovcnt) {
+               if (!len) {
+                       /* Copy over a new iovec from user space. */
+                       ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
+                       if (ret) {
+                               ret = -EINVAL;
+                               dev_err(mic_dev(mvdev), "%s %d err %d\n",
+                                       __func__, __LINE__, ret);
+                               break;
+                       }
+                       len = iov.iov_len;
+                       ubuf = iov.iov_base;
+               }
+               /* Issue all the read descriptors first */
+               ret = mic_vringh_copy(mvdev, riov, ubuf, len, MIC_VRINGH_READ,
+                                     copy->vr_idx, &out_len);
+               if (ret) {
+                       dev_err(mic_dev(mvdev), "%s %d err %d\n",
+                               __func__, __LINE__, ret);
+                       break;
+               }
+               len -= out_len;
+               ubuf += out_len;
+               copy->out_len += out_len;
+               /* Issue the write descriptors next */
+               ret = mic_vringh_copy(mvdev, wiov, ubuf, len, !MIC_VRINGH_READ,
+                                     copy->vr_idx, &out_len);
+               if (ret) {
+                       dev_err(mic_dev(mvdev), "%s %d err %d\n",
+                               __func__, __LINE__, ret);
+                       break;
+               }
+               len -= out_len;
+               ubuf += out_len;
+               copy->out_len += out_len;
+               if (!len) {
+                       /* One user space iovec is now completed */
+                       iovcnt--;
+                       u_iov++;
+               }
+               /* Exit loop if all elements in KIOVs have been processed. */
+               if (riov->i == riov->used && wiov->i == wiov->used)
+                       break;
+       }
+       /*
+        * Update the used ring if a descriptor was available and some data was
+        * copied in/out and the user asked for a used ring update.
+        */
+       if (*head != USHRT_MAX && copy->out_len && copy->update_used) {
+               u32 total = 0;
+
+               /* Determine the total data consumed */
+               total += mic_vringh_iov_consumed(riov);
+               total += mic_vringh_iov_consumed(wiov);
+               vringh_complete_kern(vrh, *head, total);
+               *head = USHRT_MAX;
+               if (vringh_need_notify_kern(vrh) > 0)
+                       vringh_notify(vrh);
+               vringh_kiov_cleanup(riov);
+               vringh_kiov_cleanup(wiov);
+               /* Update avail idx for user space */
+               vr->info->avail_idx = vrh->last_avail_idx;
+       }
+       return ret;
+}
+
+static inline int mic_verify_copy_args(struct mic_vdev *mvdev,
+               struct mic_copy_desc *copy)
+{
+       if (copy->vr_idx >= mvdev->dd->num_vq) {
+               dev_err(mic_dev(mvdev), "%s %d err %d\n",
+                       __func__, __LINE__, -EINVAL);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+/* Copy a specified number of virtio descriptors in a chain */
+int mic_virtio_copy_desc(struct mic_vdev *mvdev,
+               struct mic_copy_desc *copy)
+{
+       int err;
+       struct mic_vringh *mvr = &mvdev->mvr[copy->vr_idx];
+
+       err = mic_verify_copy_args(mvdev, copy);
+       if (err)
+               return err;
+
+       mutex_lock(&mvr->vr_mutex);
+       if (!mic_vdevup(mvdev)) {
+               err = -ENODEV;
+               dev_err(mic_dev(mvdev), "%s %d err %d\n",
+                       __func__, __LINE__, err);
+               goto err;
+       }
+       err = _mic_virtio_copy(mvdev, copy);
+       if (err) {
+               dev_err(mic_dev(mvdev), "%s %d err %d\n",
+                       __func__, __LINE__, err);
+       }
+err:
+       mutex_unlock(&mvr->vr_mutex);
+       return err;
+}
+
+static void mic_virtio_init_post(struct mic_vdev *mvdev)
+{
+       struct mic_vqconfig *vqconfig = mic_vq_config(mvdev->dd);
+       int i;
+
+       for (i = 0; i < mvdev->dd->num_vq; i++) {
+               if (!le64_to_cpu(vqconfig[i].used_address)) {
+                       dev_warn(mic_dev(mvdev), "used_address zero??\n");
+                       continue;
+               }
+               mvdev->mvr[i].vrh.vring.used =
+                       (void __force *)mvdev->mdev->aper.va +
+                       le64_to_cpu(vqconfig[i].used_address);
+       }
+
+       mvdev->dc->used_address_updated = 0;
+
+       dev_dbg(mic_dev(mvdev), "%s: device type %d LINKUP\n",
+               __func__, mvdev->virtio_id);
+}
+
+static inline void mic_virtio_device_reset(struct mic_vdev *mvdev)
+{
+       int i;
+
+       dev_dbg(mic_dev(mvdev), "%s: status %d device type %d RESET\n",
+               __func__, mvdev->dd->status, mvdev->virtio_id);
+
+       for (i = 0; i < mvdev->dd->num_vq; i++)
+               /*
+                * Avoid lockdep false positive. The + 1 is for the mic
+                * mutex which is held in the reset devices code path.
+                */
+               mutex_lock_nested(&mvdev->mvr[i].vr_mutex, i + 1);
+
+       /* 0 status means "reset" */
+       mvdev->dd->status = 0;
+       mvdev->dc->vdev_reset = 0;
+       mvdev->dc->host_ack = 1;
+
+       for (i = 0; i < mvdev->dd->num_vq; i++) {
+               struct vringh *vrh = &mvdev->mvr[i].vrh;
+               mvdev->mvr[i].vring.info->avail_idx = 0;
+               vrh->completed = 0;
+               vrh->last_avail_idx = 0;
+               vrh->last_used_idx = 0;
+       }
+
+       for (i = 0; i < mvdev->dd->num_vq; i++)
+               mutex_unlock(&mvdev->mvr[i].vr_mutex);
+}
+
+void mic_virtio_reset_devices(struct mic_device *mdev)
+{
+       struct list_head *pos, *tmp;
+       struct mic_vdev *mvdev;
+
+       dev_dbg(mdev->sdev->parent, "%s\n",  __func__);
+
+       list_for_each_safe(pos, tmp, &mdev->vdev_list) {
+               mvdev = list_entry(pos, struct mic_vdev, list);
+               mic_virtio_device_reset(mvdev);
+               mvdev->poll_wake = 1;
+               wake_up(&mvdev->waitq);
+       }
+}
+
+void mic_bh_handler(struct work_struct *work)
+{
+       struct mic_vdev *mvdev = container_of(work, struct mic_vdev,
+                       virtio_bh_work);
+
+       if (mvdev->dc->used_address_updated)
+               mic_virtio_init_post(mvdev);
+
+       if (mvdev->dc->vdev_reset)
+               mic_virtio_device_reset(mvdev);
+
+       mvdev->poll_wake = 1;
+       wake_up(&mvdev->waitq);
+}
+
+static irqreturn_t mic_virtio_intr_handler(int irq, void *data)
+{
+       struct mic_vdev *mvdev = data;
+       struct mic_device *mdev = mvdev->mdev;
+
+       mdev->ops->intr_workarounds(mdev);
+       schedule_work(&mvdev->virtio_bh_work);
+       return IRQ_HANDLED;
+}
+
+int mic_virtio_config_change(struct mic_vdev *mvdev,
+                       void __user *argp)
+{
+       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+       int ret = 0, retry, i;
+       struct mic_bootparam *bootparam = mvdev->mdev->dp;
+       s8 db = bootparam->h2c_config_db;
+
+       mutex_lock(&mvdev->mdev->mic_mutex);
+       for (i = 0; i < mvdev->dd->num_vq; i++)
+               mutex_lock_nested(&mvdev->mvr[i].vr_mutex, i + 1);
+
+       if (db == -1 || mvdev->dd->type == -1) {
+               ret = -EIO;
+               goto exit;
+       }
+
+       if (copy_from_user(mic_vq_configspace(mvdev->dd),
+                          argp, mvdev->dd->config_len)) {
+               dev_err(mic_dev(mvdev), "%s %d err %d\n",
+                       __func__, __LINE__, -EFAULT);
+               ret = -EFAULT;
+               goto exit;
+       }
+       mvdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
+       mvdev->mdev->ops->send_intr(mvdev->mdev, db);
+
+       for (retry = 100; retry--;) {
+               ret = wait_event_timeout(wake,
+                       mvdev->dc->guest_ack, msecs_to_jiffies(100));
+               if (ret)
+                       break;
+       }
+
+       dev_dbg(mic_dev(mvdev),
+               "%s %d retry: %d\n", __func__, __LINE__, retry);
+       mvdev->dc->config_change = 0;
+       mvdev->dc->guest_ack = 0;
+exit:
+       for (i = 0; i < mvdev->dd->num_vq; i++)
+               mutex_unlock(&mvdev->mvr[i].vr_mutex);
+       mutex_unlock(&mvdev->mdev->mic_mutex);
+       return ret;
+}
+
+static int mic_copy_dp_entry(struct mic_vdev *mvdev,
+                                       void __user *argp,
+                                       __u8 *type,
+                                       struct mic_device_desc **devpage)
+{
+       struct mic_device *mdev = mvdev->mdev;
+       struct mic_device_desc dd, *dd_config, *devp;
+       struct mic_vqconfig *vqconfig;
+       int ret = 0, i;
+       bool slot_found = false;
+
+       if (copy_from_user(&dd, argp, sizeof(dd))) {
+               dev_err(mic_dev(mvdev), "%s %d err %d\n",
+                       __func__, __LINE__, -EFAULT);
+               return -EFAULT;
+       }
+
+       if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE ||
+           dd.num_vq > MIC_MAX_VRINGS) {
+               dev_err(mic_dev(mvdev), "%s %d err %d\n",
+                       __func__, __LINE__, -EINVAL);
+               return -EINVAL;
+       }
+
+       dd_config = kmalloc(mic_desc_size(&dd), GFP_KERNEL);
+       if (dd_config == NULL) {
+               dev_err(mic_dev(mvdev), "%s %d err %d\n",
+                       __func__, __LINE__, -ENOMEM);
+               return -ENOMEM;
+       }
+       if (copy_from_user(dd_config, argp, mic_desc_size(&dd))) {
+               ret = -EFAULT;
+               dev_err(mic_dev(mvdev), "%s %d err %d\n",
+                       __func__, __LINE__, ret);
+               goto exit;
+       }
+
+       vqconfig = mic_vq_config(dd_config);
+       for (i = 0; i < dd.num_vq; i++) {
+               if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
+                       ret =  -EINVAL;
+                       dev_err(mic_dev(mvdev), "%s %d err %d\n",
+                               __func__, __LINE__, ret);
+                       goto exit;
+               }
+       }
+
+       /* Find the first free device page entry */
+       for (i = sizeof(struct mic_bootparam);
+               i < MIC_DP_SIZE - mic_total_desc_size(dd_config);
+               i += mic_total_desc_size(devp)) {
+               devp = mdev->dp + i;
+               if (devp->type == 0 || devp->type == -1) {
+                       slot_found = true;
+                       break;
+               }
+       }
+       if (!slot_found) {
+               ret =  -EINVAL;
+               dev_err(mic_dev(mvdev), "%s %d err %d\n",
+                       __func__, __LINE__, ret);
+               goto exit;
+       }
+       /*
+        * Save off the type before doing the memcpy. Type will be set in the
+        * end after completing all initialization for the new device.
+        */
+       *type = dd_config->type;
+       dd_config->type = 0;
+       memcpy(devp, dd_config, mic_desc_size(dd_config));
+
+       *devpage = devp;
+exit:
+       kfree(dd_config);
+       return ret;
+}
+
+static void mic_init_device_ctrl(struct mic_vdev *mvdev,
+                               struct mic_device_desc *devpage)
+{
+       struct mic_device_ctrl *dc;
+
+       dc = (void *)devpage + mic_aligned_desc_size(devpage);
+
+       dc->config_change = 0;
+       dc->guest_ack = 0;
+       dc->vdev_reset = 0;
+       dc->host_ack = 0;
+       dc->used_address_updated = 0;
+       dc->c2h_vdev_db = -1;
+       dc->h2c_vdev_db = -1;
+       mvdev->dc = dc;
+}
+
+int mic_virtio_add_device(struct mic_vdev *mvdev,
+                       void __user *argp)
+{
+       struct mic_device *mdev = mvdev->mdev;
+       struct mic_device_desc *dd = NULL;
+       struct mic_vqconfig *vqconfig;
+       int vr_size, i, j, ret;
+       u8 type = 0;
+       s8 db;
+       char irqname[10];
+       struct mic_bootparam *bootparam = mdev->dp;
+       u16 num;
+       dma_addr_t vr_addr;
+
+       mutex_lock(&mdev->mic_mutex);
+
+       ret = mic_copy_dp_entry(mvdev, argp, &type, &dd);
+       if (ret) {
+               mutex_unlock(&mdev->mic_mutex);
+               return ret;
+       }
+
+       mic_init_device_ctrl(mvdev, dd);
+
+       mvdev->dd = dd;
+       mvdev->virtio_id = type;
+       vqconfig = mic_vq_config(dd);
+       INIT_WORK(&mvdev->virtio_bh_work, mic_bh_handler);
+
+       for (i = 0; i < dd->num_vq; i++) {
+               struct mic_vringh *mvr = &mvdev->mvr[i];
+               struct mic_vring *vr = &mvdev->mvr[i].vring;
+               num = le16_to_cpu(vqconfig[i].num);
+               mutex_init(&mvr->vr_mutex);
+               vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) +
+                       sizeof(struct _mic_vring_info));
+               vr->va = (void *)
+                       __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+                                        get_order(vr_size));
+               if (!vr->va) {
+                       ret = -ENOMEM;
+                       dev_err(mic_dev(mvdev), "%s %d err %d\n",
+                               __func__, __LINE__, ret);
+                       goto err;
+               }
+               vr->len = vr_size;
+               vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN);
+               vr->info->magic = cpu_to_le32(MIC_MAGIC + mvdev->virtio_id + i);
+               vr_addr = mic_map_single(mdev, vr->va, vr_size);
+               if (mic_map_error(vr_addr)) {
+                       free_pages((unsigned long)vr->va, get_order(vr_size));
+                       ret = -ENOMEM;
+                       dev_err(mic_dev(mvdev), "%s %d err %d\n",
+                               __func__, __LINE__, ret);
+                       goto err;
+               }
+               vqconfig[i].address = cpu_to_le64(vr_addr);
+
+               vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
+               ret = vringh_init_kern(&mvr->vrh,
+                       *(u32 *)mic_vq_features(mvdev->dd), num, false,
+                       vr->vr.desc, vr->vr.avail, vr->vr.used);
+               if (ret) {
+                       dev_err(mic_dev(mvdev), "%s %d err %d\n",
+                               __func__, __LINE__, ret);
+                       goto err;
+               }
+               vringh_kiov_init(&mvr->riov, NULL, 0);
+               vringh_kiov_init(&mvr->wiov, NULL, 0);
+               mvr->head = USHRT_MAX;
+               mvr->mvdev = mvdev;
+               mvr->vrh.notify = mic_notify;
+               dev_dbg(mdev->sdev->parent,
+                       "%s %d index %d va %p info %p vr_size 0x%x\n",
+                       __func__, __LINE__, i, vr->va, vr->info, vr_size);
+               mvr->buf = (void *)__get_free_pages(GFP_KERNEL,
+                                       get_order(MIC_INT_DMA_BUF_SIZE));
+               mvr->buf_da = mic_map_single(mvdev->mdev, mvr->buf,
+                                         MIC_INT_DMA_BUF_SIZE);
+       }
+
+       snprintf(irqname, sizeof(irqname), "mic%dvirtio%d", mdev->id,
+                mvdev->virtio_id);
+       mvdev->virtio_db = mic_next_db(mdev);
+       mvdev->virtio_cookie = mic_request_threaded_irq(mdev,
+                                              mic_virtio_intr_handler,
+                                              NULL, irqname, mvdev,
+                                              mvdev->virtio_db, MIC_INTR_DB);
+       if (IS_ERR(mvdev->virtio_cookie)) {
+               ret = PTR_ERR(mvdev->virtio_cookie);
+               dev_dbg(mdev->sdev->parent, "request irq failed\n");
+               goto err;
+       }
+
+       mvdev->dc->c2h_vdev_db = mvdev->virtio_db;
+
+       list_add_tail(&mvdev->list, &mdev->vdev_list);
+       /*
+        * Order the type update with previous stores. This write barrier
+        * is paired with the corresponding read barrier before the uncached
+        * system memory read of the type, on the card while scanning the
+        * device page.
+        */
+       smp_wmb();
+       dd->type = type;
+
+       dev_dbg(mdev->sdev->parent, "Added virtio device id %d\n", dd->type);
+
+       db = bootparam->h2c_config_db;
+       if (db != -1)
+               mdev->ops->send_intr(mdev, db);
+       mutex_unlock(&mdev->mic_mutex);
+       return 0;
+err:
+       vqconfig = mic_vq_config(dd);
+       for (j = 0; j < i; j++) {
+               struct mic_vringh *mvr = &mvdev->mvr[j];
+               mic_unmap_single(mdev, le64_to_cpu(vqconfig[j].address),
+                                mvr->vring.len);
+               free_pages((unsigned long)mvr->vring.va,
+                          get_order(mvr->vring.len));
+       }
+       mutex_unlock(&mdev->mic_mutex);
+       return ret;
+}
+
+void mic_virtio_del_device(struct mic_vdev *mvdev)
+{
+       struct list_head *pos, *tmp;
+       struct mic_vdev *tmp_mvdev;
+       struct mic_device *mdev = mvdev->mdev;
+       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+       int i, ret, retry;
+       struct mic_vqconfig *vqconfig;
+       struct mic_bootparam *bootparam = mdev->dp;
+       s8 db;
+
+       mutex_lock(&mdev->mic_mutex);
+       db = bootparam->h2c_config_db;
+       if (db == -1)
+               goto skip_hot_remove;
+       dev_dbg(mdev->sdev->parent,
+               "Requesting hot remove id %d\n", mvdev->virtio_id);
+       mvdev->dc->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
+       mdev->ops->send_intr(mdev, db);
+       for (retry = 100; retry--;) {
+               ret = wait_event_timeout(wake,
+                       mvdev->dc->guest_ack, msecs_to_jiffies(100));
+               if (ret)
+                       break;
+       }
+       dev_dbg(mdev->sdev->parent,
+               "Device id %d config_change %d guest_ack %d retry %d\n",
+               mvdev->virtio_id, mvdev->dc->config_change,
+               mvdev->dc->guest_ack, retry);
+       mvdev->dc->config_change = 0;
+       mvdev->dc->guest_ack = 0;
+skip_hot_remove:
+       mic_free_irq(mdev, mvdev->virtio_cookie, mvdev);
+       flush_work(&mvdev->virtio_bh_work);
+       vqconfig = mic_vq_config(mvdev->dd);
+       for (i = 0; i < mvdev->dd->num_vq; i++) {
+               struct mic_vringh *mvr = &mvdev->mvr[i];
+
+               mic_unmap_single(mvdev->mdev, mvr->buf_da,
+                                MIC_INT_DMA_BUF_SIZE);
+               free_pages((unsigned long)mvr->buf,
+                          get_order(MIC_INT_DMA_BUF_SIZE));
+               vringh_kiov_cleanup(&mvr->riov);
+               vringh_kiov_cleanup(&mvr->wiov);
+               mic_unmap_single(mdev, le64_to_cpu(vqconfig[i].address),
+                                mvr->vring.len);
+               free_pages((unsigned long)mvr->vring.va,
+                          get_order(mvr->vring.len));
+       }
+
+       list_for_each_safe(pos, tmp, &mdev->vdev_list) {
+               tmp_mvdev = list_entry(pos, struct mic_vdev, list);
+               if (tmp_mvdev == mvdev) {
+                       list_del(pos);
+                       dev_dbg(mdev->sdev->parent,
+                               "Removing virtio device id %d\n",
+                               mvdev->virtio_id);
+                       break;
+               }
+       }
+       /*
+        * Order the type update with previous stores. This write barrier
+        * is paired with the corresponding read barrier before the uncached
+        * system memory read of the type, on the card while scanning the
+        * device page.
+        */
+       smp_wmb();
+       mvdev->dd->type = -1;
+       mutex_unlock(&mdev->mic_mutex);
+}