These changes are the raw update to qemu-2.6.
[kvmfornfv.git] / qemu / hw / vfio / platform.c
index 60365d1..1798a00 100644 (file)
@@ -14,6 +14,8 @@
  *  Copyright Red Hat, Inc. 2012
  */
 
+#include "qemu/osdep.h"
+#include "qapi/error.h"
 #include <sys/ioctl.h>
 #include <linux/vfio.h>
 
  * Functions used whatever the injection method
  */
 
+static inline bool vfio_irq_is_automasked(VFIOINTp *intp)
+{
+    return intp->flags & VFIO_IRQ_INFO_AUTOMASKED;
+}
+
 /**
  * vfio_init_intp - allocate, initialize the IRQ struct pointer
  * and add it into the list of IRQs
@@ -57,18 +64,25 @@ static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev,
     sysbus_init_irq(sbdev, &intp->qemuirq);
 
     /* Get an eventfd for trigger */
-    ret = event_notifier_init(&intp->interrupt, 0);
+    intp->interrupt = g_malloc0(sizeof(EventNotifier));
+    ret = event_notifier_init(intp->interrupt, 0);
     if (ret) {
+        g_free(intp->interrupt);
         g_free(intp);
         error_report("vfio: Error: trigger event_notifier_init failed ");
         return NULL;
     }
-    /* Get an eventfd for resample/unmask */
-    ret = event_notifier_init(&intp->unmask, 0);
-    if (ret) {
-        g_free(intp);
-        error_report("vfio: Error: resamplefd event_notifier_init failed");
-        return NULL;
+    if (vfio_irq_is_automasked(intp)) {
+        /* Get an eventfd for resample/unmask */
+        intp->unmask = g_malloc0(sizeof(EventNotifier));
+        ret = event_notifier_init(intp->unmask, 0);
+        if (ret) {
+            g_free(intp->interrupt);
+            g_free(intp->unmask);
+            g_free(intp);
+            error_report("vfio: Error: resamplefd event_notifier_init failed");
+            return NULL;
+        }
     }
 
     QLIST_INSERT_HEAD(&vdev->intp_list, intp, next);
@@ -100,7 +114,7 @@ static int vfio_set_trigger_eventfd(VFIOINTp *intp,
     irq_set->start = 0;
     irq_set->count = 1;
     pfd = (int32_t *)&irq_set->data;
-    *pfd = event_notifier_get_fd(&intp->interrupt);
+    *pfd = event_notifier_get_fd(intp->interrupt);
     qemu_set_fd_handler(*pfd, (IOHandler *)handler, NULL, intp);
     ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
     g_free(irq_set);
@@ -130,12 +144,8 @@ static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled)
 {
     int i;
 
-    trace_vfio_platform_mmap_set_enabled(enabled);
-
     for (i = 0; i < vdev->vbasedev.num_regions; i++) {
-        VFIORegion *region = vdev->regions[i];
-
-        memory_region_set_enabled(&region->mmap_mem, enabled);
+        vfio_region_mmaps_set_enabled(vdev->regions[i], enabled);
     }
 }
 
@@ -182,7 +192,7 @@ static void vfio_intp_mmap_enable(void *opaque)
 static void vfio_intp_inject_pending_lockheld(VFIOINTp *intp)
 {
     trace_vfio_platform_intp_inject_pending_lockheld(intp->pin,
-                              event_notifier_get_fd(&intp->interrupt));
+                              event_notifier_get_fd(intp->interrupt));
 
     intp->state = VFIO_IRQ_ACTIVE;
 
@@ -224,18 +234,18 @@ static void vfio_intp_interrupt(VFIOINTp *intp)
         trace_vfio_intp_interrupt_set_pending(intp->pin);
         QSIMPLEQ_INSERT_TAIL(&vdev->pending_intp_queue,
                              intp, pqnext);
-        ret = event_notifier_test_and_clear(&intp->interrupt);
+        ret = event_notifier_test_and_clear(intp->interrupt);
         qemu_mutex_unlock(&vdev->intp_mutex);
         return;
     }
 
     trace_vfio_platform_intp_interrupt(intp->pin,
-                              event_notifier_get_fd(&intp->interrupt));
+                              event_notifier_get_fd(intp->interrupt));
 
-    ret = event_notifier_test_and_clear(&intp->interrupt);
+    ret = event_notifier_test_and_clear(intp->interrupt);
     if (!ret) {
-        error_report("Error when clearing fd=%d (ret = %d)\n",
-                     event_notifier_get_fd(&intp->interrupt), ret);
+        error_report("Error when clearing fd=%d (ret = %d)",
+                     event_notifier_get_fd(intp->interrupt), ret);
     }
 
     intp->state = VFIO_IRQ_ACTIVE;
@@ -283,13 +293,13 @@ static void vfio_platform_eoi(VFIODevice *vbasedev)
     QLIST_FOREACH(intp, &vdev->intp_list, next) {
         if (intp->state == VFIO_IRQ_ACTIVE) {
             trace_vfio_platform_eoi(intp->pin,
-                                event_notifier_get_fd(&intp->interrupt));
+                                event_notifier_get_fd(intp->interrupt));
             intp->state = VFIO_IRQ_INACTIVE;
 
             /* deassert the virtual IRQ */
             qemu_set_irq(intp->qemuirq, 0);
 
-            if (intp->flags & VFIO_IRQ_INFO_AUTOMASKED) {
+            if (vfio_irq_is_automasked(intp)) {
                 /* unmasks the physical level-sensitive IRQ */
                 vfio_unmask_single_irqindex(vbasedev, intp->pin);
             }
@@ -310,18 +320,29 @@ static void vfio_platform_eoi(VFIODevice *vbasedev)
 /**
  * vfio_start_eventfd_injection - starts the virtual IRQ injection using
  * user-side handled eventfds
- * @intp: the IRQ struct pointer
+ * @sbdev: the sysbus device handle
+ * @irq: the qemu irq handle
  */
 
-static int vfio_start_eventfd_injection(VFIOINTp *intp)
+static void vfio_start_eventfd_injection(SysBusDevice *sbdev, qemu_irq irq)
 {
     int ret;
+    VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
+    VFIOINTp *intp;
+
+    QLIST_FOREACH(intp, &vdev->intp_list, next) {
+        if (intp->qemuirq == irq) {
+            break;
+        }
+    }
+    assert(intp);
 
     ret = vfio_set_trigger_eventfd(intp, vfio_intp_interrupt);
     if (ret) {
-        error_report("vfio: Error: Failed to pass IRQ fd to the driver: %m");
+        error_report("vfio: failed to start eventfd signaling for IRQ %d: %m",
+                     intp->pin);
+        abort();
     }
-    return ret;
 }
 
 /*
@@ -349,7 +370,7 @@ static int vfio_set_resample_eventfd(VFIOINTp *intp)
     irq_set->start = 0;
     irq_set->count = 1;
     pfd = (int32_t *)&irq_set->data;
-    *pfd = event_notifier_get_fd(&intp->unmask);
+    *pfd = event_notifier_get_fd(intp->unmask);
     qemu_set_fd_handler(*pfd, NULL, NULL, NULL);
     ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
     g_free(irq_set);
@@ -359,6 +380,15 @@ static int vfio_set_resample_eventfd(VFIOINTp *intp)
     return ret;
 }
 
+/**
+ * vfio_start_irqfd_injection - starts the virtual IRQ injection using
+ * irqfd
+ *
+ * @sbdev: the sysbus device handle
+ * @irq: the qemu irq handle
+ *
+ * In case the irqfd setup fails, we fallback to userspace handled eventfd
+ */
 static void vfio_start_irqfd_injection(SysBusDevice *sbdev, qemu_irq irq)
 {
     VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
@@ -366,7 +396,7 @@ static void vfio_start_irqfd_injection(SysBusDevice *sbdev, qemu_irq irq)
 
     if (!kvm_irqfds_enabled() || !kvm_resamplefds_enabled() ||
         !vdev->irqfd_allowed) {
-        return;
+        goto fail_irqfd;
     }
 
     QLIST_FOREACH(intp, &vdev->intp_list, next) {
@@ -376,39 +406,36 @@ static void vfio_start_irqfd_injection(SysBusDevice *sbdev, qemu_irq irq)
     }
     assert(intp);
 
-    /* Get to a known interrupt state */
-    qemu_set_fd_handler(event_notifier_get_fd(&intp->interrupt),
-                        NULL, NULL, vdev);
-
-    vfio_mask_single_irqindex(&vdev->vbasedev, intp->pin);
-    qemu_set_irq(intp->qemuirq, 0);
-
-    if (kvm_irqchip_add_irqfd_notifier(kvm_state, &intp->interrupt,
-                                   &intp->unmask, irq) < 0) {
+    if (kvm_irqchip_add_irqfd_notifier(kvm_state, intp->interrupt,
+                                   intp->unmask, irq) < 0) {
         goto fail_irqfd;
     }
 
     if (vfio_set_trigger_eventfd(intp, NULL) < 0) {
         goto fail_vfio;
     }
-    if (vfio_set_resample_eventfd(intp) < 0) {
-        goto fail_vfio;
+    if (vfio_irq_is_automasked(intp)) {
+        if (vfio_set_resample_eventfd(intp) < 0) {
+            goto fail_vfio;
+        }
+        trace_vfio_platform_start_level_irqfd_injection(intp->pin,
+                                    event_notifier_get_fd(intp->interrupt),
+                                    event_notifier_get_fd(intp->unmask));
+    } else {
+        trace_vfio_platform_start_edge_irqfd_injection(intp->pin,
+                                    event_notifier_get_fd(intp->interrupt));
     }
 
-    /* Let's resume injection with irqfd setup */
-    vfio_unmask_single_irqindex(&vdev->vbasedev, intp->pin);
-
     intp->kvm_accel = true;
 
-    trace_vfio_platform_start_irqfd_injection(intp->pin,
-                                     event_notifier_get_fd(&intp->interrupt),
-                                     event_notifier_get_fd(&intp->unmask));
     return;
 fail_vfio:
-    kvm_irqchip_remove_irqfd_notifier(kvm_state, &intp->interrupt, irq);
+    kvm_irqchip_remove_irqfd_notifier(kvm_state, intp->interrupt, irq);
+    error_report("vfio: failed to start eventfd signaling for IRQ %d: %m",
+                 intp->pin);
+    abort();
 fail_irqfd:
-    vfio_start_eventfd_injection(intp);
-    vfio_unmask_single_irqindex(&vdev->vbasedev, intp->pin);
+    vfio_start_eventfd_injection(sbdev, irq);
     return;
 }
 
@@ -446,28 +473,16 @@ static int vfio_populate_device(VFIODevice *vbasedev)
     vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions);
 
     for (i = 0; i < vbasedev->num_regions; i++) {
-        struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
-        VFIORegion *ptr;
+        char *name = g_strdup_printf("VFIO %s region %d\n", vbasedev->name, i);
 
-        vdev->regions[i] = g_malloc0(sizeof(VFIORegion));
-        ptr = vdev->regions[i];
-        reg_info.index = i;
-        ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
+        vdev->regions[i] = g_new0(VFIORegion, 1);
+        ret = vfio_region_setup(OBJECT(vdev), vbasedev,
+                                vdev->regions[i], i, name);
+        g_free(name);
         if (ret) {
             error_report("vfio: Error getting region %d info: %m", i);
             goto reg_error;
         }
-        ptr->flags = reg_info.flags;
-        ptr->size = reg_info.size;
-        ptr->fd_offset = reg_info.offset;
-        ptr->nr = i;
-        ptr->vbasedev = vbasedev;
-
-        trace_vfio_platform_populate_regions(ptr->nr,
-                            (unsigned long)ptr->flags,
-                            (unsigned long)ptr->size,
-                            ptr->vbasedev->fd,
-                            (unsigned long)ptr->fd_offset);
     }
 
     vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
@@ -504,6 +519,9 @@ irq_err:
     }
 reg_error:
     for (i = 0; i < vbasedev->num_regions; i++) {
+        if (vdev->regions[i]) {
+            vfio_region_finalize(vdev->regions[i]);
+        }
         g_free(vdev->regions[i]);
     }
     g_free(vdev->regions);
@@ -530,38 +548,45 @@ static int vfio_base_device_init(VFIODevice *vbasedev)
 {
     VFIOGroup *group;
     VFIODevice *vbasedev_iter;
-    char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name;
+    char *tmp, group_path[PATH_MAX], *group_name;
     ssize_t len;
     struct stat st;
     int groupid;
     int ret;
 
-    /* name must be set prior to the call */
-    if (!vbasedev->name || strchr(vbasedev->name, '/')) {
-        return -EINVAL;
-    }
+    /* @sysfsdev takes precedence over @host */
+    if (vbasedev->sysfsdev) {
+        g_free(vbasedev->name);
+        vbasedev->name = g_strdup(basename(vbasedev->sysfsdev));
+    } else {
+        if (!vbasedev->name || strchr(vbasedev->name, '/')) {
+            return -EINVAL;
+        }
 
-    /* Check that the host device exists */
-    g_snprintf(path, sizeof(path), "/sys/bus/platform/devices/%s/",
-               vbasedev->name);
+        vbasedev->sysfsdev = g_strdup_printf("/sys/bus/platform/devices/%s",
+                                             vbasedev->name);
+    }
 
-    if (stat(path, &st) < 0) {
-        error_report("vfio: error: no such host device: %s", path);
+    if (stat(vbasedev->sysfsdev, &st) < 0) {
+        error_report("vfio: error: no such host device: %s",
+                     vbasedev->sysfsdev);
         return -errno;
     }
 
-    g_strlcat(path, "iommu_group", sizeof(path));
-    len = readlink(path, iommu_group_path, sizeof(iommu_group_path));
-    if (len < 0 || len >= sizeof(iommu_group_path)) {
+    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
+    len = readlink(tmp, group_path, sizeof(group_path));
+    g_free(tmp);
+
+    if (len < 0 || len >= sizeof(group_path)) {
         error_report("vfio: error no iommu_group for device");
         return len < 0 ? -errno : -ENAMETOOLONG;
     }
 
-    iommu_group_path[len] = 0;
-    group_name = basename(iommu_group_path);
+    group_path[len] = 0;
 
+    group_name = basename(group_path);
     if (sscanf(group_name, "%d", &groupid) != 1) {
-        error_report("vfio: error reading %s: %m", path);
+        error_report("vfio: error reading %s: %m", group_path);
         return -errno;
     }
 
@@ -573,66 +598,30 @@ static int vfio_base_device_init(VFIODevice *vbasedev)
         return -ENOENT;
     }
 
-    g_snprintf(path, sizeof(path), "%s", vbasedev->name);
-
     QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
         if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
-            error_report("vfio: error: device %s is already attached", path);
+            error_report("vfio: error: device %s is already attached",
+                         vbasedev->name);
             vfio_put_group(group);
             return -EBUSY;
         }
     }
-    ret = vfio_get_device(group, path, vbasedev);
+    ret = vfio_get_device(group, vbasedev->name, vbasedev);
     if (ret) {
-        error_report("vfio: failed to get device %s", path);
+        error_report("vfio: failed to get device %s", vbasedev->name);
         vfio_put_group(group);
         return ret;
     }
 
     ret = vfio_populate_device(vbasedev);
     if (ret) {
-        error_report("vfio: failed to populate device %s", path);
+        error_report("vfio: failed to populate device %s", vbasedev->name);
         vfio_put_group(group);
     }
 
     return ret;
 }
 
-/**
- * vfio_map_region - initialize the 2 memory regions for a given
- * MMIO region index
- * @vdev: the VFIO platform device handle
- * @nr: the index of the region
- *
- * Init the top memory region and the mmapped memory region beneath
- * VFIOPlatformDevice is used since VFIODevice is not a QOM Object
- * and could not be passed to memory region functions
-*/
-static void vfio_map_region(VFIOPlatformDevice *vdev, int nr)
-{
-    VFIORegion *region = vdev->regions[nr];
-    uint64_t size = region->size;
-    char name[64];
-
-    if (!size) {
-        return;
-    }
-
-    g_snprintf(name, sizeof(name), "VFIO %s region %d",
-               vdev->vbasedev.name, nr);
-
-    /* A "slow" read/write mapping underlies all regions */
-    memory_region_init_io(&region->mem, OBJECT(vdev), &vfio_region_ops,
-                          region, name, size);
-
-    g_strlcat(name, " mmap", sizeof(name));
-
-    if (vfio_mmap_region(OBJECT(vdev), region, &region->mem,
-                         &region->mmap_mem, &region->mmap, size, 0, name)) {
-        error_report("%s unsupported. Performance may be slow", name);
-    }
-}
-
 /**
  * vfio_platform_realize  - the device realize function
  * @dev: device state pointer
@@ -646,13 +635,14 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp)
     VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
     SysBusDevice *sbdev = SYS_BUS_DEVICE(dev);
     VFIODevice *vbasedev = &vdev->vbasedev;
-    VFIOINTp *intp;
     int i, ret;
 
     vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM;
     vbasedev->ops = &vfio_platform_ops;
 
-    trace_vfio_platform_realize(vbasedev->name, vdev->compat);
+    trace_vfio_platform_realize(vbasedev->sysfsdev ?
+                                vbasedev->sysfsdev : vbasedev->name,
+                                vdev->compat);
 
     ret = vfio_base_device_init(vbasedev);
     if (ret) {
@@ -662,12 +652,11 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp)
     }
 
     for (i = 0; i < vbasedev->num_regions; i++) {
-        vfio_map_region(vdev, i);
-        sysbus_init_mmio(sbdev, &vdev->regions[i]->mem);
-    }
-
-    QLIST_FOREACH(intp, &vdev->intp_list, next) {
-        vfio_start_eventfd_injection(intp);
+        if (vfio_region_mmap(vdev->regions[i])) {
+            error_report("%s mmap unsupported. Performance may be slow",
+                         memory_region_name(vdev->regions[i]->mem));
+        }
+        sysbus_init_mmio(sbdev, vdev->regions[i]->mem);
     }
 }
 
@@ -678,7 +667,8 @@ static const VMStateDescription vfio_platform_vmstate = {
 
 static Property vfio_platform_dev_properties[] = {
     DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name),
-    DEFINE_PROP_BOOL("x-mmap", VFIOPlatformDevice, vbasedev.allow_mmap, true),
+    DEFINE_PROP_STRING("sysfsdev", VFIOPlatformDevice, vbasedev.sysfsdev),
+    DEFINE_PROP_BOOL("x-no-mmap", VFIOPlatformDevice, vbasedev.no_mmap, false),
     DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
                        mmap_timeout, 1100),
     DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true),