These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / virt / kvm / eventfd.c
index 9ff4193..46dbc0a 100644 (file)
@@ -23,6 +23,7 @@
 
 #include <linux/kvm_host.h>
 #include <linux/kvm.h>
+#include <linux/kvm_irqfd.h>
 #include <linux/workqueue.h>
 #include <linux/syscalls.h>
 #include <linux/wait.h>
 #include <linux/srcu.h>
 #include <linux/slab.h>
 #include <linux/seqlock.h>
+#include <linux/irqbypass.h>
 #include <trace/events/kvm.h>
 
 #include <kvm/iodev.h>
 
 #ifdef CONFIG_HAVE_KVM_IRQFD
-/*
- * --------------------------------------------------------------------
- * irqfd: Allows an fd to be used to inject an interrupt to the guest
- *
- * Credit goes to Avi Kivity for the original idea.
- * --------------------------------------------------------------------
- */
-
-/*
- * Resampling irqfds are a special variety of irqfds used to emulate
- * level triggered interrupts.  The interrupt is asserted on eventfd
- * trigger.  On acknowledgement through the irq ack notifier, the
- * interrupt is de-asserted and userspace is notified through the
- * resamplefd.  All resamplers on the same gsi are de-asserted
- * together, so we don't need to track the state of each individual
- * user.  We can also therefore share the same irq source ID.
- */
-struct _irqfd_resampler {
-       struct kvm *kvm;
-       /*
-        * List of resampling struct _irqfd objects sharing this gsi.
-        * RCU list modified under kvm->irqfds.resampler_lock
-        */
-       struct list_head list;
-       struct kvm_irq_ack_notifier notifier;
-       /*
-        * Entry in list of kvm->irqfd.resampler_list.  Use for sharing
-        * resamplers among irqfds on the same gsi.
-        * Accessed and modified under kvm->irqfds.resampler_lock
-        */
-       struct list_head link;
-};
-
-struct _irqfd {
-       /* Used for MSI fast-path */
-       struct kvm *kvm;
-       wait_queue_t wait;
-       /* Update side is protected by irqfds.lock */
-       struct kvm_kernel_irq_routing_entry irq_entry;
-       seqcount_t irq_entry_sc;
-       /* Used for level IRQ fast-path */
-       int gsi;
-       struct work_struct inject;
-       /* The resampler used by this irqfd (resampler-only) */
-       struct _irqfd_resampler *resampler;
-       /* Eventfd notified on resample (resampler-only) */
-       struct eventfd_ctx *resamplefd;
-       /* Entry in list of irqfds for a resampler (resampler-only) */
-       struct list_head resampler_link;
-       /* Used for setup/shutdown */
-       struct eventfd_ctx *eventfd;
-       struct list_head list;
-       poll_table pt;
-       struct work_struct shutdown;
-};
 
 static struct workqueue_struct *irqfd_cleanup_wq;
 
 static void
 irqfd_inject(struct work_struct *work)
 {
-       struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
+       struct kvm_kernel_irqfd *irqfd =
+               container_of(work, struct kvm_kernel_irqfd, inject);
        struct kvm *kvm = irqfd->kvm;
 
        if (!irqfd->resampler) {
@@ -121,12 +69,13 @@ irqfd_inject(struct work_struct *work)
 static void
 irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
 {
-       struct _irqfd_resampler *resampler;
+       struct kvm_kernel_irqfd_resampler *resampler;
        struct kvm *kvm;
-       struct _irqfd *irqfd;
+       struct kvm_kernel_irqfd *irqfd;
        int idx;
 
-       resampler = container_of(kian, struct _irqfd_resampler, notifier);
+       resampler = container_of(kian,
+                       struct kvm_kernel_irqfd_resampler, notifier);
        kvm = resampler->kvm;
 
        kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
@@ -141,9 +90,9 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
 }
 
 static void
-irqfd_resampler_shutdown(struct _irqfd *irqfd)
+irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd)
 {
-       struct _irqfd_resampler *resampler = irqfd->resampler;
+       struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler;
        struct kvm *kvm = resampler->kvm;
 
        mutex_lock(&kvm->irqfds.resampler_lock);
@@ -168,7 +117,8 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd)
 static void
 irqfd_shutdown(struct work_struct *work)
 {
-       struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
+       struct kvm_kernel_irqfd *irqfd =
+               container_of(work, struct kvm_kernel_irqfd, shutdown);
        u64 cnt;
 
        /*
@@ -191,6 +141,9 @@ irqfd_shutdown(struct work_struct *work)
        /*
         * It is now safe to release the object's resources
         */
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+       irq_bypass_unregister_consumer(&irqfd->consumer);
+#endif
        eventfd_ctx_put(irqfd->eventfd);
        kfree(irqfd);
 }
@@ -198,7 +151,7 @@ irqfd_shutdown(struct work_struct *work)
 
 /* assumes kvm->irqfds.lock is held */
 static bool
-irqfd_is_active(struct _irqfd *irqfd)
+irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
 {
        return list_empty(&irqfd->list) ? false : true;
 }
@@ -209,7 +162,7 @@ irqfd_is_active(struct _irqfd *irqfd)
  * assumes kvm->irqfds.lock is held
  */
 static void
-irqfd_deactivate(struct _irqfd *irqfd)
+irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
 {
        BUG_ON(!irqfd_is_active(irqfd));
 
@@ -218,13 +171,23 @@ irqfd_deactivate(struct _irqfd *irqfd)
        queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
 }
 
+int __attribute__((weak)) kvm_arch_set_irq_inatomic(
+                               struct kvm_kernel_irq_routing_entry *irq,
+                               struct kvm *kvm, int irq_source_id,
+                               int level,
+                               bool line_status)
+{
+       return -EWOULDBLOCK;
+}
+
 /*
  * Called with wqh->lock held and interrupts disabled
  */
 static int
 irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
 {
-       struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
+       struct kvm_kernel_irqfd *irqfd =
+               container_of(wait, struct kvm_kernel_irqfd, wait);
        unsigned long flags = (unsigned long)key;
        struct kvm_kernel_irq_routing_entry irq;
        struct kvm *kvm = irqfd->kvm;
@@ -238,10 +201,9 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
                        irq = irqfd->irq_entry;
                } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
                /* An event has been signaled, inject an interrupt */
-               if (irq.type == KVM_IRQ_ROUTING_MSI)
-                       kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
-                                       false);
-               else
+               if (kvm_arch_set_irq_inatomic(&irq, kvm,
+                                             KVM_USERSPACE_IRQ_SOURCE_ID, 1,
+                                             false) == -EWOULDBLOCK)
                        schedule_work(&irqfd->inject);
                srcu_read_unlock(&kvm->irq_srcu, idx);
        }
@@ -274,37 +236,54 @@ static void
 irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
                        poll_table *pt)
 {
-       struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt);
+       struct kvm_kernel_irqfd *irqfd =
+               container_of(pt, struct kvm_kernel_irqfd, pt);
        add_wait_queue(wqh, &irqfd->wait);
 }
 
 /* Must be called under irqfds.lock */
-static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd)
+static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
 {
        struct kvm_kernel_irq_routing_entry *e;
        struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
-       int i, n_entries;
+       int n_entries;
 
        n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
 
        write_seqcount_begin(&irqfd->irq_entry_sc);
 
-       irqfd->irq_entry.type = 0;
-
        e = entries;
-       for (i = 0; i < n_entries; ++i, ++e) {
-               /* Only fast-path MSI. */
-               if (e->type == KVM_IRQ_ROUTING_MSI)
-                       irqfd->irq_entry = *e;
-       }
+       if (n_entries == 1)
+               irqfd->irq_entry = *e;
+       else
+               irqfd->irq_entry.type = 0;
 
        write_seqcount_end(&irqfd->irq_entry_sc);
 }
 
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+void __attribute__((weak)) kvm_arch_irq_bypass_stop(
+                               struct irq_bypass_consumer *cons)
+{
+}
+
+void __attribute__((weak)) kvm_arch_irq_bypass_start(
+                               struct irq_bypass_consumer *cons)
+{
+}
+
+int  __attribute__((weak)) kvm_arch_update_irqfd_routing(
+                               struct kvm *kvm, unsigned int host_irq,
+                               uint32_t guest_irq, bool set)
+{
+       return 0;
+}
+#endif
+
 static int
 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 {
-       struct _irqfd *irqfd, *tmp;
+       struct kvm_kernel_irqfd *irqfd, *tmp;
        struct fd f;
        struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
        int ret;
@@ -340,7 +319,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
        irqfd->eventfd = eventfd;
 
        if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
-               struct _irqfd_resampler *resampler;
+               struct kvm_kernel_irqfd_resampler *resampler;
 
                resamplefd = eventfd_ctx_fdget(args->resamplefd);
                if (IS_ERR(resamplefd)) {
@@ -428,6 +407,17 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
         * we might race against the POLLHUP
         */
        fdput(f);
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+       irqfd->consumer.token = (void *)irqfd->eventfd;
+       irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
+       irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
+       irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
+       irqfd->consumer.start = kvm_arch_irq_bypass_start;
+       ret = irq_bypass_register_consumer(&irqfd->consumer);
+       if (ret)
+               pr_info("irq bypass consumer (token %p) registration fails: %d\n",
+                               irqfd->consumer.token, ret);
+#endif
 
        return 0;
 
@@ -469,9 +459,18 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
 }
 EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
 
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
+void kvm_notify_acked_gsi(struct kvm *kvm, int gsi)
 {
        struct kvm_irq_ack_notifier *kian;
+
+       hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+                                link)
+               if (kian->gsi == gsi)
+                       kian->irq_acked(kian);
+}
+
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
        int gsi, idx;
 
        trace_kvm_ack_irq(irqchip, pin);
@@ -479,10 +478,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
        idx = srcu_read_lock(&kvm->irq_srcu);
        gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
        if (gsi != -1)
-               hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
-                                        link)
-                       if (kian->gsi == gsi)
-                               kian->irq_acked(kian);
+               kvm_notify_acked_gsi(kvm, gsi);
        srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
@@ -525,7 +521,7 @@ kvm_eventfd_init(struct kvm *kvm)
 static int
 kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
 {
-       struct _irqfd *irqfd, *tmp;
+       struct kvm_kernel_irqfd *irqfd, *tmp;
        struct eventfd_ctx *eventfd;
 
        eventfd = eventfd_ctx_fdget(args->fd);
@@ -581,7 +577,7 @@ kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
 void
 kvm_irqfd_release(struct kvm *kvm)
 {
-       struct _irqfd *irqfd, *tmp;
+       struct kvm_kernel_irqfd *irqfd, *tmp;
 
        spin_lock_irq(&kvm->irqfds.lock);
 
@@ -604,13 +600,23 @@ kvm_irqfd_release(struct kvm *kvm)
  */
 void kvm_irq_routing_update(struct kvm *kvm)
 {
-       struct _irqfd *irqfd;
+       struct kvm_kernel_irqfd *irqfd;
 
        spin_lock_irq(&kvm->irqfds.lock);
 
-       list_for_each_entry(irqfd, &kvm->irqfds.items, list)
+       list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
                irqfd_update(kvm, irqfd);
 
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+               if (irqfd->producer) {
+                       int ret = kvm_arch_update_irqfd_routing(
+                                       irqfd->kvm, irqfd->producer->irq,
+                                       irqfd->gsi, 1);
+                       WARN_ON(ret);
+               }
+#endif
+       }
+
        spin_unlock_irq(&kvm->irqfds.lock);
 }
 
@@ -771,40 +777,14 @@ static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
        return KVM_MMIO_BUS;
 }
 
-static int
-kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
+static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
+                               enum kvm_bus bus_idx,
+                               struct kvm_ioeventfd *args)
 {
-       enum kvm_bus              bus_idx;
-       struct _ioeventfd        *p;
-       struct eventfd_ctx       *eventfd;
-       int                       ret;
 
-       bus_idx = ioeventfd_bus_from_flags(args->flags);
-       /* must be natural-word sized, or 0 to ignore length */
-       switch (args->len) {
-       case 0:
-       case 1:
-       case 2:
-       case 4:
-       case 8:
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       /* check for range overflow */
-       if (args->addr + args->len < args->addr)
-               return -EINVAL;
-
-       /* check for extra flags that we don't understand */
-       if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
-               return -EINVAL;
-
-       /* ioeventfd with no length can't be combined with DATAMATCH */
-       if (!args->len &&
-           args->flags & (KVM_IOEVENTFD_FLAG_PIO |
-                          KVM_IOEVENTFD_FLAG_DATAMATCH))
-               return -EINVAL;
+       struct eventfd_ctx *eventfd;
+       struct _ioeventfd *p;
+       int ret;
 
        eventfd = eventfd_ctx_fdget(args->fd);
        if (IS_ERR(eventfd))
@@ -843,16 +823,6 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
        if (ret < 0)
                goto unlock_fail;
 
-       /* When length is ignored, MMIO is also put on a separate bus, for
-        * faster lookups.
-        */
-       if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) {
-               ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS,
-                                             p->addr, 0, &p->dev);
-               if (ret < 0)
-                       goto register_fail;
-       }
-
        kvm->buses[bus_idx]->ioeventfd_count++;
        list_add_tail(&p->list, &kvm->ioeventfds);
 
@@ -860,8 +830,6 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
        return 0;
 
-register_fail:
-       kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
 unlock_fail:
        mutex_unlock(&kvm->slots_lock);
 
@@ -873,14 +841,13 @@ fail:
 }
 
 static int
-kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
+kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
+                          struct kvm_ioeventfd *args)
 {
-       enum kvm_bus              bus_idx;
        struct _ioeventfd        *p, *tmp;
        struct eventfd_ctx       *eventfd;
        int                       ret = -ENOENT;
 
-       bus_idx = ioeventfd_bus_from_flags(args->flags);
        eventfd = eventfd_ctx_fdget(args->fd);
        if (IS_ERR(eventfd))
                return PTR_ERR(eventfd);
@@ -901,10 +868,6 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
                        continue;
 
                kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
-               if (!p->length) {
-                       kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS,
-                                                 &p->dev);
-               }
                kvm->buses[bus_idx]->ioeventfd_count--;
                ioeventfd_release(p);
                ret = 0;
@@ -918,6 +881,69 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
        return ret;
 }
 
+static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
+{
+       enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags);
+       int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
+
+       if (!args->len && bus_idx == KVM_MMIO_BUS)
+               kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
+
+       return ret;
+}
+
+static int
+kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
+{
+       enum kvm_bus              bus_idx;
+       int ret;
+
+       bus_idx = ioeventfd_bus_from_flags(args->flags);
+       /* must be natural-word sized, or 0 to ignore length */
+       switch (args->len) {
+       case 0:
+       case 1:
+       case 2:
+       case 4:
+       case 8:
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       /* check for range overflow */
+       if (args->addr + args->len < args->addr)
+               return -EINVAL;
+
+       /* check for extra flags that we don't understand */
+       if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
+               return -EINVAL;
+
+       /* ioeventfd with no length can't be combined with DATAMATCH */
+       if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH))
+               return -EINVAL;
+
+       ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
+       if (ret)
+               goto fail;
+
+       /* When length is ignored, MMIO is also put on a separate bus, for
+        * faster lookups.
+        */
+       if (!args->len && bus_idx == KVM_MMIO_BUS) {
+               ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
+               if (ret < 0)
+                       goto fast_fail;
+       }
+
+       return 0;
+
+fast_fail:
+       kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
+fail:
+       return ret;
+}
+
 int
 kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {