Merge "Add VM preemption latency measuring tool vm-trace"
authorDon Dugger <donald.d.dugger@intel.com>
Tue, 5 Jan 2016 22:58:35 +0000 (22:58 +0000)
committerGerrit Code Review <gerrit@172.30.200.206>
Tue, 5 Jan 2016 22:58:35 +0000 (22:58 +0000)
kernel/arch/x86/configs/opnfv.config
kernel/drivers/vfio/pci/vfio_pci_intrs.c
kernel/kernel/time/hrtimer.c
kernel/kernel/time/tick-sched.c
qemu/hw/i386/kvm/clock.c
qemu/migration/savevm.c
qemu/target-i386/kvm.c
qemu/target-i386/kvm_i386.h

index 573bcc9..c73eaf2 100644 (file)
@@ -734,15 +734,15 @@ CONFIG_IP_PNP_BOOTP=y
 CONFIG_IP_PNP_RARP=y
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE_DEMUX is not set
-# CONFIG_NET_IP_TUNNEL is not set
+CONFIG_NET_IP_TUNNEL=y
 CONFIG_IP_MROUTE=y
 # CONFIG_IP_MROUTE_MULTIPLE_TABLES is not set
 CONFIG_IP_PIMSM_V1=y
 CONFIG_IP_PIMSM_V2=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_NET_UDP_TUNNEL is not set
+CONFIG_NET_UDP_TUNNEL=y
 # CONFIG_NET_FOU is not set
-# CONFIG_GENEVE is not set
+CONFIG_GENEVE=y
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -972,11 +972,15 @@ CONFIG_NET_SCH_FIFO=y
 # CONFIG_DCB is not set
 CONFIG_DNS_RESOLVER=y
 # CONFIG_BATMAN_ADV is not set
-# CONFIG_OPENVSWITCH is not set
+CONFIG_OPENVSWITCH=m
+CONFIG_OPENVSWITCH_VXLAN=m
+CONFIG_OPENVSWITCH_GENEVE=m
 # CONFIG_VSOCKETS is not set
 # CONFIG_NETLINK_MMAP is not set
 # CONFIG_NETLINK_DIAG is not set
-# CONFIG_MPLS is not set
+CONFIG_MPLS=y
+CONFIG_NET_MPLS_GSO=m
+# CONFIG_MPLS_ROUTING is not set
 # CONFIG_HSR is not set
 # CONFIG_NET_SWITCHDEV is not set
 CONFIG_RPS=y
@@ -1352,7 +1356,7 @@ CONFIG_NET_CORE=y
 # CONFIG_NET_TEAM is not set
 # CONFIG_MACVLAN is not set
 # CONFIG_IPVLAN is not set
-# CONFIG_VXLAN is not set
+CONFIG_VXLAN=y
 CONFIG_NETCONSOLE=y
 CONFIG_NETPOLL=y
 CONFIG_NET_POLL_CONTROLLER=y
@@ -1448,9 +1452,11 @@ CONFIG_IGB=y
 CONFIG_IGBVF=y
 CONFIG_IXGB=y
 CONFIG_IXGBE=y
+CONFIG_IXGBE_VXLAN=y
 # CONFIG_IXGBE_HWMON is not set
-# CONFIG_IXGBEVF is not set
+CONFIG_IXGBEVF=y
 CONFIG_I40E=y
+CONFIG_I40E_VXLAN=y
 # CONFIG_I40EVF is not set
 # CONFIG_FM10K is not set
 CONFIG_NET_VENDOR_I825XX=y
index 1f577b4..a21d8e1 100644 (file)
@@ -352,7 +352,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
                pci_write_msi_msg(irq, &msg);
        }
 
-       ret = request_irq(irq, vfio_msihandler, 0,
+       ret = request_irq(irq, vfio_msihandler, IRQF_NO_THREAD,
                          vdev->ctx[vector].name, trigger);
        if (ret) {
                kfree(vdev->ctx[vector].name);
index 2c6be16..5d19339 100644 (file)
@@ -583,6 +583,12 @@ static int hrtimer_reprogram(struct hrtimer *timer,
        if (hrtimer_callback_running(timer))
                return 0;
 
+        if (base->cpu_base != cpu_base)
+               return 0;
+
+       if (cpu_base->in_hrtirq)
+               return 0;
+
        /*
         * CLOCK_REALTIME timer might be requested with an absolute
         * expiry time which is less than base->offset. Nothing wrong
@@ -613,12 +619,11 @@ static int hrtimer_reprogram(struct hrtimer *timer,
        if (cpu_base->hang_detected)
                return 0;
 
+       cpu_base->expires_next = expires;
        /*
         * Clockevents returns -ETIME, when the event was in the past.
         */
-       res = tick_program_event(expires, 0);
-       if (!IS_ERR_VALUE(res))
-               cpu_base->expires_next = expires;
+       res = tick_program_event(expires, 1);
        return res;
 }
 
index b3841ba..f61dbf2 100644 (file)
@@ -576,6 +576,20 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
 }
 EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
 
+static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
+{
+        hrtimer_cancel(&ts->sched_timer);
+        hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
+
+        /* Forward the time to expire in the future */
+        hrtimer_forward(&ts->sched_timer, now, tick_period);
+
+        if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
+                hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
+        else
+                tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
+}
+
 static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
                                         ktime_t now, int cpu)
 {
@@ -704,22 +718,16 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
                        goto out;
                }
 
-               if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
-                       hrtimer_start(&ts->sched_timer, expires,
-                                     HRTIMER_MODE_ABS_PINNED);
-                       /* Check, if the timer was already in the past */
-                       if (hrtimer_active(&ts->sched_timer))
-                               goto out;
-               } else if (!tick_program_event(expires, 0))
-                               goto out;
-               /*
-                * We are past the event already. So we crossed a
-                * jiffie boundary. Update jiffies and raise the
-                * softirq.
-                */
-               tick_do_update_jiffies64(ktime_get());
+                if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
+                        hrtimer_start(&ts->sched_timer, expires,
+                                      HRTIMER_MODE_ABS_PINNED);
+                else
+                        tick_program_event(expires, 1);
+       } else {
+               /* Tick is stopped, but required now. Enforce it */
+               tick_nohz_restart(ts, now);
+
        }
-       raise_softirq_irqoff(TIMER_SOFTIRQ);
 out:
        ts->next_jiffies = next_jiffies;
        ts->last_jiffies = last_jiffies;
@@ -880,32 +888,6 @@ ktime_t tick_nohz_get_sleep_length(void)
        return ts->sleep_length;
 }
 
-static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
-{
-       hrtimer_cancel(&ts->sched_timer);
-       hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
-
-       while (1) {
-               /* Forward the time to expire in the future */
-               hrtimer_forward(&ts->sched_timer, now, tick_period);
-
-               if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
-                       hrtimer_start_expires(&ts->sched_timer,
-                                             HRTIMER_MODE_ABS_PINNED);
-                       /* Check, if the timer was already in the past */
-                       if (hrtimer_active(&ts->sched_timer))
-                               break;
-               } else {
-                       if (!tick_program_event(
-                               hrtimer_get_expires(&ts->sched_timer), 0))
-                               break;
-               }
-               /* Reread time and update jiffies */
-               now = ktime_get();
-               tick_do_update_jiffies64(now);
-       }
-}
-
 static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
 {
        /* Update jiffies first */
index efdf165..0593a3f 100644 (file)
@@ -17,7 +17,7 @@
 #include "qemu/host-utils.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
-#include "sysemu/cpus.h"
+#include "kvm_i386.h"
 #include "hw/sysbus.h"
 #include "hw/kvm/clock.h"
 
@@ -125,21 +125,7 @@ static void kvmclock_vm_state_change(void *opaque, int running,
             return;
         }
 
-        cpu_synchronize_all_states();
-        /* In theory, the cpu_synchronize_all_states() call above wouldn't
-         * affect the rest of the code, as the VCPU state inside CPUState
-         * is supposed to always match the VCPU state on the kernel side.
-         *
-         * In practice, calling cpu_synchronize_state() too soon will load the
-         * kernel-side APIC state into X86CPU.apic_state too early, APIC state
-         * won't be reloaded later because CPUState.vcpu_dirty==true, and
-         * outdated APIC state may be migrated to another host.
-         *
-         * The real fix would be to make sure outdated APIC state is read
-         * from the kernel again when necessary. While this is not fixed, we
-         * need the cpu_clean_all_dirty() call below.
-         */
-        cpu_clean_all_dirty();
+        kvm_synchronize_all_tsc();
 
         ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
         if (ret < 0) {
index 6071215..a42874b 100644 (file)
@@ -945,8 +945,8 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
         qemu_savevm_state_complete(f);
         ret = qemu_file_get_error(f);
     }
+    qemu_savevm_state_cancel();
     if (ret != 0) {
-        qemu_savevm_state_cancel();
         error_setg_errno(errp, -ret, "Error while writing VM state");
     }
     return ret;
index 066d03d..721c580 100644 (file)
@@ -96,6 +96,51 @@ bool kvm_allows_irq0_override(void)
     return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing();
 }
 
+static int kvm_get_tsc(CPUState *cs)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+    struct {
+        struct kvm_msrs info;
+        struct kvm_msr_entry entries[1];
+    } msr_data;
+    int ret;
+
+    if (env->tsc_valid) {
+        return 0;
+    }
+
+    msr_data.info.nmsrs = 1;
+    msr_data.entries[0].index = MSR_IA32_TSC;
+    env->tsc_valid = !runstate_is_running();
+
+    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
+    if (ret < 0) {
+        return ret;
+    }
+
+    env->tsc = msr_data.entries[0].data;
+    return 0;
+}
+
+static inline void do_kvm_synchronize_tsc(void *arg)
+{
+    CPUState *cpu = arg;
+
+    kvm_get_tsc(cpu);
+}
+
+void kvm_synchronize_all_tsc(void)
+{
+    CPUState *cpu;
+
+    if (kvm_enabled()) {
+        CPU_FOREACH(cpu) {
+            run_on_cpu(cpu, do_kvm_synchronize_tsc, cpu);
+        }
+    }
+}
+
 static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
 {
     struct kvm_cpuid2 *cpuid;
index e557e94..c1b312b 100644 (file)
@@ -15,6 +15,7 @@
 
 bool kvm_allows_irq0_override(void);
 bool kvm_has_smm(void);
+void kvm_synchronize_all_tsc(void);
 void kvm_arch_reset_vcpu(X86CPU *cs);
 void kvm_arch_do_init_vcpu(X86CPU *cs);