X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=qemu%2Ftarget-i386%2Fkvm.c;h=799fdfa682a2a6aba20d6760b36dbbeea2752464;hb=a14b48d18a9ed03ec191cf16b162206998a895ce;hp=721c580edd661b1aa39a28fb4ade456d4e49bc39;hpb=4f32d7e65d2df10accf0a410fd62eabd12dd6f43;p=kvmfornfv.git diff --git a/qemu/target-i386/kvm.c b/qemu/target-i386/kvm.c index 721c580ed..799fdfa68 100644 --- a/qemu/target-i386/kvm.c +++ b/qemu/target-i386/kvm.c @@ -12,7 +12,8 @@ * */ -#include +#include "qemu/osdep.h" +#include "qapi/error.h" #include #include #include @@ -25,16 +26,21 @@ #include "sysemu/kvm_int.h" #include "kvm_i386.h" #include "cpu.h" +#include "hyperv.h" + #include "exec/gdbstub.h" #include "qemu/host-utils.h" #include "qemu/config-file.h" +#include "qemu/error-report.h" #include "hw/i386/pc.h" #include "hw/i386/apic.h" #include "hw/i386/apic_internal.h" #include "hw/i386/apic-msidef.h" + #include "exec/ioport.h" -#include +#include "standard-headers/asm-x86/hyperv.h" #include "hw/pci/pci.h" +#include "hw/pci/msi.h" #include "migration/migration.h" #include "exec/memattrs.h" @@ -67,6 +73,7 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { static bool has_msr_star; static bool has_msr_hsave_pa; +static bool has_msr_tsc_aux; static bool has_msr_tsc_adjust; static bool has_msr_tsc_deadline; static bool has_msr_feature_control; @@ -80,12 +87,27 @@ static int lm_capable_kernel; static bool has_msr_hv_hypercall; static bool has_msr_hv_vapic; static bool has_msr_hv_tsc; +static bool has_msr_hv_crash; +static bool has_msr_hv_reset; +static bool has_msr_hv_vpindex; +static bool has_msr_hv_runtime; +static bool has_msr_hv_synic; +static bool has_msr_hv_stimer; static bool has_msr_mtrr; static bool has_msr_xss; static bool has_msr_architectural_pmu; static uint32_t num_architectural_pmu_counters; +static int has_xsave; +static int has_xcrs; +static int has_pit_state2; + +int kvm_has_pit_state2(void) +{ + return has_pit_state2; +} + bool kvm_has_smm(void) { return kvm_check_extension(kvm_state, KVM_CAP_X86_SMM); @@ -119,6 +141,7 @@ static int kvm_get_tsc(CPUState *cs) return ret; } + assert(ret == 1); env->tsc = msr_data.entries[0].data; return 0; } @@ -502,7 +525,43 @@ static bool hyperv_enabled(X86CPU *cpu) return kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV) > 0 && (hyperv_hypercall_available(cpu) || cpu->hyperv_time || - cpu->hyperv_relaxed_timing); + cpu->hyperv_relaxed_timing || + cpu->hyperv_crash || + cpu->hyperv_reset || + cpu->hyperv_vpindex || + cpu->hyperv_runtime || + cpu->hyperv_synic || + cpu->hyperv_stimer); +} + +static int kvm_arch_set_tsc_khz(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + int r; + + if (!env->tsc_khz) { + return 0; + } + + r = kvm_check_extension(cs->kvm_state, KVM_CAP_TSC_CONTROL) ? + kvm_vcpu_ioctl(cs, KVM_SET_TSC_KHZ, env->tsc_khz) : + -ENOTSUP; + if (r < 0) { + /* When KVM_SET_TSC_KHZ fails, it's an error only if the current + * TSC frequency doesn't match the one we want. + */ + int cur_freq = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? + kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : + -ENOTSUP; + if (cur_freq <= 0 || cur_freq != env->tsc_khz) { + error_report("warning: TSC frequency mismatch between " + "VM and host, and TSC scaling unavailable"); + return r; + } + } + + return 0; } static Error *invtsc_mig_blocker; @@ -532,7 +591,18 @@ int kvm_arch_init_vcpu(CPUState *cs) if (hyperv_enabled(cpu)) { c = &cpuid_data.entries[cpuid_i++]; c->function = HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS; - memcpy(signature, "Microsoft Hv", 12); + if (!cpu->hyperv_vendor_id) { + memcpy(signature, "Microsoft Hv", 12); + } else { + size_t len = strlen(cpu->hyperv_vendor_id); + + if (len > 12) { + error_report("hv-vendor-id truncated to 12 characters"); + len = 12; + } + memset(signature, 0, 12); + memcpy(signature, cpu->hyperv_vendor_id, len); + } c->eax = HYPERV_CPUID_MIN; c->ebx = signature[0]; c->ecx = signature[1]; @@ -568,6 +638,41 @@ int kvm_arch_init_vcpu(CPUState *cs) c->eax |= 0x200; has_msr_hv_tsc = true; } + if (cpu->hyperv_crash && has_msr_hv_crash) { + c->edx |= HV_X64_GUEST_CRASH_MSR_AVAILABLE; + } + c->edx |= HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE; + if (cpu->hyperv_reset && has_msr_hv_reset) { + c->eax |= HV_X64_MSR_RESET_AVAILABLE; + } + if (cpu->hyperv_vpindex && has_msr_hv_vpindex) { + c->eax |= HV_X64_MSR_VP_INDEX_AVAILABLE; + } + if (cpu->hyperv_runtime && has_msr_hv_runtime) { + c->eax |= HV_X64_MSR_VP_RUNTIME_AVAILABLE; + } + if (cpu->hyperv_synic) { + int sint; + + if (!has_msr_hv_synic || + kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_SYNIC, 0)) { + fprintf(stderr, "Hyper-V SynIC is not supported by kernel\n"); + return -ENOSYS; + } + + c->eax |= HV_X64_MSR_SYNIC_AVAILABLE; + env->msr_hv_synic_version = HV_SYNIC_VERSION_1; + for (sint = 0; sint < ARRAY_SIZE(env->msr_hv_synic_sint); sint++) { + env->msr_hv_synic_sint[sint] = HV_SYNIC_SINT_MASKED; + } + } + if (cpu->hyperv_stimer) { + if (!has_msr_hv_stimer) { + fprintf(stderr, "Hyper-V timers aren't supported by kernel\n"); + return -ENOSYS; + } + c->eax |= HV_X64_MSR_SYNTIMER_AVAILABLE; + } c = &cpuid_data.entries[cpuid_i++]; c->function = HYPERV_CPUID_ENLIGHTMENT_INFO; if (cpu->hyperv_relaxed_timing) { @@ -732,7 +837,7 @@ int kvm_arch_init_vcpu(CPUState *cs) && (env->features[FEAT_1_EDX] & (CPUID_MCE | CPUID_MCA)) == (CPUID_MCE | CPUID_MCA) && kvm_check_extension(cs->kvm_state, KVM_CAP_MCE) > 0) { - uint64_t mcg_cap; + uint64_t mcg_cap, unsupported_caps; int banks; int ret; @@ -742,18 +847,24 @@ int kvm_arch_init_vcpu(CPUState *cs) return ret; } - if (banks > MCE_BANKS_DEF) { - banks = MCE_BANKS_DEF; + if (banks < (env->mcg_cap & MCG_CAP_BANKS_MASK)) { + error_report("kvm: Unsupported MCE bank count (QEMU = %d, KVM = %d)", + (int)(env->mcg_cap & MCG_CAP_BANKS_MASK), banks); + return -ENOTSUP; + } + + unsupported_caps = env->mcg_cap & ~(mcg_cap | MCG_CAP_BANKS_MASK); + if (unsupported_caps) { + error_report("warning: Unsupported MCG_CAP bits: 0x%" PRIx64, + unsupported_caps); } - mcg_cap &= MCE_CAP_DEF; - mcg_cap |= banks; - ret = kvm_vcpu_ioctl(cs, KVM_X86_SETUP_MCE, &mcg_cap); + + env->mcg_cap &= mcg_cap | MCG_CAP_BANKS_MASK; + ret = kvm_vcpu_ioctl(cs, KVM_X86_SETUP_MCE, &env->mcg_cap); if (ret < 0) { fprintf(stderr, "KVM_X86_SETUP_MCE: %s", strerror(-ret)); return ret; } - - env->mcg_cap = mcg_cap; } qemu_add_vm_change_state_handler(cpu_update_state, env); @@ -781,22 +892,35 @@ int kvm_arch_init_vcpu(CPUState *cs) return r; } - r = kvm_check_extension(cs->kvm_state, KVM_CAP_TSC_CONTROL); - if (r && env->tsc_khz) { - r = kvm_vcpu_ioctl(cs, KVM_SET_TSC_KHZ, env->tsc_khz); - if (r < 0) { - fprintf(stderr, "KVM_SET_TSC_KHZ failed\n"); - return r; + r = kvm_arch_set_tsc_khz(cs); + if (r < 0) { + return r; + } + + /* vcpu's TSC frequency is either specified by user, or following + * the value used by KVM if the former is not present. In the + * latter case, we query it from KVM and record in env->tsc_khz, + * so that vcpu's TSC frequency can be migrated later via this field. + */ + if (!env->tsc_khz) { + r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? + kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : + -ENOTSUP; + if (r > 0) { + env->tsc_khz = r; } } - if (kvm_has_xsave()) { + if (has_xsave) { env->kvm_xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave)); } if (env->features[FEAT_1_EDX] & CPUID_MTRR) { has_msr_mtrr = true; } + if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP)) { + has_msr_tsc_aux = false; + } return 0; } @@ -864,6 +988,10 @@ static int kvm_get_supported_msrs(KVMState *s) has_msr_hsave_pa = true; continue; } + if (kvm_msr_list->indices[i] == MSR_TSC_AUX) { + has_msr_tsc_aux = true; + continue; + } if (kvm_msr_list->indices[i] == MSR_TSC_ADJUST) { has_msr_tsc_adjust = true; continue; @@ -888,6 +1016,30 @@ static int kvm_get_supported_msrs(KVMState *s) has_msr_xss = true; continue; } + if (kvm_msr_list->indices[i] == HV_X64_MSR_CRASH_CTL) { + has_msr_hv_crash = true; + continue; + } + if (kvm_msr_list->indices[i] == HV_X64_MSR_RESET) { + has_msr_hv_reset = true; + continue; + } + if (kvm_msr_list->indices[i] == HV_X64_MSR_VP_INDEX) { + has_msr_hv_vpindex = true; + continue; + } + if (kvm_msr_list->indices[i] == HV_X64_MSR_VP_RUNTIME) { + has_msr_hv_runtime = true; + continue; + } + if (kvm_msr_list->indices[i] == HV_X64_MSR_SCONTROL) { + has_msr_hv_synic = true; + continue; + } + if (kvm_msr_list->indices[i] == HV_X64_MSR_STIMER0_CONFIG) { + has_msr_hv_stimer = true; + continue; + } } } @@ -938,6 +1090,18 @@ int kvm_arch_init(MachineState *ms, KVMState *s) int ret; struct utsname utsname; +#ifdef KVM_CAP_XSAVE + has_xsave = kvm_check_extension(s, KVM_CAP_XSAVE); +#endif + +#ifdef KVM_CAP_XCRS + has_xcrs = kvm_check_extension(s, KVM_CAP_XCRS); +#endif + +#ifdef KVM_CAP_PIT_STATE2 + has_pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2); +#endif + ret = kvm_get_supported_msrs(s); if (ret < 0) { return ret; @@ -1027,7 +1191,7 @@ static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs) lhs->l = (flags >> DESC_L_SHIFT) & 1; lhs->g = (flags & DESC_G_MASK) != 0; lhs->avl = (flags & DESC_AVL_MASK) != 0; - lhs->unusable = 0; + lhs->unusable = !lhs->present; lhs->padding = 0; } @@ -1036,14 +1200,18 @@ static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs) lhs->selector = rhs->selector; lhs->base = rhs->base; lhs->limit = rhs->limit; - lhs->flags = (rhs->type << DESC_TYPE_SHIFT) | - (rhs->present * DESC_P_MASK) | - (rhs->dpl << DESC_DPL_SHIFT) | - (rhs->db << DESC_B_SHIFT) | - (rhs->s * DESC_S_MASK) | - (rhs->l << DESC_L_SHIFT) | - (rhs->g * DESC_G_MASK) | - (rhs->avl * DESC_AVL_MASK); + if (rhs->unusable) { + lhs->flags = 0; + } else { + lhs->flags = (rhs->type << DESC_TYPE_SHIFT) | + (rhs->present * DESC_P_MASK) | + (rhs->dpl << DESC_DPL_SHIFT) | + (rhs->db << DESC_B_SHIFT) | + (rhs->s * DESC_S_MASK) | + (rhs->l << DESC_L_SHIFT) | + (rhs->g * DESC_G_MASK) | + (rhs->avl * DESC_AVL_MASK); + } } static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set) @@ -1115,8 +1283,8 @@ static int kvm_put_fpu(X86CPU *cpu) } memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs); for (i = 0; i < CPU_NB_REGS; i++) { - stq_p(&fpu.xmm[i][0], env->xmm_regs[i].XMM_Q(0)); - stq_p(&fpu.xmm[i][8], env->xmm_regs[i].XMM_Q(1)); + stq_p(&fpu.xmm[i][0], env->xmm_regs[i].ZMM_Q(0)); + stq_p(&fpu.xmm[i][8], env->xmm_regs[i].ZMM_Q(1)); } fpu.mxcsr = env->mxcsr; @@ -1137,6 +1305,7 @@ static int kvm_put_fpu(X86CPU *cpu) #define XSAVE_OPMASK 272 #define XSAVE_ZMM_Hi256 288 #define XSAVE_Hi16_ZMM 416 +#define XSAVE_PKRU 672 static int kvm_put_xsave(X86CPU *cpu) { @@ -1146,7 +1315,7 @@ static int kvm_put_xsave(X86CPU *cpu) uint8_t *xmm, *ymmh, *zmmh; int i, r; - if (!kvm_has_xsave()) { + if (!has_xsave) { return kvm_put_fpu(cpu); } @@ -1177,19 +1346,20 @@ static int kvm_put_xsave(X86CPU *cpu) ymmh = (uint8_t *)&xsave->region[XSAVE_YMMH_SPACE]; zmmh = (uint8_t *)&xsave->region[XSAVE_ZMM_Hi256]; for (i = 0; i < CPU_NB_REGS; i++, xmm += 16, ymmh += 16, zmmh += 32) { - stq_p(xmm, env->xmm_regs[i].XMM_Q(0)); - stq_p(xmm+8, env->xmm_regs[i].XMM_Q(1)); - stq_p(ymmh, env->xmm_regs[i].XMM_Q(2)); - stq_p(ymmh+8, env->xmm_regs[i].XMM_Q(3)); - stq_p(zmmh, env->xmm_regs[i].XMM_Q(4)); - stq_p(zmmh+8, env->xmm_regs[i].XMM_Q(5)); - stq_p(zmmh+16, env->xmm_regs[i].XMM_Q(6)); - stq_p(zmmh+24, env->xmm_regs[i].XMM_Q(7)); + stq_p(xmm, env->xmm_regs[i].ZMM_Q(0)); + stq_p(xmm+8, env->xmm_regs[i].ZMM_Q(1)); + stq_p(ymmh, env->xmm_regs[i].ZMM_Q(2)); + stq_p(ymmh+8, env->xmm_regs[i].ZMM_Q(3)); + stq_p(zmmh, env->xmm_regs[i].ZMM_Q(4)); + stq_p(zmmh+8, env->xmm_regs[i].ZMM_Q(5)); + stq_p(zmmh+16, env->xmm_regs[i].ZMM_Q(6)); + stq_p(zmmh+24, env->xmm_regs[i].ZMM_Q(7)); } #ifdef TARGET_X86_64 memcpy(&xsave->region[XSAVE_Hi16_ZMM], &env->xmm_regs[16], 16 * sizeof env->xmm_regs[16]); + memcpy(&xsave->region[XSAVE_PKRU], &env->pkru, sizeof env->pkru); #endif r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave); return r; @@ -1200,7 +1370,7 @@ static int kvm_put_xcrs(X86CPU *cpu) CPUX86State *env = &cpu->env; struct kvm_xcrs xcrs = {}; - if (!kvm_has_xcrs()) { + if (!has_xcrs) { return 0; } @@ -1277,6 +1447,7 @@ static int kvm_put_tscdeadline_msr(X86CPU *cpu) struct kvm_msr_entry entries[1]; } msr_data; struct kvm_msr_entry *msrs = msr_data.entries; + int ret; if (!has_msr_tsc_deadline) { return 0; @@ -1288,7 +1459,13 @@ static int kvm_put_tscdeadline_msr(X86CPU *cpu) .nmsrs = 1, }; - return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data); + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data); + if (ret < 0) { + return ret; + } + + assert(ret == 1); + return 0; } /* @@ -1303,6 +1480,11 @@ static int kvm_put_msr_feature_control(X86CPU *cpu) struct kvm_msrs info; struct kvm_msr_entry entry; } msr_data; + int ret; + + if (!has_msr_feature_control) { + return 0; + } kvm_msr_entry_set(&msr_data.entry, MSR_IA32_FEATURE_CONTROL, cpu->env.msr_ia32_feature_control); @@ -1311,7 +1493,13 @@ static int kvm_put_msr_feature_control(X86CPU *cpu) .nmsrs = 1, }; - return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data); + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data); + if (ret < 0) { + return ret; + } + + assert(ret == 1); + return 0; } static int kvm_put_msrs(X86CPU *cpu, int level) @@ -1323,6 +1511,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level) } msr_data; struct kvm_msr_entry *msrs = msr_data.entries; int n = 0, i; + int ret; kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs); kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp); @@ -1334,6 +1523,9 @@ static int kvm_put_msrs(X86CPU *cpu, int level) if (has_msr_hsave_pa) { kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave); } + if (has_msr_tsc_aux) { + kvm_msr_entry_set(&msrs[n++], MSR_TSC_AUX, env->tsc_aux); + } if (has_msr_tsc_adjust) { kvm_msr_entry_set(&msrs[n++], MSR_TSC_ADJUST, env->tsc_adjust); } @@ -1420,6 +1612,50 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_REFERENCE_TSC, env->msr_hv_tsc); } + if (has_msr_hv_crash) { + int j; + + for (j = 0; j < HV_X64_MSR_CRASH_PARAMS; j++) + kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_CRASH_P0 + j, + env->msr_hv_crash_params[j]); + + kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_CRASH_CTL, + HV_X64_MSR_CRASH_CTL_NOTIFY); + } + if (has_msr_hv_runtime) { + kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_VP_RUNTIME, + env->msr_hv_runtime); + } + if (cpu->hyperv_synic) { + int j; + + kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_SCONTROL, + env->msr_hv_synic_control); + kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_SVERSION, + env->msr_hv_synic_version); + kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_SIEFP, + env->msr_hv_synic_evt_page); + kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_SIMP, + env->msr_hv_synic_msg_page); + + for (j = 0; j < ARRAY_SIZE(env->msr_hv_synic_sint); j++) { + kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_SINT0 + j, + env->msr_hv_synic_sint[j]); + } + } + if (has_msr_hv_stimer) { + int j; + + for (j = 0; j < ARRAY_SIZE(env->msr_hv_stimer_config); j++) { + kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_STIMER0_CONFIG + j*2, + env->msr_hv_stimer_config[j]); + } + + for (j = 0; j < ARRAY_SIZE(env->msr_hv_stimer_count); j++) { + kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_STIMER0_COUNT + j*2, + env->msr_hv_stimer_count[j]); + } + } if (has_msr_mtrr) { kvm_msr_entry_set(&msrs[n++], MSR_MTRRdefType, env->mtrr_deftype); kvm_msr_entry_set(&msrs[n++], @@ -1469,8 +1705,13 @@ static int kvm_put_msrs(X86CPU *cpu, int level) .nmsrs = n, }; - return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data); + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data); + if (ret < 0) { + return ret; + } + assert(ret == n); + return 0; } @@ -1496,8 +1737,8 @@ static int kvm_get_fpu(X86CPU *cpu) } memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs); for (i = 0; i < CPU_NB_REGS; i++) { - env->xmm_regs[i].XMM_Q(0) = ldq_p(&fpu.xmm[i][0]); - env->xmm_regs[i].XMM_Q(1) = ldq_p(&fpu.xmm[i][8]); + env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.xmm[i][0]); + env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.xmm[i][8]); } env->mxcsr = fpu.mxcsr; @@ -1512,7 +1753,7 @@ static int kvm_get_xsave(X86CPU *cpu) const uint8_t *xmm, *ymmh, *zmmh; uint16_t cwd, swd, twd; - if (!kvm_has_xsave()) { + if (!has_xsave) { return kvm_get_fpu(cpu); } @@ -1548,19 +1789,20 @@ static int kvm_get_xsave(X86CPU *cpu) ymmh = (const uint8_t *)&xsave->region[XSAVE_YMMH_SPACE]; zmmh = (const uint8_t *)&xsave->region[XSAVE_ZMM_Hi256]; for (i = 0; i < CPU_NB_REGS; i++, xmm += 16, ymmh += 16, zmmh += 32) { - env->xmm_regs[i].XMM_Q(0) = ldq_p(xmm); - env->xmm_regs[i].XMM_Q(1) = ldq_p(xmm+8); - env->xmm_regs[i].XMM_Q(2) = ldq_p(ymmh); - env->xmm_regs[i].XMM_Q(3) = ldq_p(ymmh+8); - env->xmm_regs[i].XMM_Q(4) = ldq_p(zmmh); - env->xmm_regs[i].XMM_Q(5) = ldq_p(zmmh+8); - env->xmm_regs[i].XMM_Q(6) = ldq_p(zmmh+16); - env->xmm_regs[i].XMM_Q(7) = ldq_p(zmmh+24); + env->xmm_regs[i].ZMM_Q(0) = ldq_p(xmm); + env->xmm_regs[i].ZMM_Q(1) = ldq_p(xmm+8); + env->xmm_regs[i].ZMM_Q(2) = ldq_p(ymmh); + env->xmm_regs[i].ZMM_Q(3) = ldq_p(ymmh+8); + env->xmm_regs[i].ZMM_Q(4) = ldq_p(zmmh); + env->xmm_regs[i].ZMM_Q(5) = ldq_p(zmmh+8); + env->xmm_regs[i].ZMM_Q(6) = ldq_p(zmmh+16); + env->xmm_regs[i].ZMM_Q(7) = ldq_p(zmmh+24); } #ifdef TARGET_X86_64 memcpy(&env->xmm_regs[16], &xsave->region[XSAVE_Hi16_ZMM], 16 * sizeof env->xmm_regs[16]); + memcpy(&env->pkru, &xsave->region[XSAVE_PKRU], sizeof env->pkru); #endif return 0; } @@ -1571,7 +1813,7 @@ static int kvm_get_xcrs(X86CPU *cpu) int i, ret; struct kvm_xcrs xcrs; - if (!kvm_has_xcrs()) { + if (!has_xcrs) { return 0; } @@ -1643,13 +1885,16 @@ static int kvm_get_sregs(X86CPU *cpu) HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \ HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK) - hflags = (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK; + hflags = env->hflags & HFLAG_COPY_MASK; + hflags |= (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK; hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT); hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK); hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK)); - hflags |= (env->cr[4] & CR4_OSFXSR_MASK) << - (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT); + + if (env->cr[4] & CR4_OSFXSR_MASK) { + hflags |= HF_OSFXSR_MASK; + } if (env->efer & MSR_EFER_LMA) { hflags |= HF_LMA_MASK; @@ -1670,7 +1915,7 @@ static int kvm_get_sregs(X86CPU *cpu) env->segs[R_SS].base) != 0) << HF_ADDSEG_SHIFT; } } - env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags; + env->hflags = hflags; return 0; } @@ -1696,6 +1941,9 @@ static int kvm_get_msrs(X86CPU *cpu) if (has_msr_hsave_pa) { msrs[n++].index = MSR_VM_HSAVE_PA; } + if (has_msr_tsc_aux) { + msrs[n++].index = MSR_TSC_AUX; + } if (has_msr_tsc_adjust) { msrs[n++].index = MSR_TSC_ADJUST; } @@ -1775,6 +2023,35 @@ static int kvm_get_msrs(X86CPU *cpu) if (has_msr_hv_tsc) { msrs[n++].index = HV_X64_MSR_REFERENCE_TSC; } + if (has_msr_hv_crash) { + int j; + + for (j = 0; j < HV_X64_MSR_CRASH_PARAMS; j++) { + msrs[n++].index = HV_X64_MSR_CRASH_P0 + j; + } + } + if (has_msr_hv_runtime) { + msrs[n++].index = HV_X64_MSR_VP_RUNTIME; + } + if (cpu->hyperv_synic) { + uint32_t msr; + + msrs[n++].index = HV_X64_MSR_SCONTROL; + msrs[n++].index = HV_X64_MSR_SVERSION; + msrs[n++].index = HV_X64_MSR_SIEFP; + msrs[n++].index = HV_X64_MSR_SIMP; + for (msr = HV_X64_MSR_SINT0; msr <= HV_X64_MSR_SINT15; msr++) { + msrs[n++].index = msr; + } + } + if (has_msr_hv_stimer) { + uint32_t msr; + + for (msr = HV_X64_MSR_STIMER0_CONFIG; msr <= HV_X64_MSR_STIMER3_COUNT; + msr++) { + msrs[n++].index = msr; + } + } if (has_msr_mtrr) { msrs[n++].index = MSR_MTRRdefType; msrs[n++].index = MSR_MTRRfix64K_00000; @@ -1803,6 +2080,7 @@ static int kvm_get_msrs(X86CPU *cpu) return ret; } + assert(ret == n); for (i = 0; i < ret; i++) { uint32_t index = msrs[i].index; switch (index) { @@ -1838,6 +2116,9 @@ static int kvm_get_msrs(X86CPU *cpu) case MSR_IA32_TSC: env->tsc = msrs[i].data; break; + case MSR_TSC_AUX: + env->tsc_aux = msrs[i].data; + break; case MSR_TSC_ADJUST: env->tsc_adjust = msrs[i].data; break; @@ -1922,6 +2203,41 @@ static int kvm_get_msrs(X86CPU *cpu) case HV_X64_MSR_REFERENCE_TSC: env->msr_hv_tsc = msrs[i].data; break; + case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: + env->msr_hv_crash_params[index - HV_X64_MSR_CRASH_P0] = msrs[i].data; + break; + case HV_X64_MSR_VP_RUNTIME: + env->msr_hv_runtime = msrs[i].data; + break; + case HV_X64_MSR_SCONTROL: + env->msr_hv_synic_control = msrs[i].data; + break; + case HV_X64_MSR_SVERSION: + env->msr_hv_synic_version = msrs[i].data; + break; + case HV_X64_MSR_SIEFP: + env->msr_hv_synic_evt_page = msrs[i].data; + break; + case HV_X64_MSR_SIMP: + env->msr_hv_synic_msg_page = msrs[i].data; + break; + case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: + env->msr_hv_synic_sint[index - HV_X64_MSR_SINT0] = msrs[i].data; + break; + case HV_X64_MSR_STIMER0_CONFIG: + case HV_X64_MSR_STIMER1_CONFIG: + case HV_X64_MSR_STIMER2_CONFIG: + case HV_X64_MSR_STIMER3_CONFIG: + env->msr_hv_stimer_config[(index - HV_X64_MSR_STIMER0_CONFIG)/2] = + msrs[i].data; + break; + case HV_X64_MSR_STIMER0_COUNT: + case HV_X64_MSR_STIMER1_COUNT: + case HV_X64_MSR_STIMER2_COUNT: + case HV_X64_MSR_STIMER3_COUNT: + env->msr_hv_stimer_count[(index - HV_X64_MSR_STIMER0_COUNT)/2] = + msrs[i].data; + break; case MSR_MTRRdefType: env->mtrr_deftype = msrs[i].data; break; @@ -2221,13 +2537,22 @@ int kvm_arch_put_registers(CPUState *cpu, int level) assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); - if (level >= KVM_PUT_RESET_STATE && has_msr_feature_control) { + if (level >= KVM_PUT_RESET_STATE) { ret = kvm_put_msr_feature_control(x86_cpu); if (ret < 0) { return ret; } } + if (level == KVM_PUT_FULL_STATE) { + /* We don't check for kvm_arch_set_tsc_khz() errors here, + * because TSC frequency mismatch shouldn't abort migration, + * unless the user explicitly asked for a more strict TSC + * setting (e.g. using an explicit "tsc-freq" option). + */ + kvm_arch_set_tsc_khz(cpu); + } + ret = kvm_getput_regs(x86_cpu, 1); if (ret < 0) { return ret; @@ -2294,41 +2619,44 @@ int kvm_arch_get_registers(CPUState *cs) ret = kvm_getput_regs(cpu, 0); if (ret < 0) { - return ret; + goto out; } ret = kvm_get_xsave(cpu); if (ret < 0) { - return ret; + goto out; } ret = kvm_get_xcrs(cpu); if (ret < 0) { - return ret; + goto out; } ret = kvm_get_sregs(cpu); if (ret < 0) { - return ret; + goto out; } ret = kvm_get_msrs(cpu); if (ret < 0) { - return ret; + goto out; } ret = kvm_get_mp_state(cpu); if (ret < 0) { - return ret; + goto out; } ret = kvm_get_apic(cpu); if (ret < 0) { - return ret; + goto out; } ret = kvm_get_vcpu_events(cpu); if (ret < 0) { - return ret; + goto out; } ret = kvm_get_debugregs(cpu); if (ret < 0) { - return ret; + goto out; } - return 0; + ret = 0; + out: + cpu_sync_bndcs_hflags(&cpu->env); + return ret; } void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) @@ -2363,7 +2691,7 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) } } - if (!kvm_irqchip_in_kernel()) { + if (!kvm_pic_in_kernel()) { qemu_mutex_lock_iothread(); } @@ -2381,7 +2709,7 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) } } - if (!kvm_irqchip_in_kernel()) { + if (!kvm_pic_in_kernel()) { /* Try to inject an interrupt if the guest can accept it */ if (run->ready_for_interrupt_injection && (cpu->interrupt_request & CPU_INTERRUPT_HARD) && @@ -2780,6 +3108,13 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) ret = kvm_handle_debug(cpu, &run->debug.arch); qemu_mutex_unlock_iothread(); break; + case KVM_EXIT_HYPERV: + ret = kvm_hv_handle_exit(cpu, &run->hyperv); + break; + case KVM_EXIT_IOAPIC_EOI: + ioapic_eoi_broadcast(run->eoi.vector); + ret = 0; + break; default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); ret = -1; @@ -2814,6 +3149,39 @@ void kvm_arch_init_irq_routing(KVMState *s) */ kvm_msi_via_irqfd_allowed = true; kvm_gsi_routing_allowed = true; + + if (kvm_irqchip_is_split()) { + int i; + + /* If the ioapic is in QEMU and the lapics are in KVM, reserve + MSI routes for signaling interrupts to the local apics. */ + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + struct MSIMessage msg = { 0x0, 0x0 }; + if (kvm_irqchip_add_msi_route(s, msg, NULL) < 0) { + error_report("Could not enable split IRQ mode."); + exit(1); + } + } + } +} + +int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) +{ + int ret; + if (machine_kernel_irqchip_split(ms)) { + ret = kvm_vm_enable_cap(s, KVM_CAP_SPLIT_IRQCHIP, 0, 24); + if (ret) { + error_report("Could not enable split irqchip mode: %s\n", + strerror(-ret)); + exit(1); + } else { + DPRINTF("Enabled KVM_CAP_SPLIT_IRQCHIP\n"); + kvm_split_irqchip = true; + return 1; + } + } else { + return 0; + } } /* Classic KVM device assignment interface. Will remain x86 only. */ @@ -2957,7 +3325,7 @@ int kvm_device_msix_deassign(KVMState *s, uint32_t dev_id) } int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, - uint64_t address, uint32_t data) + uint64_t address, uint32_t data, PCIDevice *dev) { return 0; }