These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / arch / x86 / kvm / mmu.c
index 554e877..8eb8a93 100644 (file)
@@ -223,15 +223,15 @@ static unsigned int get_mmio_spte_generation(u64 spte)
        return gen;
 }
 
-static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
+static unsigned int kvm_current_mmio_generation(struct kvm_vcpu *vcpu)
 {
-       return kvm_memslots(kvm)->generation & MMIO_GEN_MASK;
+       return kvm_vcpu_memslots(vcpu)->generation & MMIO_GEN_MASK;
 }
 
-static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
+static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
                           unsigned access)
 {
-       unsigned int gen = kvm_current_mmio_generation(kvm);
+       unsigned int gen = kvm_current_mmio_generation(vcpu);
        u64 mask = generation_mmio_spte_mask(gen);
 
        access &= ACC_WRITE_MASK | ACC_USER_MASK;
@@ -258,22 +258,22 @@ static unsigned get_mmio_spte_access(u64 spte)
        return (spte & ~mask) & ~PAGE_MASK;
 }
 
-static bool set_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
+static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
                          pfn_t pfn, unsigned access)
 {
        if (unlikely(is_noslot_pfn(pfn))) {
-               mark_mmio_spte(kvm, sptep, gfn, access);
+               mark_mmio_spte(vcpu, sptep, gfn, access);
                return true;
        }
 
        return false;
 }
 
-static bool check_mmio_spte(struct kvm *kvm, u64 spte)
+static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
 {
        unsigned int kvm_gen, spte_gen;
 
-       kvm_gen = kvm_current_mmio_generation(kvm);
+       kvm_gen = kvm_current_mmio_generation(vcpu);
        spte_gen = get_mmio_spte_generation(spte);
 
        trace_check_mmio_spte(spte, kvm_gen, spte_gen);
@@ -781,30 +781,36 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn,
        return &slot->arch.lpage_info[level - 2][idx];
 }
 
-static void account_shadowed(struct kvm *kvm, gfn_t gfn)
+static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
+       struct kvm_memslots *slots;
        struct kvm_memory_slot *slot;
        struct kvm_lpage_info *linfo;
+       gfn_t gfn;
        int i;
 
-       slot = gfn_to_memslot(kvm, gfn);
-       for (i = PT_DIRECTORY_LEVEL;
-            i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
+       gfn = sp->gfn;
+       slots = kvm_memslots_for_spte_role(kvm, sp->role);
+       slot = __gfn_to_memslot(slots, gfn);
+       for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
                linfo = lpage_info_slot(gfn, slot, i);
                linfo->write_count += 1;
        }
        kvm->arch.indirect_shadow_pages++;
 }
 
-static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
+static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
+       struct kvm_memslots *slots;
        struct kvm_memory_slot *slot;
        struct kvm_lpage_info *linfo;
+       gfn_t gfn;
        int i;
 
-       slot = gfn_to_memslot(kvm, gfn);
-       for (i = PT_DIRECTORY_LEVEL;
-            i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
+       gfn = sp->gfn;
+       slots = kvm_memslots_for_spte_role(kvm, sp->role);
+       slot = __gfn_to_memslot(slots, gfn);
+       for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
                linfo = lpage_info_slot(gfn, slot, i);
                linfo->write_count -= 1;
                WARN_ON(linfo->write_count < 0);
@@ -812,14 +818,11 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
        kvm->arch.indirect_shadow_pages--;
 }
 
-static int has_wrprotected_page(struct kvm *kvm,
-                               gfn_t gfn,
-                               int level)
+static int __has_wrprotected_page(gfn_t gfn, int level,
+                                 struct kvm_memory_slot *slot)
 {
-       struct kvm_memory_slot *slot;
        struct kvm_lpage_info *linfo;
 
-       slot = gfn_to_memslot(kvm, gfn);
        if (slot) {
                linfo = lpage_info_slot(gfn, slot, level);
                return linfo->write_count;
@@ -828,6 +831,14 @@ static int has_wrprotected_page(struct kvm *kvm,
        return 1;
 }
 
+static int has_wrprotected_page(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
+{
+       struct kvm_memory_slot *slot;
+
+       slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+       return __has_wrprotected_page(gfn, level, slot);
+}
+
 static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
 {
        unsigned long page_size;
@@ -835,8 +846,7 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
 
        page_size = kvm_host_page_size(kvm, gfn);
 
-       for (i = PT_PAGE_TABLE_LEVEL;
-            i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) {
+       for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
                if (page_size >= KVM_HPAGE_SIZE(i))
                        ret = i;
                else
@@ -846,28 +856,43 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
        return ret;
 }
 
+static inline bool memslot_valid_for_gpte(struct kvm_memory_slot *slot,
+                                         bool no_dirty_log)
+{
+       if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
+               return false;
+       if (no_dirty_log && slot->dirty_bitmap)
+               return false;
+
+       return true;
+}
+
 static struct kvm_memory_slot *
 gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
                            bool no_dirty_log)
 {
        struct kvm_memory_slot *slot;
 
-       slot = gfn_to_memslot(vcpu->kvm, gfn);
-       if (!slot || slot->flags & KVM_MEMSLOT_INVALID ||
-             (no_dirty_log && slot->dirty_bitmap))
+       slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+       if (!memslot_valid_for_gpte(slot, no_dirty_log))
                slot = NULL;
 
        return slot;
 }
 
-static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn)
-{
-       return !gfn_to_memslot_dirty_bitmap(vcpu, large_gfn, true);
-}
-
-static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
+static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn,
+                        bool *force_pt_level)
 {
        int host_level, level, max_level;
+       struct kvm_memory_slot *slot;
+
+       if (unlikely(*force_pt_level))
+               return PT_PAGE_TABLE_LEVEL;
+
+       slot = kvm_vcpu_gfn_to_memslot(vcpu, large_gfn);
+       *force_pt_level = !memslot_valid_for_gpte(slot, true);
+       if (unlikely(*force_pt_level))
+               return PT_PAGE_TABLE_LEVEL;
 
        host_level = host_mapping_level(vcpu->kvm, large_gfn);
 
@@ -877,7 +902,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
        max_level = min(kvm_x86_ops->get_lpage_level(), host_level);
 
        for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
-               if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
+               if (__has_wrprotected_page(large_gfn, level, slot))
                        break;
 
        return level - 1;
@@ -1019,12 +1044,14 @@ static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
 /*
  * Take gfn and return the reverse mapping to it.
  */
-static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)
+static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, struct kvm_mmu_page *sp)
 {
+       struct kvm_memslots *slots;
        struct kvm_memory_slot *slot;
 
-       slot = gfn_to_memslot(kvm, gfn);
-       return __gfn_to_rmap(gfn, level, slot);
+       slots = kvm_memslots_for_spte_role(kvm, sp->role);
+       slot = __gfn_to_memslot(slots, gfn);
+       return __gfn_to_rmap(gfn, sp->role.level, slot);
 }
 
 static bool rmap_can_add(struct kvm_vcpu *vcpu)
@@ -1042,7 +1069,7 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 
        sp = page_header(__pa(spte));
        kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
-       rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
+       rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp);
        return pte_list_add(vcpu, spte, rmapp);
 }
 
@@ -1054,7 +1081,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
 
        sp = page_header(__pa(spte));
        gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
-       rmapp = gfn_to_rmap(kvm, gfn, sp->role.level);
+       rmapp = gfn_to_rmap(kvm, gfn, sp);
        pte_list_remove(spte, rmapp);
 }
 
@@ -1119,6 +1146,11 @@ static u64 *rmap_get_next(struct rmap_iterator *iter)
        return NULL;
 }
 
+#define for_each_rmap_spte(_rmap_, _iter_, _spte_)                         \
+          for (_spte_ = rmap_get_first(*_rmap_, _iter_);                   \
+               _spte_ && ({BUG_ON(!is_shadow_present_pte(*_spte_)); 1;});  \
+                       _spte_ = rmap_get_next(_iter_))
+
 static void drop_spte(struct kvm *kvm, u64 *sptep)
 {
        if (mmu_spte_clear_track_bits(sptep))
@@ -1182,12 +1214,8 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
        struct rmap_iterator iter;
        bool flush = false;
 
-       for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
-               BUG_ON(!(*sptep & PT_PRESENT_MASK));
-
+       for_each_rmap_spte(rmapp, &iter, sptep)
                flush |= spte_write_protect(kvm, sptep, pt_protect);
-               sptep = rmap_get_next(&iter);
-       }
 
        return flush;
 }
@@ -1209,12 +1237,8 @@ static bool __rmap_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
        struct rmap_iterator iter;
        bool flush = false;
 
-       for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
-               BUG_ON(!(*sptep & PT_PRESENT_MASK));
-
+       for_each_rmap_spte(rmapp, &iter, sptep)
                flush |= spte_clear_dirty(kvm, sptep);
-               sptep = rmap_get_next(&iter);
-       }
 
        return flush;
 }
@@ -1236,12 +1260,8 @@ static bool __rmap_set_dirty(struct kvm *kvm, unsigned long *rmapp)
        struct rmap_iterator iter;
        bool flush = false;
 
-       for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
-               BUG_ON(!(*sptep & PT_PRESENT_MASK));
-
+       for_each_rmap_spte(rmapp, &iter, sptep)
                flush |= spte_set_dirty(kvm, sptep);
-               sptep = rmap_get_next(&iter);
-       }
 
        return flush;
 }
@@ -1319,42 +1339,45 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
                kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
 }
 
-static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
+static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
 {
        struct kvm_memory_slot *slot;
        unsigned long *rmapp;
        int i;
        bool write_protected = false;
 
-       slot = gfn_to_memslot(kvm, gfn);
+       slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
 
-       for (i = PT_PAGE_TABLE_LEVEL;
-            i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
+       for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
                rmapp = __gfn_to_rmap(gfn, i, slot);
-               write_protected |= __rmap_write_protect(kvm, rmapp, true);
+               write_protected |= __rmap_write_protect(vcpu->kvm, rmapp, true);
        }
 
        return write_protected;
 }
 
-static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
-                          struct kvm_memory_slot *slot, gfn_t gfn, int level,
-                          unsigned long data)
+static bool kvm_zap_rmapp(struct kvm *kvm, unsigned long *rmapp)
 {
        u64 *sptep;
        struct rmap_iterator iter;
-       int need_tlb_flush = 0;
+       bool flush = false;
 
        while ((sptep = rmap_get_first(*rmapp, &iter))) {
                BUG_ON(!(*sptep & PT_PRESENT_MASK));
-               rmap_printk("kvm_rmap_unmap_hva: spte %p %llx gfn %llx (%d)\n",
-                            sptep, *sptep, gfn, level);
+               rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep);
 
                drop_spte(kvm, sptep);
-               need_tlb_flush = 1;
+               flush = true;
        }
 
-       return need_tlb_flush;
+       return flush;
+}
+
+static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
+                          struct kvm_memory_slot *slot, gfn_t gfn, int level,
+                          unsigned long data)
+{
+       return kvm_zap_rmapp(kvm, rmapp);
 }
 
 static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
@@ -1371,8 +1394,8 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
        WARN_ON(pte_huge(*ptep));
        new_pfn = pte_pfn(*ptep);
 
-       for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
-               BUG_ON(!is_shadow_present_pte(*sptep));
+restart:
+       for_each_rmap_spte(rmapp, &iter, sptep) {
                rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n",
                             sptep, *sptep, gfn, level);
 
@@ -1380,7 +1403,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
                if (pte_write(*ptep)) {
                        drop_spte(kvm, sptep);
-                       sptep = rmap_get_first(*rmapp, &iter);
+                       goto restart;
                } else {
                        new_spte = *sptep & ~PT64_BASE_ADDR_MASK;
                        new_spte |= (u64)new_pfn << PAGE_SHIFT;
@@ -1391,7 +1414,6 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
                        mmu_spte_clear_track_bits(sptep);
                        mmu_spte_set(sptep, new_spte);
-                       sptep = rmap_get_next(&iter);
                }
        }
 
@@ -1401,6 +1423,74 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
        return 0;
 }
 
+struct slot_rmap_walk_iterator {
+       /* input fields. */
+       struct kvm_memory_slot *slot;
+       gfn_t start_gfn;
+       gfn_t end_gfn;
+       int start_level;
+       int end_level;
+
+       /* output fields. */
+       gfn_t gfn;
+       unsigned long *rmap;
+       int level;
+
+       /* private field. */
+       unsigned long *end_rmap;
+};
+
+static void
+rmap_walk_init_level(struct slot_rmap_walk_iterator *iterator, int level)
+{
+       iterator->level = level;
+       iterator->gfn = iterator->start_gfn;
+       iterator->rmap = __gfn_to_rmap(iterator->gfn, level, iterator->slot);
+       iterator->end_rmap = __gfn_to_rmap(iterator->end_gfn, level,
+                                          iterator->slot);
+}
+
+static void
+slot_rmap_walk_init(struct slot_rmap_walk_iterator *iterator,
+                   struct kvm_memory_slot *slot, int start_level,
+                   int end_level, gfn_t start_gfn, gfn_t end_gfn)
+{
+       iterator->slot = slot;
+       iterator->start_level = start_level;
+       iterator->end_level = end_level;
+       iterator->start_gfn = start_gfn;
+       iterator->end_gfn = end_gfn;
+
+       rmap_walk_init_level(iterator, iterator->start_level);
+}
+
+static bool slot_rmap_walk_okay(struct slot_rmap_walk_iterator *iterator)
+{
+       return !!iterator->rmap;
+}
+
+static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator)
+{
+       if (++iterator->rmap <= iterator->end_rmap) {
+               iterator->gfn += (1UL << KVM_HPAGE_GFN_SHIFT(iterator->level));
+               return;
+       }
+
+       if (++iterator->level > iterator->end_level) {
+               iterator->rmap = NULL;
+               return;
+       }
+
+       rmap_walk_init_level(iterator, iterator->level);
+}
+
+#define for_each_slot_rmap_range(_slot_, _start_level_, _end_level_,   \
+          _start_gfn, _end_gfn, _iter_)                                \
+       for (slot_rmap_walk_init(_iter_, _slot_, _start_level_,         \
+                                _end_level_, _start_gfn, _end_gfn);    \
+            slot_rmap_walk_okay(_iter_);                               \
+            slot_rmap_walk_next(_iter_))
+
 static int kvm_handle_hva_range(struct kvm *kvm,
                                unsigned long start,
                                unsigned long end,
@@ -1412,48 +1502,36 @@ static int kvm_handle_hva_range(struct kvm *kvm,
                                               int level,
                                               unsigned long data))
 {
-       int j;
-       int ret = 0;
        struct kvm_memslots *slots;
        struct kvm_memory_slot *memslot;
+       struct slot_rmap_walk_iterator iterator;
+       int ret = 0;
+       int i;
 
-       slots = kvm_memslots(kvm);
-
-       kvm_for_each_memslot(memslot, slots) {
-               unsigned long hva_start, hva_end;
-               gfn_t gfn_start, gfn_end;
-
-               hva_start = max(start, memslot->userspace_addr);
-               hva_end = min(end, memslot->userspace_addr +
-                                       (memslot->npages << PAGE_SHIFT));
-               if (hva_start >= hva_end)
-                       continue;
-               /*
-                * {gfn(page) | page intersects with [hva_start, hva_end)} =
-                * {gfn_start, gfn_start+1, ..., gfn_end-1}.
-                */
-               gfn_start = hva_to_gfn_memslot(hva_start, memslot);
-               gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
-
-               for (j = PT_PAGE_TABLE_LEVEL;
-                    j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
-                       unsigned long idx, idx_end;
-                       unsigned long *rmapp;
-                       gfn_t gfn = gfn_start;
+       for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+               slots = __kvm_memslots(kvm, i);
+               kvm_for_each_memslot(memslot, slots) {
+                       unsigned long hva_start, hva_end;
+                       gfn_t gfn_start, gfn_end;
 
+                       hva_start = max(start, memslot->userspace_addr);
+                       hva_end = min(end, memslot->userspace_addr +
+                                     (memslot->npages << PAGE_SHIFT));
+                       if (hva_start >= hva_end)
+                               continue;
                        /*
-                        * {idx(page_j) | page_j intersects with
-                        *  [hva_start, hva_end)} = {idx, idx+1, ..., idx_end}.
+                        * {gfn(page) | page intersects with [hva_start, hva_end)} =
+                        * {gfn_start, gfn_start+1, ..., gfn_end-1}.
                         */
-                       idx = gfn_to_index(gfn_start, memslot->base_gfn, j);
-                       idx_end = gfn_to_index(gfn_end - 1, memslot->base_gfn, j);
-
-                       rmapp = __gfn_to_rmap(gfn_start, j, memslot);
-
-                       for (; idx <= idx_end;
-                              ++idx, gfn += (1UL << KVM_HPAGE_GFN_SHIFT(j)))
-                               ret |= handler(kvm, rmapp++, memslot,
-                                              gfn, j, data);
+                       gfn_start = hva_to_gfn_memslot(hva_start, memslot);
+                       gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
+
+                       for_each_slot_rmap_range(memslot, PT_PAGE_TABLE_LEVEL,
+                                                PT_MAX_HUGEPAGE_LEVEL,
+                                                gfn_start, gfn_end - 1,
+                                                &iterator)
+                               ret |= handler(kvm, iterator.rmap, memslot,
+                                              iterator.gfn, iterator.level, data);
                }
        }
 
@@ -1495,16 +1573,13 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
        BUG_ON(!shadow_accessed_mask);
 
-       for (sptep = rmap_get_first(*rmapp, &iter); sptep;
-            sptep = rmap_get_next(&iter)) {
-               BUG_ON(!is_shadow_present_pte(*sptep));
-
+       for_each_rmap_spte(rmapp, &iter, sptep)
                if (*sptep & shadow_accessed_mask) {
                        young = 1;
                        clear_bit((ffs(shadow_accessed_mask) - 1),
                                 (unsigned long *)sptep);
                }
-       }
+
        trace_kvm_age_page(gfn, level, slot, young);
        return young;
 }
@@ -1525,15 +1600,11 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
        if (!shadow_accessed_mask)
                goto out;
 
-       for (sptep = rmap_get_first(*rmapp, &iter); sptep;
-            sptep = rmap_get_next(&iter)) {
-               BUG_ON(!is_shadow_present_pte(*sptep));
-
+       for_each_rmap_spte(rmapp, &iter, sptep)
                if (*sptep & shadow_accessed_mask) {
                        young = 1;
                        break;
                }
-       }
 out:
        return young;
 }
@@ -1547,7 +1618,7 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 
        sp = page_header(__pa(spte));
 
-       rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
+       rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp);
 
        kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0);
        kvm_flush_remote_tlbs(vcpu->kvm);
@@ -1967,7 +2038,7 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
                bool protected = false;
 
                for_each_sp(pages, sp, parents, i)
-                       protected |= rmap_write_protect(vcpu->kvm, sp->gfn);
+                       protected |= rmap_write_protect(vcpu, sp->gfn);
 
                if (protected)
                        kvm_flush_remote_tlbs(vcpu->kvm);
@@ -2065,12 +2136,12 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
        hlist_add_head(&sp->hash_link,
                &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]);
        if (!direct) {
-               if (rmap_write_protect(vcpu->kvm, gfn))
+               if (rmap_write_protect(vcpu, gfn))
                        kvm_flush_remote_tlbs(vcpu->kvm);
                if (level > PT_PAGE_TABLE_LEVEL && need_sync)
                        kvm_sync_pages(vcpu, gfn);
 
-               account_shadowed(vcpu->kvm, gfn);
+               account_shadowed(vcpu->kvm, sp);
        }
        sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
        init_shadow_page_table(sp);
@@ -2251,7 +2322,7 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
        kvm_mmu_unlink_parents(kvm, sp);
 
        if (!sp->role.invalid && !sp->role.direct)
-               unaccount_shadowed(kvm, sp->gfn);
+               unaccount_shadowed(kvm, sp);
 
        if (sp->unsync)
                kvm_unlink_unsync_page(kvm, sp);
@@ -2363,111 +2434,6 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
 
-/*
- * The function is based on mtrr_type_lookup() in
- * arch/x86/kernel/cpu/mtrr/generic.c
- */
-static int get_mtrr_type(struct mtrr_state_type *mtrr_state,
-                        u64 start, u64 end)
-{
-       int i;
-       u64 base, mask;
-       u8 prev_match, curr_match;
-       int num_var_ranges = KVM_NR_VAR_MTRR;
-
-       if (!mtrr_state->enabled)
-               return 0xFF;
-
-       /* Make end inclusive end, instead of exclusive */
-       end--;
-
-       /* Look in fixed ranges. Just return the type as per start */
-       if (mtrr_state->have_fixed && (start < 0x100000)) {
-               int idx;
-
-               if (start < 0x80000) {
-                       idx = 0;
-                       idx += (start >> 16);
-                       return mtrr_state->fixed_ranges[idx];
-               } else if (start < 0xC0000) {
-                       idx = 1 * 8;
-                       idx += ((start - 0x80000) >> 14);
-                       return mtrr_state->fixed_ranges[idx];
-               } else if (start < 0x1000000) {
-                       idx = 3 * 8;
-                       idx += ((start - 0xC0000) >> 12);
-                       return mtrr_state->fixed_ranges[idx];
-               }
-       }
-
-       /*
-        * Look in variable ranges
-        * Look of multiple ranges matching this address and pick type
-        * as per MTRR precedence
-        */
-       if (!(mtrr_state->enabled & 2))
-               return mtrr_state->def_type;
-
-       prev_match = 0xFF;
-       for (i = 0; i < num_var_ranges; ++i) {
-               unsigned short start_state, end_state;
-
-               if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11)))
-                       continue;
-
-               base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) +
-                      (mtrr_state->var_ranges[i].base_lo & PAGE_MASK);
-               mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) +
-                      (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK);
-
-               start_state = ((start & mask) == (base & mask));
-               end_state = ((end & mask) == (base & mask));
-               if (start_state != end_state)
-                       return 0xFE;
-
-               if ((start & mask) != (base & mask))
-                       continue;
-
-               curr_match = mtrr_state->var_ranges[i].base_lo & 0xff;
-               if (prev_match == 0xFF) {
-                       prev_match = curr_match;
-                       continue;
-               }
-
-               if (prev_match == MTRR_TYPE_UNCACHABLE ||
-                   curr_match == MTRR_TYPE_UNCACHABLE)
-                       return MTRR_TYPE_UNCACHABLE;
-
-               if ((prev_match == MTRR_TYPE_WRBACK &&
-                    curr_match == MTRR_TYPE_WRTHROUGH) ||
-                   (prev_match == MTRR_TYPE_WRTHROUGH &&
-                    curr_match == MTRR_TYPE_WRBACK)) {
-                       prev_match = MTRR_TYPE_WRTHROUGH;
-                       curr_match = MTRR_TYPE_WRTHROUGH;
-               }
-
-               if (prev_match != curr_match)
-                       return MTRR_TYPE_UNCACHABLE;
-       }
-
-       if (prev_match != 0xFF)
-               return prev_match;
-
-       return mtrr_state->def_type;
-}
-
-u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
-{
-       u8 mtrr;
-
-       mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT,
-                            (gfn << PAGE_SHIFT) + PAGE_SIZE);
-       if (mtrr == 0xfe || mtrr == 0xff)
-               mtrr = MTRR_TYPE_WRBACK;
-       return mtrr;
-}
-EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type);
-
 static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
        trace_kvm_mmu_unsync_page(sp);
@@ -2510,6 +2476,14 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
        return 0;
 }
 
+static bool kvm_is_mmio_pfn(pfn_t pfn)
+{
+       if (pfn_valid(pfn))
+               return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn));
+
+       return true;
+}
+
 static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                    unsigned pte_access, int level,
                    gfn_t gfn, pfn_t pfn, bool speculative,
@@ -2518,7 +2492,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
        u64 spte;
        int ret = 0;
 
-       if (set_mmio_spte(vcpu->kvm, sptep, gfn, pfn, pte_access))
+       if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
                return 0;
 
        spte = PT_PRESENT_MASK;
@@ -2537,7 +2511,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                spte |= PT_PAGE_SIZE_MASK;
        if (tdp_enabled)
                spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
-                       kvm_is_reserved_pfn(pfn));
+                       kvm_is_mmio_pfn(pfn));
 
        if (host_writable)
                spte |= SPTE_HOST_WRITEABLE;
@@ -2555,7 +2529,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                 * be fixed if guest refault.
                 */
                if (level > PT_PAGE_TABLE_LEVEL &&
-                   has_wrprotected_page(vcpu->kvm, gfn, level))
+                   has_wrprotected_page(vcpu, gfn, level))
                        goto done;
 
                spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
@@ -2579,7 +2553,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
        }
 
        if (pte_access & ACC_WRITE_MASK) {
-               mark_page_dirty(vcpu->kvm, gfn);
+               kvm_vcpu_mark_page_dirty(vcpu, gfn);
                spte |= shadow_dirty_mask;
        }
 
@@ -2669,15 +2643,17 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
                                    u64 *start, u64 *end)
 {
        struct page *pages[PTE_PREFETCH_NUM];
+       struct kvm_memory_slot *slot;
        unsigned access = sp->role.access;
        int i, ret;
        gfn_t gfn;
 
        gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt);
-       if (!gfn_to_memslot_dirty_bitmap(vcpu, gfn, access & ACC_WRITE_MASK))
+       slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, access & ACC_WRITE_MASK);
+       if (!slot)
                return -1;
 
-       ret = gfn_to_page_many_atomic(vcpu->kvm, gfn, pages, end - start);
+       ret = gfn_to_page_many_atomic(slot, gfn, pages, end - start);
        if (ret <= 0)
                return -1;
 
@@ -2795,7 +2771,7 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn)
                return 1;
 
        if (pfn == KVM_PFN_ERR_HWPOISON) {
-               kvm_send_hwpoison_signal(gfn_to_hva(vcpu->kvm, gfn), current);
+               kvm_send_hwpoison_signal(kvm_vcpu_gfn_to_hva(vcpu, gfn), current);
                return 0;
        }
 
@@ -2818,7 +2794,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
        if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
            level == PT_PAGE_TABLE_LEVEL &&
            PageTransCompound(pfn_to_page(pfn)) &&
-           !has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) {
+           !has_wrprotected_page(vcpu, gfn, PT_DIRECTORY_LEVEL)) {
                unsigned long mask;
                /*
                 * mmu_notifier_retry was successful and we hold the
@@ -2910,7 +2886,7 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
         * Compare with set_spte where instead shadow_dirty_mask is set.
         */
        if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) == spte)
-               mark_page_dirty(vcpu->kvm, gfn);
+               kvm_vcpu_mark_page_dirty(vcpu, gfn);
 
        return true;
 }
@@ -3006,14 +2982,13 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
 {
        int r;
        int level;
-       int force_pt_level;
+       bool force_pt_level = false;
        pfn_t pfn;
        unsigned long mmu_seq;
        bool map_writable, write = error_code & PFERR_WRITE_MASK;
 
-       force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn);
+       level = mapping_level(vcpu, gfn, &force_pt_level);
        if (likely(!force_pt_level)) {
-               level = mapping_level(vcpu, gfn);
                /*
                 * This path builds a PAE pagetable - so we can map
                 * 2mb pages at maximum. Therefore check if the level
@@ -3023,8 +2998,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
                        level = PT_DIRECTORY_LEVEL;
 
                gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
-       } else
-               level = PT_PAGE_TABLE_LEVEL;
+       }
 
        if (fast_page_fault(vcpu, v, level, error_code))
                return 0;
@@ -3312,6 +3286,25 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
        return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception);
 }
 
+static bool
+__is_rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, u64 pte, int level)
+{
+       int bit7 = (pte >> 7) & 1, low6 = pte & 0x3f;
+
+       return (pte & rsvd_check->rsvd_bits_mask[bit7][level-1]) |
+               ((rsvd_check->bad_mt_xwr & (1ull << low6)) != 0);
+}
+
+static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
+{
+       return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level);
+}
+
+static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)
+{
+       return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level);
+}
+
 static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 {
        if (direct)
@@ -3320,37 +3313,69 @@ static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
        return vcpu_match_mmio_gva(vcpu, addr);
 }
 
-static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr)
+/* return true if reserved bit is detected on spte. */
+static bool
+walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 {
        struct kvm_shadow_walk_iterator iterator;
-       u64 spte = 0ull;
+       u64 sptes[PT64_ROOT_LEVEL], spte = 0ull;
+       int root, leaf;
+       bool reserved = false;
 
        if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
-               return spte;
+               goto exit;
 
        walk_shadow_page_lockless_begin(vcpu);
-       for_each_shadow_entry_lockless(vcpu, addr, iterator, spte)
+
+       for (shadow_walk_init(&iterator, vcpu, addr),
+                leaf = root = iterator.level;
+            shadow_walk_okay(&iterator);
+            __shadow_walk_next(&iterator, spte)) {
+               spte = mmu_spte_get_lockless(iterator.sptep);
+
+               sptes[leaf - 1] = spte;
+               leaf--;
+
                if (!is_shadow_present_pte(spte))
                        break;
+
+               reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte,
+                                                   iterator.level);
+       }
+
        walk_shadow_page_lockless_end(vcpu);
 
-       return spte;
+       if (reserved) {
+               pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
+                      __func__, addr);
+               while (root > leaf) {
+                       pr_err("------ spte 0x%llx level %d.\n",
+                              sptes[root - 1], root);
+                       root--;
+               }
+       }
+exit:
+       *sptep = spte;
+       return reserved;
 }
 
-int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
+int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 {
        u64 spte;
+       bool reserved;
 
        if (quickly_check_mmio_pf(vcpu, addr, direct))
                return RET_MMIO_PF_EMULATE;
 
-       spte = walk_shadow_page_get_mmio_spte(vcpu, addr);
+       reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte);
+       if (WARN_ON(reserved))
+               return RET_MMIO_PF_BUG;
 
        if (is_mmio_spte(spte)) {
                gfn_t gfn = get_mmio_spte_gfn(spte);
                unsigned access = get_mmio_spte_access(spte);
 
-               if (!check_mmio_spte(vcpu->kvm, spte))
+               if (!check_mmio_spte(vcpu, spte))
                        return RET_MMIO_PF_INVALID;
 
                if (direct)
@@ -3367,17 +3392,7 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
         */
        return RET_MMIO_PF_RETRY;
 }
-EXPORT_SYMBOL_GPL(handle_mmio_page_fault_common);
-
-static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr,
-                                 u32 error_code, bool direct)
-{
-       int ret;
-
-       ret = handle_mmio_page_fault_common(vcpu, addr, direct);
-       WARN_ON(ret == RET_MMIO_PF_BUG);
-       return ret;
-}
+EXPORT_SYMBOL_GPL(handle_mmio_page_fault);
 
 static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
                                u32 error_code, bool prefault)
@@ -3388,7 +3403,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
        pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
 
        if (unlikely(error_code & PFERR_RSVD_MASK)) {
-               r = handle_mmio_page_fault(vcpu, gva, error_code, true);
+               r = handle_mmio_page_fault(vcpu, gva, true);
 
                if (likely(r != RET_MMIO_PF_INVALID))
                        return r;
@@ -3415,12 +3430,12 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
        arch.direct_map = vcpu->arch.mmu.direct_map;
        arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu);
 
-       return kvm_setup_async_pf(vcpu, gva, gfn_to_hva(vcpu->kvm, gfn), &arch);
+       return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
 }
 
 static bool can_do_async_pf(struct kvm_vcpu *vcpu)
 {
-       if (unlikely(!irqchip_in_kernel(vcpu->kvm) ||
+       if (unlikely(!lapic_in_kernel(vcpu) ||
                     kvm_event_needs_reinjection(vcpu)))
                return false;
 
@@ -3430,10 +3445,12 @@ static bool can_do_async_pf(struct kvm_vcpu *vcpu)
 static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
                         gva_t gva, pfn_t *pfn, bool write, bool *writable)
 {
+       struct kvm_memory_slot *slot;
        bool async;
 
-       *pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async, write, writable);
-
+       slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+       async = false;
+       *pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write, writable);
        if (!async)
                return false; /* *pfn has correct page already */
 
@@ -3447,18 +3464,27 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
                        return true;
        }
 
-       *pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write, writable);
-
+       *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write, writable);
        return false;
 }
 
+static bool
+check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
+{
+       int page_num = KVM_PAGES_PER_HPAGE(level);
+
+       gfn &= ~(page_num - 1);
+
+       return kvm_mtrr_check_gfn_range_consistency(vcpu, gfn, page_num);
+}
+
 static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
                          bool prefault)
 {
        pfn_t pfn;
        int r;
        int level;
-       int force_pt_level;
+       bool force_pt_level;
        gfn_t gfn = gpa >> PAGE_SHIFT;
        unsigned long mmu_seq;
        int write = error_code & PFERR_WRITE_MASK;
@@ -3467,7 +3493,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
        MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
 
        if (unlikely(error_code & PFERR_RSVD_MASK)) {
-               r = handle_mmio_page_fault(vcpu, gpa, error_code, true);
+               r = handle_mmio_page_fault(vcpu, gpa, true);
 
                if (likely(r != RET_MMIO_PF_INVALID))
                        return r;
@@ -3477,12 +3503,15 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
        if (r)
                return r;
 
-       force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn);
+       force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn,
+                                                          PT_DIRECTORY_LEVEL);
+       level = mapping_level(vcpu, gfn, &force_pt_level);
        if (likely(!force_pt_level)) {
-               level = mapping_level(vcpu, gfn);
+               if (level > PT_DIRECTORY_LEVEL &&
+                   !check_hugepage_cache_consistency(vcpu, gfn, level))
+                       level = PT_DIRECTORY_LEVEL;
                gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
-       } else
-               level = PT_PAGE_TABLE_LEVEL;
+       }
 
        if (fast_page_fault(vcpu, gpa, level, error_code))
                return 0;
@@ -3545,7 +3574,7 @@ static void inject_page_fault(struct kvm_vcpu *vcpu,
        vcpu->arch.mmu.inject_page_fault(vcpu, fault);
 }
 
-static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
+static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
                           unsigned access, int *nr_present)
 {
        if (unlikely(is_mmio_spte(*sptep))) {
@@ -3555,7 +3584,7 @@ static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
                }
 
                (*nr_present)++;
-               mark_mmio_spte(kvm, sptep, gfn, access);
+               mark_mmio_spte(vcpu, sptep, gfn, access);
                return true;
        }
 
@@ -3584,111 +3613,197 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gp
 #include "paging_tmpl.h"
 #undef PTTYPE
 
-static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
-                                 struct kvm_mmu *context)
+static void
+__reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
+                       struct rsvd_bits_validate *rsvd_check,
+                       int maxphyaddr, int level, bool nx, bool gbpages,
+                       bool pse, bool amd)
 {
-       int maxphyaddr = cpuid_maxphyaddr(vcpu);
        u64 exb_bit_rsvd = 0;
        u64 gbpages_bit_rsvd = 0;
        u64 nonleaf_bit8_rsvd = 0;
 
-       context->bad_mt_xwr = 0;
+       rsvd_check->bad_mt_xwr = 0;
 
-       if (!context->nx)
+       if (!nx)
                exb_bit_rsvd = rsvd_bits(63, 63);
-       if (!guest_cpuid_has_gbpages(vcpu))
+       if (!gbpages)
                gbpages_bit_rsvd = rsvd_bits(7, 7);
 
        /*
         * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for
         * leaf entries) on AMD CPUs only.
         */
-       if (guest_cpuid_is_amd(vcpu))
+       if (amd)
                nonleaf_bit8_rsvd = rsvd_bits(8, 8);
 
-       switch (context->root_level) {
+       switch (level) {
        case PT32_ROOT_LEVEL:
                /* no rsvd bits for 2 level 4K page table entries */
-               context->rsvd_bits_mask[0][1] = 0;
-               context->rsvd_bits_mask[0][0] = 0;
-               context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+               rsvd_check->rsvd_bits_mask[0][1] = 0;
+               rsvd_check->rsvd_bits_mask[0][0] = 0;
+               rsvd_check->rsvd_bits_mask[1][0] =
+                       rsvd_check->rsvd_bits_mask[0][0];
 
-               if (!is_pse(vcpu)) {
-                       context->rsvd_bits_mask[1][1] = 0;
+               if (!pse) {
+                       rsvd_check->rsvd_bits_mask[1][1] = 0;
                        break;
                }
 
                if (is_cpuid_PSE36())
                        /* 36bits PSE 4MB page */
-                       context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
+                       rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
                else
                        /* 32 bits PSE 4MB page */
-                       context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
+                       rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
                break;
        case PT32E_ROOT_LEVEL:
-               context->rsvd_bits_mask[0][2] =
+               rsvd_check->rsvd_bits_mask[0][2] =
                        rsvd_bits(maxphyaddr, 63) |
                        rsvd_bits(5, 8) | rsvd_bits(1, 2);      /* PDPTE */
-               context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+               rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
                        rsvd_bits(maxphyaddr, 62);      /* PDE */
-               context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+               rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd |
                        rsvd_bits(maxphyaddr, 62);      /* PTE */
-               context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+               rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |
                        rsvd_bits(maxphyaddr, 62) |
                        rsvd_bits(13, 20);              /* large page */
-               context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+               rsvd_check->rsvd_bits_mask[1][0] =
+                       rsvd_check->rsvd_bits_mask[0][0];
                break;
        case PT64_ROOT_LEVEL:
-               context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
-                       nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51);
-               context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
-                       nonleaf_bit8_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51);
-               context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+               rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
+                       nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
+                       rsvd_bits(maxphyaddr, 51);
+               rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd |
+                       nonleaf_bit8_rsvd | gbpages_bit_rsvd |
                        rsvd_bits(maxphyaddr, 51);
-               context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+               rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
                        rsvd_bits(maxphyaddr, 51);
-               context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
-               context->rsvd_bits_mask[1][2] = exb_bit_rsvd |
+               rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+                       rsvd_bits(maxphyaddr, 51);
+               rsvd_check->rsvd_bits_mask[1][3] =
+                       rsvd_check->rsvd_bits_mask[0][3];
+               rsvd_check->rsvd_bits_mask[1][2] = exb_bit_rsvd |
                        gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51) |
                        rsvd_bits(13, 29);
-               context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+               rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |
                        rsvd_bits(maxphyaddr, 51) |
                        rsvd_bits(13, 20);              /* large page */
-               context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+               rsvd_check->rsvd_bits_mask[1][0] =
+                       rsvd_check->rsvd_bits_mask[0][0];
                break;
        }
 }
 
-static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
-               struct kvm_mmu *context, bool execonly)
+static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
+                                 struct kvm_mmu *context)
 {
-       int maxphyaddr = cpuid_maxphyaddr(vcpu);
-       int pte;
+       __reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check,
+                               cpuid_maxphyaddr(vcpu), context->root_level,
+                               context->nx, guest_cpuid_has_gbpages(vcpu),
+                               is_pse(vcpu), guest_cpuid_is_amd(vcpu));
+}
+
+static void
+__reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
+                           int maxphyaddr, bool execonly)
+{
+       u64 bad_mt_xwr;
 
-       context->rsvd_bits_mask[0][3] =
+       rsvd_check->rsvd_bits_mask[0][3] =
                rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
-       context->rsvd_bits_mask[0][2] =
+       rsvd_check->rsvd_bits_mask[0][2] =
                rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
-       context->rsvd_bits_mask[0][1] =
+       rsvd_check->rsvd_bits_mask[0][1] =
                rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
-       context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
+       rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
 
        /* large page */
-       context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
-       context->rsvd_bits_mask[1][2] =
+       rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3];
+       rsvd_check->rsvd_bits_mask[1][2] =
                rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29);
-       context->rsvd_bits_mask[1][1] =
+       rsvd_check->rsvd_bits_mask[1][1] =
                rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20);
-       context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
-
-       for (pte = 0; pte < 64; pte++) {
-               int rwx_bits = pte & 7;
-               int mt = pte >> 3;
-               if (mt == 0x2 || mt == 0x3 || mt == 0x7 ||
-                               rwx_bits == 0x2 || rwx_bits == 0x6 ||
-                               (rwx_bits == 0x4 && !execonly))
-                       context->bad_mt_xwr |= (1ull << pte);
+       rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0];
+
+       bad_mt_xwr = 0xFFull << (2 * 8);        /* bits 3..5 must not be 2 */
+       bad_mt_xwr |= 0xFFull << (3 * 8);       /* bits 3..5 must not be 3 */
+       bad_mt_xwr |= 0xFFull << (7 * 8);       /* bits 3..5 must not be 7 */
+       bad_mt_xwr |= REPEAT_BYTE(1ull << 2);   /* bits 0..2 must not be 010 */
+       bad_mt_xwr |= REPEAT_BYTE(1ull << 6);   /* bits 0..2 must not be 110 */
+       if (!execonly) {
+               /* bits 0..2 must not be 100 unless VMX capabilities allow it */
+               bad_mt_xwr |= REPEAT_BYTE(1ull << 4);
        }
+       rsvd_check->bad_mt_xwr = bad_mt_xwr;
+}
+
+static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
+               struct kvm_mmu *context, bool execonly)
+{
+       __reset_rsvds_bits_mask_ept(&context->guest_rsvd_check,
+                                   cpuid_maxphyaddr(vcpu), execonly);
+}
+
+/*
+ * the page table on host is the shadow page table for the page
+ * table in guest or amd nested guest, its mmu features completely
+ * follow the features in guest.
+ */
+void
+reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
+{
+       bool uses_nx = context->nx || context->base_role.smep_andnot_wp;
+
+       /*
+        * Passing "true" to the last argument is okay; it adds a check
+        * on bit 8 of the SPTEs which KVM doesn't use anyway.
+        */
+       __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
+                               boot_cpu_data.x86_phys_bits,
+                               context->shadow_root_level, uses_nx,
+                               guest_cpuid_has_gbpages(vcpu), is_pse(vcpu),
+                               true);
+}
+EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask);
+
+static inline bool boot_cpu_is_amd(void)
+{
+       WARN_ON_ONCE(!tdp_enabled);
+       return shadow_x_mask == 0;
+}
+
+/*
+ * the direct page table on host, use as much mmu features as
+ * possible, however, kvm currently does not do execution-protection.
+ */
+static void
+reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
+                               struct kvm_mmu *context)
+{
+       if (boot_cpu_is_amd())
+               __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
+                                       boot_cpu_data.x86_phys_bits,
+                                       context->shadow_root_level, false,
+                                       cpu_has_gbpages, true, true);
+       else
+               __reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
+                                           boot_cpu_data.x86_phys_bits,
+                                           false);
+
+}
+
+/*
+ * as the comments in reset_shadow_zero_bits_mask() except it
+ * is the shadow page table for intel nested guest.
+ */
+static void
+reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
+                               struct kvm_mmu *context, bool execonly)
+{
+       __reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
+                                   boot_cpu_data.x86_phys_bits, execonly);
 }
 
 static void update_permission_bitmask(struct kvm_vcpu *vcpu,
@@ -3833,6 +3948,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
        struct kvm_mmu *context = &vcpu->arch.mmu;
 
        context->base_role.word = 0;
+       context->base_role.smm = is_smm(vcpu);
        context->page_fault = tdp_page_fault;
        context->sync_page = nonpaging_sync_page;
        context->invlpg = nonpaging_invlpg;
@@ -3868,6 +3984,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 
        update_permission_bitmask(vcpu, context, false);
        update_last_pte_bitmap(vcpu, context);
+       reset_tdp_shadow_zero_bits_mask(vcpu, context);
 }
 
 void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
@@ -3894,6 +4011,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
                = smep && !is_write_protection(vcpu);
        context->base_role.smap_andnot_wp
                = smap && !is_write_protection(vcpu);
+       context->base_role.smm = is_smm(vcpu);
+       reset_shadow_zero_bits_mask(vcpu, context);
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
 
@@ -3917,6 +4036,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
 
        update_permission_bitmask(vcpu, context, true);
        reset_rsvds_bits_mask_ept(vcpu, context, execonly);
+       reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
 
@@ -4065,7 +4185,7 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
                /* Handle a 32-bit guest writing two halves of a 64-bit gpte */
                *gpa &= ~(gpa_t)7;
                *bytes = 8;
-               r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, 8);
+               r = kvm_vcpu_read_guest(vcpu, *gpa, &gentry, 8);
                if (r)
                        gentry = 0;
                new = (const u8 *)&gentry;
@@ -4177,6 +4297,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
        mask.nxe = 1;
        mask.smep_andnot_wp = 1;
        mask.smap_andnot_wp = 1;
+       mask.smm = 1;
 
        /*
         * If we don't have indirect shadow pages, it means no page is
@@ -4375,36 +4496,115 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu)
        init_kvm_mmu(vcpu);
 }
 
-void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
-                                     struct kvm_memory_slot *memslot)
+/* The return value indicates if tlb flush on all vcpus is needed. */
+typedef bool (*slot_level_handler) (struct kvm *kvm, unsigned long *rmap);
+
+/* The caller should hold mmu-lock before calling this function. */
+static bool
+slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
+                       slot_level_handler fn, int start_level, int end_level,
+                       gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb)
 {
-       gfn_t last_gfn;
-       int i;
+       struct slot_rmap_walk_iterator iterator;
        bool flush = false;
 
-       last_gfn = memslot->base_gfn + memslot->npages - 1;
+       for_each_slot_rmap_range(memslot, start_level, end_level, start_gfn,
+                       end_gfn, &iterator) {
+               if (iterator.rmap)
+                       flush |= fn(kvm, iterator.rmap);
 
-       spin_lock(&kvm->mmu_lock);
+               if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+                       if (flush && lock_flush_tlb) {
+                               kvm_flush_remote_tlbs(kvm);
+                               flush = false;
+                       }
+                       cond_resched_lock(&kvm->mmu_lock);
+               }
+       }
+
+       if (flush && lock_flush_tlb) {
+               kvm_flush_remote_tlbs(kvm);
+               flush = false;
+       }
 
-       for (i = PT_PAGE_TABLE_LEVEL;
-            i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
-               unsigned long *rmapp;
-               unsigned long last_index, index;
+       return flush;
+}
+
+static bool
+slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
+                 slot_level_handler fn, int start_level, int end_level,
+                 bool lock_flush_tlb)
+{
+       return slot_handle_level_range(kvm, memslot, fn, start_level,
+                       end_level, memslot->base_gfn,
+                       memslot->base_gfn + memslot->npages - 1,
+                       lock_flush_tlb);
+}
+
+static bool
+slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
+                     slot_level_handler fn, bool lock_flush_tlb)
+{
+       return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL,
+                                PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
+}
+
+static bool
+slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
+                       slot_level_handler fn, bool lock_flush_tlb)
+{
+       return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL + 1,
+                                PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
+}
+
+static bool
+slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
+                slot_level_handler fn, bool lock_flush_tlb)
+{
+       return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL,
+                                PT_PAGE_TABLE_LEVEL, lock_flush_tlb);
+}
 
-               rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL];
-               last_index = gfn_to_index(last_gfn, memslot->base_gfn, i);
+void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
+{
+       struct kvm_memslots *slots;
+       struct kvm_memory_slot *memslot;
+       int i;
 
-               for (index = 0; index <= last_index; ++index, ++rmapp) {
-                       if (*rmapp)
-                               flush |= __rmap_write_protect(kvm, rmapp,
-                                               false);
+       spin_lock(&kvm->mmu_lock);
+       for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+               slots = __kvm_memslots(kvm, i);
+               kvm_for_each_memslot(memslot, slots) {
+                       gfn_t start, end;
+
+                       start = max(gfn_start, memslot->base_gfn);
+                       end = min(gfn_end, memslot->base_gfn + memslot->npages);
+                       if (start >= end)
+                               continue;
 
-                       if (need_resched() || spin_needbreak(&kvm->mmu_lock))
-                               cond_resched_lock(&kvm->mmu_lock);
+                       slot_handle_level_range(kvm, memslot, kvm_zap_rmapp,
+                                               PT_PAGE_TABLE_LEVEL, PT_MAX_HUGEPAGE_LEVEL,
+                                               start, end - 1, true);
                }
        }
 
        spin_unlock(&kvm->mmu_lock);
+}
+
+static bool slot_rmap_write_protect(struct kvm *kvm, unsigned long *rmapp)
+{
+       return __rmap_write_protect(kvm, rmapp, false);
+}
+
+void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
+                                     struct kvm_memory_slot *memslot)
+{
+       bool flush;
+
+       spin_lock(&kvm->mmu_lock);
+       flush = slot_handle_all_level(kvm, memslot, slot_rmap_write_protect,
+                                     false);
+       spin_unlock(&kvm->mmu_lock);
 
        /*
         * kvm_mmu_slot_remove_write_access() and kvm_vm_ioctl_get_dirty_log()
@@ -4437,9 +4637,8 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
        pfn_t pfn;
        struct kvm_mmu_page *sp;
 
-       for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
-               BUG_ON(!(*sptep & PT_PRESENT_MASK));
-
+restart:
+       for_each_rmap_spte(rmapp, &iter, sptep) {
                sp = page_header(__pa(sptep));
                pfn = spte_to_pfn(*sptep);
 
@@ -4454,71 +4653,31 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
                        !kvm_is_reserved_pfn(pfn) &&
                        PageTransCompound(pfn_to_page(pfn))) {
                        drop_spte(kvm, sptep);
-                       sptep = rmap_get_first(*rmapp, &iter);
                        need_tlb_flush = 1;
-               } else
-                       sptep = rmap_get_next(&iter);
+                       goto restart;
+               }
        }
 
        return need_tlb_flush;
 }
 
 void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
-                       struct kvm_memory_slot *memslot)
+                                  const struct kvm_memory_slot *memslot)
 {
-       bool flush = false;
-       unsigned long *rmapp;
-       unsigned long last_index, index;
-
+       /* FIXME: const-ify all uses of struct kvm_memory_slot.  */
        spin_lock(&kvm->mmu_lock);
-
-       rmapp = memslot->arch.rmap[0];
-       last_index = gfn_to_index(memslot->base_gfn + memslot->npages - 1,
-                               memslot->base_gfn, PT_PAGE_TABLE_LEVEL);
-
-       for (index = 0; index <= last_index; ++index, ++rmapp) {
-               if (*rmapp)
-                       flush |= kvm_mmu_zap_collapsible_spte(kvm, rmapp);
-
-               if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
-                       if (flush) {
-                               kvm_flush_remote_tlbs(kvm);
-                               flush = false;
-                       }
-                       cond_resched_lock(&kvm->mmu_lock);
-               }
-       }
-
-       if (flush)
-               kvm_flush_remote_tlbs(kvm);
-
+       slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot,
+                        kvm_mmu_zap_collapsible_spte, true);
        spin_unlock(&kvm->mmu_lock);
 }
 
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
                                   struct kvm_memory_slot *memslot)
 {
-       gfn_t last_gfn;
-       unsigned long *rmapp;
-       unsigned long last_index, index;
-       bool flush = false;
-
-       last_gfn = memslot->base_gfn + memslot->npages - 1;
+       bool flush;
 
        spin_lock(&kvm->mmu_lock);
-
-       rmapp = memslot->arch.rmap[PT_PAGE_TABLE_LEVEL - 1];
-       last_index = gfn_to_index(last_gfn, memslot->base_gfn,
-                       PT_PAGE_TABLE_LEVEL);
-
-       for (index = 0; index <= last_index; ++index, ++rmapp) {
-               if (*rmapp)
-                       flush |= __rmap_clear_dirty(kvm, rmapp);
-
-               if (need_resched() || spin_needbreak(&kvm->mmu_lock))
-                       cond_resched_lock(&kvm->mmu_lock);
-       }
-
+       flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false);
        spin_unlock(&kvm->mmu_lock);
 
        lockdep_assert_held(&kvm->slots_lock);
@@ -4537,31 +4696,11 @@ EXPORT_SYMBOL_GPL(kvm_mmu_slot_leaf_clear_dirty);
 void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
                                        struct kvm_memory_slot *memslot)
 {
-       gfn_t last_gfn;
-       int i;
-       bool flush = false;
-
-       last_gfn = memslot->base_gfn + memslot->npages - 1;
+       bool flush;
 
        spin_lock(&kvm->mmu_lock);
-
-       for (i = PT_PAGE_TABLE_LEVEL + 1; /* skip rmap for 4K page */
-            i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
-               unsigned long *rmapp;
-               unsigned long last_index, index;
-
-               rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL];
-               last_index = gfn_to_index(last_gfn, memslot->base_gfn, i);
-
-               for (index = 0; index <= last_index; ++index, ++rmapp) {
-                       if (*rmapp)
-                               flush |= __rmap_write_protect(kvm, rmapp,
-                                               false);
-
-                       if (need_resched() || spin_needbreak(&kvm->mmu_lock))
-                               cond_resched_lock(&kvm->mmu_lock);
-               }
-       }
+       flush = slot_handle_large_level(kvm, memslot, slot_rmap_write_protect,
+                                       false);
        spin_unlock(&kvm->mmu_lock);
 
        /* see kvm_mmu_slot_remove_write_access */
@@ -4575,31 +4714,10 @@ EXPORT_SYMBOL_GPL(kvm_mmu_slot_largepage_remove_write_access);
 void kvm_mmu_slot_set_dirty(struct kvm *kvm,
                            struct kvm_memory_slot *memslot)
 {
-       gfn_t last_gfn;
-       int i;
-       bool flush = false;
-
-       last_gfn = memslot->base_gfn + memslot->npages - 1;
+       bool flush;
 
        spin_lock(&kvm->mmu_lock);
-
-       for (i = PT_PAGE_TABLE_LEVEL;
-            i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
-               unsigned long *rmapp;
-               unsigned long last_index, index;
-
-               rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL];
-               last_index = gfn_to_index(last_gfn, memslot->base_gfn, i);
-
-               for (index = 0; index <= last_index; ++index, ++rmapp) {
-                       if (*rmapp)
-                               flush |= __rmap_set_dirty(kvm, rmapp);
-
-                       if (need_resched() || spin_needbreak(&kvm->mmu_lock))
-                               cond_resched_lock(&kvm->mmu_lock);
-               }
-       }
-
+       flush = slot_handle_all_level(kvm, memslot, __rmap_set_dirty, false);
        spin_unlock(&kvm->mmu_lock);
 
        lockdep_assert_held(&kvm->slots_lock);
@@ -4696,13 +4814,13 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
        return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
 }
 
-void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm)
+void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots)
 {
        /*
         * The very rare case: if the generation-number is round,
         * zap all shadow pages.
         */
-       if (unlikely(kvm_current_mmio_generation(kvm) == 0)) {
+       if (unlikely((slots->generation & MMIO_GEN_MASK) == 0)) {
                printk_ratelimited(KERN_DEBUG "kvm: zapping shadow pages for mmio generation wraparound\n");
                kvm_mmu_invalidate_zap_all_pages(kvm);
        }
@@ -4824,41 +4942,22 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
        unsigned int  nr_pages = 0;
        struct kvm_memslots *slots;
        struct kvm_memory_slot *memslot;
+       int i;
 
-       slots = kvm_memslots(kvm);
+       for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+               slots = __kvm_memslots(kvm, i);
 
-       kvm_for_each_memslot(memslot, slots)
-               nr_pages += memslot->npages;
+               kvm_for_each_memslot(memslot, slots)
+                       nr_pages += memslot->npages;
+       }
 
        nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
        nr_mmu_pages = max(nr_mmu_pages,
-                       (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
+                          (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
 
        return nr_mmu_pages;
 }
 
-int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
-{
-       struct kvm_shadow_walk_iterator iterator;
-       u64 spte;
-       int nr_sptes = 0;
-
-       if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
-               return nr_sptes;
-
-       walk_shadow_page_lockless_begin(vcpu);
-       for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
-               sptes[iterator.level-1] = spte;
-               nr_sptes++;
-               if (!is_shadow_present_pte(spte))
-                       break;
-       }
-       walk_shadow_page_lockless_end(vcpu);
-
-       return nr_sptes;
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy);
-
 void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
 {
        kvm_mmu_unload(vcpu);