These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / arch / powerpc / kvm / book3s_hv_rm_mmu.c
index c6d601c..9170051 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/kvm_host.h>
 #include <linux/hugetlb.h>
 #include <linux/module.h>
+#include <linux/log2.h>
 
 #include <asm/tlbflush.h>
 #include <asm/kvm_ppc.h>
@@ -31,7 +32,7 @@ static void *real_vmalloc_addr(void *x)
         * So don't worry about THP collapse/split. Called
         * Only in realmode, hence won't need irq_save/restore.
         */
-       p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL);
+       p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL, NULL);
        if (!p || !pte_present(*p))
                return NULL;
        addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
@@ -97,25 +98,52 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
 }
 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
 
+/* Update the changed page order field of an rmap entry */
+void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize)
+{
+       unsigned long order;
+
+       if (!psize)
+               return;
+       order = ilog2(psize);
+       order <<= KVMPPC_RMAP_CHG_SHIFT;
+       if (order > (*rmap & KVMPPC_RMAP_CHG_ORDER))
+               *rmap = (*rmap & ~KVMPPC_RMAP_CHG_ORDER) | order;
+}
+EXPORT_SYMBOL_GPL(kvmppc_update_rmap_change);
+
+/* Returns a pointer to the revmap entry for the page mapped by a HPTE */
+static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v,
+                                     unsigned long hpte_gr)
+{
+       struct kvm_memory_slot *memslot;
+       unsigned long *rmap;
+       unsigned long gfn;
+
+       gfn = hpte_rpn(hpte_gr, hpte_page_size(hpte_v, hpte_gr));
+       memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
+       if (!memslot)
+               return NULL;
+
+       rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
+       return rmap;
+}
+
 /* Remove this HPTE from the chain for a real page */
 static void remove_revmap_chain(struct kvm *kvm, long pte_index,
                                struct revmap_entry *rev,
                                unsigned long hpte_v, unsigned long hpte_r)
 {
        struct revmap_entry *next, *prev;
-       unsigned long gfn, ptel, head;
-       struct kvm_memory_slot *memslot;
+       unsigned long ptel, head;
        unsigned long *rmap;
        unsigned long rcbits;
 
        rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
        ptel = rev->guest_rpte |= rcbits;
-       gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
-       memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
-       if (!memslot)
+       rmap = revmap_for_hpte(kvm, hpte_v, ptel);
+       if (!rmap)
                return;
-
-       rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
        lock_rmap(rmap);
 
        head = *rmap & KVMPPC_RMAP_INDEX;
@@ -131,6 +159,8 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
                        *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
        }
        *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
+       if (rcbits & HPTE_R_C)
+               kvmppc_update_rmap_change(rmap, hpte_page_size(hpte_v, hpte_r));
        unlock_rmap(rmap);
 }
 
@@ -191,10 +221,12 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
         * retry via mmu_notifier_retry.
         */
        if (realmode)
-               ptep = __find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift);
+               ptep = __find_linux_pte_or_hugepte(pgdir, hva, NULL,
+                                                  &hpage_shift);
        else {
                local_irq_save(irq_flags);
-               ptep = find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift);
+               ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL,
+                                                &hpage_shift);
        }
        if (ptep) {
                pte_t pte;
@@ -440,6 +472,8 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
        note_hpte_modification(kvm, rev);
        unlock_hpte(hpte, 0);
 
+       if (v & HPTE_V_ABSENT)
+               v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID;
        hpret[0] = v;
        hpret[1] = r;
        return H_SUCCESS;
@@ -661,6 +695,105 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
        return H_SUCCESS;
 }
 
+long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
+                       unsigned long pte_index)
+{
+       struct kvm *kvm = vcpu->kvm;
+       __be64 *hpte;
+       unsigned long v, r, gr;
+       struct revmap_entry *rev;
+       unsigned long *rmap;
+       long ret = H_NOT_FOUND;
+
+       if (pte_index >= kvm->arch.hpt_npte)
+               return H_PARAMETER;
+
+       rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+       hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
+       while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+               cpu_relax();
+       v = be64_to_cpu(hpte[0]);
+       r = be64_to_cpu(hpte[1]);
+       if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
+               goto out;
+
+       gr = rev->guest_rpte;
+       if (rev->guest_rpte & HPTE_R_R) {
+               rev->guest_rpte &= ~HPTE_R_R;
+               note_hpte_modification(kvm, rev);
+       }
+       if (v & HPTE_V_VALID) {
+               gr |= r & (HPTE_R_R | HPTE_R_C);
+               if (r & HPTE_R_R) {
+                       kvmppc_clear_ref_hpte(kvm, hpte, pte_index);
+                       rmap = revmap_for_hpte(kvm, v, gr);
+                       if (rmap) {
+                               lock_rmap(rmap);
+                               *rmap |= KVMPPC_RMAP_REFERENCED;
+                               unlock_rmap(rmap);
+                       }
+               }
+       }
+       vcpu->arch.gpr[4] = gr;
+       ret = H_SUCCESS;
+ out:
+       unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
+       return ret;
+}
+
+long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
+                       unsigned long pte_index)
+{
+       struct kvm *kvm = vcpu->kvm;
+       __be64 *hpte;
+       unsigned long v, r, gr;
+       struct revmap_entry *rev;
+       unsigned long *rmap;
+       long ret = H_NOT_FOUND;
+
+       if (pte_index >= kvm->arch.hpt_npte)
+               return H_PARAMETER;
+
+       rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+       hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
+       while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+               cpu_relax();
+       v = be64_to_cpu(hpte[0]);
+       r = be64_to_cpu(hpte[1]);
+       if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
+               goto out;
+
+       gr = rev->guest_rpte;
+       if (gr & HPTE_R_C) {
+               rev->guest_rpte &= ~HPTE_R_C;
+               note_hpte_modification(kvm, rev);
+       }
+       if (v & HPTE_V_VALID) {
+               /* need to make it temporarily absent so C is stable */
+               hpte[0] |= cpu_to_be64(HPTE_V_ABSENT);
+               kvmppc_invalidate_hpte(kvm, hpte, pte_index);
+               r = be64_to_cpu(hpte[1]);
+               gr |= r & (HPTE_R_R | HPTE_R_C);
+               if (r & HPTE_R_C) {
+                       unsigned long psize = hpte_page_size(v, r);
+                       hpte[1] = cpu_to_be64(r & ~HPTE_R_C);
+                       eieio();
+                       rmap = revmap_for_hpte(kvm, v, gr);
+                       if (rmap) {
+                               lock_rmap(rmap);
+                               *rmap |= KVMPPC_RMAP_CHANGED;
+                               kvmppc_update_rmap_change(rmap, psize);
+                               unlock_rmap(rmap);
+                       }
+               }
+       }
+       vcpu->arch.gpr[4] = gr;
+       ret = H_SUCCESS;
+ out:
+       unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
+       return ret;
+}
+
 void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
                        unsigned long pte_index)
 {