Discussion:
[PATCH 2/3] KVM: PPC: BOOK3S: HV: Use unlock variant with memory barrier
Aneesh Kumar K.V
2014-10-20 14:28:59 UTC
Permalink
We switch to unlock variant with memory barriers in the error path
and also in code path where we had implicit dependency on previous
functions calling lwsync/ptesync. In most of the cases we don't really
need an explicit barrier, but using the variant make sure we don't make
mistakes later with code movements. We also document why a
non-barrier variant is ok in performance critical path.

Signed-off-by: Aneesh Kumar K.V <***@linux.vnet.ibm.com>
---
arch/powerpc/kvm/book3s_64_mmu_hv.c | 10 +++++-----
arch/powerpc/kvm/book3s_hv_rm_mmu.c | 15 ++++++++++-----
2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 5ea4b2b6a157..c97690ffb5f6 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -774,7 +774,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
return ret;

out_unlock:
- __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
+ unlock_hpte(hptep, be64_to_cpu(hptep[0]));
preempt_enable();
goto out_put;
}
@@ -903,8 +903,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
note_hpte_modification(kvm, &rev[i]);
}
}
+ unlock_hpte(hptep, be64_to_cpu(hptep[0]));
unlock_rmap(rmapp);
- __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
}
return 0;
}
@@ -992,7 +992,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
}
ret = 1;
}
- __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
+ unlock_hpte(hptep, be64_to_cpu(hptep[0]));
} while ((i = j) != head);

unlock_rmap(rmapp);
@@ -1115,7 +1115,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)

/* Now check and modify the HPTE */
if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) {
- __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
+ unlock_hpte(hptep, be64_to_cpu(hptep[0]));
continue;
}
/* need to make it temporarily absent so C is stable */
@@ -1137,7 +1137,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
}
v &= ~HPTE_V_ABSENT;
v |= HPTE_V_VALID;
- __unlock_hpte(hptep, v);
+ unlock_hpte(hptep, v);
} while ((i = j) != head);

unlock_rmap(rmapp);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 769a5d4c0430..78e689b066f1 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -292,6 +292,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
pte = be64_to_cpu(hpte[0]);
if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT)))
break;
+ /*
+ * Data dependency will avoid re-ordering
+ */
__unlock_hpte(hpte, pte);
hpte += 2;
}
@@ -310,7 +313,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
cpu_relax();
pte = be64_to_cpu(hpte[0]);
if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
- __unlock_hpte(hpte, pte);
+ unlock_hpte(hpte, pte);
return H_PTEG_FULL;
}
}
@@ -481,7 +484,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) ||
((flags & H_ANDCOND) && (pte & avpn) != 0)) {
- __unlock_hpte(hpte, pte);
+ unlock_hpte(hpte, pte);
return H_NOT_FOUND;
}

@@ -617,7 +620,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
be64_to_cpu(hp[0]), be64_to_cpu(hp[1]));
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
args[j] |= rcbits << (56 - 5);
- __unlock_hpte(hp, 0);
+ unlock_hpte(hp, 0);
}
}

@@ -643,7 +646,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
pte = be64_to_cpu(hpte[0]);
if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) {
- __unlock_hpte(hpte, pte);
+ unlock_hpte(hpte, pte);
return H_NOT_FOUND;
}

@@ -834,7 +837,9 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
hpte_base_page_size(v, r) == (1ul << pshift))
/* Return with the HPTE still locked */
return (hash << 3) + (i >> 1);
-
+ /*
+ * Data dependency should avoid re-ordering
+ */
__unlock_hpte(&hpte[i], v);
}
--
1.9.1
Aneesh Kumar K.V
2014-10-20 14:29:00 UTC
Permalink
Minor cleanup

Signed-off-by: Aneesh Kumar K.V <***@linux.vnet.ibm.com>
---
arch/powerpc/kvm/book3s_hv_rm_mmu.c | 25 +++++++++++++------------
1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 78e689b066f1..2922f8d127ff 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -523,7 +523,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
unsigned long *args = &vcpu->arch.gpr[4];
__be64 *hp, *hptes[4];
unsigned long tlbrb[4];
- long int i, j, k, n, found, indexes[4];
+ long int i, j, k, collected_hpte, found, indexes[4];
unsigned long flags, req, pte_index, rcbits;
int global;
long int ret = H_SUCCESS;
@@ -532,7 +532,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)

global = global_invalidates(kvm, 0);
for (i = 0; i < 4 && ret == H_SUCCESS; ) {
- n = 0;
+ collected_hpte = 0;
for (; i < 4; ++i) {
j = i * 2;
pte_index = args[j];
@@ -554,7 +554,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
hp = (__be64 *) (kvm->arch.hpt_virt + (pte_index << 4));
/* to avoid deadlock, don't spin except for first */
if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
- if (n)
+ if (collected_hpte)
break;
while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
cpu_relax();
@@ -596,22 +596,23 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)

/* leave it locked */
hp[0] &= ~cpu_to_be64(HPTE_V_VALID);
- tlbrb[n] = compute_tlbie_rb(be64_to_cpu(hp[0]),
- be64_to_cpu(hp[1]), pte_index);
- indexes[n] = j;
- hptes[n] = hp;
- revs[n] = rev;
- ++n;
+ tlbrb[collected_hpte] = compute_tlbie_rb(be64_to_cpu(hp[0]),
+ be64_to_cpu(hp[1]),
+ pte_index);
+ indexes[collected_hpte] = j;
+ hptes[collected_hpte] = hp;
+ revs[collected_hpte] = rev;
+ ++collected_hpte;
}

- if (!n)
+ if (!collected_hpte)
break;

/* Now that we've collected a batch, do the tlbies */
- do_tlbies(kvm, tlbrb, n, global, true);
+ do_tlbies(kvm, tlbrb, collected_hpte, global, true);

/* Read PTE low words after tlbie to get final R/C values */
- for (k = 0; k < n; ++k) {
+ for (k = 0; k < collected_hpte; ++k) {
j = indexes[k];
pte_index = args[j] & ((1ul << 56) - 1);
hp = hptes[k];
--
1.9.1
Loading...