Re: [PATCH 10/16] KVM: x86/tdp_mmu: Support TDX private mapping for TDP MMU

From: Edgecombe, Rick P
Date: Thu May 16 2024 - 22:36:07 EST


Here is a diff of an attempt to merge all the feedback so far. It's on top of
the the dev branch from this series.

On Thu, 2024-05-16 at 12:42 -0700, Isaku Yamahata wrote:
> - rename role.is_private => role.is_mirrored_pt

Agreed.

>
> - sp->gfn: gfn without shared bit.
>
> - fault->address: without gfn_shared_mask
>   Actually it doesn't matter much.  We can use gpa with gfn_shared_mask.

I left fault->addr with shared bits. It's not used anymore for TDX except in the
tracepoint which I think makes sense.

>
> - Update struct tdp_iter
>   struct tdp_iter
>     gfn: gfn without shared bit
>
>     /* Add new members */
>
>     /* Indicates which PT to walk. */
>     bool mirrored_pt;
>
>     // This is used tdp_iter_refresh_sptep()
>     // shared gfn_mask if mirrored_pt
>     // 0 if !mirrored_pt
>     gfn_shared_mask
>
> - Pass mirrored_pt and gfn_shared_mask to
>   tdp_iter_start(..., mirrored_pt, gfn_shared_mask)
>
>   and update tdp_iter_refresh_sptep()
>   static void tdp_iter_refresh_sptep(struct tdp_iter *iter)
>         ...
>         iter->sptep = iter->pt_path[iter->level - 1] +
>                 SPTE_INDEX((iter->gfn << PAGE_SHIFT) | iter->gfn_shared_mask,
> iter->level);

I tried something else. The iterators still have gfn's with shared bits, but the
addition of the shared bit is wrapped in tdp_mmu_for_each_pte(), so
kvm_tdp_mmu_map() and similar don't have to handle the shared bits. They just
pass in a root, and tdp_mmu_for_each_pte() knows how to adjust the GFN. Like:

#define tdp_mmu_for_each_pte(_iter, _kvm, _root, _start, _end) \
for_each_tdp_pte(_iter, _root, \
kvm_gfn_for_root(_kvm, _root, _start), \
kvm_gfn_for_root(_kvm, _root, _end))

I also changed the callers to use the new enum to specify roots. This way they
can pass something with a nice name instead of true/false for bool private.

Keeping a gfn_shared_mask inside the iterator didn't seem more clear to me, and
bit more cumbersome. But please compare it.

>
>   Change for_each_tdp_mte_min_level() accordingly.
>   Also the iteretor to call this.
>   
>   #define for_each_tdp_pte_min_level(kvm, iter, root, min_level, start,
> end)      \
>           for (tdp_iter_start(&iter, root, min_level,
> start,                      \
>                mirrored_root, mirrored_root ? kvm_gfn_shared_mask(kvm) :
> 0);      \
>                iter.valid && iter.gfn < kvm_gfn_for_root(kvm, root,
> end);         \
>                tdp_iter_next(&iter))

I liked it a lot because the callers don't need to manually call
kvm_gfn_for_root() anymore. But I tried it and it required a lot of additions of
kvm to the iterators call sites. I ended up removing it, but I'm not sure.

>
> - trace point: update to include mirroredd_pt. Or Leave it as is for now.
>
> - pr_err() that log gfn in handle_changed_spte()
>   Update to include mirrored_pt. Or Leave it as is for now.

I left it, as fault->addr still has shared bit.

>
> - Update spte handler (handle_changed_spte(), handle_removed_pt()...),
>   use iter->mirror_pt or pass down mirror_pt.

You mean just rename it, or something else?


Anyway below is a first cut based on the discussion.

A few other things:
1. kvm_is_private_gpa() is moved into Intel code. kvm_gfn_shared_mask() remains
for only two operations in common code:
- kvm_gfn_for_root() <- required for zapping/mapping
- Stripping the bit when setting fault.gfn <- possible to remove if we strip
cr2_or_gpa
2. I also played with changing KVM_PRIVATE_ROOTS to KVM_MIRROR_ROOTS.
Unfortunately there is still some confusion between private and mirrored. For
example you walk a mirror root (what is actually happening), but you have to
allocate private page tables as you do, as well as call out to x86_ops named
private. So those concepts are effectively linked and used a bit
interchangeably.

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e35a446baaad..64af6fd7cf85 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -351,7 +351,7 @@ union kvm_mmu_page_role {
unsigned ad_disabled:1;
unsigned guest_mode:1;
unsigned passthrough:1;
- unsigned is_private:1;
+ unsigned mirrored_pt:1;
unsigned :4;

/*
@@ -364,14 +364,14 @@ union kvm_mmu_page_role {
};
};

-static inline bool kvm_mmu_page_role_is_private(union kvm_mmu_page_role role)
+static inline bool kvm_mmu_page_role_is_mirrored(union kvm_mmu_page_role role)
{
- return !!role.is_private;
+ return !!role.mirrored_pt;
}

-static inline void kvm_mmu_page_role_set_private(union kvm_mmu_page_role *role)
+static inline void kvm_mmu_page_role_set_mirrored(union kvm_mmu_page_role
*role)
{
- role->is_private = 1;
+ role->mirrored_pt = 1;
}

/*
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index a578ea09dfb3..0c08b4f9093c 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -338,21 +338,26 @@ static inline gfn_t kvm_gfn_shared_mask(const struct kvm
*kvm)
return kvm->arch.gfn_shared_mask;
}

-static inline gfn_t kvm_gfn_to_shared(const struct kvm *kvm, gfn_t gfn)
-{
- return gfn | kvm_gfn_shared_mask(kvm);
-}
-
static inline gfn_t kvm_gfn_to_private(const struct kvm *kvm, gfn_t gfn)
{
return gfn & ~kvm_gfn_shared_mask(kvm);
}

-static inline bool kvm_is_private_gpa(const struct kvm *kvm, gpa_t gpa)
-{
- gfn_t mask = kvm_gfn_shared_mask(kvm);

- return mask && !(gpa_to_gfn(gpa) & mask);
+/* The VM keeps a mirrored copy of the private memory */
+static inline bool kvm_has_mirrored_tdp(const struct kvm *kvm)
+{
+ return kvm->arch.vm_type == KVM_X86_TDX_VM;
+}
+
+static inline bool kvm_has_private_root(const struct kvm *kvm)
+{
+ return kvm->arch.vm_type == KVM_X86_TDX_VM;
+}
+
+static inline bool kvm_zap_leafs_only(const struct kvm *kvm)
+{
+ return kvm->arch.vm_type == KVM_X86_TDX_VM;
}

#endif
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 3d291c5d2d50..c6a0af5aefce 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -686,7 +686,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu,
bool maybe_indirect)
1 + PT64_ROOT_MAX_LEVEL +
PTE_PREFETCH_NUM);
if (r)
return r;
- if (kvm_gfn_shared_mask(vcpu->kvm)) {
+ if (kvm_has_mirrored_tdp(vcpu->kvm)) {
r = kvm_mmu_topup_memory_cache(&vcpu-
>arch.mmu_private_spt_cache,
PT64_ROOT_MAX_LEVEL);
if (r)
@@ -3702,7 +3702,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
int r;

if (tdp_mmu_enabled) {
- if (kvm_gfn_shared_mask(vcpu->kvm))
+ if (kvm_has_private_root(vcpu->kvm))
kvm_tdp_mmu_alloc_root(vcpu, true);
kvm_tdp_mmu_alloc_root(vcpu, false);
return 0;
@@ -6539,17 +6539,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start,
gfn_t gfn_end)

flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end);

- if (tdp_mmu_enabled) {
- /*
- * kvm_zap_gfn_range() is used when MTRR or PAT memory
- * type was changed. TDX can't handle zapping the private
- * mapping, but it's ok because KVM doesn't support either of
- * those features for TDX. In case a new caller appears, BUG
- * the VM if it's called for solutions with private aliases.
- */
- KVM_BUG_ON(kvm_gfn_shared_mask(kvm), kvm);
+ if (tdp_mmu_enabled)
flush = kvm_tdp_mmu_zap_leafs(kvm, gfn_start, gfn_end, flush);
- }

if (flush)
kvm_flush_remote_tlbs_range(kvm, gfn_start, gfn_end -
gfn_start);
@@ -6996,10 +6987,38 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
kvm_mmu_zap_all(kvm);
}

+static void kvm_mmu_zap_memslot_leafs(struct kvm *kvm, struct kvm_memory_slot
*slot)
+{
+ if (KVM_BUG_ON(!tdp_mmu_enabled, kvm))
+ return;
+
+ write_lock(&kvm->mmu_lock);
+
+ /*
+ * Zapping non-leaf SPTEs, a.k.a. not-last SPTEs, isn't required, worst
+ * case scenario we'll have unused shadow pages lying around until they
+ * are recycled due to age or when the VM is destroyed.
+ */
+ struct kvm_gfn_range range = {
+ .slot = slot,
+ .start = slot->base_gfn,
+ .end = slot->base_gfn + slot->npages,
+ .may_block = true,
+ };
+
+ if (kvm_tdp_mmu_unmap_gfn_range(kvm, &range, false))
+ kvm_flush_remote_tlbs(kvm);
+
+ write_unlock(&kvm->mmu_lock);
+}
+
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
- kvm_mmu_zap_all_fast(kvm);
+ if (kvm_zap_leafs_only(kvm))
+ kvm_mmu_zap_memslot_leafs(kvm, slot);
+ else
+ kvm_mmu_zap_all_fast(kvm);
}

void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 3a7fe9261e23..2b1b2a980b03 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -159,9 +159,9 @@ static inline int kvm_mmu_page_as_id(struct kvm_mmu_page
*sp)
return kvm_mmu_role_as_id(sp->role);
}

-static inline bool is_private_sp(const struct kvm_mmu_page *sp)
+static inline bool is_mirrored_sp(const struct kvm_mmu_page *sp)
{
- return kvm_mmu_page_role_is_private(sp->role);
+ return kvm_mmu_page_role_is_mirrored(sp->role);
}

static inline void *kvm_mmu_private_spt(struct kvm_mmu_page *sp)
@@ -186,7 +186,7 @@ static inline gfn_t kvm_gfn_for_root(struct kvm *kvm, struct
kvm_mmu_page *root,
gfn_t gfn_for_root = kvm_gfn_to_private(kvm, gfn);

/* Set shared bit if not private */
- gfn_for_root |= -(gfn_t)!is_private_sp(root) & kvm_gfn_shared_mask(kvm);
+ gfn_for_root |= -(gfn_t)!is_mirrored_sp(root) &
kvm_gfn_shared_mask(kvm);
return gfn_for_root;
}

diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index 5eae8eac2da0..d0d13a4317e8 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -74,9 +74,6 @@ u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned
int access)
u64 spte = generation_mmio_spte_mask(gen);
u64 gpa = gfn << PAGE_SHIFT;

- WARN_ON_ONCE(!vcpu->kvm->arch.shadow_mmio_value &&
- !kvm_gfn_shared_mask(vcpu->kvm));
-
access &= shadow_mmio_access_mask;
spte |= vcpu->kvm->arch.shadow_mmio_value | access;
spte |= gpa | shadow_nonpresent_or_rsvd_mask;
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index d0df691ced5c..17d3f1593a24 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -265,9 +265,9 @@ static inline struct kvm_mmu_page *root_to_sp(hpa_t root)
return spte_to_child_sp(root);
}

-static inline bool is_private_sptep(u64 *sptep)
+static inline bool is_mirrored_sptep(u64 *sptep)
{
- return is_private_sp(sptep_to_sp(sptep));
+ return is_mirrored_sp(sptep_to_sp(sptep));
}

static inline bool is_mmio_spte(struct kvm *kvm, u64 spte)
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 42ccafc7deff..7f13016e210b 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -97,15 +97,15 @@ static bool tdp_mmu_root_match(struct kvm_mmu_page *root,
{
if (WARN_ON_ONCE(types == BUGGY_KVM_ROOTS))
return false;
- if (WARN_ON_ONCE(!(types & (KVM_SHARED_ROOTS | KVM_PRIVATE_ROOTS))))
+ if (WARN_ON_ONCE(!(types & (KVM_SHARED_ROOTS | KVM_MIRROR_ROOTS))))
return false;

if ((types & KVM_VALID_ROOTS) && root->role.invalid)
return false;

- if ((types & KVM_SHARED_ROOTS) && !is_private_sp(root))
+ if ((types & KVM_SHARED_ROOTS) && !is_mirrored_sp(root))
return true;
- if ((types & KVM_PRIVATE_ROOTS) && is_private_sp(root))
+ if ((types & KVM_MIRROR_ROOTS) && is_mirrored_sp(root))
return true;

return false;
@@ -252,7 +252,7 @@ void kvm_tdp_mmu_alloc_root(struct kvm_vcpu *vcpu, bool
private)
struct kvm_mmu_page *root;

if (private)
- kvm_mmu_page_role_set_private(&role);
+ kvm_mmu_page_role_set_mirrored(&role);

/*
* Check for an existing root before acquiring the pages lock to avoid
@@ -446,7 +446,7 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t
pt, bool shared)
shared);
}

- if (is_private_sp(sp) &&
+ if (is_mirrored_sp(sp) &&
WARN_ON(static_call(kvm_x86_free_private_spt)(kvm, sp->gfn, sp-
>role.level,

kvm_mmu_private_spt(sp)))) {
/*
@@ -580,7 +580,7 @@ static void handle_changed_spte(struct kvm *kvm, int as_id,
gfn_t gfn,
u64 old_spte, u64 new_spte,
union kvm_mmu_page_role role, bool shared)
{
- bool is_private = kvm_mmu_page_role_is_private(role);
+ bool is_mirrored = kvm_mmu_page_role_is_mirrored(role);
int level = role.level;
bool was_present = is_shadow_present_pte(old_spte);
bool is_present = is_shadow_present_pte(new_spte);
@@ -665,12 +665,12 @@ static void handle_changed_spte(struct kvm *kvm, int
as_id, gfn_t gfn,
*/
if (was_present && !was_leaf &&
(is_leaf || !is_present || WARN_ON_ONCE(pfn_changed))) {
- KVM_BUG_ON(is_private !=
is_private_sptep(spte_to_child_pt(old_spte, level)),
+ KVM_BUG_ON(is_mirrored !=
is_mirrored_sptep(spte_to_child_pt(old_spte, level)),
kvm);
handle_removed_pt(kvm, spte_to_child_pt(old_spte, level),
shared);
}

- if (is_private && !is_present)
+ if (is_mirrored && !is_present)
handle_removed_private_spte(kvm, gfn, old_spte, new_spte,
role.level);

if (was_leaf && is_accessed_spte(old_spte) &&
@@ -690,7 +690,7 @@ static inline int __tdp_mmu_set_spte_atomic(struct kvm *kvm,
struct tdp_iter *it
*/
WARN_ON_ONCE(iter->yielded || is_removed_spte(iter->old_spte));

- if (is_private_sptep(iter->sptep) && !is_removed_spte(new_spte)) {
+ if (is_mirrored_sptep(iter->sptep) && !is_removed_spte(new_spte)) {
int ret;

if (is_shadow_present_pte(new_spte)) {
@@ -840,7 +840,7 @@ static u64 tdp_mmu_set_spte(struct kvm *kvm, int as_id,
tdp_ptep_t sptep,
WARN_ON_ONCE(is_removed_spte(old_spte) || is_removed_spte(new_spte));

old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, new_spte, level);
- if (is_private_sptep(sptep) && !is_removed_spte(new_spte) &&
+ if (is_mirrored_sptep(sptep) && !is_removed_spte(new_spte) &&
is_shadow_present_pte(new_spte)) {
/* Because write spin lock is held, no race. It should success.
*/
KVM_BUG_ON(__set_private_spte_present(kvm, sptep, gfn, old_spte,
@@ -872,11 +872,10 @@ static inline void tdp_mmu_iter_set_spte(struct kvm *kvm,
struct tdp_iter *iter,
continue; \
else

-#define tdp_mmu_for_each_pte(_iter, _mmu, _private, _start, _end) \
- for_each_tdp_pte(_iter, \
- root_to_sp((_private) ? _mmu->private_root_hpa : \
- _mmu->root.hpa), \
- _start, _end)
+#define tdp_mmu_for_each_pte(_iter, _kvm, _root, _start, _end) \
+ for_each_tdp_pte(_iter, _root, \
+ kvm_gfn_for_root(_kvm, _root, _start), \
+ kvm_gfn_for_root(_kvm, _root, _end))

/*
* Yield if the MMU lock is contended or this thread needs to return control
@@ -1307,12 +1306,11 @@ static int tdp_mmu_split_huge_page(struct kvm *kvm,
struct tdp_iter *iter,
*/
int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
{
- struct kvm_mmu *mmu = vcpu->arch.mmu;
struct kvm *kvm = vcpu->kvm;
+ enum kvm_tdp_mmu_root_types root_type = tdp_mmu_get_root_type(kvm,
fault);
+ struct kvm_mmu_page *root;
struct tdp_iter iter;
struct kvm_mmu_page *sp;
- gfn_t raw_gfn;
- bool is_private = fault->is_private && kvm_gfn_shared_mask(kvm);
int ret = RET_PF_RETRY;

kvm_mmu_hugepage_adjust(vcpu, fault);
@@ -1321,9 +1319,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct
kvm_page_fault *fault)

rcu_read_lock();

- raw_gfn = gpa_to_gfn(fault->addr);
-
- tdp_mmu_for_each_pte(iter, mmu, is_private, raw_gfn, raw_gfn + 1) {
+ root = tdp_mmu_get_root(vcpu, root_type);
+ tdp_mmu_for_each_pte(iter, kvm, root, fault->gfn, fault->gfn + 1) {
int r;

if (fault->nx_huge_page_workaround_enabled)
@@ -1349,7 +1346,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct
kvm_page_fault *fault)
* needs to be split.
*/
sp = tdp_mmu_alloc_sp(vcpu);
- if (kvm_is_private_gpa(kvm, raw_gfn << PAGE_SHIFT))
+ if (root_type == KVM_MIRROR_ROOTS)
kvm_mmu_alloc_private_spt(vcpu, sp);
tdp_mmu_init_child_sp(sp, &iter);

@@ -1360,7 +1357,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct
kvm_page_fault *fault)
* TODO: large page support.
* Doesn't support large page for TDX now
*/
- KVM_BUG_ON(is_private_sptep(iter.sptep), vcpu->kvm);
+ KVM_BUG_ON(is_mirrored_sptep(iter.sptep), vcpu->kvm);
r = tdp_mmu_split_huge_page(kvm, &iter, sp, true);
} else {
r = tdp_mmu_link_sp(kvm, &iter, sp, true);
@@ -1405,7 +1402,7 @@ static enum kvm_tdp_mmu_root_types
kvm_process_to_root_types(struct kvm *kvm,
WARN_ON_ONCE(process == BUGGY_KVM_INVALIDATION);

/* Always process shared for cases where private is not on a separate
root */
- if (!kvm_gfn_shared_mask(kvm)) {
+ if (!kvm_has_private_root(kvm)) {
process |= KVM_PROCESS_SHARED;
process &= ~KVM_PROCESS_PRIVATE;
}
@@ -2022,14 +2019,14 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
* Must be called between kvm_tdp_mmu_walk_lockless_{begin,end}.
*/
static int __kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
- bool is_private)
+ enum kvm_tdp_mmu_root_types root_type)
{
+ struct kvm_mmu_page *root = tdp_mmu_get_root(vcpu, root_type);
struct tdp_iter iter;
- struct kvm_mmu *mmu = vcpu->arch.mmu;
gfn_t gfn = addr >> PAGE_SHIFT;
int leaf = -1;

- tdp_mmu_for_each_pte(iter, mmu, is_private, gfn, gfn + 1) {
+ tdp_mmu_for_each_pte(iter, vcpu->kvm, root, gfn, gfn + 1) {
leaf = iter.level;
sptes[leaf] = iter.old_spte;
}
@@ -2042,7 +2039,7 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr,
u64 *sptes,
{
*root_level = vcpu->arch.mmu->root_role.level;

- return __kvm_tdp_mmu_get_walk(vcpu, addr, sptes, false);
+ return __kvm_tdp_mmu_get_walk(vcpu, addr, sptes, KVM_SHARED_ROOTS);
}

int kvm_tdp_mmu_get_walk_private_pfn(struct kvm_vcpu *vcpu, u64 gpa,
@@ -2054,7 +2051,7 @@ int kvm_tdp_mmu_get_walk_private_pfn(struct kvm_vcpu
*vcpu, u64 gpa,
lockdep_assert_held(&vcpu->kvm->mmu_lock);

rcu_read_lock();
- leaf = __kvm_tdp_mmu_get_walk(vcpu, gpa, sptes, true);
+ leaf = __kvm_tdp_mmu_get_walk(vcpu, gpa, sptes, KVM_MIRROR_ROOTS);
rcu_read_unlock();
if (leaf < 0)
return -ENOENT;
@@ -2082,15 +2079,12 @@ EXPORT_SYMBOL_GPL(kvm_tdp_mmu_get_walk_private_pfn);
u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, u64 addr,
u64 *spte)
{
+ struct kvm_mmu_page *root = tdp_mmu_get_root(vcpu, KVM_SHARED_ROOTS);
struct tdp_iter iter;
- struct kvm_mmu *mmu = vcpu->arch.mmu;
gfn_t gfn = addr >> PAGE_SHIFT;
tdp_ptep_t sptep = NULL;

- /* fast page fault for private GPA isn't supported. */
- WARN_ON_ONCE(kvm_is_private_gpa(vcpu->kvm, addr));
-
- tdp_mmu_for_each_pte(iter, mmu, false, gfn, gfn + 1) {
+ tdp_mmu_for_each_pte(iter, vcpu->kvm, root, gfn, gfn + 1) {
*spte = iter.old_spte;
sptep = iter.sptep;
}
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index b8a967426fac..40f5f9753131 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -22,15 +22,30 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct
kvm_mmu_page *root);
enum kvm_tdp_mmu_root_types {
BUGGY_KVM_ROOTS = BUGGY_KVM_INVALIDATION,
KVM_SHARED_ROOTS = KVM_PROCESS_SHARED,
- KVM_PRIVATE_ROOTS = KVM_PROCESS_PRIVATE,
+ KVM_MIRROR_ROOTS = KVM_PROCESS_PRIVATE,
KVM_VALID_ROOTS = BIT(2),
- KVM_ANY_VALID_ROOTS = KVM_SHARED_ROOTS | KVM_PRIVATE_ROOTS |
KVM_VALID_ROOTS,
- KVM_ANY_ROOTS = KVM_SHARED_ROOTS | KVM_PRIVATE_ROOTS,
+ KVM_ANY_VALID_ROOTS = KVM_SHARED_ROOTS | KVM_MIRROR_ROOTS |
KVM_VALID_ROOTS,
+ KVM_ANY_ROOTS = KVM_SHARED_ROOTS | KVM_MIRROR_ROOTS,
};

static_assert(!(KVM_SHARED_ROOTS & KVM_VALID_ROOTS));
-static_assert(!(KVM_PRIVATE_ROOTS & KVM_VALID_ROOTS));
-static_assert(KVM_PRIVATE_ROOTS == (KVM_SHARED_ROOTS << 1));
+static_assert(!(KVM_MIRROR_ROOTS & KVM_VALID_ROOTS));
+static_assert(KVM_MIRROR_ROOTS == (KVM_SHARED_ROOTS << 1));
+
+static inline enum kvm_tdp_mmu_root_types tdp_mmu_get_root_type(struct kvm
*kvm,
+ struct
kvm_page_fault *fault)
+{
+ if (fault->is_private && kvm_has_mirrored_tdp(kvm))
+ return KVM_MIRROR_ROOTS;
+ return KVM_SHARED_ROOTS;
+}
+
+static inline struct kvm_mmu_page *tdp_mmu_get_root(struct kvm_vcpu *vcpu, enum
kvm_tdp_mmu_root_types type)
+{
+ if (type == KVM_MIRROR_ROOTS)
+ return root_to_sp(vcpu->arch.mmu->private_root_hpa);
+ return root_to_sp(vcpu->arch.mmu->root.hpa);
+}

bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool
flush);
bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp);
diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h
index 7fdc67835e06..b4e324fe55c5 100644
--- a/arch/x86/kvm/vmx/common.h
+++ b/arch/x86/kvm/vmx/common.h
@@ -69,6 +69,14 @@ static inline void
vmx_handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu,
vcpu->arch.at_instruction_boundary = true;
}

+
+static inline bool gpa_on_private_root(const struct kvm *kvm, gpa_t gpa)
+{
+ gfn_t mask = kvm_gfn_shared_mask(kvm);
+
+ return kvm_has_private_root(kvm) && !(gpa_to_gfn(gpa) & mask);
+}
+
static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa,
unsigned long exit_qualification)
{
@@ -90,7 +98,7 @@ static inline int __vmx_handle_ept_violation(struct kvm_vcpu
*vcpu, gpa_t gpa,
error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) != 0 ?
PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;

- if (kvm_is_private_gpa(vcpu->kvm, gpa))
+ if (gpa_on_private_root(vcpu->kvm, gpa))
error_code |= PFERR_PRIVATE_ACCESS;

return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index bfb939826276..d7626f80b7f7 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -1772,7 +1772,7 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
{
unsigned long exit_qual;

- if (kvm_is_private_gpa(vcpu->kvm, tdexit_gpa(vcpu))) {
+ if (gpa_on_private_root(vcpu->kvm, tdexit_gpa(vcpu))) {
/*
* Always treat SEPT violations as write faults. Ignore the
* EXIT_QUALIFICATION reported by TDX-SEAM for SEPT violations.
@@ -2967,8 +2967,8 @@ static int tdx_vcpu_init_mem_region(struct kvm_vcpu *vcpu,
struct kvm_tdx_cmd *c
if (!PAGE_ALIGNED(region.source_addr) || !PAGE_ALIGNED(region.gpa) ||
!region.nr_pages ||
region.gpa + (region.nr_pages << PAGE_SHIFT) <= region.gpa ||
- !kvm_is_private_gpa(kvm, region.gpa) ||
- !kvm_is_private_gpa(kvm, region.gpa + (region.nr_pages <<
PAGE_SHIFT)))
+ !gpa_on_private_root(kvm, region.gpa) ||
+ !gpa_on_private_root(kvm, region.gpa + (region.nr_pages <<
PAGE_SHIFT)))
return -EINVAL;

mutex_lock(&kvm->slots_lock);