On Tue, 30 Apr 2024 19:14:44 +0100,
Colton Lewis <coltonlewis@xxxxxxxxxx> wrote:
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 31b3a25680d0..a4d94d9abbe4 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2653,6 +2653,22 @@
[KVM,ARM] Allow use of GICv4 for direct injection of
LPIs.
+ kvm-arm.wfe_trap_policy=
+ [KVM,ARM] Control when to set WFE instruction trap for
+ KVM VMs.
+
+ trap: set WFE instruction trap
+
+ notrap: clear WFE instruction trap
+
+ kvm-arm.wfi_trap_policy=
+ [KVM,ARM] Control when to set WFI instruction trap for
+ KVM VMs.
+
+ trap: set WFI instruction trap
+
+ notrap: clear WFI instruction trap
+
Please make it clear that neither traps are guaranteed. The
architecture *allows* an implementation to trap when no events (resp.
interrupts) are pending, but nothing more. An implementation is
perfectly allowed to ignore these bits.
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 21c57b812569..315ee7bfc1cb 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -67,6 +67,13 @@ enum kvm_mode {
KVM_MODE_NV,
KVM_MODE_NONE,
};
+
+enum kvm_wfx_trap_policy {
+ KVM_WFX_NOTRAP_SINGLE_TASK, /* Default option */
+ KVM_WFX_NOTRAP,
+ KVM_WFX_TRAP,
+};
Since this is only ever used in arm.c, it really doesn't need to be
exposed anywhere else.
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index a25265aca432..5ec52333e042 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -46,6 +46,8 @@
#include <kvm/arm_psci.h>
static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
+static enum kvm_wfx_trap_policy kvm_wfi_trap_policy = KVM_WFX_NOTRAP_SINGLE_TASK;
+static enum kvm_wfx_trap_policy kvm_wfe_trap_policy = KVM_WFX_NOTRAP_SINGLE_TASK;
It would be worth declaring those as __read_mostly.
+static bool kvm_vcpu_should_clear_twi(struct kvm_vcpu *vcpu)
+{
+ if (likely(kvm_wfi_trap_policy == KVM_WFX_NOTRAP_SINGLE_TASK))
+ return single_task_running() &&
+ (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count) ||
+ vcpu->kvm->arch.vgic.nassgireq);
So you are evaluating a runtime condition (scheduler queue length,
number of LPIs)...
+
+ return kvm_wfi_trap_policy == KVM_WFX_NOTRAP;
+}
+
+static bool kvm_vcpu_should_clear_twe(struct kvm_vcpu *vcpu)
+{
+ if (likely(kvm_wfe_trap_policy == KVM_WFX_NOTRAP_SINGLE_TASK))
+ return single_task_running();
+
+ return kvm_wfe_trap_policy == KVM_WFX_NOTRAP;
+}
+
+static inline void kvm_vcpu_reset_hcr(struct kvm_vcpu *vcpu)
Why the inline?
+{
+ vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+ if (has_vhe() || has_hvhe())
+ vcpu->arch.hcr_el2 |= HCR_E2H;
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
+ /* route synchronous external abort exceptions to EL2 */
+ vcpu->arch.hcr_el2 |= HCR_TEA;
+ /* trap error record accesses */
+ vcpu->arch.hcr_el2 |= HCR_TERR;
+ }
+
+ if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) {
+ vcpu->arch.hcr_el2 |= HCR_FWB;
+ } else {
+ /*
+ * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
+ * get set in SCTLR_EL1 such that we can detect when the guest
+ * MMU gets turned on and do the necessary cache maintenance
+ * then.
+ */
+ vcpu->arch.hcr_el2 |= HCR_TVM;
+ }
+
+ if (cpus_have_final_cap(ARM64_HAS_EVT) &&
+ !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
+ vcpu->arch.hcr_el2 |= HCR_TID4;
+ else
+ vcpu->arch.hcr_el2 |= HCR_TID2;
+
+ if (vcpu_el1_is_32bit(vcpu))
+ vcpu->arch.hcr_el2 &= ~HCR_RW;
+
+ if (kvm_has_mte(vcpu->kvm))
+ vcpu->arch.hcr_el2 |= HCR_ATA;
+
+
+ if (kvm_vcpu_should_clear_twe(vcpu))
+ vcpu->arch.hcr_el2 &= ~HCR_TWE;
+ else
+ vcpu->arch.hcr_el2 |= HCR_TWE;
+
+ if (kvm_vcpu_should_clear_twi(vcpu))
+ vcpu->arch.hcr_el2 &= ~HCR_TWI;
+ else
+ vcpu->arch.hcr_el2 |= HCR_TWI;
... and from the above runtime conditions you make it a forever
decision, for a vcpu that still hasn't executed a single instruction.
What could possibly go wrong?
+static int __init early_kvm_wfx_trap_policy_cfg(char *arg, enum kvm_wfx_trap_policy *p)
+{
+ if (!arg)
+ return -EINVAL;
+
+ if (strcmp(arg, "trap") == 0) {
+ *p = KVM_WFX_TRAP;
+ return 0;
+ }
+
+ if (strcmp(arg, "notrap") == 0) {
+ *p = KVM_WFX_NOTRAP;
+ return 0;
+ }
+
+ if (strcmp(arg, "default") == 0) {
+ *p = KVM_WFX_NOTRAP_SINGLE_TASK;
+ return 0;
+ }
Where is this "default" coming from? It's not documented.