[RFC PATCH v3 13/21] KVM: x86: Improve synchronization in kvm_synchronize_tsc()

From: David Woodhouse
Date: Tue May 21 2024 - 20:19:18 EST


From: David Woodhouse <dwmw@xxxxxxxxxxxx>

When synchronizing to an existing TSC (either by explicitly writing zero,
or the legacy hack where the TSC is written within one second's worth of
the previously written TSC), the last_tsc_write and last_tsc_nsec values
were being misrecorded by __kvm_synchronize_tsc(). The *unsynchronized*
value of the TSC (perhaps even zero) was bring recorded, along with the
current time at which kvm_synchronize_tsc() was called. This could cause
*subsequent* writes to fail to synchronize correctly.

Fix that by resetting {data, ns} to the previous values before passing
them to __kvm_synchronize_tsc() when synchronization is detected. Except
in the case where the TSC is unstable and *has* to be synthesised from
the host clock, in which case attempt to create a nsec/tsc pair which is
on the correct line.

Furthermore, there were *three* different TSC reads used for calculating
the "current" time, all slightly different from each other. Fix that by
using kvm_get_time_and_clockread() where possible and using the same
host_tsc value in all cases.

Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
---
arch/x86/kvm/x86.c | 32 ++++++++++++++++++++++++++++----
1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ea59694d712a..6ec43f39bdb0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -201,6 +201,10 @@ module_param(eager_page_split, bool, 0644);
static bool __read_mostly mitigate_smt_rsb;
module_param(mitigate_smt_rsb, bool, 0444);

+#ifdef CONFIG_X86_64
+static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp);
+#endif
+
/*
* Restoring the host value for MSRs that are only consumed when running in
* usermode, e.g. SYSCALL MSRs and TSC_AUX, can be deferred until the CPU
@@ -2753,14 +2757,22 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 *user_value)
{
u64 data = user_value ? *user_value : 0;
struct kvm *kvm = vcpu->kvm;
- u64 offset, ns, elapsed;
+ u64 offset, host_tsc, ns, elapsed;
unsigned long flags;
bool matched = false;
bool synchronizing = false;

+#ifdef CONFIG_X86_64
+ if (!kvm_get_time_and_clockread(&ns, &host_tsc))
+#endif
+ {
+ ns = get_kvmclock_base_ns();
+ host_tsc = rdtsc();
+ }
+
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
- offset = kvm_compute_l1_tsc_offset(vcpu, rdtsc(), data);
- ns = get_kvmclock_base_ns();
+
+ offset = kvm_compute_l1_tsc_offset(vcpu, host_tsc, data);
elapsed = ns - kvm->arch.last_tsc_nsec;

if (vcpu->arch.virtual_tsc_khz) {
@@ -2805,12 +2817,24 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 *user_value)
*/
if (synchronizing &&
vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
+ /*
+ * If synchronizing, the "last written" TSC value/time recorded
+ * by __kvm_synchronize_tsc() should not change (i.e. should
+ * be precisely the same as the existing generation)...
+ */
+ data = kvm->arch.last_tsc_write;
+
if (!kvm_check_tsc_unstable()) {
offset = kvm->arch.cur_tsc_offset;
+ ns = kvm->arch.cur_tsc_nsec;
} else {
+ /*
+ * ... unless the TSC is unstable and has to be
+ * synthesised from the host clock in nanoseconds.
+ */
u64 delta = nsec_to_cycles(vcpu, elapsed);
data += delta;
- offset = kvm_compute_l1_tsc_offset(vcpu, rdtsc(), data);
+ offset = kvm_compute_l1_tsc_offset(vcpu, host_tsc, data);
}
matched = true;
}
--
2.44.0