- Fix guest vtime accounting so that ticks happening while the guest is running

can also be accounted to it. Along with a consolidation to the guest-specific context
 tracking helpers.
 
 - Provide for the host NMI handler running after a VMX VMEXIT to be able to run
 on the kernel stack correctly.
 
 - Initialize MSR_TSC_AUX when RDPID is supported and not RDTSCP (virt relevant -
   real hw supports both)
 
 - A code generation improvement to TASK_SIZE_MAX through the use of alternatives
 
 - The usual misc. and related cleanups and improvements
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmCXqpcACgkQEsHwGGHe
 VUp6Kg/+O0y2PvL6dhfYnUvTmQD7be0DOfWeFSLfBBA0c6yaHL1INbFHWDDptNuJ
 ZV50V+vyqXWV9q0AWF94fYHBs2kB0S79/En0Pwt1a3kb/xlfVTh8VAMPr36utnTY
 VWvOwHgixfPbY+8g1AoqIm/IeFuYWubXQ9CyBrLx/zkJjszfot1eooGRYKDPc2qi
 dNEqBO4IKzw24OdO+oIzW1/owLfnBF+GnXrwCb8fFC2U7luyFAJmp9c1bYnyNuCm
 BdQySOTfm8nnE2RpN4wfc8Akvu/ETKHOPSQOqHIb5glzv6lVfRKXu3CgpYbzoCNl
 Iohb6z8xmgAG29g2VpBjNvCWyyO79y4Ckf94ibWl+qt01EdeYefcP0euK+MGi85A
 cN/MrMt7QjHHEO7ok5J9rBSeKobOtng6A4MHenSOLvjifOYoupRFijaLVxRluATW
 3NsC2IhL10u1c69Zsq6JJFJKoAytInKSigEN9VFZp+4NdE/FzDxfebC/6rSKznGi
 XoaEjOOX0JQ5TXM1gDoyzowAvt2vgndvldpwJTnPY5NP3X9fdiHhoOF9cU2yvl+x
 ZjgD1VxRWLGZKBojNfAa+0oDMZ/cTwPoeZ5Rr5p7SMr/Xw2fsUQ68KVjhOR7ZbaU
 8zEV//JtetwGSN86NhQ/V32hqiF2fni62yBZjYGZ8XM/AnDqaMQ=
 =O3BS
 -----END PGP SIGNATURE-----

Merge tag 'x86_urgent_for_v5.13_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Borislav Petkov:
 "A bunch of things accumulated for x86 in the last two weeks:

   - Fix guest vtime accounting so that ticks happening while the guest
     is running can also be accounted to it. Along with a consolidation
     to the guest-specific context tracking helpers.

   - Provide for the host NMI handler running after a VMX VMEXIT to be
     able to run on the kernel stack correctly.

   - Initialize MSR_TSC_AUX when RDPID is supported and not RDTSCP (virt
     relevant - real hw supports both)

   - A code generation improvement to TASK_SIZE_MAX through the use of
     alternatives

   - The usual misc and related cleanups and improvements"

* tag 'x86_urgent_for_v5.13_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  KVM: x86: Consolidate guest enter/exit logic to common helpers
  context_tracking: KVM: Move guest enter/exit wrappers to KVM's domain
  context_tracking: Consolidate guest enter/exit wrappers
  sched/vtime: Move guest enter/exit vtime accounting to vtime.h
  sched/vtime: Move vtime accounting external declarations above inlines
  KVM: x86: Defer vtime accounting 'til after IRQ handling
  context_tracking: Move guest exit vtime accounting to separate helpers
  context_tracking: Move guest exit context tracking to separate helpers
  KVM/VMX: Invoke NMI non-IST entry instead of IST entry
  x86/cpu: Remove write_tsc() and write_rdtscp_aux() wrappers
  x86/cpu: Initialize MSR_TSC_AUX if RDTSCP *or* RDPID is supported
  x86/resctrl: Fix init const confusion
  x86: Delete UD0, UD1 traces
  x86/smpboot: Remove duplicate includes
  x86/cpu: Use alternative to generate the TASK_SIZE_MAX constant
This commit is contained in:
Linus Torvalds 2021-05-09 12:52:25 -07:00
commit dd3e4012dd
16 changed files with 263 additions and 233 deletions

View file

@ -71,6 +71,19 @@ static inline void exception_exit(enum ctx_state prev_ctx)
}
}
static __always_inline bool context_tracking_guest_enter(void)
{
if (context_tracking_enabled())
__context_tracking_enter(CONTEXT_GUEST);
return context_tracking_enabled_this_cpu();
}
static __always_inline void context_tracking_guest_exit(void)
{
if (context_tracking_enabled())
__context_tracking_exit(CONTEXT_GUEST);
}
/**
* ct_state() - return the current context tracking state if known
@ -92,6 +105,9 @@ static inline void user_exit_irqoff(void) { }
static inline enum ctx_state exception_enter(void) { return 0; }
static inline void exception_exit(enum ctx_state prev_ctx) { }
static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; }
static inline bool context_tracking_guest_enter(void) { return false; }
static inline void context_tracking_guest_exit(void) { }
#endif /* !CONFIG_CONTEXT_TRACKING */
#define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond))
@ -102,80 +118,4 @@ extern void context_tracking_init(void);
static inline void context_tracking_init(void) { }
#endif /* CONFIG_CONTEXT_TRACKING_FORCE */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
/* must be called with irqs disabled */
static __always_inline void guest_enter_irqoff(void)
{
instrumentation_begin();
if (vtime_accounting_enabled_this_cpu())
vtime_guest_enter(current);
else
current->flags |= PF_VCPU;
instrumentation_end();
if (context_tracking_enabled())
__context_tracking_enter(CONTEXT_GUEST);
/* KVM does not hold any references to rcu protected data when it
* switches CPU into a guest mode. In fact switching to a guest mode
* is very similar to exiting to userspace from rcu point of view. In
* addition CPU may stay in a guest mode for quite a long time (up to
* one time slice). Lets treat guest mode as quiescent state, just like
* we do with user-mode execution.
*/
if (!context_tracking_enabled_this_cpu()) {
instrumentation_begin();
rcu_virt_note_context_switch(smp_processor_id());
instrumentation_end();
}
}
static __always_inline void guest_exit_irqoff(void)
{
if (context_tracking_enabled())
__context_tracking_exit(CONTEXT_GUEST);
instrumentation_begin();
if (vtime_accounting_enabled_this_cpu())
vtime_guest_exit(current);
else
current->flags &= ~PF_VCPU;
instrumentation_end();
}
#else
static __always_inline void guest_enter_irqoff(void)
{
/*
* This is running in ioctl context so its safe
* to assume that it's the stime pending cputime
* to flush.
*/
instrumentation_begin();
vtime_account_kernel(current);
current->flags |= PF_VCPU;
rcu_virt_note_context_switch(smp_processor_id());
instrumentation_end();
}
static __always_inline void guest_exit_irqoff(void)
{
instrumentation_begin();
/* Flush the guest cputime we spent on the guest */
vtime_account_kernel(current);
current->flags &= ~PF_VCPU;
instrumentation_end();
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
static inline void guest_exit(void)
{
unsigned long flags;
local_irq_save(flags);
guest_exit_irqoff();
local_irq_restore(flags);
}
#endif

View file

@ -338,6 +338,51 @@ struct kvm_vcpu {
struct kvm_dirty_ring dirty_ring;
};
/* must be called with irqs disabled */
static __always_inline void guest_enter_irqoff(void)
{
/*
* This is running in ioctl context so its safe to assume that it's the
* stime pending cputime to flush.
*/
instrumentation_begin();
vtime_account_guest_enter();
instrumentation_end();
/*
* KVM does not hold any references to rcu protected data when it
* switches CPU into a guest mode. In fact switching to a guest mode
* is very similar to exiting to userspace from rcu point of view. In
* addition CPU may stay in a guest mode for quite a long time (up to
* one time slice). Lets treat guest mode as quiescent state, just like
* we do with user-mode execution.
*/
if (!context_tracking_guest_enter()) {
instrumentation_begin();
rcu_virt_note_context_switch(smp_processor_id());
instrumentation_end();
}
}
static __always_inline void guest_exit_irqoff(void)
{
context_tracking_guest_exit();
instrumentation_begin();
/* Flush the guest cputime we spent on the guest */
vtime_account_guest_exit();
instrumentation_end();
}
static inline void guest_exit(void)
{
unsigned long flags;
local_irq_save(flags);
guest_exit_irqoff();
local_irq_restore(flags);
}
static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
{
/*

View file

@ -3,12 +3,46 @@
#define _LINUX_KERNEL_VTIME_H
#include <linux/context_tracking_state.h>
#include <linux/sched.h>
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#include <asm/vtime.h>
#endif
/*
* Common vtime APIs
*/
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
extern void vtime_account_kernel(struct task_struct *tsk);
extern void vtime_account_idle(struct task_struct *tsk);
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
struct task_struct;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern void arch_vtime_task_switch(struct task_struct *tsk);
extern void vtime_user_enter(struct task_struct *tsk);
extern void vtime_user_exit(struct task_struct *tsk);
extern void vtime_guest_enter(struct task_struct *tsk);
extern void vtime_guest_exit(struct task_struct *tsk);
extern void vtime_init_idle(struct task_struct *tsk, int cpu);
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN */
static inline void vtime_user_enter(struct task_struct *tsk) { }
static inline void vtime_user_exit(struct task_struct *tsk) { }
static inline void vtime_guest_enter(struct task_struct *tsk) { }
static inline void vtime_guest_exit(struct task_struct *tsk) { }
static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { }
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
extern void vtime_account_irq(struct task_struct *tsk, unsigned int offset);
extern void vtime_account_softirq(struct task_struct *tsk);
extern void vtime_account_hardirq(struct task_struct *tsk);
extern void vtime_flush(struct task_struct *tsk);
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static inline void vtime_account_irq(struct task_struct *tsk, unsigned int offset) { }
static inline void vtime_account_softirq(struct task_struct *tsk) { }
static inline void vtime_account_hardirq(struct task_struct *tsk) { }
static inline void vtime_flush(struct task_struct *tsk) { }
#endif
/*
* vtime_accounting_enabled_this_cpu() definitions/declarations
@ -18,6 +52,18 @@ struct task_struct;
static inline bool vtime_accounting_enabled_this_cpu(void) { return true; }
extern void vtime_task_switch(struct task_struct *prev);
static __always_inline void vtime_account_guest_enter(void)
{
vtime_account_kernel(current);
current->flags |= PF_VCPU;
}
static __always_inline void vtime_account_guest_exit(void)
{
vtime_account_kernel(current);
current->flags &= ~PF_VCPU;
}
#elif defined(CONFIG_VIRT_CPU_ACCOUNTING_GEN)
/*
@ -49,49 +95,37 @@ static inline void vtime_task_switch(struct task_struct *prev)
vtime_task_switch_generic(prev);
}
static __always_inline void vtime_account_guest_enter(void)
{
if (vtime_accounting_enabled_this_cpu())
vtime_guest_enter(current);
else
current->flags |= PF_VCPU;
}
static __always_inline void vtime_account_guest_exit(void)
{
if (vtime_accounting_enabled_this_cpu())
vtime_guest_exit(current);
else
current->flags &= ~PF_VCPU;
}
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
static inline bool vtime_accounting_enabled_cpu(int cpu) {return false; }
static inline bool vtime_accounting_enabled_this_cpu(void) { return false; }
static inline void vtime_task_switch(struct task_struct *prev) { }
#endif
static __always_inline void vtime_account_guest_enter(void)
{
current->flags |= PF_VCPU;
}
/*
* Common vtime APIs
*/
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
extern void vtime_account_kernel(struct task_struct *tsk);
extern void vtime_account_idle(struct task_struct *tsk);
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
static inline void vtime_account_kernel(struct task_struct *tsk) { }
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
static __always_inline void vtime_account_guest_exit(void)
{
current->flags &= ~PF_VCPU;
}
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern void arch_vtime_task_switch(struct task_struct *tsk);
extern void vtime_user_enter(struct task_struct *tsk);
extern void vtime_user_exit(struct task_struct *tsk);
extern void vtime_guest_enter(struct task_struct *tsk);
extern void vtime_guest_exit(struct task_struct *tsk);
extern void vtime_init_idle(struct task_struct *tsk, int cpu);
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN */
static inline void vtime_user_enter(struct task_struct *tsk) { }
static inline void vtime_user_exit(struct task_struct *tsk) { }
static inline void vtime_guest_enter(struct task_struct *tsk) { }
static inline void vtime_guest_exit(struct task_struct *tsk) { }
static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { }
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
extern void vtime_account_irq(struct task_struct *tsk, unsigned int offset);
extern void vtime_account_softirq(struct task_struct *tsk);
extern void vtime_account_hardirq(struct task_struct *tsk);
extern void vtime_flush(struct task_struct *tsk);
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static inline void vtime_account_irq(struct task_struct *tsk, unsigned int offset) { }
static inline void vtime_account_softirq(struct task_struct *tsk) { }
static inline void vtime_account_hardirq(struct task_struct *tsk) { }
static inline void vtime_flush(struct task_struct *tsk) { }
#endif