From 0ab410a93d627ae73136d1a52c096262360b7992 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Jul 2021 13:38:59 +0100 Subject: [PATCH 1/4] KVM: arm64: Narrow PMU sysreg reset values to architectural requirements A number of the PMU sysregs expose reset values that are not compliant with the architecture (set bits in the RES0 ranges, for example). This in turn has the effect that we need to pointlessly mask some register fields when using them. Let's start by making sure we don't have illegal values in the shadow registers at reset time. This affects all the registers that dedicate one bit per counter, the counters themselves, PMEVTYPERn_EL0 and PMSELR_EL0. Reported-by: Alexandre Chartre Reviewed-by: Alexandre Chartre Acked-by: Russell King (Oracle) Signed-off-by: Marc Zyngier Reviewed-by: Alexandru Elisei Link: https://lore.kernel.org/r/20210719123902.1493805-2-maz@kernel.org --- arch/arm64/kvm/sys_regs.c | 43 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f6f126eb6ac1..96bdfa0e68b2 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -603,6 +603,41 @@ static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu, return REG_HIDDEN; } +static void reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + u64 n, mask = BIT(ARMV8_PMU_CYCLE_IDX); + + /* No PMU available, any PMU reg may UNDEF... */ + if (!kvm_arm_support_pmu_v3()) + return; + + n = read_sysreg(pmcr_el0) >> ARMV8_PMU_PMCR_N_SHIFT; + n &= ARMV8_PMU_PMCR_N_MASK; + if (n) + mask |= GENMASK(n - 1, 0); + + reset_unknown(vcpu, r); + __vcpu_sys_reg(vcpu, r->reg) &= mask; +} + +static void reset_pmevcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + reset_unknown(vcpu, r); + __vcpu_sys_reg(vcpu, r->reg) &= GENMASK(31, 0); +} + +static void reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + reset_unknown(vcpu, r); + __vcpu_sys_reg(vcpu, r->reg) &= ARMV8_PMU_EVTYPE_MASK; +} + +static void reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + reset_unknown(vcpu, r); + __vcpu_sys_reg(vcpu, r->reg) &= ARMV8_PMU_COUNTER_MASK; +} + static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 pmcr, val; @@ -944,16 +979,18 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, trap_wcr, reset_wcr, 0, 0, get_wcr, set_wcr } #define PMU_SYS_REG(r) \ - SYS_DESC(r), .reset = reset_unknown, .visibility = pmu_visibility + SYS_DESC(r), .reset = reset_pmu_reg, .visibility = pmu_visibility /* Macro to expand the PMEVCNTRn_EL0 register */ #define PMU_PMEVCNTR_EL0(n) \ { PMU_SYS_REG(SYS_PMEVCNTRn_EL0(n)), \ + .reset = reset_pmevcntr, \ .access = access_pmu_evcntr, .reg = (PMEVCNTR0_EL0 + n), } /* Macro to expand the PMEVTYPERn_EL0 register */ #define PMU_PMEVTYPER_EL0(n) \ { PMU_SYS_REG(SYS_PMEVTYPERn_EL0(n)), \ + .reset = reset_pmevtyper, \ .access = access_pmu_evtyper, .reg = (PMEVTYPER0_EL0 + n), } static bool undef_access(struct kvm_vcpu *vcpu, struct sys_reg_params *p, @@ -1595,13 +1632,13 @@ static const struct sys_reg_desc sys_reg_descs[] = { { PMU_SYS_REG(SYS_PMSWINC_EL0), .access = access_pmswinc, .reg = PMSWINC_EL0 }, { PMU_SYS_REG(SYS_PMSELR_EL0), - .access = access_pmselr, .reg = PMSELR_EL0 }, + .access = access_pmselr, .reset = reset_pmselr, .reg = PMSELR_EL0 }, { PMU_SYS_REG(SYS_PMCEID0_EL0), .access = access_pmceid, .reset = NULL }, { PMU_SYS_REG(SYS_PMCEID1_EL0), .access = access_pmceid, .reset = NULL }, { PMU_SYS_REG(SYS_PMCCNTR_EL0), - .access = access_pmu_evcntr, .reg = PMCCNTR_EL0 }, + .access = access_pmu_evcntr, .reset = reset_unknown, .reg = PMCCNTR_EL0 }, { PMU_SYS_REG(SYS_PMXEVTYPER_EL0), .access = access_pmu_evtyper, .reset = NULL }, { PMU_SYS_REG(SYS_PMXEVCNTR_EL0), From f5eff40058a856c23c5ec2f31756f107a2b1ef84 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Jul 2021 13:39:00 +0100 Subject: [PATCH 2/4] KVM: arm64: Drop unnecessary masking of PMU registers We always sanitise our PMU sysreg on the write side, so there is no need to do it on the read side as well. Drop the unnecessary masking. Acked-by: Russell King (Oracle) Reviewed-by: Alexandre Chartre Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210719123902.1493805-3-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 3 +-- arch/arm64/kvm/sys_regs.c | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index f33825c995cb..fae4e95b586c 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -373,7 +373,6 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); - reg &= kvm_pmu_valid_counter_mask(vcpu); } return reg; @@ -569,7 +568,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) if (val & ARMV8_PMU_PMCR_E) { kvm_pmu_enable_counter_mask(vcpu, - __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask); + __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); } else { kvm_pmu_disable_counter_mask(vcpu, mask); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 96bdfa0e68b2..f22139658e48 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -880,7 +880,7 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, kvm_pmu_disable_counter_mask(vcpu, val); } } else { - p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask; + p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); } return true; @@ -904,7 +904,7 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, /* accessing PMINTENCLR_EL1 */ __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val; } else { - p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask; + p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1); } return true; @@ -926,7 +926,7 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, /* accessing PMOVSCLR_EL0 */ __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask); } else { - p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask; + p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); } return true; From ca4f202d08ba7f24cc97dce14c6d20ec7a679135 Mon Sep 17 00:00:00 2001 From: Alexandre Chartre Date: Mon, 19 Jul 2021 13:39:01 +0100 Subject: [PATCH 3/4] KVM: arm64: Disabling disabled PMU counters wastes a lot of time In a KVM guest on arm64, performance counters interrupts have an unnecessary overhead which slows down execution when using the "perf record" command and limits the "perf record" sampling period. The problem is that when a guest VM disables counters by clearing the PMCR_EL0.E bit (bit 0), KVM will disable all counters defined in PMCR_EL0 even if they are not enabled in PMCNTENSET_EL0. KVM disables a counter by calling into the perf framework, in particular by calling perf_event_create_kernel_counter() which is a time consuming operation. So, for example, with a Neoverse N1 CPU core which has 6 event counters and one cycle counter, KVM will always disable all 7 counters even if only one is enabled. This typically happens when using the "perf record" command in a guest VM: perf will disable all event counters with PMCNTENTSET_EL0 and only uses the cycle counter. And when using the "perf record" -F option with a high profiling frequency, the overhead of KVM disabling all counters instead of one on every counter interrupt becomes very noticeable. The problem is fixed by having KVM disable only counters which are enabled in PMCNTENSET_EL0. If a counter is not enabled in PMCNTENSET_EL0 then KVM will not enable it when setting PMCR_EL0.E and it will remain disabled as long as it is not enabled in PMCNTENSET_EL0. So there is effectively no need to disable a counter when clearing PMCR_EL0.E if it is not enabled PMCNTENSET_EL0. Acked-by: Russell King (Oracle) Reviewed-by: Alexandru Elisei Signed-off-by: Alexandre Chartre [maz: moved 'mask' close to the actual user, simplifying the patch] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210712170345.660272-1-alexandre.chartre@oracle.com Link: https://lore.kernel.org/r/20210719123902.1493805-4-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index fae4e95b586c..dc65b58dc68f 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -563,20 +563,21 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) */ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) { - unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); int i; if (val & ARMV8_PMU_PMCR_E) { kvm_pmu_enable_counter_mask(vcpu, __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); } else { - kvm_pmu_disable_counter_mask(vcpu, mask); + kvm_pmu_disable_counter_mask(vcpu, + __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); } if (val & ARMV8_PMU_PMCR_C) kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); if (val & ARMV8_PMU_PMCR_P) { + unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); for_each_set_bit(i, &mask, 32) kvm_pmu_set_counter_value(vcpu, i, 0); From 7a3ba3095a32f9c4ec8f30d680fea5150e12c3f3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Jul 2021 13:39:02 +0100 Subject: [PATCH 4/4] KVM: arm64: Remove PMSWINC_EL0 shadow register We keep an entry for the PMSWINC_EL0 register in the vcpu structure, while *never* writing anything there outside of reset. Given that the register is defined as write-only, that we always trap when this register is accessed, there is little point in saving anything anyway. Get rid of the entry, and save a mighty 8 bytes per vcpu structure. We still need to keep it exposed to userspace in order to preserve backward compatibility with previously saved VMs. Since userspace cannot expect any effect of writing to PMSWINC_EL0, treat the register as RAZ/WI for the purpose of userspace access. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210719123902.1493805-5-maz@kernel.org --- arch/arm64/include/asm/kvm_host.h | 1 - arch/arm64/kvm/sys_regs.c | 21 ++++++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 41911585ae0c..afc169630884 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -185,7 +185,6 @@ enum vcpu_sysreg { PMCNTENSET_EL0, /* Count Enable Set Register */ PMINTENSET_EL1, /* Interrupt Enable Set Register */ PMOVSSET_EL0, /* Overflow Flag Status Set Register */ - PMSWINC_EL0, /* Software Increment Register */ PMUSERENR_EL0, /* User Enable Register */ /* Pointer Authentication Registers in a strict increasing order. */ diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f22139658e48..a1f5101f49a3 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1286,6 +1286,20 @@ static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return __set_id_reg(vcpu, rd, uaddr, true); } +static int set_wi_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + int err; + u64 val; + + /* Perform the access even if we are going to ignore the value */ + err = reg_from_user(&val, uaddr, sys_reg_to_index(rd)); + if (err) + return err; + + return 0; +} + static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { @@ -1629,8 +1643,13 @@ static const struct sys_reg_desc sys_reg_descs[] = { .access = access_pmcnten, .reg = PMCNTENSET_EL0 }, { PMU_SYS_REG(SYS_PMOVSCLR_EL0), .access = access_pmovs, .reg = PMOVSSET_EL0 }, + /* + * PM_SWINC_EL0 is exposed to userspace as RAZ/WI, as it was + * previously (and pointlessly) advertised in the past... + */ { PMU_SYS_REG(SYS_PMSWINC_EL0), - .access = access_pmswinc, .reg = PMSWINC_EL0 }, + .get_user = get_raz_id_reg, .set_user = set_wi_reg, + .access = access_pmswinc, .reset = NULL }, { PMU_SYS_REG(SYS_PMSELR_EL0), .access = access_pmselr, .reset = reset_pmselr, .reg = PMSELR_EL0 }, { PMU_SYS_REG(SYS_PMCEID0_EL0),