KVM/arm64 updates for 5.15

- Page ownership tracking between host EL1 and EL2
 
 - Rely on userspace page tables to create large stage-2 mappings
 
 - Fix incompatibility between pKVM and kmemleak
 
 - Fix the PMU reset state, and improve the performance of the virtual PMU
 
 - Move over to the generic KVM entry code
 
 - Address PSCI reset issues w.r.t. save/restore
 
 - Preliminary rework for the upcoming pKVM fixed feature
 
 - A bunch of MM cleanups
 
 - a vGIC fix for timer spurious interrupts
 
 - Various cleanups
 -----BEGIN PGP SIGNATURE-----
 
 iQJDBAABCgAtFiEEn9UcU+C1Yxj9lZw9I9DQutE9ekMFAmEnfogPHG1hekBrZXJu
 ZWwub3JnAAoJECPQ0LrRPXpDF9oQAINWHN1n30gsxcErMV8gH+XAyhDq2vTjkExQ
 Qz5ddo4R5zeVkj0nkunFSK+W3xYz+W97X3I+IaiiHvk5D6dUatj37IyYlazX5iFT
 7mbjTAqY7GRxfd6um7uK+CTRCApXY49GGkCVLGA5f+6mQ0JMVXaK9AKlsXKWUQLZ
 JvLasUgKkseN6IEJWmPDNBdIeiKBTZloeZMdlM2vSm34HsuirSS5LmshdzJQzSk8
 QSEqwXZX50afzJLNlB9Qa6V1tokjZVoYIBk0vAPO83tTh9HIyGL/PFAqBeq2rnWT
 M19fFFbx5vizap4ICbpviLmZ5AOywCoBmbPBT79eMAJ53rOqHUJhU1y/3DoiVzxu
 LJZI4wmGBQZVivOWOqyEZcNtTAagPLhyrLhMzYulBLwAjfFJmUHdSOxYtx+2Ysvr
 SDIPN31FKWrvifTXTqJHDmaaXusi2CNZUOPzVSe2I14SbX+ZX2ny9DltlbRgPNuc
 hGJagI5cZc0ngd4mAIzjjNmgBS2B+dSc8dOo71dRNJRLtQLiNHcAyQNJyFme+4xI
 NpvpkvzxBAs8rG2X0YIR/Cz3W3yZoCYuQNcoPk7+F/bUTK47VocQCS+gLucHVLbT
 H4286EV5n4nZ7E01oJ6uWnDnslPvrx9Sz2fxsrWYkBDR+xrz0EprrGsftFaILprz
 Ic43uXfd
 =LuHM
 -----END PGP SIGNATURE-----

Merge tag 'kvmarm-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD

KVM/arm64 updates for 5.15

- Page ownership tracking between host EL1 and EL2

- Rely on userspace page tables to create large stage-2 mappings

- Fix incompatibility between pKVM and kmemleak

- Fix the PMU reset state, and improve the performance of the virtual PMU

- Move over to the generic KVM entry code

- Address PSCI reset issues w.r.t. save/restore

- Preliminary rework for the upcoming pKVM fixed feature

- A bunch of MM cleanups

- a vGIC fix for timer spurious interrupts

- Various cleanups
This commit is contained in:
Paolo Bonzini 2021-09-06 06:34:11 -04:00
commit e99314a340
574 changed files with 7136 additions and 3867 deletions

View file

@ -57,7 +57,7 @@ struct blk_keyslot_manager;
* Maximum number of blkcg policies allowed to be registered concurrently.
* Defined here to simplify include dependency.
*/
#define BLKCG_MAX_POLS 5
#define BLKCG_MAX_POLS 6
typedef void (rq_end_io_fn)(struct request *, blk_status_t);

View file

@ -134,4 +134,5 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup)
BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter)
#ifdef CONFIG_NET
BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns)
BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp)
#endif

View file

@ -340,8 +340,8 @@ struct bpf_insn_aux_data {
};
u64 map_key_state; /* constant (32 bit) key tracking for maps */
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
int sanitize_stack_off; /* stack slot to be cleared */
u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */
bool zext_dst; /* this insn zero extends dst reg */
u8 alu_state; /* used in combination with alu_limit */
@ -414,6 +414,7 @@ struct bpf_verifier_env {
u32 used_map_cnt; /* number of used maps */
u32 used_btf_cnt; /* number of used BTF objects */
u32 id_gen; /* used to generate unique reg IDs */
bool explore_alu_limits;
bool allow_ptr_leaks;
bool allow_uninit_stack;
bool allow_ptr_to_map_access;

View file

@ -2,7 +2,11 @@
#ifndef __LINUX_ENTRYKVM_H
#define __LINUX_ENTRYKVM_H
#include <linux/entry-common.h>
#include <linux/static_call_types.h>
#include <linux/tracehook.h>
#include <linux/syscalls.h>
#include <linux/seccomp.h>
#include <linux/sched.h>
#include <linux/tick.h>
/* Transfer to guest mode work */

View file

@ -73,6 +73,11 @@ struct ctl_table_header;
/* unused opcode to mark call to interpreter with arguments */
#define BPF_CALL_ARGS 0xe0
/* unused opcode to mark speculation barrier for mitigating
* Speculative Store Bypass
*/
#define BPF_NOSPEC 0xc0
/* As per nm, we expose JITed images as text (code) section for
* kallsyms. That way, tools like perf can find it to match
* addresses.
@ -390,6 +395,16 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
.off = 0, \
.imm = 0 })
/* Speculation barrier */
#define BPF_ST_NOSPEC() \
((struct bpf_insn) { \
.code = BPF_ST | BPF_NOSPEC, \
.dst_reg = 0, \
.src_reg = 0, \
.off = 0, \
.imm = 0 })
/* Internal classic blocks for direct assignment */
#define __BPF_STMT(CODE, K) \

View file

@ -141,6 +141,7 @@ extern int vfs_get_tree(struct fs_context *fc);
extern void put_fs_context(struct fs_context *fc);
extern int vfs_parse_fs_param_source(struct fs_context *fc,
struct fs_parameter *param);
extern void fc_drop_locked(struct fs_context *fc);
/*
* sget() wrappers to be called from the ->get_tree() op.

View file

@ -318,14 +318,16 @@ static inline void memcpy_to_page(struct page *page, size_t offset,
VM_BUG_ON(offset + len > PAGE_SIZE);
memcpy(to + offset, from, len);
flush_dcache_page(page);
kunmap_local(to);
}
static inline void memzero_page(struct page *page, size_t offset, size_t len)
{
char *addr = kmap_atomic(page);
char *addr = kmap_local_page(page);
memset(addr + offset, 0, len);
kunmap_atomic(addr);
flush_dcache_page(page);
kunmap_local(addr);
}
#endif /* _LINUX_HIGHMEM_H */

View file

@ -81,6 +81,8 @@ int ishtp_register_event_cb(struct ishtp_cl_device *device,
/* Get the device * from ishtp device instance */
struct device *ishtp_device(struct ishtp_cl_device *cl_device);
/* wait for IPC resume */
bool ishtp_wait_resume(struct ishtp_device *dev);
/* Trace interface for clients */
ishtp_print_log ishtp_trace_callback(struct ishtp_cl_device *cl_device);
/* Get device pointer of PCI device for DMA acces */

View file

@ -872,7 +872,6 @@ void kvm_release_pfn_clean(kvm_pfn_t pfn);
void kvm_release_pfn_dirty(kvm_pfn_t pfn);
void kvm_set_pfn_dirty(kvm_pfn_t pfn);
void kvm_set_pfn_accessed(kvm_pfn_t pfn);
void kvm_get_pfn(kvm_pfn_t pfn);
void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,

View file

@ -209,7 +209,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type,
*/
#define for_each_mem_range(i, p_start, p_end) \
__for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, \
MEMBLOCK_NONE, p_start, p_end, NULL)
MEMBLOCK_HOTPLUG, p_start, p_end, NULL)
/**
* for_each_mem_range_rev - reverse iterate through memblock areas from
@ -220,7 +220,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type,
*/
#define for_each_mem_range_rev(i, p_start, p_end) \
__for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, \
MEMBLOCK_NONE, p_start, p_end, NULL)
MEMBLOCK_HOTPLUG, p_start, p_end, NULL)
/**
* for_each_reserved_mem_range - iterate over all reserved memblock areas

View file

@ -200,13 +200,13 @@ enum rt5033_reg {
#define RT5033_REGULATOR_BUCK_VOLTAGE_MIN 1000000U
#define RT5033_REGULATOR_BUCK_VOLTAGE_MAX 3000000U
#define RT5033_REGULATOR_BUCK_VOLTAGE_STEP 100000U
#define RT5033_REGULATOR_BUCK_VOLTAGE_STEP_NUM 32
#define RT5033_REGULATOR_BUCK_VOLTAGE_STEP_NUM 21
/* RT5033 regulator LDO output voltage uV */
#define RT5033_REGULATOR_LDO_VOLTAGE_MIN 1200000U
#define RT5033_REGULATOR_LDO_VOLTAGE_MAX 3000000U
#define RT5033_REGULATOR_LDO_VOLTAGE_STEP 100000U
#define RT5033_REGULATOR_LDO_VOLTAGE_STEP_NUM 32
#define RT5033_REGULATOR_LDO_VOLTAGE_STEP_NUM 19
/* RT5033 regulator SAFE LDO output voltage uV */
#define RT5033_REGULATOR_SAFE_LDO_VOLTAGE 4900000U

View file

@ -632,43 +632,6 @@ static inline int PageTransCompound(struct page *page)
return PageCompound(page);
}
/*
* PageTransCompoundMap is the same as PageTransCompound, but it also
* guarantees the primary MMU has the entire compound page mapped
* through pmd_trans_huge, which in turn guarantees the secondary MMUs
* can also map the entire compound page. This allows the secondary
* MMUs to call get_user_pages() only once for each compound page and
* to immediately map the entire compound page with a single secondary
* MMU fault. If there will be a pmd split later, the secondary MMUs
* will get an update through the MMU notifier invalidation through
* split_huge_pmd().
*
* Unlike PageTransCompound, this is safe to be called only while
* split_huge_pmd() cannot run from under us, like if protected by the
* MMU notifier, otherwise it may result in page->_mapcount check false
* positives.
*
* We have to treat page cache THP differently since every subpage of it
* would get _mapcount inc'ed once it is PMD mapped. But, it may be PTE
* mapped in the current process so comparing subpage's _mapcount to
* compound_mapcount to filter out PTE mapped case.
*/
static inline int PageTransCompoundMap(struct page *page)
{
struct page *head;
if (!PageTransCompound(page))
return 0;
if (PageAnon(page))
return atomic_read(&page->_mapcount) < 0;
head = compound_head(page);
/* File THP is PMD mapped and not PTE mapped */
return atomic_read(&page->_mapcount) ==
atomic_read(compound_mapcount_ptr(head));
}
/*
* PageTransTail returns true for both transparent huge pages
* and hugetlbfs pages, so it should only be called when it's known

View file

@ -1397,34 +1397,10 @@ static inline int p4d_clear_huge(p4d_t *p4d)
}
#endif /* !__PAGETABLE_P4D_FOLDED */
#ifndef __PAGETABLE_PUD_FOLDED
int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
int pud_clear_huge(pud_t *pud);
#else
static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
{
return 0;
}
static inline int pud_clear_huge(pud_t *pud)
{
return 0;
}
#endif /* !__PAGETABLE_PUD_FOLDED */
#ifndef __PAGETABLE_PMD_FOLDED
int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
int pud_clear_huge(pud_t *pud);
int pmd_clear_huge(pmd_t *pmd);
#else
static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
{
return 0;
}
static inline int pmd_clear_huge(pmd_t *pmd)
{
return 0;
}
#endif /* !__PAGETABLE_PMD_FOLDED */
int p4d_free_pud_page(p4d_t *p4d, unsigned long addr);
int pud_free_pmd_page(pud_t *pud, unsigned long addr);
int pmd_free_pte_page(pmd_t *pmd, unsigned long addr);

View file

@ -285,11 +285,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk)
return rcu_dereference_sk_user_data(sk);
}
static inline void sk_psock_set_state(struct sk_psock *psock,
enum sk_psock_state_bits bit)
{
set_bit(bit, &psock->state);
}
static inline void sk_psock_clear_state(struct sk_psock *psock,
enum sk_psock_state_bits bit)
{
clear_bit(bit, &psock->state);
}
static inline bool sk_psock_test_state(const struct sk_psock *psock,
enum sk_psock_state_bits bit)
{
return test_bit(bit, &psock->state);
}
static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
{
sk_drops_add(sk, skb);
kfree_skb(skb);
}
static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg)
{
if (msg->skb)
sock_drop(psock->sk, msg->skb);
kfree(msg);
}
static inline void sk_psock_queue_msg(struct sk_psock *psock,
struct sk_msg *msg)
{
spin_lock_bh(&psock->ingress_lock);
list_add_tail(&msg->list, &psock->ingress_msg);
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
list_add_tail(&msg->list, &psock->ingress_msg);
else
drop_sk_msg(psock, msg);
spin_unlock_bh(&psock->ingress_lock);
}
@ -406,24 +440,6 @@ static inline void sk_psock_restore_proto(struct sock *sk,
psock->psock_update_sk_prot(sk, psock, true);
}
static inline void sk_psock_set_state(struct sk_psock *psock,
enum sk_psock_state_bits bit)
{
set_bit(bit, &psock->state);
}
static inline void sk_psock_clear_state(struct sk_psock *psock,
enum sk_psock_state_bits bit)
{
clear_bit(bit, &psock->state);
}
static inline bool sk_psock_test_state(const struct sk_psock *psock,
enum sk_psock_state_bits bit)
{
return test_bit(bit, &psock->state);
}
static inline struct sk_psock *sk_psock_get(struct sock *sk)
{
struct sk_psock *psock;