From ecdc5d842bb3c166c3d549e52ba91a3955b257f2 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 23 Oct 2019 13:56:36 +0200 Subject: [PATCH 1/5] s390/protvirt: introduce host side setup Add "prot_virt" command line option which controls if the kernel protected VMs support is enabled at early boot time. This has to be done early, because it needs large amounts of memory and will disable some features like STP time sync for the lpar. Extend ultravisor info definitions and expose it via uv_info struct filled in during startup. Signed-off-by: Vasily Gorbik Reviewed-by: Thomas Huth Acked-by: David Hildenbrand Reviewed-by: Cornelia Huck Acked-by: Christian Borntraeger [borntraeger@de.ibm.com: patch merging, splitting, fixing] Signed-off-by: Christian Borntraeger --- .../admin-guide/kernel-parameters.txt | 5 ++ arch/s390/boot/Makefile | 2 +- arch/s390/boot/uv.c | 20 +++++++ arch/s390/include/asm/uv.h | 45 +++++++++++++++- arch/s390/kernel/Makefile | 1 + arch/s390/kernel/setup.c | 4 -- arch/s390/kernel/uv.c | 52 +++++++++++++++++++ 7 files changed, 122 insertions(+), 7 deletions(-) create mode 100644 arch/s390/kernel/uv.c diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index dbc22d684627..b0beae9b9e36 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3795,6 +3795,11 @@ before loading. See Documentation/admin-guide/blockdev/ramdisk.rst. + prot_virt= [S390] enable hosting protected virtual machines + isolated from the hypervisor (if hardware supports + that). + Format: + psi= [KNL] Enable or disable pressure stall information tracking. Format: diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index e2c47d3a1c89..30f1811540c5 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -37,7 +37,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o obj-y += version.o pgm_check_info.o ctype.o text_dma.o -obj-$(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) += uv.o +obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o obj-$(CONFIG_RELOCATABLE) += machine_kexec_reloc.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o targets := bzImage startup.a section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y) diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c index 3f501159ee9f..8fde561f1d07 100644 --- a/arch/s390/boot/uv.c +++ b/arch/s390/boot/uv.c @@ -3,7 +3,13 @@ #include #include +/* will be used in arch/s390/kernel/uv.c */ +#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST int __bootdata_preserved(prot_virt_guest); +#endif +#if IS_ENABLED(CONFIG_KVM) +struct uv_info __bootdata_preserved(uv_info); +#endif void uv_query_info(void) { @@ -19,7 +25,21 @@ void uv_query_info(void) if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100) return; + if (IS_ENABLED(CONFIG_KVM)) { + memcpy(uv_info.inst_calls_list, uvcb.inst_calls_list, sizeof(uv_info.inst_calls_list)); + uv_info.uv_base_stor_len = uvcb.uv_base_stor_len; + uv_info.guest_base_stor_len = uvcb.conf_base_phys_stor_len; + uv_info.guest_virt_base_stor_len = uvcb.conf_base_virt_stor_len; + uv_info.guest_virt_var_stor_len = uvcb.conf_virt_var_stor_len; + uv_info.guest_cpu_stor_len = uvcb.cpu_stor_len; + uv_info.max_sec_stor_addr = ALIGN(uvcb.max_guest_stor_addr, PAGE_SIZE); + uv_info.max_num_sec_conf = uvcb.max_num_sec_conf; + uv_info.max_guest_cpus = uvcb.max_guest_cpus; + } + +#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) && test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list)) prot_virt_guest = 1; +#endif } diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index 4093a2856929..c6a330740e5d 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -44,7 +44,19 @@ struct uv_cb_qui { struct uv_cb_header header; u64 reserved08; u64 inst_calls_list[4]; - u64 reserved30[15]; + u64 reserved30[2]; + u64 uv_base_stor_len; + u64 reserved48; + u64 conf_base_phys_stor_len; + u64 conf_base_virt_stor_len; + u64 conf_virt_var_stor_len; + u64 cpu_stor_len; + u32 reserved70[3]; + u32 max_num_sec_conf; + u64 max_guest_stor_addr; + u8 reserved88[158 - 136]; + u16 max_guest_cpus; + u8 reserveda0[200 - 160]; } __packed __aligned(8); struct uv_cb_share { @@ -69,6 +81,20 @@ static inline int uv_call(unsigned long r1, unsigned long r2) return cc; } +struct uv_info { + unsigned long inst_calls_list[4]; + unsigned long uv_base_stor_len; + unsigned long guest_base_stor_len; + unsigned long guest_virt_base_stor_len; + unsigned long guest_virt_var_stor_len; + unsigned long guest_cpu_stor_len; + unsigned long max_sec_stor_addr; + unsigned int max_num_sec_conf; + unsigned short max_guest_cpus; +}; + +extern struct uv_info uv_info; + #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST extern int prot_virt_guest; @@ -121,11 +147,26 @@ static inline int uv_remove_shared(unsigned long addr) return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS); } -void uv_query_info(void); #else #define is_prot_virt_guest() 0 static inline int uv_set_shared(unsigned long addr) { return 0; } static inline int uv_remove_shared(unsigned long addr) { return 0; } +#endif + +#if IS_ENABLED(CONFIG_KVM) +extern int prot_virt_host; + +static inline int is_prot_virt_host(void) +{ + return prot_virt_host; +} +#else +#define is_prot_virt_host() 0 +#endif + +#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) +void uv_query_info(void); +#else static inline void uv_query_info(void) {} #endif diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 2b1203cf7be6..22bfb8d5084e 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -78,6 +78,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_diag.o obj-$(CONFIG_TRACEPOINTS) += trace.o +obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o # vdso obj-y += vdso64/ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b2c2f75860e8..a2496382175e 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -92,10 +92,6 @@ char elf_platform[ELF_PLATFORM_SIZE]; unsigned long int_hwcap = 0; -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST -int __bootdata_preserved(prot_virt_guest); -#endif - int __bootdata(noexec_disabled); int __bootdata(memory_end_set); unsigned long __bootdata(memory_end); diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c new file mode 100644 index 000000000000..b1f936710360 --- /dev/null +++ b/arch/s390/kernel/uv.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common Ultravisor functions and initialization + * + * Copyright IBM Corp. 2019, 2020 + */ +#define KMSG_COMPONENT "prot_virt" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */ +#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST +int __bootdata_preserved(prot_virt_guest); +#endif + +#if IS_ENABLED(CONFIG_KVM) +int prot_virt_host; +EXPORT_SYMBOL(prot_virt_host); +struct uv_info __bootdata_preserved(uv_info); +EXPORT_SYMBOL(uv_info); + +static int __init prot_virt_setup(char *val) +{ + bool enabled; + int rc; + + rc = kstrtobool(val, &enabled); + if (!rc && enabled) + prot_virt_host = 1; + + if (is_prot_virt_guest() && prot_virt_host) { + prot_virt_host = 0; + pr_warn("Protected virtualization not available in protected guests."); + } + + if (prot_virt_host && !test_facility(158)) { + prot_virt_host = 0; + pr_warn("Protected virtualization not supported by the hardware."); + } + + return rc; +} +early_param("prot_virt", prot_virt_setup); +#endif From 29d37e5b82f3e96dd648167657d5a0e0111ce877 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 23 Oct 2019 13:56:39 +0200 Subject: [PATCH 2/5] s390/protvirt: add ultravisor initialization Before being able to host protected virtual machines, donate some of the memory to the ultravisor. Besides that the ultravisor might impose addressing limitations for memory used to back protected VM storage. Treat that limit as protected virtualization host's virtual memory limit. Signed-off-by: Vasily Gorbik Reviewed-by: Christian Borntraeger Reviewed-by: Cornelia Huck Reviewed-by: Thomas Huth Reviewed-by: David Hildenbrand [borntraeger@de.ibm.com: patch merging, splitting, fixing] Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/uv.h | 15 ++++++++++++ arch/s390/kernel/setup.c | 5 ++++ arch/s390/kernel/uv.c | 48 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+) diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index c6a330740e5d..1af6ce8023cc 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -23,12 +23,14 @@ #define UVC_RC_NO_RESUME 0x0007 #define UVC_CMD_QUI 0x0001 +#define UVC_CMD_INIT_UV 0x000f #define UVC_CMD_SET_SHARED_ACCESS 0x1000 #define UVC_CMD_REMOVE_SHARED_ACCESS 0x1001 /* Bits in installed uv calls */ enum uv_cmds_inst { BIT_UVC_CMD_QUI = 0, + BIT_UVC_CMD_INIT_UV = 1, BIT_UVC_CMD_SET_SHARED_ACCESS = 8, BIT_UVC_CMD_REMOVE_SHARED_ACCESS = 9, }; @@ -59,6 +61,14 @@ struct uv_cb_qui { u8 reserveda0[200 - 160]; } __packed __aligned(8); +struct uv_cb_init { + struct uv_cb_header header; + u64 reserved08[2]; + u64 stor_origin; + u64 stor_len; + u64 reserved28[4]; +} __packed __aligned(8); + struct uv_cb_share { struct uv_cb_header header; u64 reserved08[3]; @@ -160,8 +170,13 @@ static inline int is_prot_virt_host(void) { return prot_virt_host; } + +void setup_uv(void); +void adjust_to_uv_max(unsigned long *vmax); #else #define is_prot_virt_host() 0 +static inline void setup_uv(void) {} +static inline void adjust_to_uv_max(unsigned long *vmax) {} #endif #if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index a2496382175e..1423090a2259 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -560,6 +560,9 @@ static void __init setup_memory_end(void) vmax = _REGION1_SIZE; /* 4-level kernel page table */ } + if (is_prot_virt_host()) + adjust_to_uv_max(&vmax); + /* module area is at the end of the kernel address space. */ MODULES_END = vmax; MODULES_VADDR = MODULES_END - MODULES_LEN; @@ -1134,6 +1137,8 @@ void __init setup_arch(char **cmdline_p) */ memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT)); + if (is_prot_virt_host()) + setup_uv(); setup_memory_end(); setup_memory(); dma_contiguous_reserve(memory_end); diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index b1f936710360..1ddc42154ef6 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -49,4 +49,52 @@ static int __init prot_virt_setup(char *val) return rc; } early_param("prot_virt", prot_virt_setup); + +static int __init uv_init(unsigned long stor_base, unsigned long stor_len) +{ + struct uv_cb_init uvcb = { + .header.cmd = UVC_CMD_INIT_UV, + .header.len = sizeof(uvcb), + .stor_origin = stor_base, + .stor_len = stor_len, + }; + + if (uv_call(0, (uint64_t)&uvcb)) { + pr_err("Ultravisor init failed with rc: 0x%x rrc: 0%x\n", + uvcb.header.rc, uvcb.header.rrc); + return -1; + } + return 0; +} + +void __init setup_uv(void) +{ + unsigned long uv_stor_base; + + uv_stor_base = (unsigned long)memblock_alloc_try_nid( + uv_info.uv_base_stor_len, SZ_1M, SZ_2G, + MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE); + if (!uv_stor_base) { + pr_warn("Failed to reserve %lu bytes for ultravisor base storage\n", + uv_info.uv_base_stor_len); + goto fail; + } + + if (uv_init(uv_stor_base, uv_info.uv_base_stor_len)) { + memblock_free(uv_stor_base, uv_info.uv_base_stor_len); + goto fail; + } + + pr_info("Reserving %luMB as ultravisor base storage\n", + uv_info.uv_base_stor_len >> 20); + return; +fail: + pr_info("Disabling support for protected virtualization"); + prot_virt_host = 0; +} + +void adjust_to_uv_max(unsigned long *vmax) +{ + *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr); +} #endif From 214d9bbcd3a67230b932f6cea83c078ab34d9e70 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Tue, 21 Jan 2020 09:48:44 +0100 Subject: [PATCH 3/5] s390/mm: provide memory management functions for protected KVM guests This provides the basic ultravisor calls and page table handling to cope with secure guests: - provide arch_make_page_accessible - make pages accessible after unmapping of secure guests - provide the ultravisor commands convert to/from secure - provide the ultravisor commands pin/unpin shared - provide callbacks to make pages secure (inacccessible) - we check for the expected pin count to only make pages secure if the host is not accessing them - we fence hugetlbfs for secure pages - add missing radix-tree include into gmap.h The basic idea is that a page can have 3 states: secure, normal or shared. The hypervisor can call into a firmware function called ultravisor that allows to change the state of a page: convert from/to secure. The convert from secure will encrypt the page and make it available to the host and host I/O. The convert to secure will remove the host capability to access this page. The design is that on convert to secure we will wait until writeback and page refs are indicating no host usage. At the same time the convert from secure (export to host) will be called in common code when the refcount or the writeback bit is already set. This avoids races between convert from and to secure. Then there is also the concept of shared pages. Those are kind of secure where the host can still access those pages. We need to be notified when the guest "unshares" such a page, basically doing a convert to secure by then. There is a call "pin shared page" that we use instead of convert from secure when possible. We do use PG_arch_1 as an optimization to minimize the convert from secure/pin shared. Several comments have been added in the code to explain the logic in the relevant places. Co-developed-by: Ulrich Weigand Signed-off-by: Ulrich Weigand Signed-off-by: Claudio Imbrenda Acked-by: David Hildenbrand Acked-by: Cornelia Huck Reviewed-by: Christian Borntraeger [borntraeger@de.ibm.com: patch merging, splitting, fixing] Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/gmap.h | 4 + arch/s390/include/asm/mmu.h | 2 + arch/s390/include/asm/mmu_context.h | 1 + arch/s390/include/asm/page.h | 5 + arch/s390/include/asm/pgtable.h | 35 ++++- arch/s390/include/asm/uv.h | 31 ++++ arch/s390/kernel/uv.c | 227 ++++++++++++++++++++++++++++ 7 files changed, 300 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index 37f96b6f0e61..3c4926aa78f4 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -9,6 +9,7 @@ #ifndef _ASM_S390_GMAP_H #define _ASM_S390_GMAP_H +#include #include /* Generic bits for GMAP notification on DAT table entry changes. */ @@ -31,6 +32,7 @@ * @table: pointer to the page directory * @asce: address space control element for gmap page table * @pfault_enabled: defines if pfaults are applicable for the guest + * @guest_handle: protected virtual machine handle for the ultravisor * @host_to_rmap: radix tree with gmap_rmap lists * @children: list of shadow gmap structures * @pt_list: list of all page tables used in the shadow guest address space @@ -54,6 +56,8 @@ struct gmap { unsigned long asce_end; void *private; bool pfault_enabled; + /* only set for protected virtual machines */ + unsigned long guest_handle; /* Additional data for shadow guest address spaces */ struct radix_tree_root host_to_rmap; struct list_head children; diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index bcfb6371086f..e21b618ad432 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -16,6 +16,8 @@ typedef struct { unsigned long asce; unsigned long asce_limit; unsigned long vdso_base; + /* The mmu context belongs to a secure guest. */ + atomic_t is_protected; /* * The following bitfields need a down_write on the mm * semaphore when they are written to. As they are only diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 8d04e6f3f796..afa836014076 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -23,6 +23,7 @@ static inline int init_new_context(struct task_struct *tsk, INIT_LIST_HEAD(&mm->context.gmap_list); cpumask_clear(&mm->context.cpu_attach_mask); atomic_set(&mm->context.flush_count, 0); + atomic_set(&mm->context.is_protected, 0); mm->context.gmap_asce = 0; mm->context.flush_mm = 0; mm->context.compat_mm = test_thread_flag(TIF_31BIT); diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 85e944f04c70..4ebcf891ff3c 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -153,6 +153,11 @@ static inline int devmem_is_allowed(unsigned long pfn) #define HAVE_ARCH_FREE_PAGE #define HAVE_ARCH_ALLOC_PAGE +#if IS_ENABLED(CONFIG_PGSTE) +int arch_make_page_accessible(struct page *page); +#define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE +#endif + #endif /* !__ASSEMBLY__ */ #define __PAGE_OFFSET 0x0UL diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 137a3920ca36..cc7a1adacb94 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -19,6 +19,7 @@ #include #include #include +#include extern pgd_t swapper_pg_dir[]; extern void paging_init(void); @@ -520,6 +521,15 @@ static inline int mm_has_pgste(struct mm_struct *mm) return 0; } +static inline int mm_is_protected(struct mm_struct *mm) +{ +#ifdef CONFIG_PGSTE + if (unlikely(atomic_read(&mm->context.is_protected))) + return 1; +#endif + return 0; +} + static inline int mm_alloc_pgste(struct mm_struct *mm) { #ifdef CONFIG_PGSTE @@ -1061,7 +1071,12 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); + pte_t res; + + res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); + if (mm_is_protected(mm) && pte_present(res)) + uv_convert_from_secure(pte_val(res) & PAGE_MASK); + return res; } #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION @@ -1073,7 +1088,12 @@ void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long, static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { - return ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID)); + pte_t res; + + res = ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID)); + if (mm_is_protected(vma->vm_mm) && pte_present(res)) + uv_convert_from_secure(pte_val(res) & PAGE_MASK); + return res; } /* @@ -1088,12 +1108,17 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) { + pte_t res; + if (full) { - pte_t pte = *ptep; + res = *ptep; *ptep = __pte(_PAGE_INVALID); - return pte; + } else { + res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); } - return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); + if (mm_is_protected(mm) && pte_present(res)) + uv_convert_from_secure(pte_val(res) & PAGE_MASK); + return res; } #define __HAVE_ARCH_PTEP_SET_WRPROTECT diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index 1af6ce8023cc..d089a960b3e2 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -15,6 +15,7 @@ #include #include #include +#include #define UVC_RC_EXECUTED 0x0001 #define UVC_RC_INV_CMD 0x0002 @@ -24,6 +25,10 @@ #define UVC_CMD_QUI 0x0001 #define UVC_CMD_INIT_UV 0x000f +#define UVC_CMD_CONV_TO_SEC_STOR 0x0200 +#define UVC_CMD_CONV_FROM_SEC_STOR 0x0201 +#define UVC_CMD_PIN_PAGE_SHARED 0x0341 +#define UVC_CMD_UNPIN_PAGE_SHARED 0x0342 #define UVC_CMD_SET_SHARED_ACCESS 0x1000 #define UVC_CMD_REMOVE_SHARED_ACCESS 0x1001 @@ -31,8 +36,12 @@ enum uv_cmds_inst { BIT_UVC_CMD_QUI = 0, BIT_UVC_CMD_INIT_UV = 1, + BIT_UVC_CMD_CONV_TO_SEC_STOR = 6, + BIT_UVC_CMD_CONV_FROM_SEC_STOR = 7, BIT_UVC_CMD_SET_SHARED_ACCESS = 8, BIT_UVC_CMD_REMOVE_SHARED_ACCESS = 9, + BIT_UVC_CMD_PIN_PAGE_SHARED = 21, + BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22, }; struct uv_cb_header { @@ -69,6 +78,19 @@ struct uv_cb_init { u64 reserved28[4]; } __packed __aligned(8); +struct uv_cb_cts { + struct uv_cb_header header; + u64 reserved08[2]; + u64 guest_handle; + u64 gaddr; +} __packed __aligned(8); + +struct uv_cb_cfs { + struct uv_cb_header header; + u64 reserved08[2]; + u64 paddr; +} __packed __aligned(8); + struct uv_cb_share { struct uv_cb_header header; u64 reserved08[3]; @@ -171,12 +193,21 @@ static inline int is_prot_virt_host(void) return prot_virt_host; } +int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); +int uv_convert_from_secure(unsigned long paddr); +int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); + void setup_uv(void); void adjust_to_uv_max(unsigned long *vmax); #else #define is_prot_virt_host() 0 static inline void setup_uv(void) {} static inline void adjust_to_uv_max(unsigned long *vmax) {} + +static inline int uv_convert_from_secure(unsigned long paddr) +{ + return 0; +} #endif #if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 1ddc42154ef6..4539003dac9d 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include #include @@ -97,4 +99,229 @@ void adjust_to_uv_max(unsigned long *vmax) { *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr); } + +/* + * Requests the Ultravisor to pin the page in the shared state. This will + * cause an intercept when the guest attempts to unshare the pinned page. + */ +static int uv_pin_shared(unsigned long paddr) +{ + struct uv_cb_cfs uvcb = { + .header.cmd = UVC_CMD_PIN_PAGE_SHARED, + .header.len = sizeof(uvcb), + .paddr = paddr, + }; + + if (uv_call(0, (u64)&uvcb)) + return -EINVAL; + return 0; +} + +/* + * Requests the Ultravisor to encrypt a guest page and make it + * accessible to the host for paging (export). + * + * @paddr: Absolute host address of page to be exported + */ +int uv_convert_from_secure(unsigned long paddr) +{ + struct uv_cb_cfs uvcb = { + .header.cmd = UVC_CMD_CONV_FROM_SEC_STOR, + .header.len = sizeof(uvcb), + .paddr = paddr + }; + + if (uv_call(0, (u64)&uvcb)) + return -EINVAL; + return 0; +} + +/* + * Calculate the expected ref_count for a page that would otherwise have no + * further pins. This was cribbed from similar functions in other places in + * the kernel, but with some slight modifications. We know that a secure + * page can not be a huge page for example. + */ +static int expected_page_refs(struct page *page) +{ + int res; + + res = page_mapcount(page); + if (PageSwapCache(page)) { + res++; + } else if (page_mapping(page)) { + res++; + if (page_has_private(page)) + res++; + } + return res; +} + +static int make_secure_pte(pte_t *ptep, unsigned long addr, + struct page *exp_page, struct uv_cb_header *uvcb) +{ + pte_t entry = READ_ONCE(*ptep); + struct page *page; + int expected, rc = 0; + + if (!pte_present(entry)) + return -ENXIO; + if (pte_val(entry) & _PAGE_INVALID) + return -ENXIO; + + page = pte_page(entry); + if (page != exp_page) + return -ENXIO; + if (PageWriteback(page)) + return -EAGAIN; + expected = expected_page_refs(page); + if (!page_ref_freeze(page, expected)) + return -EBUSY; + set_bit(PG_arch_1, &page->flags); + rc = uv_call(0, (u64)uvcb); + page_ref_unfreeze(page, expected); + /* Return -ENXIO if the page was not mapped, -EINVAL otherwise */ + if (rc) + rc = uvcb->rc == 0x10a ? -ENXIO : -EINVAL; + return rc; +} + +/* + * Requests the Ultravisor to make a page accessible to a guest. + * If it's brought in the first time, it will be cleared. If + * it has been exported before, it will be decrypted and integrity + * checked. + */ +int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) +{ + struct vm_area_struct *vma; + bool local_drain = false; + spinlock_t *ptelock; + unsigned long uaddr; + struct page *page; + pte_t *ptep; + int rc; + +again: + rc = -EFAULT; + down_read(&gmap->mm->mmap_sem); + + uaddr = __gmap_translate(gmap, gaddr); + if (IS_ERR_VALUE(uaddr)) + goto out; + vma = find_vma(gmap->mm, uaddr); + if (!vma) + goto out; + /* + * Secure pages cannot be huge and userspace should not combine both. + * In case userspace does it anyway this will result in an -EFAULT for + * the unpack. The guest is thus never reaching secure mode. If + * userspace is playing dirty tricky with mapping huge pages later + * on this will result in a segmentation fault. + */ + if (is_vm_hugetlb_page(vma)) + goto out; + + rc = -ENXIO; + page = follow_page(vma, uaddr, FOLL_WRITE); + if (IS_ERR_OR_NULL(page)) + goto out; + + lock_page(page); + ptep = get_locked_pte(gmap->mm, uaddr, &ptelock); + rc = make_secure_pte(ptep, uaddr, page, uvcb); + pte_unmap_unlock(ptep, ptelock); + unlock_page(page); +out: + up_read(&gmap->mm->mmap_sem); + + if (rc == -EAGAIN) { + wait_on_page_writeback(page); + } else if (rc == -EBUSY) { + /* + * If we have tried a local drain and the page refcount + * still does not match our expected safe value, try with a + * system wide drain. This is needed if the pagevecs holding + * the page are on a different CPU. + */ + if (local_drain) { + lru_add_drain_all(); + /* We give up here, and let the caller try again */ + return -EAGAIN; + } + /* + * We are here if the page refcount does not match the + * expected safe value. The main culprits are usually + * pagevecs. With lru_add_drain() we drain the pagevecs + * on the local CPU so that hopefully the refcount will + * reach the expected safe value. + */ + lru_add_drain(); + local_drain = true; + /* And now we try again immediately after draining */ + goto again; + } else if (rc == -ENXIO) { + if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE)) + return -EFAULT; + return -EAGAIN; + } + return rc; +} +EXPORT_SYMBOL_GPL(gmap_make_secure); + +int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr) +{ + struct uv_cb_cts uvcb = { + .header.cmd = UVC_CMD_CONV_TO_SEC_STOR, + .header.len = sizeof(uvcb), + .guest_handle = gmap->guest_handle, + .gaddr = gaddr, + }; + + return gmap_make_secure(gmap, gaddr, &uvcb); +} +EXPORT_SYMBOL_GPL(gmap_convert_to_secure); + +/* + * To be called with the page locked or with an extra reference! This will + * prevent gmap_make_secure from touching the page concurrently. Having 2 + * parallel make_page_accessible is fine, as the UV calls will become a + * no-op if the page is already exported. + */ +int arch_make_page_accessible(struct page *page) +{ + int rc = 0; + + /* Hugepage cannot be protected, so nothing to do */ + if (PageHuge(page)) + return 0; + + /* + * PG_arch_1 is used in 3 places: + * 1. for kernel page tables during early boot + * 2. for storage keys of huge pages and KVM + * 3. As an indication that this page might be secure. This can + * overindicate, e.g. we set the bit before calling + * convert_to_secure. + * As secure pages are never huge, all 3 variants can co-exists. + */ + if (!test_bit(PG_arch_1, &page->flags)) + return 0; + + rc = uv_pin_shared(page_to_phys(page)); + if (!rc) { + clear_bit(PG_arch_1, &page->flags); + return 0; + } + + rc = uv_convert_from_secure(page_to_phys(page)); + if (!rc) { + clear_bit(PG_arch_1, &page->flags); + return 0; + } + + return rc; +} +EXPORT_SYMBOL_GPL(arch_make_page_accessible); + #endif From 084ea4d611a3d00ee3930400b262240e10895900 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 21 Jan 2020 09:43:10 +0100 Subject: [PATCH 4/5] s390/mm: add (non)secure page access exceptions handlers Add exceptions handlers performing transparent transition of non-secure pages to secure (import) upon guest access and secure pages to non-secure (export) upon hypervisor access. Signed-off-by: Vasily Gorbik [frankja@linux.ibm.com: adding checks for failures] Signed-off-by: Janosch Frank [imbrenda@linux.ibm.com: adding a check for gmap fault] Signed-off-by: Claudio Imbrenda Acked-by: David Hildenbrand Acked-by: Cornelia Huck Reviewed-by: Christian Borntraeger [borntraeger@de.ibm.com: patch merging, splitting, fixing] Signed-off-by: Christian Borntraeger --- arch/s390/kernel/entry.h | 2 + arch/s390/kernel/pgm_check.S | 4 +- arch/s390/mm/fault.c | 78 ++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 1d3927e01a5f..faca269d5f27 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -24,6 +24,8 @@ asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); void do_protection_exception(struct pt_regs *regs); void do_dat_exception(struct pt_regs *regs); +void do_secure_storage_access(struct pt_regs *regs); +void do_non_secure_storage_access(struct pt_regs *regs); void addressing_exception(struct pt_regs *regs); void data_exception(struct pt_regs *regs); diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S index eee3a482195a..2c27907a5ffc 100644 --- a/arch/s390/kernel/pgm_check.S +++ b/arch/s390/kernel/pgm_check.S @@ -78,8 +78,8 @@ PGM_CHECK(do_dat_exception) /* 39 */ PGM_CHECK(do_dat_exception) /* 3a */ PGM_CHECK(do_dat_exception) /* 3b */ PGM_CHECK_DEFAULT /* 3c */ -PGM_CHECK_DEFAULT /* 3d */ -PGM_CHECK_DEFAULT /* 3e */ +PGM_CHECK(do_secure_storage_access) /* 3d */ +PGM_CHECK(do_non_secure_storage_access) /* 3e */ PGM_CHECK_DEFAULT /* 3f */ PGM_CHECK(monitor_event_exception) /* 40 */ PGM_CHECK_DEFAULT /* 41 */ diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 7b0bb475c166..7bd86ebc882f 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "../kernel/entry.h" #define __FAIL_ADDR_MASK -4096L @@ -816,3 +817,80 @@ out_extint: early_initcall(pfault_irq_init); #endif /* CONFIG_PFAULT */ + +#if IS_ENABLED(CONFIG_PGSTE) +void do_secure_storage_access(struct pt_regs *regs) +{ + unsigned long addr = regs->int_parm_long & __FAIL_ADDR_MASK; + struct vm_area_struct *vma; + struct mm_struct *mm; + struct page *page; + int rc; + + switch (get_fault_type(regs)) { + case USER_FAULT: + mm = current->mm; + down_read(&mm->mmap_sem); + vma = find_vma(mm, addr); + if (!vma) { + up_read(&mm->mmap_sem); + do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP); + break; + } + page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET); + if (IS_ERR_OR_NULL(page)) { + up_read(&mm->mmap_sem); + break; + } + if (arch_make_page_accessible(page)) + send_sig(SIGSEGV, current, 0); + put_page(page); + up_read(&mm->mmap_sem); + break; + case KERNEL_FAULT: + page = phys_to_page(addr); + if (unlikely(!try_get_page(page))) + break; + rc = arch_make_page_accessible(page); + put_page(page); + if (rc) + BUG(); + break; + case VDSO_FAULT: + /* fallthrough */ + case GMAP_FAULT: + /* fallthrough */ + default: + do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP); + WARN_ON_ONCE(1); + } +} +NOKPROBE_SYMBOL(do_secure_storage_access); + +void do_non_secure_storage_access(struct pt_regs *regs) +{ + unsigned long gaddr = regs->int_parm_long & __FAIL_ADDR_MASK; + struct gmap *gmap = (struct gmap *)S390_lowcore.gmap; + + if (get_fault_type(regs) != GMAP_FAULT) { + do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP); + WARN_ON_ONCE(1); + return; + } + + if (gmap_convert_to_secure(gmap, gaddr) == -EINVAL) + send_sig(SIGSEGV, current, 0); +} +NOKPROBE_SYMBOL(do_non_secure_storage_access); + +#else +void do_secure_storage_access(struct pt_regs *regs) +{ + default_trap_handler(regs); +} + +void do_non_secure_storage_access(struct pt_regs *regs) +{ + default_trap_handler(regs); +} +#endif From a0f60f8431999bf57cf53c3b27c47ef156b4fa17 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Thu, 13 Feb 2020 04:15:25 -0500 Subject: [PATCH 5/5] s390/protvirt: Add sysfs firmware interface for Ultravisor information That information, e.g. the maximum number of guests or installed Ultravisor facilities, is interesting for QEMU, Libvirt and administrators. Let's provide an easily parsable API to get that information. Signed-off-by: Janosch Frank Reviewed-by: Cornelia Huck Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kernel/uv.c | 87 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 4539003dac9d..c86d654351d1 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -325,3 +325,90 @@ int arch_make_page_accessible(struct page *page) EXPORT_SYMBOL_GPL(arch_make_page_accessible); #endif + +#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) +static ssize_t uv_query_facilities(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return snprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n", + uv_info.inst_calls_list[0], + uv_info.inst_calls_list[1], + uv_info.inst_calls_list[2], + uv_info.inst_calls_list[3]); +} + +static struct kobj_attribute uv_query_facilities_attr = + __ATTR(facilities, 0444, uv_query_facilities, NULL); + +static ssize_t uv_query_max_guest_cpus(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return snprintf(page, PAGE_SIZE, "%d\n", + uv_info.max_guest_cpus); +} + +static struct kobj_attribute uv_query_max_guest_cpus_attr = + __ATTR(max_cpus, 0444, uv_query_max_guest_cpus, NULL); + +static ssize_t uv_query_max_guest_vms(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return snprintf(page, PAGE_SIZE, "%d\n", + uv_info.max_num_sec_conf); +} + +static struct kobj_attribute uv_query_max_guest_vms_attr = + __ATTR(max_guests, 0444, uv_query_max_guest_vms, NULL); + +static ssize_t uv_query_max_guest_addr(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return snprintf(page, PAGE_SIZE, "%lx\n", + uv_info.max_sec_stor_addr); +} + +static struct kobj_attribute uv_query_max_guest_addr_attr = + __ATTR(max_address, 0444, uv_query_max_guest_addr, NULL); + +static struct attribute *uv_query_attrs[] = { + &uv_query_facilities_attr.attr, + &uv_query_max_guest_cpus_attr.attr, + &uv_query_max_guest_vms_attr.attr, + &uv_query_max_guest_addr_attr.attr, + NULL, +}; + +static struct attribute_group uv_query_attr_group = { + .attrs = uv_query_attrs, +}; + +static struct kset *uv_query_kset; +static struct kobject *uv_kobj; + +static int __init uv_info_init(void) +{ + int rc = -ENOMEM; + + if (!test_facility(158)) + return 0; + + uv_kobj = kobject_create_and_add("uv", firmware_kobj); + if (!uv_kobj) + return -ENOMEM; + + uv_query_kset = kset_create_and_add("query", NULL, uv_kobj); + if (!uv_query_kset) + goto out_kobj; + + rc = sysfs_create_group(&uv_query_kset->kobj, &uv_query_attr_group); + if (!rc) + return 0; + + kset_unregister(uv_query_kset); +out_kobj: + kobject_del(uv_kobj); + kobject_put(uv_kobj); + return rc; +} +device_initcall(uv_info_init); +#endif