From b62a8486de3ab1d7c2353ec422b9cca3abfcfbcd Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 31 Jan 2022 16:54:52 +0000 Subject: [PATCH 1/6] elfcore: Replace CONFIG_{IA64, UML} checks with a new option As arm64 is about to introduce MTE-specific phdrs in the core dump, add a common CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS option currently selectable by UML_X86 and IA64. Signed-off-by: Catalin Marinas Cc: Eric Biederman Link: https://lore.kernel.org/r/20220131165456.2160675-2-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/ia64/Kconfig | 1 + arch/x86/um/Kconfig | 1 + fs/Kconfig.binfmt | 3 +++ include/linux/elfcore.h | 4 ++-- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index a7e01573abd8..e003b2473c64 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -8,6 +8,7 @@ menu "Processor type and features" config IA64 bool + select ARCH_BINFMT_ELF_EXTRA_PHDRS select ARCH_HAS_DMA_MARK_CLEAN select ARCH_HAS_STRNCPY_FROM_USER select ARCH_HAS_STRNLEN_USER diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 40d6a06e41c8..ead7e5b3a975 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -8,6 +8,7 @@ endmenu config UML_X86 def_bool y + select ARCH_BINFMT_ELF_EXTRA_PHDRS if X86_32 config 64BIT bool "64-bit kernel" if "$(SUBARCH)" = "x86" diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 4d5ae61580aa..68e586283764 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -36,6 +36,9 @@ config COMPAT_BINFMT_ELF config ARCH_BINFMT_ELF_STATE bool +config ARCH_BINFMT_ELF_EXTRA_PHDRS + bool + config ARCH_HAVE_ELF_PROT bool diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 746e081879a5..f8e206e82476 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -114,7 +114,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg #endif } -#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64) +#ifdef CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS /* * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out * extra segments containing the gate DSO contents. Dumping its @@ -149,6 +149,6 @@ static inline size_t elf_core_extra_data_size(void) { return 0; } -#endif +#endif /* CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS */ #endif /* _LINUX_ELFCORE_H */ From 761b9b366cec0c81a1cd80930f00611d86521d1b Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 31 Jan 2022 16:54:53 +0000 Subject: [PATCH 2/6] elf: Introduce the ARM MTE ELF segment type Memory tags will be dumped in the core file as segments with their own type. Discussions with the binutils and the generic ABI community settled on using new definitions in the PT_*PROC space (and to be documented in the processor-specific ABIs). Introduce PT_ARM_MEMTAG_MTE as (PT_LOPROC + 0x1). Not included in this patch since there is no upstream support but the CHERI/BSD community will also reserve: #define PT_ARM_MEMTAG_CHERI (PT_LOPROC + 0x2) #define PT_RISCV_MEMTAG_CHERI (PT_LOPROC + 0x3) Signed-off-by: Catalin Marinas Acked-by: Luis Machado Link: https://lore.kernel.org/r/20220131165456.2160675-3-catalin.marinas@arm.com Signed-off-by: Will Deacon --- include/uapi/linux/elf.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 61bf4774b8f2..fe8e5b74cb39 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -40,6 +40,9 @@ typedef __s64 Elf64_Sxword; #define PT_GNU_STACK (PT_LOOS + 0x474e551) +/* ARM MTE memory tag segment type */ +#define PT_ARM_MEMTAG_MTE (PT_LOPROC + 0x1) + /* * Extended Numbering * From ab1e435ca7913e384ed801210418633eee43a71b Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 31 Jan 2022 16:54:54 +0000 Subject: [PATCH 3/6] arm64: mte: Define the number of bytes for storing the tags in a page Rather than explicitly calculating the number of bytes for a compact tag storage format corresponding to a page, just add a MTE_PAGE_TAG_STORAGE macro. With the current MTE implementation of 4 bits per tag, we store 2 tags in a byte. Signed-off-by: Catalin Marinas Acked-by: Luis Machado Link: https://lore.kernel.org/r/20220131165456.2160675-4-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/mte-def.h | 1 + arch/arm64/lib/mte.S | 4 ++-- arch/arm64/mm/mteswap.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h index 626d359b396e..14ee86b019c2 100644 --- a/arch/arm64/include/asm/mte-def.h +++ b/arch/arm64/include/asm/mte-def.h @@ -11,6 +11,7 @@ #define MTE_TAG_SHIFT 56 #define MTE_TAG_SIZE 4 #define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT) +#define MTE_PAGE_TAG_STORAGE (MTE_GRANULES_PER_PAGE * MTE_TAG_SIZE / 8) #define __MTE_PREAMBLE ARM64_ASM_PREAMBLE ".arch_extension memtag\n" diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S index f531dcb95174..8590af3c98c0 100644 --- a/arch/arm64/lib/mte.S +++ b/arch/arm64/lib/mte.S @@ -134,7 +134,7 @@ SYM_FUNC_END(mte_copy_tags_to_user) /* * Save the tags in a page * x0 - page address - * x1 - tag storage + * x1 - tag storage, MTE_PAGE_TAG_STORAGE bytes */ SYM_FUNC_START(mte_save_page_tags) multitag_transfer_size x7, x5 @@ -158,7 +158,7 @@ SYM_FUNC_END(mte_save_page_tags) /* * Restore the tags in a page * x0 - page address - * x1 - tag storage + * x1 - tag storage, MTE_PAGE_TAG_STORAGE bytes */ SYM_FUNC_START(mte_restore_page_tags) multitag_transfer_size x7, x5 diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c index 7c4ef56265ee..a9e50e930484 100644 --- a/arch/arm64/mm/mteswap.c +++ b/arch/arm64/mm/mteswap.c @@ -12,7 +12,7 @@ static DEFINE_XARRAY(mte_pages); void *mte_allocate_tag_storage(void) { /* tags granule is 16 bytes, 2 tags stored per byte */ - return kmalloc(PAGE_SIZE / 16 / 2, GFP_KERNEL); + return kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL); } void mte_free_tag_storage(char *storage) From 6dd8b1a0b6cb3ed93d24110e02e67ff9d006610a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 31 Jan 2022 16:54:55 +0000 Subject: [PATCH 4/6] arm64: mte: Dump the MTE tags in the core file For each vma mapped with PROT_MTE (the VM_MTE flag set), generate a PT_ARM_MEMTAG_MTE segment in the core file and dump the corresponding tags. The in-file size for such segments is 128 bytes per page. For pages in a VM_MTE vma which are not present in the user page tables or don't have the PG_mte_tagged flag set (e.g. execute-only), just write zeros in the core file. An example of program headers for two vmas, one 2-page, the other 4-page long: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align ... LOAD 0x030000 0x0000ffff80034000 0x0000000000000000 0x000000 0x002000 RW 0x1000 LOAD 0x030000 0x0000ffff80036000 0x0000000000000000 0x004000 0x004000 RW 0x1000 ... LOPROC+0x1 0x05b000 0x0000ffff80034000 0x0000000000000000 0x000100 0x002000 0 LOPROC+0x1 0x05b100 0x0000ffff80036000 0x0000000000000000 0x000200 0x004000 0 Signed-off-by: Catalin Marinas Acked-by: Luis Machado Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20220131165456.2160675-5-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/elfcore.c | 123 ++++++++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+) create mode 100644 arch/arm64/kernel/elfcore.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index cbcd42decb2a..b55c11796fad 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -10,6 +10,7 @@ config ARM64 select ACPI_SPCR_TABLE if ACPI select ACPI_PPTT if ACPI select ARCH_HAS_DEBUG_WX + select ARCH_BINFMT_ELF_EXTRA_PHDRS select ARCH_BINFMT_ELF_STATE select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 88b3e2a21408..986837d7ec82 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -61,6 +61,7 @@ obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o obj-$(CONFIG_PARAVIRT) += paravirt.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o +obj-$(CONFIG_ELF_CORE) += elfcore.o obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \ cpu-reset.o obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c new file mode 100644 index 000000000000..3455ee4acc04 --- /dev/null +++ b/arch/arm64/kernel/elfcore.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include + +#include +#include + +#define for_each_mte_vma(tsk, vma) \ + if (system_supports_mte()) \ + for (vma = tsk->mm->mmap; vma; vma = vma->vm_next) \ + if (vma->vm_flags & VM_MTE) + +static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_DONTDUMP) + return 0; + + return vma_pages(vma) * MTE_PAGE_TAG_STORAGE; +} + +/* Derived from dump_user_range(); start/end must be page-aligned */ +static int mte_dump_tag_range(struct coredump_params *cprm, + unsigned long start, unsigned long end) +{ + unsigned long addr; + + for (addr = start; addr < end; addr += PAGE_SIZE) { + char tags[MTE_PAGE_TAG_STORAGE]; + struct page *page = get_dump_page(addr); + + /* + * get_dump_page() returns NULL when encountering an empty + * page table entry that would otherwise have been filled with + * the zero page. Skip the equivalent tag dump which would + * have been all zeros. + */ + if (!page) { + dump_skip(cprm, MTE_PAGE_TAG_STORAGE); + continue; + } + + /* + * Pages mapped in user space as !pte_access_permitted() (e.g. + * PROT_EXEC only) may not have the PG_mte_tagged flag set. + */ + if (!test_bit(PG_mte_tagged, &page->flags)) { + put_page(page); + dump_skip(cprm, MTE_PAGE_TAG_STORAGE); + continue; + } + + mte_save_page_tags(page_address(page), tags); + put_page(page); + if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) + return 0; + } + + return 1; +} + +Elf_Half elf_core_extra_phdrs(void) +{ + struct vm_area_struct *vma; + int vma_count = 0; + + for_each_mte_vma(current, vma) + vma_count++; + + return vma_count; +} + +int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) +{ + struct vm_area_struct *vma; + + for_each_mte_vma(current, vma) { + struct elf_phdr phdr; + + phdr.p_type = PT_ARM_MEMTAG_MTE; + phdr.p_offset = offset; + phdr.p_vaddr = vma->vm_start; + phdr.p_paddr = 0; + phdr.p_filesz = mte_vma_tag_dump_size(vma); + phdr.p_memsz = vma->vm_end - vma->vm_start; + offset += phdr.p_filesz; + phdr.p_flags = 0; + phdr.p_align = 0; + + if (!dump_emit(cprm, &phdr, sizeof(phdr))) + return 0; + } + + return 1; +} + +size_t elf_core_extra_data_size(void) +{ + struct vm_area_struct *vma; + size_t data_size = 0; + + for_each_mte_vma(current, vma) + data_size += mte_vma_tag_dump_size(vma); + + return data_size; +} + +int elf_core_write_extra_data(struct coredump_params *cprm) +{ + struct vm_area_struct *vma; + + for_each_mte_vma(current, vma) { + if (vma->vm_flags & VM_DONTDUMP) + continue; + + if (!mte_dump_tag_range(cprm, vma->vm_start, vma->vm_end)) + return 0; + } + + return 1; +} From 731451ab3c0c6fe88142dbc73a74c71bd92a5cff Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 31 Jan 2022 16:54:56 +0000 Subject: [PATCH 5/6] arm64: mte: Document the core dump file format Add the program header definition and data layout for the PT_ARM_MEMTAG_MTE segments. Signed-off-by: Catalin Marinas Acked-by: Luis Machado Link: https://lore.kernel.org/r/20220131165456.2160675-6-catalin.marinas@arm.com Signed-off-by: Will Deacon --- .../arm64/memory-tagging-extension.rst | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst index 7b99c8f428eb..5a70d7a3ca12 100644 --- a/Documentation/arm64/memory-tagging-extension.rst +++ b/Documentation/arm64/memory-tagging-extension.rst @@ -213,6 +213,29 @@ address ABI control and MTE configuration of a process as per the Documentation/arm64/tagged-address-abi.rst and above. The corresponding ``regset`` is 1 element of 8 bytes (``sizeof(long))``). +Core dump support +----------------- + +The allocation tags for user memory mapped with ``PROT_MTE`` are dumped +in the core file as additional ``PT_ARM_MEMTAG_MTE`` segments. The +program header for such segment is defined as: + +:``p_type``: ``PT_ARM_MEMTAG_MTE`` +:``p_flags``: 0 +:``p_offset``: segment file offset +:``p_vaddr``: segment virtual address, same as the corresponding + ``PT_LOAD`` segment +:``p_paddr``: 0 +:``p_filesz``: segment size in file, calculated as ``p_mem_sz / 32`` + (two 4-bit tags cover 32 bytes of memory) +:``p_memsz``: segment size in memory, same as the corresponding + ``PT_LOAD`` segment +:``p_align``: 0 + +The tags are stored in the core file at ``p_offset`` as two 4-bit tags +in a byte. With the tag granule of 16 bytes, a 4K page requires 128 +bytes in the core file. + Example of correct usage ======================== From 3a4f7ef4bed5bdc77a1ac8132f9f0650bbcb3eae Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Fri, 18 Feb 2022 02:37:04 +0000 Subject: [PATCH 6/6] arm64: Change elfcore for_each_mte_vma() to use VMA iterator Rework for_each_mte_vma() to use a VMA iterator instead of an explicit linked-list. This will allow easy integration with the maple tree work which removes the VMA list altogether. Signed-off-by: Liam R. Howlett Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220218023650.672072-1-Liam.Howlett@oracle.com [will: Folded in fix from Catalin] Link: https://lore.kernel.org/r/YhUcywqIhmHvX6dG@arm.com Signed-off--by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/kernel/elfcore.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 3455ee4acc04..3ed39c61a510 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -8,9 +8,16 @@ #include #include -#define for_each_mte_vma(tsk, vma) \ +#ifndef VMA_ITERATOR +#define VMA_ITERATOR(name, mm, addr) \ + struct mm_struct *name = mm +#define for_each_vma(vmi, vma) \ + for (vma = vmi->mmap; vma; vma = vma->vm_next) +#endif + +#define for_each_mte_vma(vmi, vma) \ if (system_supports_mte()) \ - for (vma = tsk->mm->mmap; vma; vma = vma->vm_next) \ + for_each_vma(vmi, vma) \ if (vma->vm_flags & VM_MTE) static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) @@ -65,8 +72,9 @@ Elf_Half elf_core_extra_phdrs(void) { struct vm_area_struct *vma; int vma_count = 0; + VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(current, vma) + for_each_mte_vma(vmi, vma) vma_count++; return vma_count; @@ -75,8 +83,9 @@ Elf_Half elf_core_extra_phdrs(void) int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) { struct vm_area_struct *vma; + VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(current, vma) { + for_each_mte_vma(vmi, vma) { struct elf_phdr phdr; phdr.p_type = PT_ARM_MEMTAG_MTE; @@ -100,8 +109,9 @@ size_t elf_core_extra_data_size(void) { struct vm_area_struct *vma; size_t data_size = 0; + VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(current, vma) + for_each_mte_vma(vmi, vma) data_size += mte_vma_tag_dump_size(vma); return data_size; @@ -110,8 +120,9 @@ size_t elf_core_extra_data_size(void) int elf_core_write_extra_data(struct coredump_params *cprm) { struct vm_area_struct *vma; + VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(current, vma) { + for_each_mte_vma(vmi, vma) { if (vma->vm_flags & VM_DONTDUMP) continue;