Perf updates:
Core:
- Better handling of page table leaves on archictectures which have
architectures have non-pagetable aligned huge/large pages. For such
architectures a leaf can actually be part of a larger entry.
- Prevent a deadlock vs. exec_update_mutex
Architectures:
- The related updates for page size calculation of leaf entries
- The usual churn to support new CPUs
- Small fixes and improvements all over the place
-----BEGIN PGP SIGNATURE-----
iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAl/XvgATHHRnbHhAbGlu
dXRyb25peC5kZQAKCRCmGPVMDXSYoUrdEACatdr93wv75vnm5tCZM4EsFvB2PzVJ
ck4K4+hHiMVV4802qf+kW5plF+rckAU4TAai/L7wkTntKHvjD/0/o1epoIStb+dS
SCpVkQMCLT/8xT242iHPOfgsQpVpJnIiBwVRjn8HXu82nXdgMJhKnBjTe634UfxW
o2OCFiyJzpRi5l86gVp67ueqgvl34NPI2JaSLc0g80QfZ8akzdePPpED35CzYjZh
41k+7ssvt6qch3vMUySHAhkX4gQl0nc80YAaF/XZbCfvdyY7D03PtfBjfvphTSK0
l54z9aWh0ciK9P1aPfvkHDXBJUR2VtUAx2GiURK+XU3jNk3KMrz9CcBl1D/exIAg
07IsiYVoB38YAUOZoR9K8p+p+5EuwYRRUMAgfQfBALCuaLQV477Cne82b2KmNCus
1izUQvcDDf0s74OyYTHWFXRGla95COJvNLzkrZ1oU3mX4HgdKdOAUbf/2XTLWeKO
3HOIS+jsg5cp82tRe4X5r51h73pONYlo9lLo/CjQXz25vMcXKtE/MZGq2gkRff4p
N4k88eQ5LOsRqUaU46GcHozXRCfcpW7SPI9AaN5I/fKGIZvHP7uMdMb+g5DV8yHI
dNZ8u5uLPHwdg80C3fJ3Pnp7VsVNHliPXMwv0vib7BCp7aUVZWeFnOntw3PdYFRk
XKEbfl36IuAadg==
=rZ99
-----END PGP SIGNATURE-----
Merge tag 'perf-core-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Thomas Gleixner:
"Core:
- Better handling of page table leaves on archictectures which have
architectures have non-pagetable aligned huge/large pages. For such
architectures a leaf can actually be part of a larger entry.
- Prevent a deadlock vs exec_update_mutex
Architectures:
- The related updates for page size calculation of leaf entries
- The usual churn to support new CPUs
- Small fixes and improvements all over the place"
* tag 'perf-core-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
perf/x86/intel: Add Tremont Topdown support
uprobes/x86: Fix fall-through warnings for Clang
perf/x86: Fix fall-through warnings for Clang
kprobes/x86: Fix fall-through warnings for Clang
perf/x86/intel/lbr: Fix the return type of get_lbr_cycles()
perf/x86/intel: Fix rtm_abort_event encoding on Ice Lake
x86/kprobes: Restore BTF if the single-stepping is cancelled
perf: Break deadlock involving exec_update_mutex
sparc64/mm: Implement pXX_leaf_size() support
powerpc/8xx: Implement pXX_leaf_size() support
arm64/mm: Implement pXX_leaf_size() support
perf/core: Fix arch_perf_get_page_size()
mm: Introduce pXX_leaf_size()
mm/gup: Provide gup_get_pte() more generic
perf/x86/intel: Add event constraint for CYCLE_ACTIVITY.STALLS_MEM_ANY
perf/x86/intel/uncore: Add Rocket Lake support
perf/x86/msr: Add Rocket Lake CPU support
perf/x86/cstate: Add Rocket Lake CPU support
perf/x86/intel: Add Rocket Lake CPU support
perf,mm: Handle non-page-table-aligned hugetlbfs
...
This commit is contained in:
commit
8a8ca83ec3
21 changed files with 349 additions and 108 deletions
|
|
@ -1028,6 +1028,8 @@ struct perf_sample_data {
|
|||
|
||||
u64 phys_addr;
|
||||
u64 cgroup;
|
||||
u64 data_page_size;
|
||||
u64 code_page_size;
|
||||
} ____cacheline_aligned;
|
||||
|
||||
/* default value for data source */
|
||||
|
|
@ -1585,4 +1587,8 @@ extern void __weak arch_perf_update_userpage(struct perf_event *event,
|
|||
struct perf_event_mmap_page *userpg,
|
||||
u64 now);
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr);
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_PERF_EVENT_H */
|
||||
|
|
|
|||
|
|
@ -258,6 +258,61 @@ static inline pte_t ptep_get(pte_t *ptep)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH
|
||||
/*
|
||||
* WARNING: only to be used in the get_user_pages_fast() implementation.
|
||||
*
|
||||
* With get_user_pages_fast(), we walk down the pagetables without taking any
|
||||
* locks. For this we would like to load the pointers atomically, but sometimes
|
||||
* that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE). What
|
||||
* we do have is the guarantee that a PTE will only either go from not present
|
||||
* to present, or present to not present or both -- it will not switch to a
|
||||
* completely different present page without a TLB flush in between; something
|
||||
* that we are blocking by holding interrupts off.
|
||||
*
|
||||
* Setting ptes from not present to present goes:
|
||||
*
|
||||
* ptep->pte_high = h;
|
||||
* smp_wmb();
|
||||
* ptep->pte_low = l;
|
||||
*
|
||||
* And present to not present goes:
|
||||
*
|
||||
* ptep->pte_low = 0;
|
||||
* smp_wmb();
|
||||
* ptep->pte_high = 0;
|
||||
*
|
||||
* We must ensure here that the load of pte_low sees 'l' IFF pte_high sees 'h'.
|
||||
* We load pte_high *after* loading pte_low, which ensures we don't see an older
|
||||
* value of pte_high. *Then* we recheck pte_low, which ensures that we haven't
|
||||
* picked up a changed pte high. We might have gotten rubbish values from
|
||||
* pte_low and pte_high, but we are guaranteed that pte_low will not have the
|
||||
* present bit set *unless* it is 'l'. Because get_user_pages_fast() only
|
||||
* operates on present ptes we're safe.
|
||||
*/
|
||||
static inline pte_t ptep_get_lockless(pte_t *ptep)
|
||||
{
|
||||
pte_t pte;
|
||||
|
||||
do {
|
||||
pte.pte_low = ptep->pte_low;
|
||||
smp_rmb();
|
||||
pte.pte_high = ptep->pte_high;
|
||||
smp_rmb();
|
||||
} while (unlikely(pte.pte_low != ptep->pte_low));
|
||||
|
||||
return pte;
|
||||
}
|
||||
#else /* CONFIG_GUP_GET_PTE_LOW_HIGH */
|
||||
/*
|
||||
* We require that the PTE can be read atomically.
|
||||
*/
|
||||
static inline pte_t ptep_get_lockless(pte_t *ptep)
|
||||
{
|
||||
return ptep_get(ptep);
|
||||
}
|
||||
#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
|
||||
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
|
||||
|
|
@ -1494,4 +1549,20 @@ typedef unsigned int pgtbl_mod_mask;
|
|||
#define pmd_leaf(x) 0
|
||||
#endif
|
||||
|
||||
#ifndef pgd_leaf_size
|
||||
#define pgd_leaf_size(x) (1ULL << PGDIR_SHIFT)
|
||||
#endif
|
||||
#ifndef p4d_leaf_size
|
||||
#define p4d_leaf_size(x) P4D_SIZE
|
||||
#endif
|
||||
#ifndef pud_leaf_size
|
||||
#define pud_leaf_size(x) PUD_SIZE
|
||||
#endif
|
||||
#ifndef pmd_leaf_size
|
||||
#define pmd_leaf_size(x) PMD_SIZE
|
||||
#endif
|
||||
#ifndef pte_leaf_size
|
||||
#define pte_leaf_size(x) PAGE_SIZE
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_PGTABLE_H */
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue