Folio changes for 5.18
- Rewrite how munlock works to massively reduce the contention
on i_mmap_rwsem (Hugh Dickins):
https://lore.kernel.org/linux-mm/8e4356d-9622-a7f0-b2c-f116b5f2efea@google.com/
- Sort out the page refcount mess for ZONE_DEVICE pages (Christoph Hellwig):
https://lore.kernel.org/linux-mm/20220210072828.2930359-1-hch@lst.de/
- Convert GUP to use folios and make pincount available for order-1
pages. (Matthew Wilcox)
- Convert a few more truncation functions to use folios (Matthew Wilcox)
- Convert page_vma_mapped_walk to use PFNs instead of pages (Matthew Wilcox)
- Convert rmap_walk to use folios (Matthew Wilcox)
- Convert most of shrink_page_list() to use a folio (Matthew Wilcox)
- Add support for creating large folios in readahead (Matthew Wilcox)
-----BEGIN PGP SIGNATURE-----
iQEzBAABCgAdFiEEejHryeLBw/spnjHrDpNsjXcpgj4FAmI4ucgACgkQDpNsjXcp
gj69Wgf6AwqwmO5Tmy+fLScDPqWxmXJofbocae1kyoGHf7Ui91OK4U2j6IpvAr+g
P/vLIK+JAAcTQcrSCjymuEkf4HkGZOR03QQn7maPIEe4eLrZRQDEsmHC1L9gpeJp
s/GMvDWiGE0Tnxu0EOzfVi/yT+qjIl/S8VvqtCoJv1HdzxitZ7+1RDuqImaMC5MM
Qi3uHag78vLmCltLXpIOdpgZhdZexCdL2Y/1npf+b6FVkAJRRNUnA0gRbS7YpoVp
CbxEJcmAl9cpJLuj5i5kIfS9trr+/QcvbUlzRxh4ggC58iqnmF2V09l2MJ7YU3XL
v1O/Elq4lRhXninZFQEm9zjrri7LDQ==
=n9Ad
-----END PGP SIGNATURE-----
Merge tag 'folio-5.18c' of git://git.infradead.org/users/willy/pagecache
Pull folio updates from Matthew Wilcox:
- Rewrite how munlock works to massively reduce the contention on
i_mmap_rwsem (Hugh Dickins):
https://lore.kernel.org/linux-mm/8e4356d-9622-a7f0-b2c-f116b5f2efea@google.com/
- Sort out the page refcount mess for ZONE_DEVICE pages (Christoph
Hellwig):
https://lore.kernel.org/linux-mm/20220210072828.2930359-1-hch@lst.de/
- Convert GUP to use folios and make pincount available for order-1
pages. (Matthew Wilcox)
- Convert a few more truncation functions to use folios (Matthew
Wilcox)
- Convert page_vma_mapped_walk to use PFNs instead of pages (Matthew
Wilcox)
- Convert rmap_walk to use folios (Matthew Wilcox)
- Convert most of shrink_page_list() to use a folio (Matthew Wilcox)
- Add support for creating large folios in readahead (Matthew Wilcox)
* tag 'folio-5.18c' of git://git.infradead.org/users/willy/pagecache: (114 commits)
mm/damon: minor cleanup for damon_pa_young
selftests/vm/transhuge-stress: Support file-backed PMD folios
mm/filemap: Support VM_HUGEPAGE for file mappings
mm/readahead: Switch to page_cache_ra_order
mm/readahead: Align file mappings for non-DAX
mm/readahead: Add large folio readahead
mm: Support arbitrary THP sizes
mm: Make large folios depend on THP
mm: Fix READ_ONLY_THP warning
mm/filemap: Allow large folios to be added to the page cache
mm: Turn can_split_huge_page() into can_split_folio()
mm/vmscan: Convert pageout() to take a folio
mm/vmscan: Turn page_check_references() into folio_check_references()
mm/vmscan: Account large folios correctly
mm/vmscan: Optimise shrink_page_list for non-PMD-sized folios
mm/vmscan: Free non-shmem folios without splitting them
mm/rmap: Constify the rmap_walk_control argument
mm/rmap: Convert rmap_walk() to take a folio
mm: Turn page_anon_vma() into folio_anon_vma()
mm/rmap: Turn page_lock_anon_vma_read() into folio_lock_anon_vma_read()
...
This commit is contained in:
commit
9030fb0bb9
100 changed files with 2924 additions and 3044 deletions
|
|
@ -2753,54 +2753,6 @@ extern void init_special_inode(struct inode *, umode_t, dev_t);
|
|||
extern void make_bad_inode(struct inode *);
|
||||
extern bool is_bad_inode(struct inode *);
|
||||
|
||||
unsigned long invalidate_mapping_pages(struct address_space *mapping,
|
||||
pgoff_t start, pgoff_t end);
|
||||
|
||||
void invalidate_mapping_pagevec(struct address_space *mapping,
|
||||
pgoff_t start, pgoff_t end,
|
||||
unsigned long *nr_pagevec);
|
||||
|
||||
static inline void invalidate_remote_inode(struct inode *inode)
|
||||
{
|
||||
if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
|
||||
S_ISLNK(inode->i_mode))
|
||||
invalidate_mapping_pages(inode->i_mapping, 0, -1);
|
||||
}
|
||||
extern int invalidate_inode_pages2(struct address_space *mapping);
|
||||
extern int invalidate_inode_pages2_range(struct address_space *mapping,
|
||||
pgoff_t start, pgoff_t end);
|
||||
extern int write_inode_now(struct inode *, int);
|
||||
extern int filemap_fdatawrite(struct address_space *);
|
||||
extern int filemap_flush(struct address_space *);
|
||||
extern int filemap_fdatawait_keep_errors(struct address_space *mapping);
|
||||
extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
|
||||
loff_t lend);
|
||||
extern int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
|
||||
loff_t start_byte, loff_t end_byte);
|
||||
|
||||
static inline int filemap_fdatawait(struct address_space *mapping)
|
||||
{
|
||||
return filemap_fdatawait_range(mapping, 0, LLONG_MAX);
|
||||
}
|
||||
|
||||
extern bool filemap_range_has_page(struct address_space *, loff_t lstart,
|
||||
loff_t lend);
|
||||
extern int filemap_write_and_wait_range(struct address_space *mapping,
|
||||
loff_t lstart, loff_t lend);
|
||||
extern int __filemap_fdatawrite_range(struct address_space *mapping,
|
||||
loff_t start, loff_t end, int sync_mode);
|
||||
extern int filemap_fdatawrite_range(struct address_space *mapping,
|
||||
loff_t start, loff_t end);
|
||||
extern int filemap_check_errors(struct address_space *mapping);
|
||||
extern void __filemap_set_wb_err(struct address_space *mapping, int err);
|
||||
int filemap_fdatawrite_wbc(struct address_space *mapping,
|
||||
struct writeback_control *wbc);
|
||||
|
||||
static inline int filemap_write_and_wait(struct address_space *mapping)
|
||||
{
|
||||
return filemap_write_and_wait_range(mapping, 0, LLONG_MAX);
|
||||
}
|
||||
|
||||
extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart,
|
||||
loff_t lend);
|
||||
extern int __must_check file_check_and_advance_wb_err(struct file *file);
|
||||
|
|
@ -2812,67 +2764,6 @@ static inline int file_write_and_wait(struct file *file)
|
|||
return file_write_and_wait_range(file, 0, LLONG_MAX);
|
||||
}
|
||||
|
||||
/**
|
||||
* filemap_set_wb_err - set a writeback error on an address_space
|
||||
* @mapping: mapping in which to set writeback error
|
||||
* @err: error to be set in mapping
|
||||
*
|
||||
* When writeback fails in some way, we must record that error so that
|
||||
* userspace can be informed when fsync and the like are called. We endeavor
|
||||
* to report errors on any file that was open at the time of the error. Some
|
||||
* internal callers also need to know when writeback errors have occurred.
|
||||
*
|
||||
* When a writeback error occurs, most filesystems will want to call
|
||||
* filemap_set_wb_err to record the error in the mapping so that it will be
|
||||
* automatically reported whenever fsync is called on the file.
|
||||
*/
|
||||
static inline void filemap_set_wb_err(struct address_space *mapping, int err)
|
||||
{
|
||||
/* Fastpath for common case of no error */
|
||||
if (unlikely(err))
|
||||
__filemap_set_wb_err(mapping, err);
|
||||
}
|
||||
|
||||
/**
|
||||
* filemap_check_wb_err - has an error occurred since the mark was sampled?
|
||||
* @mapping: mapping to check for writeback errors
|
||||
* @since: previously-sampled errseq_t
|
||||
*
|
||||
* Grab the errseq_t value from the mapping, and see if it has changed "since"
|
||||
* the given value was sampled.
|
||||
*
|
||||
* If it has then report the latest error set, otherwise return 0.
|
||||
*/
|
||||
static inline int filemap_check_wb_err(struct address_space *mapping,
|
||||
errseq_t since)
|
||||
{
|
||||
return errseq_check(&mapping->wb_err, since);
|
||||
}
|
||||
|
||||
/**
|
||||
* filemap_sample_wb_err - sample the current errseq_t to test for later errors
|
||||
* @mapping: mapping to be sampled
|
||||
*
|
||||
* Writeback errors are always reported relative to a particular sample point
|
||||
* in the past. This function provides those sample points.
|
||||
*/
|
||||
static inline errseq_t filemap_sample_wb_err(struct address_space *mapping)
|
||||
{
|
||||
return errseq_sample(&mapping->wb_err);
|
||||
}
|
||||
|
||||
/**
|
||||
* file_sample_sb_err - sample the current errseq_t to test for later errors
|
||||
* @file: file pointer to be sampled
|
||||
*
|
||||
* Grab the most current superblock-level errseq_t value for the given
|
||||
* struct file.
|
||||
*/
|
||||
static inline errseq_t file_sample_sb_err(struct file *file)
|
||||
{
|
||||
return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err);
|
||||
}
|
||||
|
||||
extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
|
||||
int datasync);
|
||||
extern int vfs_fsync(struct file *file, int datasync);
|
||||
|
|
@ -3627,15 +3518,4 @@ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len,
|
|||
extern int generic_fadvise(struct file *file, loff_t offset, loff_t len,
|
||||
int advice);
|
||||
|
||||
/*
|
||||
* Flush file data before changing attributes. Caller must hold any locks
|
||||
* required to prevent further writes to this file until we're done setting
|
||||
* flags.
|
||||
*/
|
||||
static inline int inode_drain_writes(struct inode *inode)
|
||||
{
|
||||
inode_dio_wait(inode);
|
||||
return filemap_write_and_wait(inode->i_mapping);
|
||||
}
|
||||
|
||||
#endif /* _LINUX_FS_H */
|
||||
|
|
|
|||
|
|
@ -9,14 +9,9 @@
|
|||
#ifndef LINUX_HMM_H
|
||||
#define LINUX_HMM_H
|
||||
|
||||
#include <linux/kconfig.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
#include <linux/device.h>
|
||||
#include <linux/migrate.h>
|
||||
#include <linux/memremap.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
struct mmu_interval_notifier;
|
||||
|
||||
/*
|
||||
* On output:
|
||||
|
|
|
|||
|
|
@ -185,7 +185,7 @@ void prep_transhuge_page(struct page *page);
|
|||
void free_transhuge_page(struct page *page);
|
||||
bool is_transparent_hugepage(struct page *page);
|
||||
|
||||
bool can_split_huge_page(struct page *page, int *pextra_pins);
|
||||
bool can_split_folio(struct folio *folio, int *pextra_pins);
|
||||
int split_huge_page_to_list(struct page *page, struct list_head *list);
|
||||
static inline int split_huge_page(struct page *page)
|
||||
{
|
||||
|
|
@ -194,7 +194,7 @@ static inline int split_huge_page(struct page *page)
|
|||
void deferred_split_huge_page(struct page *page);
|
||||
|
||||
void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
|
||||
unsigned long address, bool freeze, struct page *page);
|
||||
unsigned long address, bool freeze, struct folio *folio);
|
||||
|
||||
#define split_huge_pmd(__vma, __pmd, __address) \
|
||||
do { \
|
||||
|
|
@ -207,7 +207,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
|
|||
|
||||
|
||||
void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
|
||||
bool freeze, struct page *page);
|
||||
bool freeze, struct folio *folio);
|
||||
|
||||
void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
|
||||
unsigned long address);
|
||||
|
|
@ -250,30 +250,6 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* thp_order - Order of a transparent huge page.
|
||||
* @page: Head page of a transparent huge page.
|
||||
*/
|
||||
static inline unsigned int thp_order(struct page *page)
|
||||
{
|
||||
VM_BUG_ON_PGFLAGS(PageTail(page), page);
|
||||
if (PageHead(page))
|
||||
return HPAGE_PMD_ORDER;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* thp_nr_pages - The number of regular pages in this huge page.
|
||||
* @page: The head page of a huge page.
|
||||
*/
|
||||
static inline int thp_nr_pages(struct page *page)
|
||||
{
|
||||
VM_BUG_ON_PGFLAGS(PageTail(page), page);
|
||||
if (PageHead(page))
|
||||
return HPAGE_PMD_NR;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* folio_test_pmd_mappable - Can we map this folio with a PMD?
|
||||
* @folio: The folio to test
|
||||
|
|
@ -336,18 +312,6 @@ static inline struct list_head *page_deferred_list(struct page *page)
|
|||
#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
|
||||
#define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; })
|
||||
|
||||
static inline unsigned int thp_order(struct page *page)
|
||||
{
|
||||
VM_BUG_ON_PGFLAGS(PageTail(page), page);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int thp_nr_pages(struct page *page)
|
||||
{
|
||||
VM_BUG_ON_PGFLAGS(PageTail(page), page);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline bool folio_test_pmd_mappable(struct folio *folio)
|
||||
{
|
||||
return false;
|
||||
|
|
@ -387,7 +351,7 @@ static inline bool is_transparent_hugepage(struct page *page)
|
|||
#define thp_get_unmapped_area NULL
|
||||
|
||||
static inline bool
|
||||
can_split_huge_page(struct page *page, int *pextra_pins)
|
||||
can_split_folio(struct folio *folio, int *pextra_pins)
|
||||
{
|
||||
BUILD_BUG();
|
||||
return false;
|
||||
|
|
@ -406,9 +370,9 @@ static inline void deferred_split_huge_page(struct page *page) {}
|
|||
do { } while (0)
|
||||
|
||||
static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
|
||||
unsigned long address, bool freeze, struct page *page) {}
|
||||
unsigned long address, bool freeze, struct folio *folio) {}
|
||||
static inline void split_huge_pmd_address(struct vm_area_struct *vma,
|
||||
unsigned long address, bool freeze, struct page *page) {}
|
||||
unsigned long address, bool freeze, struct folio *folio) {}
|
||||
|
||||
#define split_huge_pud(__vma, __pmd, __address) \
|
||||
do { } while (0)
|
||||
|
|
@ -483,15 +447,10 @@ static inline bool thp_migration_supported(void)
|
|||
}
|
||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
|
||||
/**
|
||||
* thp_size - Size of a transparent huge page.
|
||||
* @page: Head page of a transparent huge page.
|
||||
*
|
||||
* Return: Number of bytes in this page.
|
||||
*/
|
||||
static inline unsigned long thp_size(struct page *page)
|
||||
static inline int split_folio_to_list(struct folio *folio,
|
||||
struct list_head *list)
|
||||
{
|
||||
return PAGE_SIZE << thp_order(page);
|
||||
return split_huge_page_to_list(&folio->page, list);
|
||||
}
|
||||
|
||||
#endif /* _LINUX_HUGE_MM_H */
|
||||
|
|
|
|||
|
|
@ -970,6 +970,11 @@ static inline struct hstate *page_hstate(struct page *page)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct hstate *size_to_hstate(unsigned long size)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline unsigned long huge_page_size(struct hstate *h)
|
||||
{
|
||||
return PAGE_SIZE;
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ static inline void ksm_exit(struct mm_struct *mm)
|
|||
struct page *ksm_might_need_to_copy(struct page *page,
|
||||
struct vm_area_struct *vma, unsigned long address);
|
||||
|
||||
void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
|
||||
void rmap_walk_ksm(struct folio *folio, const struct rmap_walk_control *rwc);
|
||||
void folio_migrate_ksm(struct folio *newfolio, struct folio *folio);
|
||||
|
||||
#else /* !CONFIG_KSM */
|
||||
|
|
@ -78,8 +78,8 @@ static inline struct page *ksm_might_need_to_copy(struct page *page,
|
|||
return page;
|
||||
}
|
||||
|
||||
static inline void rmap_walk_ksm(struct page *page,
|
||||
struct rmap_walk_control *rwc)
|
||||
static inline void rmap_walk_ksm(struct folio *folio,
|
||||
const struct rmap_walk_control *rwc)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_MEMREMAP_H_
|
||||
#define _LINUX_MEMREMAP_H_
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/range.h>
|
||||
#include <linux/ioport.h>
|
||||
#include <linux/percpu-refcount.h>
|
||||
|
|
@ -66,9 +68,9 @@ enum memory_type {
|
|||
|
||||
struct dev_pagemap_ops {
|
||||
/*
|
||||
* Called once the page refcount reaches 1. (ZONE_DEVICE pages never
|
||||
* reach 0 refcount unless there is a refcount bug. This allows the
|
||||
* device driver to implement its own memory management.)
|
||||
* Called once the page refcount reaches 0. The reference count will be
|
||||
* reset to one by the core code after the method is called to prepare
|
||||
* for handing out the page again.
|
||||
*/
|
||||
void (*page_free)(struct page *page);
|
||||
|
||||
|
|
@ -129,6 +131,25 @@ static inline unsigned long pgmap_vmemmap_nr(struct dev_pagemap *pgmap)
|
|||
return 1 << pgmap->vmemmap_shift;
|
||||
}
|
||||
|
||||
static inline bool is_device_private_page(const struct page *page)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_DEVICE_PRIVATE) &&
|
||||
is_zone_device_page(page) &&
|
||||
page->pgmap->type == MEMORY_DEVICE_PRIVATE;
|
||||
}
|
||||
|
||||
static inline bool folio_is_device_private(const struct folio *folio)
|
||||
{
|
||||
return is_device_private_page(&folio->page);
|
||||
}
|
||||
|
||||
static inline bool is_pci_p2pdma_page(const struct page *page)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_PCI_P2PDMA) &&
|
||||
is_zone_device_page(page) &&
|
||||
page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ZONE_DEVICE
|
||||
void *memremap_pages(struct dev_pagemap *pgmap, int nid);
|
||||
void memunmap_pages(struct dev_pagemap *pgmap);
|
||||
|
|
|
|||
|
|
@ -3,9 +3,6 @@
|
|||
#define _LINUX_MM_H
|
||||
|
||||
#include <linux/errno.h>
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
#include <linux/mmdebug.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/bug.h>
|
||||
|
|
@ -26,7 +23,6 @@
|
|||
#include <linux/err.h>
|
||||
#include <linux/page-flags.h>
|
||||
#include <linux/page_ref.h>
|
||||
#include <linux/memremap.h>
|
||||
#include <linux/overflow.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <linux/sched.h>
|
||||
|
|
@ -216,8 +212,10 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
|
|||
|
||||
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
|
||||
#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
|
||||
#define folio_page_idx(folio, p) (page_to_pfn(p) - folio_pfn(folio))
|
||||
#else
|
||||
#define nth_page(page,n) ((page) + (n))
|
||||
#define folio_page_idx(folio, p) ((p) - &(folio)->page)
|
||||
#endif
|
||||
|
||||
/* to align the pointer to the (next) page boundary */
|
||||
|
|
@ -227,6 +225,10 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
|
|||
#define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE)
|
||||
|
||||
#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
|
||||
static inline struct folio *lru_to_folio(struct list_head *head)
|
||||
{
|
||||
return list_entry((head)->prev, struct folio, lru);
|
||||
}
|
||||
|
||||
void setup_initial_init_mm(void *start_code, void *end_code,
|
||||
void *end_data, void *brk);
|
||||
|
|
@ -775,21 +777,26 @@ static inline int is_vmalloc_or_module_addr(const void *x)
|
|||
}
|
||||
#endif
|
||||
|
||||
static inline int head_compound_mapcount(struct page *head)
|
||||
/*
|
||||
* How many times the entire folio is mapped as a single unit (eg by a
|
||||
* PMD or PUD entry). This is probably not what you want, except for
|
||||
* debugging purposes; look at folio_mapcount() or page_mapcount()
|
||||
* instead.
|
||||
*/
|
||||
static inline int folio_entire_mapcount(struct folio *folio)
|
||||
{
|
||||
return atomic_read(compound_mapcount_ptr(head)) + 1;
|
||||
VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
|
||||
return atomic_read(folio_mapcount_ptr(folio)) + 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mapcount of compound page as a whole, does not include mapped sub-pages.
|
||||
*
|
||||
* Must be called only for compound pages or any their tail sub-pages.
|
||||
* Must be called only for compound pages.
|
||||
*/
|
||||
static inline int compound_mapcount(struct page *page)
|
||||
{
|
||||
VM_BUG_ON_PAGE(!PageCompound(page), page);
|
||||
page = compound_head(page);
|
||||
return head_compound_mapcount(page);
|
||||
return folio_entire_mapcount(page_folio(page));
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -819,8 +826,14 @@ static inline int page_mapcount(struct page *page)
|
|||
return atomic_read(&page->_mapcount) + 1;
|
||||
}
|
||||
|
||||
int folio_mapcount(struct folio *folio);
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
int total_mapcount(struct page *page);
|
||||
static inline int total_mapcount(struct page *page)
|
||||
{
|
||||
return folio_mapcount(page_folio(page));
|
||||
}
|
||||
|
||||
int page_trans_huge_mapcount(struct page *page);
|
||||
#else
|
||||
static inline int total_mapcount(struct page *page)
|
||||
|
|
@ -890,33 +903,17 @@ static inline void destroy_compound_page(struct page *page)
|
|||
compound_page_dtors[page[1].compound_dtor](page);
|
||||
}
|
||||
|
||||
static inline bool hpage_pincount_available(struct page *page)
|
||||
{
|
||||
/*
|
||||
* Can the page->hpage_pinned_refcount field be used? That field is in
|
||||
* the 3rd page of the compound page, so the smallest (2-page) compound
|
||||
* pages cannot support it.
|
||||
*/
|
||||
page = compound_head(page);
|
||||
return PageCompound(page) && compound_order(page) > 1;
|
||||
}
|
||||
|
||||
static inline int head_compound_pincount(struct page *head)
|
||||
{
|
||||
return atomic_read(compound_pincount_ptr(head));
|
||||
}
|
||||
|
||||
static inline int compound_pincount(struct page *page)
|
||||
{
|
||||
VM_BUG_ON_PAGE(!hpage_pincount_available(page), page);
|
||||
page = compound_head(page);
|
||||
return head_compound_pincount(page);
|
||||
}
|
||||
|
||||
static inline void set_compound_order(struct page *page, unsigned int order)
|
||||
{
|
||||
page[1].compound_order = order;
|
||||
#ifdef CONFIG_64BIT
|
||||
page[1].compound_nr = 1U << order;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Returns the number of pages in this potentially compound page. */
|
||||
|
|
@ -924,7 +921,11 @@ static inline unsigned long compound_nr(struct page *page)
|
|||
{
|
||||
if (!PageHead(page))
|
||||
return 1;
|
||||
#ifdef CONFIG_64BIT
|
||||
return page[1].compound_nr;
|
||||
#else
|
||||
return 1UL << compound_order(page);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Returns the number of bytes in this potentially compound page. */
|
||||
|
|
@ -939,6 +940,37 @@ static inline unsigned int page_shift(struct page *page)
|
|||
return PAGE_SHIFT + compound_order(page);
|
||||
}
|
||||
|
||||
/**
|
||||
* thp_order - Order of a transparent huge page.
|
||||
* @page: Head page of a transparent huge page.
|
||||
*/
|
||||
static inline unsigned int thp_order(struct page *page)
|
||||
{
|
||||
VM_BUG_ON_PGFLAGS(PageTail(page), page);
|
||||
return compound_order(page);
|
||||
}
|
||||
|
||||
/**
|
||||
* thp_nr_pages - The number of regular pages in this huge page.
|
||||
* @page: The head page of a huge page.
|
||||
*/
|
||||
static inline int thp_nr_pages(struct page *page)
|
||||
{
|
||||
VM_BUG_ON_PGFLAGS(PageTail(page), page);
|
||||
return compound_nr(page);
|
||||
}
|
||||
|
||||
/**
|
||||
* thp_size - Size of a transparent huge page.
|
||||
* @page: Head page of a transparent huge page.
|
||||
*
|
||||
* Return: Number of bytes in this page.
|
||||
*/
|
||||
static inline unsigned long thp_size(struct page *page)
|
||||
{
|
||||
return PAGE_SIZE << thp_order(page);
|
||||
}
|
||||
|
||||
void free_compound_page(struct page *page);
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
|
|
@ -1090,59 +1122,35 @@ static inline bool is_zone_device_page(const struct page *page)
|
|||
}
|
||||
#endif
|
||||
|
||||
static inline bool folio_is_zone_device(const struct folio *folio)
|
||||
{
|
||||
return is_zone_device_page(&folio->page);
|
||||
}
|
||||
|
||||
static inline bool is_zone_movable_page(const struct page *page)
|
||||
{
|
||||
return page_zonenum(page) == ZONE_MOVABLE;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEV_PAGEMAP_OPS
|
||||
void free_devmap_managed_page(struct page *page);
|
||||
#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX)
|
||||
DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
|
||||
|
||||
static inline bool page_is_devmap_managed(struct page *page)
|
||||
bool __put_devmap_managed_page(struct page *page);
|
||||
static inline bool put_devmap_managed_page(struct page *page)
|
||||
{
|
||||
if (!static_branch_unlikely(&devmap_managed_key))
|
||||
return false;
|
||||
if (!is_zone_device_page(page))
|
||||
return false;
|
||||
switch (page->pgmap->type) {
|
||||
case MEMORY_DEVICE_PRIVATE:
|
||||
case MEMORY_DEVICE_FS_DAX:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
return __put_devmap_managed_page(page);
|
||||
}
|
||||
|
||||
void put_devmap_managed_page(struct page *page);
|
||||
|
||||
#else /* CONFIG_DEV_PAGEMAP_OPS */
|
||||
static inline bool page_is_devmap_managed(struct page *page)
|
||||
#else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */
|
||||
static inline bool put_devmap_managed_page(struct page *page)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void put_devmap_managed_page(struct page *page)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_DEV_PAGEMAP_OPS */
|
||||
|
||||
static inline bool is_device_private_page(const struct page *page)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
|
||||
IS_ENABLED(CONFIG_DEVICE_PRIVATE) &&
|
||||
is_zone_device_page(page) &&
|
||||
page->pgmap->type == MEMORY_DEVICE_PRIVATE;
|
||||
}
|
||||
|
||||
static inline bool is_pci_p2pdma_page(const struct page *page)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
|
||||
IS_ENABLED(CONFIG_PCI_P2PDMA) &&
|
||||
is_zone_device_page(page) &&
|
||||
page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
|
||||
}
|
||||
#endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */
|
||||
|
||||
/* 127: arbitrary random number, small enough to assemble well */
|
||||
#define folio_ref_zero_or_close_to_overflow(folio) \
|
||||
|
|
@ -1168,9 +1176,6 @@ static inline void get_page(struct page *page)
|
|||
}
|
||||
|
||||
bool __must_check try_grab_page(struct page *page, unsigned int flags);
|
||||
struct page *try_grab_compound_head(struct page *page, int refs,
|
||||
unsigned int flags);
|
||||
|
||||
|
||||
static inline __must_check bool try_get_page(struct page *page)
|
||||
{
|
||||
|
|
@ -1225,16 +1230,11 @@ static inline void put_page(struct page *page)
|
|||
struct folio *folio = page_folio(page);
|
||||
|
||||
/*
|
||||
* For devmap managed pages we need to catch refcount transition from
|
||||
* 2 to 1, when refcount reach one it means the page is free and we
|
||||
* need to inform the device driver through callback. See
|
||||
* include/linux/memremap.h and HMM for details.
|
||||
* For some devmap managed pages we need to catch refcount transition
|
||||
* from 2 to 1:
|
||||
*/
|
||||
if (page_is_devmap_managed(&folio->page)) {
|
||||
put_devmap_managed_page(&folio->page);
|
||||
if (put_devmap_managed_page(&folio->page))
|
||||
return;
|
||||
}
|
||||
|
||||
folio_put(folio);
|
||||
}
|
||||
|
||||
|
|
@ -1264,10 +1264,9 @@ static inline void put_page(struct page *page)
|
|||
* applications that don't have huge page reference counts, this won't be an
|
||||
* issue.
|
||||
*
|
||||
* Locking: the lockless algorithm described in page_cache_get_speculative()
|
||||
* and page_cache_gup_pin_speculative() provides safe operation for
|
||||
* get_user_pages and page_mkclean and other calls that race to set up page
|
||||
* table entries.
|
||||
* Locking: the lockless algorithm described in folio_try_get_rcu()
|
||||
* provides safe operation for get_user_pages(), page_mkclean() and
|
||||
* other calls that race to set up page table entries.
|
||||
*/
|
||||
#define GUP_PIN_COUNTING_BIAS (1U << 10)
|
||||
|
||||
|
|
@ -1278,70 +1277,11 @@ void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages,
|
|||
bool make_dirty);
|
||||
void unpin_user_pages(struct page **pages, unsigned long npages);
|
||||
|
||||
/**
|
||||
* page_maybe_dma_pinned - Report if a page is pinned for DMA.
|
||||
* @page: The page.
|
||||
*
|
||||
* This function checks if a page has been pinned via a call to
|
||||
* a function in the pin_user_pages() family.
|
||||
*
|
||||
* For non-huge pages, the return value is partially fuzzy: false is not fuzzy,
|
||||
* because it means "definitely not pinned for DMA", but true means "probably
|
||||
* pinned for DMA, but possibly a false positive due to having at least
|
||||
* GUP_PIN_COUNTING_BIAS worth of normal page references".
|
||||
*
|
||||
* False positives are OK, because: a) it's unlikely for a page to get that many
|
||||
* refcounts, and b) all the callers of this routine are expected to be able to
|
||||
* deal gracefully with a false positive.
|
||||
*
|
||||
* For huge pages, the result will be exactly correct. That's because we have
|
||||
* more tracking data available: the 3rd struct page in the compound page is
|
||||
* used to track the pincount (instead using of the GUP_PIN_COUNTING_BIAS
|
||||
* scheme).
|
||||
*
|
||||
* For more information, please see Documentation/core-api/pin_user_pages.rst.
|
||||
*
|
||||
* Return: True, if it is likely that the page has been "dma-pinned".
|
||||
* False, if the page is definitely not dma-pinned.
|
||||
*/
|
||||
static inline bool page_maybe_dma_pinned(struct page *page)
|
||||
{
|
||||
if (hpage_pincount_available(page))
|
||||
return compound_pincount(page) > 0;
|
||||
|
||||
/*
|
||||
* page_ref_count() is signed. If that refcount overflows, then
|
||||
* page_ref_count() returns a negative value, and callers will avoid
|
||||
* further incrementing the refcount.
|
||||
*
|
||||
* Here, for that overflow case, use the signed bit to count a little
|
||||
* bit higher via unsigned math, and thus still get an accurate result.
|
||||
*/
|
||||
return ((unsigned int)page_ref_count(compound_head(page))) >=
|
||||
GUP_PIN_COUNTING_BIAS;
|
||||
}
|
||||
|
||||
static inline bool is_cow_mapping(vm_flags_t flags)
|
||||
{
|
||||
return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
|
||||
}
|
||||
|
||||
/*
|
||||
* This should most likely only be called during fork() to see whether we
|
||||
* should break the cow immediately for a page on the src mm.
|
||||
*/
|
||||
static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma,
|
||||
struct page *page)
|
||||
{
|
||||
if (!is_cow_mapping(vma->vm_flags))
|
||||
return false;
|
||||
|
||||
if (!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags))
|
||||
return false;
|
||||
|
||||
return page_maybe_dma_pinned(page);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
|
||||
#define SECTION_IN_PAGE_FLAGS
|
||||
#endif
|
||||
|
|
@ -1586,6 +1526,74 @@ static inline unsigned long folio_pfn(struct folio *folio)
|
|||
return page_to_pfn(&folio->page);
|
||||
}
|
||||
|
||||
static inline atomic_t *folio_pincount_ptr(struct folio *folio)
|
||||
{
|
||||
return &folio_page(folio, 1)->compound_pincount;
|
||||
}
|
||||
|
||||
/**
|
||||
* folio_maybe_dma_pinned - Report if a folio may be pinned for DMA.
|
||||
* @folio: The folio.
|
||||
*
|
||||
* This function checks if a folio has been pinned via a call to
|
||||
* a function in the pin_user_pages() family.
|
||||
*
|
||||
* For small folios, the return value is partially fuzzy: false is not fuzzy,
|
||||
* because it means "definitely not pinned for DMA", but true means "probably
|
||||
* pinned for DMA, but possibly a false positive due to having at least
|
||||
* GUP_PIN_COUNTING_BIAS worth of normal folio references".
|
||||
*
|
||||
* False positives are OK, because: a) it's unlikely for a folio to
|
||||
* get that many refcounts, and b) all the callers of this routine are
|
||||
* expected to be able to deal gracefully with a false positive.
|
||||
*
|
||||
* For large folios, the result will be exactly correct. That's because
|
||||
* we have more tracking data available: the compound_pincount is used
|
||||
* instead of the GUP_PIN_COUNTING_BIAS scheme.
|
||||
*
|
||||
* For more information, please see Documentation/core-api/pin_user_pages.rst.
|
||||
*
|
||||
* Return: True, if it is likely that the page has been "dma-pinned".
|
||||
* False, if the page is definitely not dma-pinned.
|
||||
*/
|
||||
static inline bool folio_maybe_dma_pinned(struct folio *folio)
|
||||
{
|
||||
if (folio_test_large(folio))
|
||||
return atomic_read(folio_pincount_ptr(folio)) > 0;
|
||||
|
||||
/*
|
||||
* folio_ref_count() is signed. If that refcount overflows, then
|
||||
* folio_ref_count() returns a negative value, and callers will avoid
|
||||
* further incrementing the refcount.
|
||||
*
|
||||
* Here, for that overflow case, use the sign bit to count a little
|
||||
* bit higher via unsigned math, and thus still get an accurate result.
|
||||
*/
|
||||
return ((unsigned int)folio_ref_count(folio)) >=
|
||||
GUP_PIN_COUNTING_BIAS;
|
||||
}
|
||||
|
||||
static inline bool page_maybe_dma_pinned(struct page *page)
|
||||
{
|
||||
return folio_maybe_dma_pinned(page_folio(page));
|
||||
}
|
||||
|
||||
/*
|
||||
* This should most likely only be called during fork() to see whether we
|
||||
* should break the cow immediately for a page on the src mm.
|
||||
*/
|
||||
static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma,
|
||||
struct page *page)
|
||||
{
|
||||
if (!is_cow_mapping(vma->vm_flags))
|
||||
return false;
|
||||
|
||||
if (!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags))
|
||||
return false;
|
||||
|
||||
return page_maybe_dma_pinned(page);
|
||||
}
|
||||
|
||||
/* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */
|
||||
#ifdef CONFIG_MIGRATION
|
||||
static inline bool is_pinnable_page(struct page *page)
|
||||
|
|
@ -1600,6 +1608,11 @@ static inline bool is_pinnable_page(struct page *page)
|
|||
}
|
||||
#endif
|
||||
|
||||
static inline bool folio_is_pinnable(struct folio *folio)
|
||||
{
|
||||
return is_pinnable_page(&folio->page);
|
||||
}
|
||||
|
||||
static inline void set_page_zone(struct page *page, enum zone_type zone)
|
||||
{
|
||||
page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
|
||||
|
|
@ -1749,7 +1762,6 @@ static inline void *folio_address(const struct folio *folio)
|
|||
}
|
||||
|
||||
extern void *page_rmapping(struct page *page);
|
||||
extern struct anon_vma *page_anon_vma(struct page *page);
|
||||
extern pgoff_t __page_file_index(struct page *page);
|
||||
|
||||
/*
|
||||
|
|
@ -1855,7 +1867,6 @@ extern void truncate_setsize(struct inode *inode, loff_t newsize);
|
|||
void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
|
||||
void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
|
||||
int generic_error_remove_page(struct address_space *mapping, struct page *page);
|
||||
int invalidate_inode_page(struct page *page);
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
|
||||
|
|
@ -2921,13 +2932,11 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
|
|||
#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */
|
||||
#define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO
|
||||
* and return without waiting upon it */
|
||||
#define FOLL_POPULATE 0x40 /* fault in pages (with FOLL_MLOCK) */
|
||||
#define FOLL_NOFAULT 0x80 /* do not fault in pages */
|
||||
#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */
|
||||
#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */
|
||||
#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */
|
||||
#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */
|
||||
#define FOLL_MLOCK 0x1000 /* lock present pages */
|
||||
#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
|
||||
#define FOLL_COW 0x4000 /* internal GUP flag */
|
||||
#define FOLL_ANON 0x8000 /* don't do file mappings */
|
||||
|
|
@ -3381,5 +3390,4 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
|
|||
}
|
||||
#endif
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* _LINUX_MM_H */
|
||||
|
|
|
|||
|
|
@ -99,7 +99,8 @@ void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio)
|
|||
|
||||
update_lru_size(lruvec, lru, folio_zonenum(folio),
|
||||
folio_nr_pages(folio));
|
||||
list_add(&folio->lru, &lruvec->lists[lru]);
|
||||
if (lru != LRU_UNEVICTABLE)
|
||||
list_add(&folio->lru, &lruvec->lists[lru]);
|
||||
}
|
||||
|
||||
static __always_inline void add_page_to_lru_list(struct page *page,
|
||||
|
|
@ -115,6 +116,7 @@ void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio)
|
|||
|
||||
update_lru_size(lruvec, lru, folio_zonenum(folio),
|
||||
folio_nr_pages(folio));
|
||||
/* This is not expected to be used on LRU_UNEVICTABLE */
|
||||
list_add_tail(&folio->lru, &lruvec->lists[lru]);
|
||||
}
|
||||
|
||||
|
|
@ -127,8 +129,11 @@ static __always_inline void add_page_to_lru_list_tail(struct page *page,
|
|||
static __always_inline
|
||||
void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio)
|
||||
{
|
||||
list_del(&folio->lru);
|
||||
update_lru_size(lruvec, folio_lru_list(folio), folio_zonenum(folio),
|
||||
enum lru_list lru = folio_lru_list(folio);
|
||||
|
||||
if (lru != LRU_UNEVICTABLE)
|
||||
list_del(&folio->lru);
|
||||
update_lru_size(lruvec, lru, folio_zonenum(folio),
|
||||
-folio_nr_pages(folio));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -85,7 +85,16 @@ struct page {
|
|||
* lruvec->lru_lock. Sometimes used as a generic list
|
||||
* by the page owner.
|
||||
*/
|
||||
struct list_head lru;
|
||||
union {
|
||||
struct list_head lru;
|
||||
/* Or, for the Unevictable "LRU list" slot */
|
||||
struct {
|
||||
/* Always even, to negate PageTail */
|
||||
void *__filler;
|
||||
/* Count page's or folio's mlocks */
|
||||
unsigned int mlock_count;
|
||||
};
|
||||
};
|
||||
/* See page-flags.h for PAGE_MAPPING_FLAGS */
|
||||
struct address_space *mapping;
|
||||
pgoff_t index; /* Our offset within mapping. */
|
||||
|
|
@ -126,11 +135,14 @@ struct page {
|
|||
unsigned char compound_dtor;
|
||||
unsigned char compound_order;
|
||||
atomic_t compound_mapcount;
|
||||
atomic_t compound_pincount;
|
||||
#ifdef CONFIG_64BIT
|
||||
unsigned int compound_nr; /* 1 << compound_order */
|
||||
#endif
|
||||
};
|
||||
struct { /* Second tail page of compound page */
|
||||
unsigned long _compound_pad_1; /* compound_head */
|
||||
atomic_t hpage_pinned_refcount;
|
||||
unsigned long _compound_pad_2;
|
||||
/* For both global and memcg */
|
||||
struct list_head deferred_list;
|
||||
};
|
||||
|
|
@ -241,7 +253,13 @@ struct folio {
|
|||
struct {
|
||||
/* public: */
|
||||
unsigned long flags;
|
||||
struct list_head lru;
|
||||
union {
|
||||
struct list_head lru;
|
||||
struct {
|
||||
void *__filler;
|
||||
unsigned int mlock_count;
|
||||
};
|
||||
};
|
||||
struct address_space *mapping;
|
||||
pgoff_t index;
|
||||
void *private;
|
||||
|
|
@ -285,7 +303,7 @@ static inline atomic_t *compound_mapcount_ptr(struct page *page)
|
|||
|
||||
static inline atomic_t *compound_pincount_ptr(struct page *page)
|
||||
{
|
||||
return &page[2].hpage_pinned_refcount;
|
||||
return &page[1].compound_pincount;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -18,6 +18,120 @@
|
|||
|
||||
struct folio_batch;
|
||||
|
||||
unsigned long invalidate_mapping_pages(struct address_space *mapping,
|
||||
pgoff_t start, pgoff_t end);
|
||||
|
||||
static inline void invalidate_remote_inode(struct inode *inode)
|
||||
{
|
||||
if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
|
||||
S_ISLNK(inode->i_mode))
|
||||
invalidate_mapping_pages(inode->i_mapping, 0, -1);
|
||||
}
|
||||
int invalidate_inode_pages2(struct address_space *mapping);
|
||||
int invalidate_inode_pages2_range(struct address_space *mapping,
|
||||
pgoff_t start, pgoff_t end);
|
||||
int write_inode_now(struct inode *, int sync);
|
||||
int filemap_fdatawrite(struct address_space *);
|
||||
int filemap_flush(struct address_space *);
|
||||
int filemap_fdatawait_keep_errors(struct address_space *mapping);
|
||||
int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend);
|
||||
int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
|
||||
loff_t start_byte, loff_t end_byte);
|
||||
|
||||
static inline int filemap_fdatawait(struct address_space *mapping)
|
||||
{
|
||||
return filemap_fdatawait_range(mapping, 0, LLONG_MAX);
|
||||
}
|
||||
|
||||
bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend);
|
||||
int filemap_write_and_wait_range(struct address_space *mapping,
|
||||
loff_t lstart, loff_t lend);
|
||||
int __filemap_fdatawrite_range(struct address_space *mapping,
|
||||
loff_t start, loff_t end, int sync_mode);
|
||||
int filemap_fdatawrite_range(struct address_space *mapping,
|
||||
loff_t start, loff_t end);
|
||||
int filemap_check_errors(struct address_space *mapping);
|
||||
void __filemap_set_wb_err(struct address_space *mapping, int err);
|
||||
int filemap_fdatawrite_wbc(struct address_space *mapping,
|
||||
struct writeback_control *wbc);
|
||||
|
||||
static inline int filemap_write_and_wait(struct address_space *mapping)
|
||||
{
|
||||
return filemap_write_and_wait_range(mapping, 0, LLONG_MAX);
|
||||
}
|
||||
|
||||
/**
|
||||
* filemap_set_wb_err - set a writeback error on an address_space
|
||||
* @mapping: mapping in which to set writeback error
|
||||
* @err: error to be set in mapping
|
||||
*
|
||||
* When writeback fails in some way, we must record that error so that
|
||||
* userspace can be informed when fsync and the like are called. We endeavor
|
||||
* to report errors on any file that was open at the time of the error. Some
|
||||
* internal callers also need to know when writeback errors have occurred.
|
||||
*
|
||||
* When a writeback error occurs, most filesystems will want to call
|
||||
* filemap_set_wb_err to record the error in the mapping so that it will be
|
||||
* automatically reported whenever fsync is called on the file.
|
||||
*/
|
||||
static inline void filemap_set_wb_err(struct address_space *mapping, int err)
|
||||
{
|
||||
/* Fastpath for common case of no error */
|
||||
if (unlikely(err))
|
||||
__filemap_set_wb_err(mapping, err);
|
||||
}
|
||||
|
||||
/**
|
||||
* filemap_check_wb_err - has an error occurred since the mark was sampled?
|
||||
* @mapping: mapping to check for writeback errors
|
||||
* @since: previously-sampled errseq_t
|
||||
*
|
||||
* Grab the errseq_t value from the mapping, and see if it has changed "since"
|
||||
* the given value was sampled.
|
||||
*
|
||||
* If it has then report the latest error set, otherwise return 0.
|
||||
*/
|
||||
static inline int filemap_check_wb_err(struct address_space *mapping,
|
||||
errseq_t since)
|
||||
{
|
||||
return errseq_check(&mapping->wb_err, since);
|
||||
}
|
||||
|
||||
/**
|
||||
* filemap_sample_wb_err - sample the current errseq_t to test for later errors
|
||||
* @mapping: mapping to be sampled
|
||||
*
|
||||
* Writeback errors are always reported relative to a particular sample point
|
||||
* in the past. This function provides those sample points.
|
||||
*/
|
||||
static inline errseq_t filemap_sample_wb_err(struct address_space *mapping)
|
||||
{
|
||||
return errseq_sample(&mapping->wb_err);
|
||||
}
|
||||
|
||||
/**
|
||||
* file_sample_sb_err - sample the current errseq_t to test for later errors
|
||||
* @file: file pointer to be sampled
|
||||
*
|
||||
* Grab the most current superblock-level errseq_t value for the given
|
||||
* struct file.
|
||||
*/
|
||||
static inline errseq_t file_sample_sb_err(struct file *file)
|
||||
{
|
||||
return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Flush file data before changing attributes. Caller must hold any locks
|
||||
* required to prevent further writes to this file until we're done setting
|
||||
* flags.
|
||||
*/
|
||||
static inline int inode_drain_writes(struct inode *inode)
|
||||
{
|
||||
inode_dio_wait(inode);
|
||||
return filemap_write_and_wait(inode->i_mapping);
|
||||
}
|
||||
|
||||
static inline bool mapping_empty(struct address_space *mapping)
|
||||
{
|
||||
return xa_empty(&mapping->i_pages);
|
||||
|
|
@ -192,9 +306,14 @@ static inline void mapping_set_large_folios(struct address_space *mapping)
|
|||
__set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Large folio support currently depends on THP. These dependencies are
|
||||
* being worked on but are not yet fixed.
|
||||
*/
|
||||
static inline bool mapping_large_folio_support(struct address_space *mapping)
|
||||
{
|
||||
return test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
|
||||
return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
|
||||
test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
|
||||
}
|
||||
|
||||
static inline int filemap_nr_thps(struct address_space *mapping)
|
||||
|
|
@ -212,7 +331,7 @@ static inline void filemap_nr_thps_inc(struct address_space *mapping)
|
|||
if (!mapping_large_folio_support(mapping))
|
||||
atomic_inc(&mapping->nr_thps);
|
||||
#else
|
||||
WARN_ON_ONCE(1);
|
||||
WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -222,7 +341,7 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping)
|
|||
if (!mapping_large_folio_support(mapping))
|
||||
atomic_dec(&mapping->nr_thps);
|
||||
#else
|
||||
WARN_ON_ONCE(1);
|
||||
WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -283,16 +402,6 @@ static inline struct inode *folio_inode(struct folio *folio)
|
|||
return folio->mapping->host;
|
||||
}
|
||||
|
||||
static inline bool page_cache_add_speculative(struct page *page, int count)
|
||||
{
|
||||
return folio_ref_try_add_rcu((struct folio *)page, count);
|
||||
}
|
||||
|
||||
static inline bool page_cache_get_speculative(struct page *page)
|
||||
{
|
||||
return page_cache_add_speculative(page, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* folio_attach_private - Attach private data to a folio.
|
||||
* @folio: Folio to attach data to.
|
||||
|
|
@ -706,6 +815,17 @@ static inline loff_t folio_file_pos(struct folio *folio)
|
|||
return page_file_offset(&folio->page);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the offset in PAGE_SIZE (even for hugetlb folios).
|
||||
* (TODO: hugetlb folios should have ->index in PAGE_SIZE)
|
||||
*/
|
||||
static inline pgoff_t folio_pgoff(struct folio *folio)
|
||||
{
|
||||
if (unlikely(folio_test_hugetlb(folio)))
|
||||
return hugetlb_basepage_index(&folio->page);
|
||||
return folio->index;
|
||||
}
|
||||
|
||||
extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
|
||||
unsigned long address);
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@
|
|||
#include <linux/rwsem.h>
|
||||
#include <linux/memcontrol.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/pagemap.h>
|
||||
|
||||
/*
|
||||
* The anon_vma heads a list of private "related" vmas, to scan if
|
||||
|
|
@ -167,18 +168,19 @@ struct anon_vma *page_get_anon_vma(struct page *page);
|
|||
*/
|
||||
void page_move_anon_rmap(struct page *, struct vm_area_struct *);
|
||||
void page_add_anon_rmap(struct page *, struct vm_area_struct *,
|
||||
unsigned long, bool);
|
||||
unsigned long address, bool compound);
|
||||
void do_page_add_anon_rmap(struct page *, struct vm_area_struct *,
|
||||
unsigned long, int);
|
||||
unsigned long address, int flags);
|
||||
void page_add_new_anon_rmap(struct page *, struct vm_area_struct *,
|
||||
unsigned long, bool);
|
||||
void page_add_file_rmap(struct page *, bool);
|
||||
void page_remove_rmap(struct page *, bool);
|
||||
|
||||
unsigned long address, bool compound);
|
||||
void page_add_file_rmap(struct page *, struct vm_area_struct *,
|
||||
bool compound);
|
||||
void page_remove_rmap(struct page *, struct vm_area_struct *,
|
||||
bool compound);
|
||||
void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *,
|
||||
unsigned long);
|
||||
unsigned long address);
|
||||
void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *,
|
||||
unsigned long);
|
||||
unsigned long address);
|
||||
|
||||
static inline void page_dup_rmap(struct page *page, bool compound)
|
||||
{
|
||||
|
|
@ -188,11 +190,11 @@ static inline void page_dup_rmap(struct page *page, bool compound)
|
|||
/*
|
||||
* Called from mm/vmscan.c to handle paging out
|
||||
*/
|
||||
int page_referenced(struct page *, int is_locked,
|
||||
int folio_referenced(struct folio *, int is_locked,
|
||||
struct mem_cgroup *memcg, unsigned long *vm_flags);
|
||||
|
||||
void try_to_migrate(struct page *page, enum ttu_flags flags);
|
||||
void try_to_unmap(struct page *, enum ttu_flags flags);
|
||||
void try_to_migrate(struct folio *folio, enum ttu_flags flags);
|
||||
void try_to_unmap(struct folio *, enum ttu_flags flags);
|
||||
|
||||
int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long end, struct page **pages,
|
||||
|
|
@ -200,11 +202,13 @@ int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
|
|||
|
||||
/* Avoid racy checks */
|
||||
#define PVMW_SYNC (1 << 0)
|
||||
/* Look for migarion entries rather than present PTEs */
|
||||
/* Look for migration entries rather than present PTEs */
|
||||
#define PVMW_MIGRATION (1 << 1)
|
||||
|
||||
struct page_vma_mapped_walk {
|
||||
struct page *page;
|
||||
unsigned long pfn;
|
||||
unsigned long nr_pages;
|
||||
pgoff_t pgoff;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long address;
|
||||
pmd_t *pmd;
|
||||
|
|
@ -213,10 +217,30 @@ struct page_vma_mapped_walk {
|
|||
unsigned int flags;
|
||||
};
|
||||
|
||||
#define DEFINE_PAGE_VMA_WALK(name, _page, _vma, _address, _flags) \
|
||||
struct page_vma_mapped_walk name = { \
|
||||
.pfn = page_to_pfn(_page), \
|
||||
.nr_pages = compound_nr(page), \
|
||||
.pgoff = page_to_pgoff(page), \
|
||||
.vma = _vma, \
|
||||
.address = _address, \
|
||||
.flags = _flags, \
|
||||
}
|
||||
|
||||
#define DEFINE_FOLIO_VMA_WALK(name, _folio, _vma, _address, _flags) \
|
||||
struct page_vma_mapped_walk name = { \
|
||||
.pfn = folio_pfn(_folio), \
|
||||
.nr_pages = folio_nr_pages(_folio), \
|
||||
.pgoff = folio_pgoff(_folio), \
|
||||
.vma = _vma, \
|
||||
.address = _address, \
|
||||
.flags = _flags, \
|
||||
}
|
||||
|
||||
static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
|
||||
{
|
||||
/* HugeTLB pte is set to the relevant page table entry without pte_mapped. */
|
||||
if (pvmw->pte && !PageHuge(pvmw->page))
|
||||
if (pvmw->pte && !is_vm_hugetlb_page(pvmw->vma))
|
||||
pte_unmap(pvmw->pte);
|
||||
if (pvmw->ptl)
|
||||
spin_unlock(pvmw->ptl);
|
||||
|
|
@ -237,18 +261,12 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
|
|||
*/
|
||||
int folio_mkclean(struct folio *);
|
||||
|
||||
/*
|
||||
* called in munlock()/munmap() path to check for other vmas holding
|
||||
* the page mlocked.
|
||||
*/
|
||||
void page_mlock(struct page *page);
|
||||
|
||||
void remove_migration_ptes(struct page *old, struct page *new, bool locked);
|
||||
void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked);
|
||||
|
||||
/*
|
||||
* Called by memory-failure.c to kill processes.
|
||||
*/
|
||||
struct anon_vma *page_lock_anon_vma_read(struct page *page);
|
||||
struct anon_vma *folio_lock_anon_vma_read(struct folio *folio);
|
||||
void page_unlock_anon_vma_read(struct anon_vma *anon_vma);
|
||||
int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
|
||||
|
||||
|
|
@ -267,15 +285,15 @@ struct rmap_walk_control {
|
|||
* Return false if page table scanning in rmap_walk should be stopped.
|
||||
* Otherwise, return true.
|
||||
*/
|
||||
bool (*rmap_one)(struct page *page, struct vm_area_struct *vma,
|
||||
bool (*rmap_one)(struct folio *folio, struct vm_area_struct *vma,
|
||||
unsigned long addr, void *arg);
|
||||
int (*done)(struct page *page);
|
||||
struct anon_vma *(*anon_lock)(struct page *page);
|
||||
int (*done)(struct folio *folio);
|
||||
struct anon_vma *(*anon_lock)(struct folio *folio);
|
||||
bool (*invalid_vma)(struct vm_area_struct *vma, void *arg);
|
||||
};
|
||||
|
||||
void rmap_walk(struct page *page, struct rmap_walk_control *rwc);
|
||||
void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc);
|
||||
void rmap_walk(struct folio *folio, const struct rmap_walk_control *rwc);
|
||||
void rmap_walk_locked(struct folio *folio, const struct rmap_walk_control *rwc);
|
||||
|
||||
#else /* !CONFIG_MMU */
|
||||
|
||||
|
|
@ -283,7 +301,7 @@ void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc);
|
|||
#define anon_vma_prepare(vma) (0)
|
||||
#define anon_vma_link(vma) do {} while (0)
|
||||
|
||||
static inline int page_referenced(struct page *page, int is_locked,
|
||||
static inline int folio_referenced(struct folio *folio, int is_locked,
|
||||
struct mem_cgroup *memcg,
|
||||
unsigned long *vm_flags)
|
||||
{
|
||||
|
|
@ -291,7 +309,7 @@ static inline int page_referenced(struct page *page, int is_locked,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline void try_to_unmap(struct page *page, enum ttu_flags flags)
|
||||
static inline void try_to_unmap(struct folio *folio, enum ttu_flags flags)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -328,7 +328,7 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio)
|
|||
|
||||
/* linux/mm/workingset.c */
|
||||
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
|
||||
void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg);
|
||||
void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
|
||||
void workingset_refault(struct folio *folio, void *shadow);
|
||||
void workingset_activation(struct folio *folio);
|
||||
|
||||
|
|
@ -375,7 +375,6 @@ extern void lru_add_drain(void);
|
|||
extern void lru_add_drain_cpu(int cpu);
|
||||
extern void lru_add_drain_cpu_zone(struct zone *zone);
|
||||
extern void lru_add_drain_all(void);
|
||||
extern void deactivate_file_page(struct page *page);
|
||||
extern void deactivate_page(struct page *page);
|
||||
extern void mark_page_lazyfree(struct page *page);
|
||||
extern void swap_setup(void);
|
||||
|
|
@ -397,7 +396,7 @@ extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
|
|||
unsigned long *nr_scanned);
|
||||
extern unsigned long shrink_all_memory(unsigned long nr_pages);
|
||||
extern int vm_swappiness;
|
||||
extern int remove_mapping(struct address_space *mapping, struct page *page);
|
||||
long remove_mapping(struct address_space *mapping, struct folio *folio);
|
||||
|
||||
extern unsigned long reclaim_pages(struct list_head *page_list);
|
||||
#ifdef CONFIG_NUMA
|
||||
|
|
@ -743,7 +742,7 @@ static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask)
|
|||
#endif
|
||||
|
||||
#ifdef CONFIG_MEMCG_SWAP
|
||||
extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry);
|
||||
void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry);
|
||||
extern int __mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry);
|
||||
static inline int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
|
||||
{
|
||||
|
|
@ -763,7 +762,7 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_p
|
|||
extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg);
|
||||
extern bool mem_cgroup_swap_full(struct page *page);
|
||||
#else
|
||||
static inline void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
|
||||
static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue