libnvdimm for 4.16

* Require struct page by default for filesystem DAX to remove a number of
   surprising failure cases.  This includes failures with direct I/O, gdb and
   fork(2).
 
 * Add support for the new Platform Capabilities Structure added to the NFIT in
   ACPI 6.2a.  This new table tells us whether the platform supports flushing
   of CPU and memory controller caches on unexpected power loss events.
 
 * Revamp vmem_altmap and dev_pagemap handling to clean up code and better
   support future future PCI P2P uses.
 
 * Deprecate the ND_IOCTL_SMART_THRESHOLD command whose payload has become
   out-of-sync with recent versions of the NVDIMM_FAMILY_INTEL spec, and
   instead rely on the generic ND_CMD_CALL approach used by the two other IOCTL
   families, NVDIMM_FAMILY_{HPE,MSFT}.
 
 * Enhance nfit_test so we can test some of the new things added in version 1.6
   of the DSM specification.  This includes testing firmware download and
   simulating the Last Shutdown State (LSS) status.
 -----BEGIN PGP SIGNATURE-----
 
 iQIcBAABAgAGBQJaeOg0AAoJEJ/BjXdf9fLBAFoQAI/IgcgJ2h9lfEpgjBRTC44t
 2p8dxwT1Ofw3Y1aR/tI8nYRXjRtAGuP4UIeRVnb1CL/N7PagJyoMGU+6hmzg+ptY
 c7cEDvw6nZOhrFwXx/xn7R53sYG8zH+UE6+jTR/PP/G4mQJfFCg4iF9R72Y7z0n7
 aurf82Kz137NPUy6dNr4V9bmPMJWAaOci9WOj5SKddR5ZSNbjoxylTwQRvre5y4r
 7HQTScEkirABOdSf1JoXTSUXCH/RC9UFFXR03ScHstGb1HjCj3KdcicVc50Q++Ub
 qsEudhE6i44PEW1Hh4Qkg6hjHMEa8qHP+ShBuRuVaUmlghYTQn66niJAYLZilwdz
 EVjE7vR+toHA5g3YCalEmYVutUEhIDkh/xfpd7vM6ZorUGJy95a2elEJs2fHBffC
 gEhnCip7FROPcK5RDNUM8hBgnG/q5wwWPQMKY+6rKDZQx3mXssCrKp2Vlx7kBwMG
 rpblkEpYjPonbLEHxsSU8yTg9Uq55ciIWgnOToffcjZvjbihi8WUVlHcwHUMPf/o
 DWElg+4qmG0Sdd4S2NeAGwTl1Ewrf2RrtUGMjHtH4OUFs1wo6ZmfrxFzzMfoZ1Od
 ko/s65v4uwtTzECh2o+XQaNsReR5YETXxmA40N/Jpo7/7twABIoZ/ASvj/3ZBYj+
 sie+u2rTod8/gQWSfHpJ
 =MIMX
 -----END PGP SIGNATURE-----

Merge tag 'libnvdimm-for-4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm updates from Ross Zwisler:

 - Require struct page by default for filesystem DAX to remove a number
   of surprising failure cases. This includes failures with direct I/O,
   gdb and fork(2).

 - Add support for the new Platform Capabilities Structure added to the
   NFIT in ACPI 6.2a. This new table tells us whether the platform
   supports flushing of CPU and memory controller caches on unexpected
   power loss events.

 - Revamp vmem_altmap and dev_pagemap handling to clean up code and
   better support future future PCI P2P uses.

 - Deprecate the ND_IOCTL_SMART_THRESHOLD command whose payload has
   become out-of-sync with recent versions of the NVDIMM_FAMILY_INTEL
   spec, and instead rely on the generic ND_CMD_CALL approach used by
   the two other IOCTL families, NVDIMM_FAMILY_{HPE,MSFT}.

 - Enhance nfit_test so we can test some of the new things added in
   version 1.6 of the DSM specification. This includes testing firmware
   download and simulating the Last Shutdown State (LSS) status.

* tag 'libnvdimm-for-4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (37 commits)
  libnvdimm, namespace: remove redundant initialization of 'nd_mapping'
  acpi, nfit: fix register dimm error handling
  libnvdimm, namespace: make min namespace size 4K
  tools/testing/nvdimm: force nfit_test to depend on instrumented modules
  libnvdimm/nfit_test: adding support for unit testing enable LSS status
  libnvdimm/nfit_test: add firmware download emulation
  nfit-test: Add platform cap support from ACPI 6.2a to test
  libnvdimm: expose platform persistence attribute for nd_region
  acpi: nfit: add persistent memory control flag for nd_region
  acpi: nfit: Add support for detect platform CPU cache flush on power loss
  device-dax: Fix trailing semicolon
  libnvdimm, btt: fix uninitialized err_lock
  dax: require 'struct page' by default for filesystem dax
  ext2: auto disable dax instead of failing mount
  ext4: auto disable dax instead of failing mount
  mm, dax: introduce pfn_t_special()
  mm: Fix devm_memremap_pages() collision handling
  mm: Fix memory size alignment in devm_memremap_pages_release()
  memremap: merge find_dev_pagemap into get_dev_pagemap
  memremap: change devm_memremap_pages interface to use struct dev_pagemap
  ...
This commit is contained in:
Linus Torvalds 2018-02-06 10:41:33 -08:00
commit 3ff1b28caa
52 changed files with 1124 additions and 529 deletions

View file

@ -47,6 +47,17 @@ enum {
/* region flag indicating to direct-map persistent memory by default */
ND_REGION_PAGEMAP = 0,
/*
* Platform ensures entire CPU store data path is flushed to pmem on
* system power loss.
*/
ND_REGION_PERSIST_CACHE = 1,
/*
* Platform provides mechanisms to automatically flush outstanding
* write data from memory controler to pmem on system power loss.
* (ADR)
*/
ND_REGION_PERSIST_MEMCTRL = 2,
/* mark newly adjusted resources as requiring a label update */
DPA_RESOURCE_ADJUSTED = 1 << 0,

View file

@ -13,6 +13,7 @@ struct pglist_data;
struct mem_section;
struct memory_block;
struct resource;
struct vmem_altmap;
#ifdef CONFIG_MEMORY_HOTPLUG
/*
@ -125,24 +126,26 @@ static inline bool movable_node_is_enabled(void)
#ifdef CONFIG_MEMORY_HOTREMOVE
extern bool is_pageblock_removable_nolock(struct page *page);
extern int arch_remove_memory(u64 start, u64 size);
extern int arch_remove_memory(u64 start, u64 size,
struct vmem_altmap *altmap);
extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages);
unsigned long nr_pages, struct vmem_altmap *altmap);
#endif /* CONFIG_MEMORY_HOTREMOVE */
/* reasonably generic interface to expand the physical pages */
extern int __add_pages(int nid, unsigned long start_pfn,
unsigned long nr_pages, bool want_memblock);
extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
struct vmem_altmap *altmap, bool want_memblock);
#ifndef CONFIG_ARCH_HAS_ADD_PAGES
static inline int add_pages(int nid, unsigned long start_pfn,
unsigned long nr_pages, bool want_memblock)
unsigned long nr_pages, struct vmem_altmap *altmap,
bool want_memblock)
{
return __add_pages(nid, start_pfn, nr_pages, want_memblock);
return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
}
#else /* ARCH_HAS_ADD_PAGES */
int add_pages(int nid, unsigned long start_pfn,
unsigned long nr_pages, bool want_memblock);
int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
struct vmem_altmap *altmap, bool want_memblock);
#endif /* ARCH_HAS_ADD_PAGES */
#ifdef CONFIG_NUMA
@ -318,15 +321,17 @@ extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
void *arg, int (*func)(struct memory_block *, void *));
extern int add_memory(int nid, u64 start, u64 size);
extern int add_memory_resource(int nid, struct resource *resource, bool online);
extern int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock);
extern int arch_add_memory(int nid, u64 start, u64 size,
struct vmem_altmap *altmap, bool want_memblock);
extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages);
unsigned long nr_pages, struct vmem_altmap *altmap);
extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
extern bool is_memblock_offlined(struct memory_block *mem);
extern void remove_memory(int nid, u64 start, u64 size);
extern int sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn);
extern int sparse_add_one_section(struct pglist_data *pgdat,
unsigned long start_pfn, struct vmem_altmap *altmap);
extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
unsigned long map_offset);
unsigned long map_offset, struct vmem_altmap *altmap);
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
unsigned long pnum);
extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages,

View file

@ -26,18 +26,6 @@ struct vmem_altmap {
unsigned long alloc;
};
unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
#ifdef CONFIG_ZONE_DEVICE
struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start);
#else
static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
{
return NULL;
}
#endif
/*
* Specialize ZONE_DEVICE memory into multiple types each having differents
* usage.
@ -125,8 +113,9 @@ typedef void (*dev_page_free_t)(struct page *page, void *data);
struct dev_pagemap {
dev_page_fault_t page_fault;
dev_page_free_t page_free;
struct vmem_altmap *altmap;
const struct resource *res;
struct vmem_altmap altmap;
bool altmap_valid;
struct resource res;
struct percpu_ref *ref;
struct device *dev;
void *data;
@ -134,15 +123,17 @@ struct dev_pagemap {
};
#ifdef CONFIG_ZONE_DEVICE
void *devm_memremap_pages(struct device *dev, struct resource *res,
struct percpu_ref *ref, struct vmem_altmap *altmap);
struct dev_pagemap *find_dev_pagemap(resource_size_t phys);
void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *pgmap);
unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
static inline bool is_zone_device_page(const struct page *page);
#else
static inline void *devm_memremap_pages(struct device *dev,
struct resource *res, struct percpu_ref *ref,
struct vmem_altmap *altmap)
struct dev_pagemap *pgmap)
{
/*
* Fail attempts to call devm_memremap_pages() without
@ -153,11 +144,22 @@ static inline void *devm_memremap_pages(struct device *dev,
return ERR_PTR(-ENXIO);
}
static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *pgmap)
{
return NULL;
}
#endif
static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
{
return 0;
}
static inline void vmem_altmap_free(struct vmem_altmap *altmap,
unsigned long nr_pfns)
{
}
#endif /* CONFIG_ZONE_DEVICE */
#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
static inline bool is_device_private_page(const struct page *page)
@ -173,39 +175,6 @@ static inline bool is_device_public_page(const struct page *page)
}
#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
/**
* get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
* @pfn: page frame number to lookup page_map
* @pgmap: optional known pgmap that already has a reference
*
* @pgmap allows the overhead of a lookup to be bypassed when @pfn lands in the
* same mapping.
*/
static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *pgmap)
{
const struct resource *res = pgmap ? pgmap->res : NULL;
resource_size_t phys = PFN_PHYS(pfn);
/*
* In the cached case we're already holding a live reference so
* we can simply do a blind increment
*/
if (res && phys >= res->start && phys <= res->end) {
percpu_ref_get(pgmap->ref);
return pgmap;
}
/* fall back to slow path lookup */
rcu_read_lock();
pgmap = find_dev_pagemap(phys);
if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
pgmap = NULL;
rcu_read_unlock();
return pgmap;
}
static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
{
if (pgmap)

View file

@ -2075,8 +2075,8 @@ static inline void zero_resv_unavail(void) {}
#endif
extern void set_dma_reserve(unsigned long new_dma_reserve);
extern void memmap_init_zone(unsigned long, int, unsigned long,
unsigned long, enum memmap_context);
extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long,
enum memmap_context, struct vmem_altmap *);
extern void setup_per_zone_wmarks(void);
extern int __meminit init_per_zone_wmark_min(void);
extern void mem_init(void);
@ -2544,7 +2544,8 @@ void sparse_mem_maps_populate_node(struct page **map_map,
unsigned long map_count,
int nodeid);
struct page *sparse_mem_map_populate(unsigned long pnum, int nid);
struct page *sparse_mem_map_populate(unsigned long pnum, int nid,
struct vmem_altmap *altmap);
pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
@ -2552,20 +2553,17 @@ pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
void *vmemmap_alloc_block(unsigned long size, int node);
struct vmem_altmap;
void *__vmemmap_alloc_block_buf(unsigned long size, int node,
struct vmem_altmap *altmap);
static inline void *vmemmap_alloc_block_buf(unsigned long size, int node)
{
return __vmemmap_alloc_block_buf(size, node, NULL);
}
void *vmemmap_alloc_block_buf(unsigned long size, int node);
void *altmap_alloc_block_buf(unsigned long size, struct vmem_altmap *altmap);
void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
int vmemmap_populate_basepages(unsigned long start, unsigned long end,
int node);
int vmemmap_populate(unsigned long start, unsigned long end, int node);
int vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap);
void vmemmap_populate_print_last(void);
#ifdef CONFIG_MEMORY_HOTPLUG
void vmemmap_free(unsigned long start, unsigned long end);
void vmemmap_free(unsigned long start, unsigned long end,
struct vmem_altmap *altmap);
#endif
void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
unsigned long nr_pages);

View file

@ -15,8 +15,10 @@
#define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2))
#define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3))
#define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4))
#define PFN_SPECIAL (1ULL << (BITS_PER_LONG_LONG - 5))
#define PFN_FLAGS_TRACE \
{ PFN_SPECIAL, "SPECIAL" }, \
{ PFN_SG_CHAIN, "SG_CHAIN" }, \
{ PFN_SG_LAST, "SG_LAST" }, \
{ PFN_DEV, "DEV" }, \
@ -120,4 +122,15 @@ pud_t pud_mkdevmap(pud_t pud);
#endif
#endif /* __HAVE_ARCH_PTE_DEVMAP */
#ifdef __HAVE_ARCH_PTE_SPECIAL
static inline bool pfn_t_special(pfn_t pfn)
{
return (pfn.val & PFN_SPECIAL) == PFN_SPECIAL;
}
#else
static inline bool pfn_t_special(pfn_t pfn)
{
return false;
}
#endif /* __HAVE_ARCH_PTE_SPECIAL */
#endif /* _LINUX_PFN_T_H_ */