summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/alloc_tag.h6
-rw-r--r--include/linux/balloon_compaction.h90
-rw-r--r--include/linux/codetag.h1
-rw-r--r--include/linux/coredump.h2
-rw-r--r--include/linux/damon.h80
-rw-r--r--include/linux/dax.h9
-rw-r--r--include/linux/device-mapper.h2
-rw-r--r--include/linux/fs.h13
-rw-r--r--include/linux/gfp.h7
-rw-r--r--include/linux/highmem-internal.h2
-rw-r--r--include/linux/highmem.h12
-rw-r--r--include/linux/huge_mm.h52
-rw-r--r--include/linux/hugetlb.h20
-rw-r--r--include/linux/khugepaged.h4
-rw-r--r--include/linux/ksm.h12
-rw-r--r--include/linux/maple_tree.h4
-rw-r--r--include/linux/memcontrol.h44
-rw-r--r--include/linux/memfd.h4
-rw-r--r--include/linux/memory-tiers.h2
-rw-r--r--include/linux/memory.h20
-rw-r--r--include/linux/memory_hotplug.h3
-rw-r--r--include/linux/migrate.h46
-rw-r--r--include/linux/mm.h77
-rw-r--r--include/linux/mm_types.h3
-rw-r--r--include/linux/mman.h4
-rw-r--r--include/linux/mmap_lock.h11
-rw-r--r--include/linux/mmdebug.h12
-rw-r--r--include/linux/mmzone.h36
-rw-r--r--include/linux/node.h77
-rw-r--r--include/linux/page-flags.h106
-rw-r--r--include/linux/page-isolation.h47
-rw-r--r--include/linux/page_owner.h8
-rw-r--r--include/linux/pageblock-flags.h56
-rw-r--r--include/linux/pagemap.h14
-rw-r--r--include/linux/pagewalk.h9
-rw-r--r--include/linux/percpu-defs.h7
-rw-r--r--include/linux/pfn.h9
-rw-r--r--include/linux/pfn_t.h131
-rw-r--r--include/linux/pgtable.h118
-rw-r--r--include/linux/proc_fs.h1
-rw-r--r--include/linux/rmap.h4
-rw-r--r--include/linux/shmem_fs.h5
-rw-r--r--include/linux/swap.h23
-rw-r--r--include/linux/userfaultfd_k.h15
-rw-r--r--include/linux/vmstat.h4
-rw-r--r--include/linux/writeback.h11
-rw-r--r--include/linux/zsmalloc.h2
47 files changed, 652 insertions, 573 deletions
diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
index 8f7931eb7d16..9ef2633e2c08 100644
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -88,7 +88,7 @@ static inline struct alloc_tag *ct_to_alloc_tag(struct codetag *ct)
return container_of(ct, struct alloc_tag, ct);
}
-#ifdef ARCH_NEEDS_WEAK_PER_CPU
+#if defined(CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU) && defined(MODULE)
/*
* When percpu variables are required to be defined as weak, static percpu
* variables can't be used inside a function (see comments for DECLARE_PER_CPU_SECTION).
@@ -102,7 +102,7 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);
.ct = CODE_TAG_INIT, \
.counters = &_shared_alloc_tag };
-#else /* ARCH_NEEDS_WEAK_PER_CPU */
+#else /* CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU && MODULE */
#ifdef MODULE
@@ -123,7 +123,7 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);
#endif /* MODULE */
-#endif /* ARCH_NEEDS_WEAK_PER_CPU */
+#endif /* CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU && MODULE */
DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
mem_alloc_profiling_key);
diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
index 5ca2d5699620..7cfe48769239 100644
--- a/include/linux/balloon_compaction.h
+++ b/include/linux/balloon_compaction.h
@@ -4,12 +4,13 @@
*
* Common interface definitions for making balloon pages movable by compaction.
*
- * Balloon page migration makes use of the general non-lru movable page
+ * Balloon page migration makes use of the general "movable_ops page migration"
* feature.
*
* page->private is used to reference the responsible balloon device.
- * page->mapping is used in context of non-lru page migration to reference
- * the address space operations for page isolation/migration/compaction.
+ * That these pages have movable_ops, and which movable_ops apply,
+ * is derived from the page type (PageOffline()) combined with the
+ * PG_movable_ops flag (PageMovableOps()).
*
* As the page isolation scanning step a compaction thread does is a lockless
* procedure (from a page standpoint), it might bring some racy situations while
@@ -17,12 +18,10 @@
* and safely perform balloon's page compaction and migration we must, always,
* ensure following these simple rules:
*
- * i. when updating a balloon's page ->mapping element, strictly do it under
- * the following lock order, independently of the far superior
- * locking scheme (lru_lock, balloon_lock):
+ * i. Setting the PG_movable_ops flag and page->private with the following
+ * lock order
* +-page_lock(page);
* +--spin_lock_irq(&b_dev_info->pages_lock);
- * ... page->mapping updates here ...
*
* ii. isolation or dequeueing procedure must remove the page from balloon
* device page list under b_dev_info->pages_lock.
@@ -78,6 +77,15 @@ static inline void balloon_devinfo_init(struct balloon_dev_info *balloon)
#ifdef CONFIG_BALLOON_COMPACTION
extern const struct movable_operations balloon_mops;
+/*
+ * balloon_page_device - get the b_dev_info descriptor for the balloon device
+ * that enqueues the given page.
+ */
+static inline struct balloon_dev_info *balloon_page_device(struct page *page)
+{
+ return (struct balloon_dev_info *)page_private(page);
+}
+#endif /* CONFIG_BALLOON_COMPACTION */
/*
* balloon_page_insert - insert a page into the balloon's page list and make
@@ -92,68 +100,34 @@ static inline void balloon_page_insert(struct balloon_dev_info *balloon,
struct page *page)
{
__SetPageOffline(page);
- __SetPageMovable(page, &balloon_mops);
- set_page_private(page, (unsigned long)balloon);
+ if (IS_ENABLED(CONFIG_BALLOON_COMPACTION)) {
+ SetPageMovableOps(page);
+ set_page_private(page, (unsigned long)balloon);
+ }
list_add(&page->lru, &balloon->pages);
}
-/*
- * balloon_page_delete - delete a page from balloon's page list and clear
- * the page->private assignement accordingly.
- * @page : page to be released from balloon's page list
- *
- * Caller must ensure the page is locked and the spin_lock protecting balloon
- * pages list is held before deleting a page from the balloon device.
- */
-static inline void balloon_page_delete(struct page *page)
+static inline gfp_t balloon_mapping_gfp_mask(void)
{
- __ClearPageOffline(page);
- __ClearPageMovable(page);
- set_page_private(page, 0);
- /*
- * No touch page.lru field once @page has been isolated
- * because VM is using the field.
- */
- if (!PageIsolated(page))
- list_del(&page->lru);
+ if (IS_ENABLED(CONFIG_BALLOON_COMPACTION))
+ return GFP_HIGHUSER_MOVABLE;
+ return GFP_HIGHUSER;
}
/*
- * balloon_page_device - get the b_dev_info descriptor for the balloon device
- * that enqueues the given page.
+ * balloon_page_finalize - prepare a balloon page that was removed from the
+ * balloon list for release to the page allocator
+ * @page: page to be released to the page allocator
+ *
+ * Caller must ensure that the page is locked.
*/
-static inline struct balloon_dev_info *balloon_page_device(struct page *page)
+static inline void balloon_page_finalize(struct page *page)
{
- return (struct balloon_dev_info *)page_private(page);
+ if (IS_ENABLED(CONFIG_BALLOON_COMPACTION))
+ set_page_private(page, 0);
+ /* PageOffline is sticky until the page is freed to the buddy. */
}
-static inline gfp_t balloon_mapping_gfp_mask(void)
-{
- return GFP_HIGHUSER_MOVABLE;
-}
-
-#else /* !CONFIG_BALLOON_COMPACTION */
-
-static inline void balloon_page_insert(struct balloon_dev_info *balloon,
- struct page *page)
-{
- __SetPageOffline(page);
- list_add(&page->lru, &balloon->pages);
-}
-
-static inline void balloon_page_delete(struct page *page)
-{
- __ClearPageOffline(page);
- list_del(&page->lru);
-}
-
-static inline gfp_t balloon_mapping_gfp_mask(void)
-{
- return GFP_HIGHUSER;
-}
-
-#endif /* CONFIG_BALLOON_COMPACTION */
-
/*
* balloon_page_push - insert a page into a page list.
* @head : pointer to list
diff --git a/include/linux/codetag.h b/include/linux/codetag.h
index 5f2b9a1f722c..457ed8fd3214 100644
--- a/include/linux/codetag.h
+++ b/include/linux/codetag.h
@@ -54,6 +54,7 @@ struct codetag_iterator {
struct codetag_module *cmod;
unsigned long mod_id;
struct codetag *ct;
+ unsigned long mod_seq;
};
#ifdef MODULE
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 96e8a66da133..68861da4cf7c 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -10,7 +10,7 @@
#ifdef CONFIG_COREDUMP
struct core_vma_metadata {
unsigned long start, end;
- unsigned long flags;
+ vm_flags_t flags;
unsigned long dump_size;
unsigned long pgoff;
struct file *file;
diff --git a/include/linux/damon.h b/include/linux/damon.h
index a4011726cb3b..f13664c62ddd 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -448,12 +448,29 @@ struct damos_access_pattern {
};
/**
+ * struct damos_migrate_dests - Migration destination nodes and their weights.
+ * @node_id_arr: Array of migration destination node ids.
+ * @weight_arr: Array of migration weights for @node_id_arr.
+ * @nr_dests: Length of the @node_id_arr and @weight_arr arrays.
+ *
+ * @node_id_arr is an array of the ids of migration destination nodes.
+ * @weight_arr is an array of the weights for those. The weights in
+ * @weight_arr are for nodes in @node_id_arr of same array index.
+ */
+struct damos_migrate_dests {
+ unsigned int *node_id_arr;
+ unsigned int *weight_arr;
+ size_t nr_dests;
+};
+
+/**
* struct damos - Represents a Data Access Monitoring-based Operation Scheme.
* @pattern: Access pattern of target regions.
- * @action: &damo_action to be applied to the target regions.
+ * @action: &damos_action to be applied to the target regions.
* @apply_interval_us: The time between applying the @action.
* @quota: Control the aggressiveness of this scheme.
* @wmarks: Watermarks for automated (in)activation of this scheme.
+ * @migrate_dests: Destination nodes if @action is "migrate_{hot,cold}".
* @target_nid: Destination node if @action is "migrate_{hot,cold}".
* @filters: Additional set of &struct damos_filter for &action.
* @ops_filters: ops layer handling &struct damos_filter objects list.
@@ -472,9 +489,12 @@ struct damos_access_pattern {
* monitoring context are inactive, DAMON stops monitoring either, and just
* repeatedly checks the watermarks.
*
+ * @migrate_dests specifies multiple migration target nodes with different
+ * weights for migrate_hot or migrate_cold actions. @target_nid is ignored if
+ * this is set.
+ *
* @target_nid is used to set the migration target node for migrate_hot or
- * migrate_cold actions, which means it's only meaningful when @action is either
- * "migrate_hot" or "migrate_cold".
+ * migrate_cold actions, and @migrate_dests is unset.
*
* Before applying the &action to a memory region, &struct damon_operations
* implementation could check pages of the region and skip &action to respect
@@ -517,7 +537,10 @@ struct damos {
struct damos_quota quota;
struct damos_watermarks wmarks;
union {
- int target_nid;
+ struct {
+ int target_nid;
+ struct damos_migrate_dests migrate_dests;
+ };
};
struct list_head filters;
struct list_head ops_filters;
@@ -553,6 +576,7 @@ enum damon_ops_id {
* @get_scheme_score: Get the score of a region for a scheme.
* @apply_scheme: Apply a DAMON-based operation scheme.
* @target_valid: Determine if the target is valid.
+ * @cleanup_target: Clean up each target before deallocation.
* @cleanup: Clean up the context.
*
* DAMON can be extended for various address spaces and usages. For this,
@@ -585,6 +609,7 @@ enum damon_ops_id {
* filters (&struct damos_filter) that handled by itself.
* @target_valid should check whether the target is still valid for the
* monitoring.
+ * @cleanup_target is called before the target will be deallocated.
* @cleanup is called from @kdamond just before its termination.
*/
struct damon_operations {
@@ -600,42 +625,16 @@ struct damon_operations {
struct damon_target *t, struct damon_region *r,
struct damos *scheme, unsigned long *sz_filter_passed);
bool (*target_valid)(struct damon_target *t);
+ void (*cleanup_target)(struct damon_target *t);
void (*cleanup)(struct damon_ctx *context);
};
-/**
- * struct damon_callback - Monitoring events notification callbacks.
- *
- * @after_wmarks_check: Called after each schemes' watermarks check.
- * @after_aggregation: Called after each aggregation.
- * @before_terminate: Called before terminating the monitoring.
- *
- * The monitoring thread (&damon_ctx.kdamond) calls @before_terminate just
- * before finishing the monitoring.
- *
- * The monitoring thread calls @after_wmarks_check after each DAMON-based
- * operation schemes' watermarks check. If users need to make changes to the
- * attributes of the monitoring context while it's deactivated due to the
- * watermarks, this is the good place to do.
- *
- * The monitoring thread calls @after_aggregation for each of the aggregation
- * intervals. Therefore, users can safely access the monitoring results
- * without additional protection. For the reason, users are recommended to use
- * these callback for the accesses to the results.
- *
- * If any callback returns non-zero, monitoring stops.
- */
-struct damon_callback {
- int (*after_wmarks_check)(struct damon_ctx *context);
- int (*after_aggregation)(struct damon_ctx *context);
- void (*before_terminate)(struct damon_ctx *context);
-};
-
/*
* struct damon_call_control - Control damon_call().
*
* @fn: Function to be called back.
* @data: Data that will be passed to @fn.
+ * @repeat: Repeat invocations.
* @return_code: Return code from @fn invocation.
*
* Control damon_call(), which requests specific kdamond to invoke a given
@@ -644,19 +643,22 @@ struct damon_callback {
struct damon_call_control {
int (*fn)(void *data);
void *data;
+ bool repeat;
int return_code;
/* private: internal use only */
/* informs if the kdamond finished handling of the request */
struct completion completion;
/* informs if the kdamond canceled @fn infocation */
bool canceled;
+ /* List head for siblings. */
+ struct list_head list;
};
/**
* struct damon_intervals_goal - Monitoring intervals auto-tuning goal.
*
* @access_bp: Access events observation ratio to achieve in bp.
- * @aggrs: Number of aggregations to acheive @access_bp within.
+ * @aggrs: Number of aggregations to achieve @access_bp within.
* @min_sample_us: Minimum resulting sampling interval in microseconds.
* @max_sample_us: Maximum resulting sampling interval in microseconds.
*
@@ -697,7 +699,7 @@ struct damon_intervals_goal {
* ``mmap()`` calls from the application, in case of virtual memory monitoring)
* and applies the changes for each @ops_update_interval. All time intervals
* are in micro-seconds. Please refer to &struct damon_operations and &struct
- * damon_callback for more detail.
+ * damon_call_control for more detail.
*/
struct damon_attrs {
unsigned long sample_interval;
@@ -744,7 +746,6 @@ struct damon_attrs {
* Accesses to other fields must be protected by themselves.
*
* @ops: Set of monitoring operations for given use cases.
- * @callback: Set of callbacks for monitoring events notifications.
*
* @adaptive_targets: Head of monitoring targets (&damon_target) list.
* @schemes: Head of schemes (&damos) list.
@@ -775,8 +776,9 @@ struct damon_ctx {
/* for scheme quotas prioritization */
unsigned long *regions_score_histogram;
- struct damon_call_control *call_control;
- struct mutex call_control_lock;
+ /* lists of &struct damon_call_control */
+ struct list_head call_controls;
+ struct mutex call_controls_lock;
struct damos_walk_control *walk_control;
struct mutex walk_control_lock;
@@ -786,7 +788,6 @@ struct damon_ctx {
struct mutex kdamond_lock;
struct damon_operations ops;
- struct damon_callback callback;
struct list_head adaptive_targets;
struct list_head schemes;
@@ -905,7 +906,7 @@ struct damon_target *damon_new_target(void);
void damon_add_target(struct damon_ctx *ctx, struct damon_target *t);
bool damon_targets_empty(struct damon_ctx *ctx);
void damon_free_target(struct damon_target *t);
-void damon_destroy_target(struct damon_target *t);
+void damon_destroy_target(struct damon_target *t, struct damon_ctx *ctx);
unsigned int damon_nr_regions(struct damon_target *t);
struct damon_ctx *damon_new_ctx(void);
@@ -934,6 +935,7 @@ static inline unsigned int damon_max_nr_accesses(const struct damon_attrs *attrs
int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive);
int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
+bool damon_is_running(struct damon_ctx *ctx);
int damon_call(struct damon_ctx *ctx, struct damon_call_control *control);
int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control);
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 78891518291d..9d624f4d9df6 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -26,7 +26,7 @@ struct dax_operations {
* number of pages available for DAX at that pfn.
*/
long (*direct_access)(struct dax_device *, pgoff_t, long,
- enum dax_access_mode, void **, pfn_t *);
+ enum dax_access_mode, void **, unsigned long *);
/* zero_page_range: required operation. Zero page range */
int (*zero_page_range)(struct dax_device *, pgoff_t, size_t);
/*
@@ -243,7 +243,7 @@ static inline void dax_break_layout_final(struct inode *inode)
bool dax_alive(struct dax_device *dax_dev);
void *dax_get_private(struct dax_device *dax_dev);
long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
- enum dax_access_mode mode, void **kaddr, pfn_t *pfn);
+ enum dax_access_mode mode, void **kaddr, unsigned long *pfn);
size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i);
size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
@@ -257,9 +257,10 @@ void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops);
vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order,
- pfn_t *pfnp, int *errp, const struct iomap_ops *ops);
+ unsigned long *pfnp, int *errp,
+ const struct iomap_ops *ops);
vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
- unsigned int order, pfn_t pfn);
+ unsigned int order, unsigned long pfn);
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
void dax_delete_mapping_range(struct address_space *mapping,
loff_t start, loff_t end);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index cb95951547ab..84fdc3a6a19a 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -156,7 +156,7 @@ typedef int (*dm_busy_fn) (struct dm_target *ti);
*/
typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff,
long nr_pages, enum dax_access_mode node, void **kaddr,
- pfn_t *pfn);
+ unsigned long *pfn);
typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff,
size_t nr_pages);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8b16f90f81bc..d7ab4f96d705 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -526,7 +526,7 @@ struct address_space {
/*
* On most architectures that alignment is already the case; but
* must be enforced here for CRIS, to let the least significant bit
- * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
+ * of struct folio's "mapping" pointer be used for FOLIO_MAPPING_ANON.
*/
/* XArray tags, for tagging dirty and writeback pages in the pagecache. */
@@ -1043,6 +1043,7 @@ struct fown_struct {
* and so were/are genuinely "ahead". Start next readahead when
* the first of these pages is accessed.
* @ra_pages: Maximum size of a readahead request, copied from the bdi.
+ * @order: Preferred folio order used for most recent readahead.
* @mmap_miss: How many mmap accesses missed in the page cache.
* @prev_pos: The last byte in the most recent read request.
*
@@ -1054,7 +1055,8 @@ struct file_ra_state {
unsigned int size;
unsigned int async_size;
unsigned int ra_pages;
- unsigned int mmap_miss;
+ unsigned short order;
+ unsigned short mmap_miss;
loff_t prev_pos;
};
@@ -3756,9 +3758,14 @@ void setattr_copy(struct mnt_idmap *, struct inode *inode,
extern int file_update_time(struct file *file);
+static inline bool file_is_dax(const struct file *file)
+{
+ return file && IS_DAX(file->f_mapping->host);
+}
+
static inline bool vma_is_dax(const struct vm_area_struct *vma)
{
- return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
+ return file_is_dax(vma->vm_file);
}
static inline bool vma_is_fsdax(struct vm_area_struct *vma)
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index be160e8d8bcb..5ebf26fcdcfa 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -423,9 +423,14 @@ static inline bool gfp_compaction_allowed(gfp_t gfp_mask)
extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma);
#ifdef CONFIG_CONTIG_ALLOC
+
+typedef unsigned int __bitwise acr_flags_t;
+#define ACR_FLAGS_NONE ((__force acr_flags_t)0) // ordinary allocation request
+#define ACR_FLAGS_CMA ((__force acr_flags_t)BIT(0)) // allocate for CMA
+
/* The below functions must be run on a range from a single zone. */
extern int alloc_contig_range_noprof(unsigned long start, unsigned long end,
- unsigned migratetype, gfp_t gfp_mask);
+ acr_flags_t alloc_flags, gfp_t gfp_mask);
#define alloc_contig_range(...) alloc_hooks(alloc_contig_range_noprof(__VA_ARGS__))
extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
index 9a7683d79a4b..36053c3d6d64 100644
--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -195,7 +195,7 @@ static inline void *kmap_local_page_try_from_panic(struct page *page)
static inline void *kmap_local_folio(struct folio *folio, size_t offset)
{
- return page_address(&folio->page) + offset;
+ return folio_address(folio) + offset;
}
static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot)
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index e48d7f27b0b9..6234f316468c 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -292,12 +292,6 @@ static inline void zero_user_segment(struct page *page,
zero_user_segments(page, start, end, 0, 0);
}
-static inline void zero_user(struct page *page,
- unsigned start, unsigned size)
-{
- zero_user_segments(page, start, start + size, 0, 0);
-}
-
#ifndef __HAVE_ARCH_COPY_USER_HIGHPAGE
static inline void copy_user_highpage(struct page *to, struct page *from,
@@ -688,10 +682,4 @@ static inline void folio_release_kmap(struct folio *folio, void *addr)
kunmap_local(addr);
folio_put(folio);
}
-
-static inline void unmap_and_put_page(struct page *page, void *addr)
-{
- folio_release_kmap(page_folio(page), addr);
-}
-
#endif /* _LINUX_HIGHMEM_H */
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 2f190c90192d..7748489fde1b 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -37,8 +37,10 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
pmd_t *pmd, unsigned long addr, pgprot_t newprot,
unsigned long cp_flags);
-vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write);
-vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write);
+vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, unsigned long pfn,
+ bool write);
+vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, unsigned long pfn,
+ bool write);
vm_fault_t vmf_insert_folio_pmd(struct vm_fault *vmf, struct folio *folio,
bool write);
vm_fault_t vmf_insert_folio_pud(struct vm_fault *vmf, struct folio *folio,
@@ -261,7 +263,7 @@ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma,
}
unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
- unsigned long vm_flags,
+ vm_flags_t vm_flags,
unsigned long tva_flags,
unsigned long orders);
@@ -282,7 +284,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
*/
static inline
unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
- unsigned long vm_flags,
+ vm_flags_t vm_flags,
unsigned long tva_flags,
unsigned long orders)
{
@@ -317,7 +319,7 @@ struct thpsize {
(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
static inline bool vma_thp_disabled(struct vm_area_struct *vma,
- unsigned long vm_flags)
+ vm_flags_t vm_flags)
{
/*
* Explicitly disabled through madvise or prctl, or some
@@ -400,8 +402,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
#define split_huge_pmd(__vma, __pmd, __address) \
do { \
pmd_t *____pmd = (__pmd); \
- if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd) \
- || pmd_devmap(*____pmd)) \
+ if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd)) \
__split_huge_pmd(__vma, __pmd, __address, \
false); \
} while (0)
@@ -426,16 +427,14 @@ change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,
#define split_huge_pud(__vma, __pud, __address) \
do { \
pud_t *____pud = (__pud); \
- if (pud_trans_huge(*____pud) \
- || pud_devmap(*____pud)) \
+ if (pud_trans_huge(*____pud)) \
__split_huge_pud(__vma, __pud, __address); \
} while (0)
-int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags,
+int hugepage_madvise(struct vm_area_struct *vma, vm_flags_t *vm_flags,
int advice);
-int madvise_collapse(struct vm_area_struct *vma,
- struct vm_area_struct **prev,
- unsigned long start, unsigned long end);
+int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, bool *lock_dropped);
void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start,
unsigned long end, struct vm_area_struct *next);
spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma);
@@ -450,7 +449,7 @@ static inline int is_swap_pmd(pmd_t pmd)
static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
struct vm_area_struct *vma)
{
- if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd))
+ if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd))
return __pmd_trans_huge_lock(pmd, vma);
else
return NULL;
@@ -458,7 +457,7 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
struct vm_area_struct *vma)
{
- if (pud_trans_huge(*pud) || pud_devmap(*pud))
+ if (pud_trans_huge(*pud))
return __pud_trans_huge_lock(pud, vma);
else
return NULL;
@@ -473,9 +472,6 @@ static inline bool folio_test_pmd_mappable(struct folio *folio)
return folio_order(folio) >= HPAGE_PMD_ORDER;
}
-struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
- pmd_t *pmd, int flags, struct dev_pagemap **pgmap);
-
vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf);
extern struct folio *huge_zero_folio;
@@ -486,9 +482,14 @@ static inline bool is_huge_zero_folio(const struct folio *folio)
return READ_ONCE(huge_zero_folio) == folio;
}
+static inline bool is_huge_zero_pfn(unsigned long pfn)
+{
+ return READ_ONCE(huge_zero_pfn) == (pfn & ~(HPAGE_PMD_NR - 1));
+}
+
static inline bool is_huge_zero_pmd(pmd_t pmd)
{
- return pmd_present(pmd) && READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd);
+ return pmd_present(pmd) && is_huge_zero_pfn(pmd_pfn(pmd));
}
struct folio *mm_get_huge_zero_folio(struct mm_struct *mm);
@@ -524,7 +525,7 @@ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma,
}
static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
- unsigned long vm_flags,
+ vm_flags_t vm_flags,
unsigned long tva_flags,
unsigned long orders)
{
@@ -593,14 +594,14 @@ static inline bool unmap_huge_pmd_locked(struct vm_area_struct *vma,
do { } while (0)
static inline int hugepage_madvise(struct vm_area_struct *vma,
- unsigned long *vm_flags, int advice)
+ vm_flags_t *vm_flags, int advice)
{
return -EINVAL;
}
static inline int madvise_collapse(struct vm_area_struct *vma,
- struct vm_area_struct **prev,
- unsigned long start, unsigned long end)
+ unsigned long start,
+ unsigned long end, bool *lock_dropped)
{
return -EINVAL;
}
@@ -636,6 +637,11 @@ static inline bool is_huge_zero_folio(const struct folio *folio)
return false;
}
+static inline bool is_huge_zero_pfn(unsigned long pfn)
+{
+ return false;
+}
+
static inline bool is_huge_zero_pmd(pmd_t pmd)
{
return false;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 42f374e828a2..526d27e88b3b 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -149,7 +149,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
uffd_flags_t flags,
struct folio **foliop);
#endif /* CONFIG_USERFAULTFD */
-bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
+long hugetlb_reserve_pages(struct inode *inode, long from, long to,
struct vm_area_struct *vma,
vm_flags_t vm_flags);
long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
@@ -359,12 +359,6 @@ static inline void hugetlb_show_meminfo_node(int nid)
{
}
-static inline int prepare_hugepage_range(struct file *file,
- unsigned long addr, unsigned long len)
-{
- return -EINVAL;
-}
-
static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma)
{
}
@@ -396,13 +390,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
return 0;
}
-static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
- unsigned long addr, unsigned long end,
- unsigned long floor, unsigned long ceiling)
-{
- BUG();
-}
-
#ifdef CONFIG_USERFAULTFD
static inline int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
struct vm_area_struct *dst_vma,
@@ -740,6 +727,11 @@ extern unsigned int default_hstate_idx;
#define default_hstate (hstates[default_hstate_idx])
+static inline struct hugepage_subpool *subpool_inode(struct inode *inode)
+{
+ return HUGETLBFS_SB(inode->i_sb)->spool;
+}
+
static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio)
{
return folio->_hugetlb_subpool;
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index b8d69cfbb58b..ff6120463745 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -12,7 +12,7 @@ extern int start_stop_khugepaged(void);
extern void __khugepaged_enter(struct mm_struct *mm);
extern void __khugepaged_exit(struct mm_struct *mm);
extern void khugepaged_enter_vma(struct vm_area_struct *vma,
- unsigned long vm_flags);
+ vm_flags_t vm_flags);
extern void khugepaged_min_free_kbytes_update(void);
extern bool current_is_khugepaged(void);
extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
@@ -37,7 +37,7 @@ static inline void khugepaged_exit(struct mm_struct *mm)
{
}
static inline void khugepaged_enter_vma(struct vm_area_struct *vma,
- unsigned long vm_flags)
+ vm_flags_t vm_flags)
{
}
static inline int collapse_pte_mapped_thp(struct mm_struct *mm,
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index d73095b5cd96..c17b955e7b0b 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -16,9 +16,9 @@
#ifdef CONFIG_KSM
int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, int advice, unsigned long *vm_flags);
-
-void ksm_add_vma(struct vm_area_struct *vma);
+ unsigned long end, int advice, vm_flags_t *vm_flags);
+vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file,
+ vm_flags_t vm_flags);
int ksm_enable_merge_any(struct mm_struct *mm);
int ksm_disable_merge_any(struct mm_struct *mm);
int ksm_disable(struct mm_struct *mm);
@@ -97,8 +97,10 @@ bool ksm_process_mergeable(struct mm_struct *mm);
#else /* !CONFIG_KSM */
-static inline void ksm_add_vma(struct vm_area_struct *vma)
+static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm,
+ const struct file *file, vm_flags_t vm_flags)
{
+ return vm_flags;
}
static inline int ksm_disable(struct mm_struct *mm)
@@ -131,7 +133,7 @@ static inline void collect_procs_ksm(const struct folio *folio,
#ifdef CONFIG_MMU
static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, int advice, unsigned long *vm_flags)
+ unsigned long end, int advice, vm_flags_t *vm_flags)
{
return 0;
}
diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 9ef129038224..bafe143b1f78 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -75,8 +75,8 @@
* searching for gaps or any other code that needs to find the end of the data.
*/
struct maple_metadata {
- unsigned char end;
- unsigned char gap;
+ unsigned char end; /* end of data */
+ unsigned char gap; /* offset of largest gap */
};
/*
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 87b6688f124a..785173aa0739 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -251,8 +251,10 @@ struct mem_cgroup {
* that this indicator should NOT be used in legacy cgroup mode
* where socket memory is accounted/charged separately.
*/
- unsigned long socket_pressure;
-
+ u64 socket_pressure;
+#if BITS_PER_LONG < 64
+ seqlock_t socket_pressure_seqlock;
+#endif
int kmemcg_id;
/*
* memcg->objcg is wiped out as a part of the objcg repaprenting
@@ -1602,6 +1604,42 @@ extern struct static_key_false memcg_sockets_enabled_key;
#define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key)
void mem_cgroup_sk_alloc(struct sock *sk);
void mem_cgroup_sk_free(struct sock *sk);
+
+#if BITS_PER_LONG < 64
+static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg)
+{
+ u64 val = get_jiffies_64() + HZ;
+ unsigned long flags;
+
+ write_seqlock_irqsave(&memcg->socket_pressure_seqlock, flags);
+ memcg->socket_pressure = val;
+ write_sequnlock_irqrestore(&memcg->socket_pressure_seqlock, flags);
+}
+
+static inline u64 mem_cgroup_get_socket_pressure(struct mem_cgroup *memcg)
+{
+ unsigned int seq;
+ u64 val;
+
+ do {
+ seq = read_seqbegin(&memcg->socket_pressure_seqlock);
+ val = memcg->socket_pressure;
+ } while (read_seqretry(&memcg->socket_pressure_seqlock, seq));
+
+ return val;
+}
+#else
+static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg)
+{
+ WRITE_ONCE(memcg->socket_pressure, jiffies + HZ);
+}
+
+static inline u64 mem_cgroup_get_socket_pressure(struct mem_cgroup *memcg)
+{
+ return READ_ONCE(memcg->socket_pressure);
+}
+#endif
+
static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
{
#ifdef CONFIG_MEMCG_V1
@@ -1609,7 +1647,7 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
return !!memcg->tcpmem_pressure;
#endif /* CONFIG_MEMCG_V1 */
do {
- if (time_before(jiffies, READ_ONCE(memcg->socket_pressure)))
+ if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg)))
return true;
} while ((memcg = parent_mem_cgroup(memcg)));
return false;
diff --git a/include/linux/memfd.h b/include/linux/memfd.h
index 246daadbfde8..6f606d9573c3 100644
--- a/include/linux/memfd.h
+++ b/include/linux/memfd.h
@@ -14,7 +14,7 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx);
* We also update VMA flags if appropriate by manipulating the VMA flags pointed
* to by vm_flags_ptr.
*/
-int memfd_check_seals_mmap(struct file *file, unsigned long *vm_flags_ptr);
+int memfd_check_seals_mmap(struct file *file, vm_flags_t *vm_flags_ptr);
#else
static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned int a)
{
@@ -25,7 +25,7 @@ static inline struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
return ERR_PTR(-EINVAL);
}
static inline int memfd_check_seals_mmap(struct file *file,
- unsigned long *vm_flags_ptr)
+ vm_flags_t *vm_flags_ptr)
{
return 0;
}
diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 0dc0cf2863e2..7a805796fcfd 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -18,7 +18,7 @@
* adistance value (slightly faster) than default DRAM adistance to be part of
* the same memory tier.
*/
-#define MEMTIER_ADISTANCE_DRAM ((4 * MEMTIER_CHUNK_SIZE) + (MEMTIER_CHUNK_SIZE >> 1))
+#define MEMTIER_ADISTANCE_DRAM ((4L * MEMTIER_CHUNK_SIZE) + (MEMTIER_CHUNK_SIZE >> 1))
struct memory_tier;
struct memory_dev_type {
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 5ec4e6d209b9..40eb70ccb09d 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -109,8 +109,6 @@ struct memory_notify {
unsigned long altmap_nr_pages;
unsigned long start_pfn;
unsigned long nr_pages;
- int status_change_nid_normal;
- int status_change_nid;
};
struct notifier_block;
@@ -179,12 +177,30 @@ struct memory_group *memory_group_find_by_id(int mgid);
typedef int (*walk_memory_groups_func_t)(struct memory_group *, void *);
int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
struct memory_group *excluded, void *arg);
+struct memory_block *find_memory_block_by_id(unsigned long block_id);
#define hotplug_memory_notifier(fn, pri) ({ \
static __meminitdata struct notifier_block fn##_mem_nb =\
{ .notifier_call = fn, .priority = pri };\
register_memory_notifier(&fn##_mem_nb); \
})
+extern int sections_per_block;
+
+static inline unsigned long memory_block_id(unsigned long section_nr)
+{
+ return section_nr / sections_per_block;
+}
+
+static inline unsigned long pfn_to_block_id(unsigned long pfn)
+{
+ return memory_block_id(pfn_to_section_nr(pfn));
+}
+
+static inline unsigned long phys_to_block_id(unsigned long phys)
+{
+ return pfn_to_block_id(PFN_DOWN(phys));
+}
+
#ifdef CONFIG_NUMA
void memory_block_add_nid(struct memory_block *mem, int nid,
enum meminit_context context);
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index eaac5ae8c05c..23f038a16231 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -314,7 +314,8 @@ extern int add_memory_driver_managed(int nid, u64 start, u64 size,
mhp_t mhp_flags);
extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages,
- struct vmem_altmap *altmap, int migratetype);
+ struct vmem_altmap *altmap, int migratetype,
+ bool isolate_pageblock);
extern void remove_pfn_range_from_zone(struct zone *zone,
unsigned long start_pfn,
unsigned long nr_pages);
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index aaa2114498d6..acadd41e0b5c 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -35,8 +35,8 @@ struct migration_target_control;
* @src page. The driver should copy the contents of the
* @src page to the @dst page and set up the fields of @dst page.
* Both pages are locked.
- * If page migration is successful, the driver should call
- * __ClearPageMovable(@src) and return MIGRATEPAGE_SUCCESS.
+ * If page migration is successful, the driver should
+ * return MIGRATEPAGE_SUCCESS.
* If the driver cannot migrate the page at the moment, it can return
* -EAGAIN. The VM interprets this as a temporary migration failure and
* will retry it later. Any other error value is a permanent migration
@@ -69,7 +69,7 @@ int migrate_pages(struct list_head *l, new_folio_t new, free_folio_t free,
unsigned long private, enum migrate_mode mode, int reason,
unsigned int *ret_succeeded);
struct folio *alloc_migration_target(struct folio *src, unsigned long private);
-bool isolate_movable_page(struct page *page, isolate_mode_t mode);
+bool isolate_movable_ops_page(struct page *page, isolate_mode_t mode);
bool isolate_folio_to_list(struct folio *folio, struct list_head *list);
int migrate_huge_page_move_mapping(struct address_space *mapping,
@@ -90,7 +90,7 @@ static inline int migrate_pages(struct list_head *l, new_folio_t new,
static inline struct folio *alloc_migration_target(struct folio *src,
unsigned long private)
{ return NULL; }
-static inline bool isolate_movable_page(struct page *page, isolate_mode_t mode)
+static inline bool isolate_movable_ops_page(struct page *page, isolate_mode_t mode)
{ return false; }
static inline bool isolate_folio_to_list(struct folio *folio, struct list_head *list)
{ return false; }
@@ -103,44 +103,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
#endif /* CONFIG_MIGRATION */
-#ifdef CONFIG_COMPACTION
-bool PageMovable(struct page *page);
-void __SetPageMovable(struct page *page, const struct movable_operations *ops);
-void __ClearPageMovable(struct page *page);
-#else
-static inline bool PageMovable(struct page *page) { return false; }
-static inline void __SetPageMovable(struct page *page,
- const struct movable_operations *ops)
-{
-}
-static inline void __ClearPageMovable(struct page *page)
-{
-}
-#endif
-
-static inline bool folio_test_movable(struct folio *folio)
-{
- return PageMovable(&folio->page);
-}
-
-static inline
-const struct movable_operations *folio_movable_ops(struct folio *folio)
-{
- VM_BUG_ON(!__folio_test_movable(folio));
-
- return (const struct movable_operations *)
- ((unsigned long)folio->mapping - PAGE_MAPPING_MOVABLE);
-}
-
-static inline
-const struct movable_operations *page_movable_ops(struct page *page)
-{
- VM_BUG_ON(!__PageMovable(page));
-
- return (const struct movable_operations *)
- ((unsigned long)page->mapping - PAGE_MAPPING_MOVABLE);
-}
-
#ifdef CONFIG_NUMA_BALANCING
int migrate_misplaced_folio_prepare(struct folio *folio,
struct vm_area_struct *vma, int node);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0c44bb8ce544..349f0d9aad22 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1818,7 +1818,24 @@ static inline pmd_t folio_mk_pmd(struct folio *folio, pgprot_t pgprot)
{
return pmd_mkhuge(pfn_pmd(folio_pfn(folio), pgprot));
}
-#endif
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+/**
+ * folio_mk_pud - Create a PUD for this folio
+ * @folio: The folio to create a PUD for
+ * @pgprot: The page protection bits to use
+ *
+ * Create a page table entry for the first page of this folio.
+ * This is suitable for passing to set_pud_at().
+ *
+ * Return: A page table entry suitable for mapping this folio.
+ */
+static inline pud_t folio_mk_pud(struct folio *folio, pgprot_t pgprot)
+{
+ return pud_mkhuge(pfn_pud(folio_pfn(folio), pgprot));
+}
+#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* CONFIG_MMU */
static inline bool folio_has_pincount(const struct folio *folio)
@@ -2152,13 +2169,13 @@ static inline int folio_expected_ref_count(const struct folio *folio)
const int order = folio_order(folio);
int ref_count = 0;
- if (WARN_ON_ONCE(folio_test_slab(folio)))
+ if (WARN_ON_ONCE(page_has_type(&folio->page) && !folio_test_hugetlb(folio)))
return 0;
if (folio_test_anon(folio)) {
/* One reference per page from the swapcache. */
ref_count += folio_test_swapcache(folio) << order;
- } else if (!((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS)) {
+ } else {
/* One reference per page from the pagecache. */
ref_count += !!folio->mapping << order;
/* One reference from PG_private. */
@@ -2549,7 +2566,7 @@ extern long change_protection(struct mmu_gather *tlb,
unsigned long end, unsigned long cp_flags);
extern int mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
struct vm_area_struct *vma, struct vm_area_struct **pprev,
- unsigned long start, unsigned long end, unsigned long newflags);
+ unsigned long start, unsigned long end, vm_flags_t newflags);
/*
* doesn't attempt to fault and will return short.
@@ -2694,13 +2711,6 @@ static inline pud_t pud_mkspecial(pud_t pud)
}
#endif /* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */
-#ifndef CONFIG_ARCH_HAS_PTE_DEVMAP
-static inline int pte_devmap(pte_t pte)
-{
- return 0;
-}
-#endif
-
extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
spinlock_t **ptl);
static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
@@ -3313,9 +3323,9 @@ extern void vm_stat_account(struct mm_struct *, vm_flags_t, long npages);
extern bool vma_is_special_mapping(const struct vm_area_struct *vma,
const struct vm_special_mapping *sm);
-extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
+struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
unsigned long addr, unsigned long len,
- unsigned long flags,
+ vm_flags_t vm_flags,
const struct vm_special_mapping *spec);
unsigned long randomize_stack_top(unsigned long stack_top);
@@ -3479,10 +3489,10 @@ static inline bool range_in_vma(struct vm_area_struct *vma,
}
#ifdef CONFIG_MMU
-pgprot_t vm_get_page_prot(unsigned long vm_flags);
+pgprot_t vm_get_page_prot(vm_flags_t vm_flags);
void vma_set_page_prot(struct vm_area_struct *vma);
#else
-static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
+static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags)
{
return __pgprot(0);
}
@@ -3519,9 +3529,9 @@ vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn, pgprot_t pgprot);
vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
- pfn_t pfn);
+ unsigned long pfn);
vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,
- unsigned long addr, pfn_t pfn);
+ unsigned long addr, unsigned long pfn);
int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);
static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma,
@@ -4050,14 +4060,14 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping,
#endif
#ifdef CONFIG_ANON_VMA_NAME
-int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
- unsigned long len_in,
- struct anon_vma_name *anon_name);
+int set_anon_vma_name(unsigned long addr, unsigned long size,
+ const char __user *uname);
#else
-static inline int
-madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
- unsigned long len_in, struct anon_vma_name *anon_name) {
- return 0;
+static inline
+int set_anon_vma_name(unsigned long addr, unsigned long size,
+ const char __user *uname)
+{
+ return -EINVAL;
}
#endif
@@ -4190,4 +4200,23 @@ static inline bool page_pool_page_is_pp(const struct page *page)
}
#endif
+#define PAGE_SNAPSHOT_FAITHFUL (1 << 0)
+#define PAGE_SNAPSHOT_PG_BUDDY (1 << 1)
+#define PAGE_SNAPSHOT_PG_IDLE (1 << 2)
+
+struct page_snapshot {
+ struct folio folio_snapshot;
+ struct page page_snapshot;
+ unsigned long pfn;
+ unsigned long idx;
+ unsigned long flags;
+};
+
+static inline bool snapshot_page_is_faithful(const struct page_snapshot *ps)
+{
+ return ps->flags & PAGE_SNAPSHOT_FAITHFUL;
+}
+
+void snapshot_page(struct page_snapshot *ps, const struct page *page);
+
#endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0f0662157066..08bc2442db93 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -105,7 +105,6 @@ struct page {
unsigned int order;
};
};
- /* See page-flags.h for PAGE_MAPPING_FLAGS */
struct address_space *mapping;
union {
pgoff_t __folio_index; /* Our offset within mapping. */
@@ -1086,7 +1085,7 @@ struct mm_struct {
unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
unsigned long stack_vm; /* VM_STACK */
- unsigned long def_flags;
+ vm_flags_t def_flags;
/**
* @write_protect_seq: Locked when any thread is write
diff --git a/include/linux/mman.h b/include/linux/mman.h
index f4c6346a8fcd..de9e8e6229a4 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -137,7 +137,7 @@ static inline bool arch_validate_flags(unsigned long flags)
/*
* Combine the mmap "prot" argument into "vm_flags" used internally.
*/
-static inline unsigned long
+static inline vm_flags_t
calc_vm_prot_bits(unsigned long prot, unsigned long pkey)
{
return _calc_vm_trans(prot, PROT_READ, VM_READ ) |
@@ -149,7 +149,7 @@ calc_vm_prot_bits(unsigned long prot, unsigned long pkey)
/*
* Combine the mmap "flags" argument into "vm_flags" used internally.
*/
-static inline unsigned long
+static inline vm_flags_t
calc_vm_flag_bits(struct file *file, unsigned long flags)
{
return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) |
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 5da384bd0a26..1f4f44951abe 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -309,6 +309,17 @@ void vma_mark_detached(struct vm_area_struct *vma);
struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
unsigned long address);
+/*
+ * Locks next vma pointed by the iterator. Confirms the locked vma has not
+ * been modified and will retry under mmap_lock protection if modification
+ * was detected. Should be called from read RCU section.
+ * Returns either a valid locked VMA, NULL if no more VMAs or -EINTR if the
+ * process was interrupted.
+ */
+struct vm_area_struct *lock_next_vma(struct mm_struct *mm,
+ struct vma_iterator *iter,
+ unsigned long address);
+
#else /* CONFIG_PER_VMA_LOCK */
static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index a0a3894900ed..14a45979cccc 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -89,6 +89,17 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
} \
unlikely(__ret_warn_once); \
})
+#define VM_WARN_ON_ONCE_VMA(cond, vma) ({ \
+ static bool __section(".data..once") __warned; \
+ int __ret_warn_once = !!(cond); \
+ \
+ if (unlikely(__ret_warn_once && !__warned)) { \
+ dump_vma(vma); \
+ __warned = true; \
+ WARN_ON(1); \
+ } \
+ unlikely(__ret_warn_once); \
+})
#define VM_WARN_ON_VMG(cond, vmg) ({ \
int __ret_warn = !!(cond); \
\
@@ -115,6 +126,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
#define VM_WARN_ON_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ON_ONCE_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ON_ONCE_MM(cond, mm) BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ON_ONCE_VMA(cond, vma) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ON_VMG(cond, vmg) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 283913d42d7b..0c5da9141983 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -38,19 +38,19 @@
#define NR_PAGE_ORDERS (MAX_PAGE_ORDER + 1)
/* Defines the order for the number of pages that have a migrate type. */
-#ifndef CONFIG_PAGE_BLOCK_ORDER
-#define PAGE_BLOCK_ORDER MAX_PAGE_ORDER
+#ifndef CONFIG_PAGE_BLOCK_MAX_ORDER
+#define PAGE_BLOCK_MAX_ORDER MAX_PAGE_ORDER
#else
-#define PAGE_BLOCK_ORDER CONFIG_PAGE_BLOCK_ORDER
-#endif /* CONFIG_PAGE_BLOCK_ORDER */
+#define PAGE_BLOCK_MAX_ORDER CONFIG_PAGE_BLOCK_MAX_ORDER
+#endif /* CONFIG_PAGE_BLOCK_MAX_ORDER */
/*
* The MAX_PAGE_ORDER, which defines the max order of pages to be allocated
- * by the buddy allocator, has to be larger or equal to the PAGE_BLOCK_ORDER,
+ * by the buddy allocator, has to be larger or equal to the PAGE_BLOCK_MAX_ORDER,
* which defines the order for the number of pages that can have a migrate type
*/
-#if (PAGE_BLOCK_ORDER > MAX_PAGE_ORDER)
-#error MAX_PAGE_ORDER must be >= PAGE_BLOCK_ORDER
+#if (PAGE_BLOCK_MAX_ORDER > MAX_PAGE_ORDER)
+#error MAX_PAGE_ORDER must be >= PAGE_BLOCK_MAX_ORDER
#endif
/*
@@ -79,6 +79,9 @@ enum migratetype {
* __free_pageblock_cma() function.
*/
MIGRATE_CMA,
+ __MIGRATE_TYPE_END = MIGRATE_CMA,
+#else
+ __MIGRATE_TYPE_END = MIGRATE_HIGHATOMIC,
#endif
#ifdef CONFIG_MEMORY_ISOLATION
MIGRATE_ISOLATE, /* can't allocate from here */
@@ -92,8 +95,12 @@ extern const char * const migratetype_names[MIGRATE_TYPES];
#ifdef CONFIG_CMA
# define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
# define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA)
-# define is_migrate_cma_folio(folio, pfn) (MIGRATE_CMA == \
- get_pfnblock_flags_mask(&folio->page, pfn, MIGRATETYPE_MASK))
+/*
+ * __dump_folio() in mm/debug.c passes a folio pointer to on-stack struct folio,
+ * so folio_pfn() cannot be used and pfn is needed.
+ */
+# define is_migrate_cma_folio(folio, pfn) \
+ (get_pfnblock_migratetype(&folio->page, pfn) == MIGRATE_CMA)
#else
# define is_migrate_cma(migratetype) false
# define is_migrate_cma_page(_page) false
@@ -122,14 +129,12 @@ static inline bool migratetype_is_mergeable(int mt)
extern int page_group_by_mobility_disabled;
-#define MIGRATETYPE_MASK ((1UL << PB_migratetype_bits) - 1)
+#define get_pageblock_migratetype(page) \
+ get_pfnblock_migratetype(page, page_to_pfn(page))
-#define get_pageblock_migratetype(page) \
- get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK)
+#define folio_migratetype(folio) \
+ get_pageblock_migratetype(&folio->page)
-#define folio_migratetype(folio) \
- get_pfnblock_flags_mask(&folio->page, folio_pfn(folio), \
- MIGRATETYPE_MASK)
struct free_area {
struct list_head free_list[MIGRATE_TYPES];
unsigned long nr_free;
@@ -201,7 +206,6 @@ enum node_stat_item {
NR_FILE_PAGES,
NR_FILE_DIRTY,
NR_WRITEBACK,
- NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
NR_SHMEM_THPS,
NR_SHMEM_PMDMAPPED,
diff --git a/include/linux/node.h b/include/linux/node.h
index 2b7517892230..2c7529335b21 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -111,43 +111,64 @@ struct memory_block;
extern struct node *node_devices[];
#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_NUMA)
-void register_memory_blocks_under_node(int nid, unsigned long start_pfn,
- unsigned long end_pfn,
- enum meminit_context context);
+void register_memory_blocks_under_node_hotplug(int nid, unsigned long start_pfn,
+ unsigned long end_pfn);
#else
-static inline void register_memory_blocks_under_node(int nid, unsigned long start_pfn,
- unsigned long end_pfn,
- enum meminit_context context)
+static inline void register_memory_blocks_under_node_hotplug(int nid,
+ unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+}
+static inline void register_memory_blocks_under_nodes(void)
{
}
#endif
extern void unregister_node(struct node *node);
-#ifdef CONFIG_NUMA
-extern void node_dev_init(void);
-/* Core of the node registration - only memory hotplug should use this */
-extern int __register_one_node(int nid);
-
-/* Registers an online node */
-static inline int register_one_node(int nid)
-{
- int error = 0;
- if (node_online(nid)) {
- struct pglist_data *pgdat = NODE_DATA(nid);
- unsigned long start_pfn = pgdat->node_start_pfn;
- unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
+struct node_notify {
+ int nid;
+};
- error = __register_one_node(nid);
- if (error)
- return error;
- register_memory_blocks_under_node(nid, start_pfn, end_pfn,
- MEMINIT_EARLY);
- }
+#define NODE_ADDING_FIRST_MEMORY (1<<0)
+#define NODE_ADDED_FIRST_MEMORY (1<<1)
+#define NODE_CANCEL_ADDING_FIRST_MEMORY (1<<2)
+#define NODE_REMOVING_LAST_MEMORY (1<<3)
+#define NODE_REMOVED_LAST_MEMORY (1<<4)
+#define NODE_CANCEL_REMOVING_LAST_MEMORY (1<<5)
- return error;
+#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_NUMA)
+extern int register_node_notifier(struct notifier_block *nb);
+extern void unregister_node_notifier(struct notifier_block *nb);
+extern int node_notify(unsigned long val, void *v);
+
+#define hotplug_node_notifier(fn, pri) ({ \
+ static __meminitdata struct notifier_block fn##_node_nb =\
+ { .notifier_call = fn, .priority = pri };\
+ register_node_notifier(&fn##_node_nb); \
+})
+#else
+static inline int register_node_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+static inline void unregister_node_notifier(struct notifier_block *nb)
+{
+}
+static inline int node_notify(unsigned long val, void *v)
+{
+ return 0;
+}
+static inline int hotplug_node_notifier(notifier_fn_t fn, int pri)
+{
+ return 0;
}
+#endif
+#ifdef CONFIG_NUMA
+extern void node_dev_init(void);
+/* Core of the node registration - only memory hotplug should use this */
+extern int register_one_node(int nid);
extern void unregister_one_node(int nid);
extern int register_cpu_under_node(unsigned int cpu, unsigned int nid);
extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid);
@@ -160,10 +181,6 @@ extern int register_memory_node_under_compute_node(unsigned int mem_nid,
static inline void node_dev_init(void)
{
}
-static inline int __register_one_node(int nid)
-{
- return 0;
-}
static inline int register_one_node(int nid)
{
return 0;
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 4fe5ee67535b..8e4d6eda8a8d 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -167,8 +167,12 @@ enum pageflags {
/* Remapped by swiotlb-xen. */
PG_xen_remapped = PG_owner_priv_1,
- /* non-lru isolated movable page */
- PG_isolated = PG_reclaim,
+#ifdef CONFIG_MIGRATION
+ /* movable_ops page that is isolated for migration */
+ PG_movable_ops_isolated = PG_reclaim,
+ /* this is a movable_ops page (for selected typed pages only) */
+ PG_movable_ops = PG_uptodate,
+#endif
/* Only valid for buddy pages. Used to track pages that are reported */
PG_reported = PG_uptodate,
@@ -691,15 +695,12 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted)
/*
* On an anonymous folio mapped into a user virtual memory area,
* folio->mapping points to its anon_vma, not to a struct address_space;
- * with the PAGE_MAPPING_ANON bit set to distinguish it. See rmap.h.
+ * with the FOLIO_MAPPING_ANON bit set to distinguish it. See rmap.h.
*
- * On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled,
- * the PAGE_MAPPING_MOVABLE bit may be set along with the PAGE_MAPPING_ANON
+ * On an anonymous folio in a VM_MERGEABLE area, if CONFIG_KSM is enabled,
+ * the FOLIO_MAPPING_ANON_KSM bit may be set along with the FOLIO_MAPPING_ANON
* bit; and then folio->mapping points, not to an anon_vma, but to a private
- * structure which KSM associates with that merged page. See ksm.h.
- *
- * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is used for non-lru movable
- * page and then folio->mapping points to a struct movable_operations.
+ * structure which KSM associates with that merged folio. See ksm.h.
*
* Please note that, confusingly, "folio_mapping" refers to the inode
* address_space which maps the folio from disk; whereas "folio_mapped"
@@ -712,50 +713,27 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted)
* false before calling the following functions (e.g., folio_test_anon).
* See mm/slab.h.
*/
-#define PAGE_MAPPING_ANON 0x1
-#define PAGE_MAPPING_MOVABLE 0x2
-#define PAGE_MAPPING_KSM (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE)
-#define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE)
-
-static __always_inline bool folio_mapping_flags(const struct folio *folio)
-{
- return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) != 0;
-}
-
-static __always_inline bool PageMappingFlags(const struct page *page)
-{
- return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0;
-}
+#define FOLIO_MAPPING_ANON 0x1
+#define FOLIO_MAPPING_ANON_KSM 0x2
+#define FOLIO_MAPPING_KSM (FOLIO_MAPPING_ANON | FOLIO_MAPPING_ANON_KSM)
+#define FOLIO_MAPPING_FLAGS (FOLIO_MAPPING_ANON | FOLIO_MAPPING_ANON_KSM)
static __always_inline bool folio_test_anon(const struct folio *folio)
{
- return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0;
+ return ((unsigned long)folio->mapping & FOLIO_MAPPING_ANON) != 0;
}
static __always_inline bool PageAnonNotKsm(const struct page *page)
{
unsigned long flags = (unsigned long)page_folio(page)->mapping;
- return (flags & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_ANON;
+ return (flags & FOLIO_MAPPING_FLAGS) == FOLIO_MAPPING_ANON;
}
static __always_inline bool PageAnon(const struct page *page)
{
return folio_test_anon(page_folio(page));
}
-
-static __always_inline bool __folio_test_movable(const struct folio *folio)
-{
- return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) ==
- PAGE_MAPPING_MOVABLE;
-}
-
-static __always_inline bool __PageMovable(const struct page *page)
-{
- return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) ==
- PAGE_MAPPING_MOVABLE;
-}
-
#ifdef CONFIG_KSM
/*
* A KSM page is one of those write-protected "shared pages" or "merged pages"
@@ -765,8 +743,8 @@ static __always_inline bool __PageMovable(const struct page *page)
*/
static __always_inline bool folio_test_ksm(const struct folio *folio)
{
- return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) ==
- PAGE_MAPPING_KSM;
+ return ((unsigned long)folio->mapping & FOLIO_MAPPING_FLAGS) ==
+ FOLIO_MAPPING_KSM;
}
#else
FOLIO_TEST_FLAG_FALSE(ksm)
@@ -1137,7 +1115,53 @@ static inline bool folio_contain_hwpoisoned_page(struct folio *folio)
bool is_free_buddy_page(const struct page *page);
-PAGEFLAG(Isolated, isolated, PF_ANY);
+#ifdef CONFIG_MIGRATION
+/*
+ * This page is migratable through movable_ops (for selected typed pages
+ * only).
+ *
+ * Page migration of such pages might fail, for example, if the page is
+ * already isolated by somebody else, or if the page is about to get freed.
+ *
+ * While a subsystem might set selected typed pages that support page migration
+ * as being movable through movable_ops, it must never clear this flag.
+ *
+ * This flag is only cleared when the page is freed back to the buddy.
+ *
+ * Only selected page types support this flag (see page_movable_ops()) and
+ * the flag might be used in other context for other pages. Always use
+ * page_has_movable_ops() instead.
+ */
+TESTPAGEFLAG(MovableOps, movable_ops, PF_NO_TAIL);
+SETPAGEFLAG(MovableOps, movable_ops, PF_NO_TAIL);
+/*
+ * A movable_ops page has this flag set while it is isolated for migration.
+ * This flag primarily protects against concurrent migration attempts.
+ *
+ * Once migration ended (success or failure), the flag is cleared. The
+ * flag is managed by the migration core.
+ */
+PAGEFLAG(MovableOpsIsolated, movable_ops_isolated, PF_NO_TAIL);
+#else /* !CONFIG_MIGRATION */
+TESTPAGEFLAG_FALSE(MovableOps, movable_ops);
+SETPAGEFLAG_NOOP(MovableOps, movable_ops);
+PAGEFLAG_FALSE(MovableOpsIsolated, movable_ops_isolated);
+#endif /* CONFIG_MIGRATION */
+
+/**
+ * page_has_movable_ops - test for a movable_ops page
+ * @page: The page to test.
+ *
+ * Test whether this is a movable_ops page. Such pages will stay that
+ * way until freed.
+ *
+ * Returns true if this is a movable_ops page, otherwise false.
+ */
+static inline bool page_has_movable_ops(const struct page *page)
+{
+ return PageMovableOps(page) &&
+ (PageOffline(page) || PageZsmalloc(page));
+}
static __always_inline int PageAnonExclusive(const struct page *page)
{
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 898bb788243b..3e2f960e166c 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -11,6 +11,12 @@ static inline bool is_migrate_isolate(int migratetype)
{
return migratetype == MIGRATE_ISOLATE;
}
+#define get_pageblock_isolate(page) \
+ get_pfnblock_bit(page, page_to_pfn(page), PB_migrate_isolate)
+#define clear_pageblock_isolate(page) \
+ clear_pfnblock_bit(page, page_to_pfn(page), PB_migrate_isolate)
+#define set_pageblock_isolate(page) \
+ set_pfnblock_bit(page, page_to_pfn(page), PB_migrate_isolate)
#else
static inline bool is_migrate_isolate_page(struct page *page)
{
@@ -20,22 +26,45 @@ static inline bool is_migrate_isolate(int migratetype)
{
return false;
}
+static inline bool get_pageblock_isolate(struct page *page)
+{
+ return false;
+}
+static inline void clear_pageblock_isolate(struct page *page)
+{
+}
+static inline void set_pageblock_isolate(struct page *page)
+{
+}
#endif
-#define MEMORY_OFFLINE 0x1
-#define REPORT_FAILURE 0x2
+/*
+ * Pageblock isolation modes:
+ * PB_ISOLATE_MODE_MEM_OFFLINE - isolate to offline (!allocate) memory
+ * e.g., skip over PageHWPoison() pages and
+ * PageOffline() pages. Unmovable pages will be
+ * reported in this mode.
+ * PB_ISOLATE_MODE_CMA_ALLOC - isolate for CMA allocations
+ * PB_ISOLATE_MODE_OTHER - isolate for other purposes
+ */
+enum pb_isolate_mode {
+ PB_ISOLATE_MODE_MEM_OFFLINE,
+ PB_ISOLATE_MODE_CMA_ALLOC,
+ PB_ISOLATE_MODE_OTHER,
+};
-void set_pageblock_migratetype(struct page *page, int migratetype);
+void __meminit init_pageblock_migratetype(struct page *page,
+ enum migratetype migratetype,
+ bool isolate);
-bool move_freepages_block_isolate(struct zone *zone, struct page *page,
- int migratetype);
+bool pageblock_isolate_and_move_free_pages(struct zone *zone, struct page *page);
+bool pageblock_unisolate_and_move_free_pages(struct zone *zone, struct page *page);
int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
- int migratetype, int flags);
+ enum pb_isolate_mode mode);
-void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
- int migratetype);
+void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn);
int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
- int isol_flags);
+ enum pb_isolate_mode mode);
#endif
diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
index debdc25f08b9..3328357f6dba 100644
--- a/include/linux/page_owner.h
+++ b/include/linux/page_owner.h
@@ -14,7 +14,7 @@ extern void __set_page_owner(struct page *page,
extern void __split_page_owner(struct page *page, int old_order,
int new_order);
extern void __folio_copy_owner(struct folio *newfolio, struct folio *old);
-extern void __set_page_owner_migrate_reason(struct page *page, int reason);
+extern void __folio_set_owner_migrate_reason(struct folio *folio, int reason);
extern void __dump_page_owner(const struct page *page);
extern void pagetypeinfo_showmixedcount_print(struct seq_file *m,
pg_data_t *pgdat, struct zone *zone);
@@ -43,10 +43,10 @@ static inline void folio_copy_owner(struct folio *newfolio, struct folio *old)
if (static_branch_unlikely(&page_owner_inited))
__folio_copy_owner(newfolio, old);
}
-static inline void set_page_owner_migrate_reason(struct page *page, int reason)
+static inline void folio_set_owner_migrate_reason(struct folio *folio, int reason)
{
if (static_branch_unlikely(&page_owner_inited))
- __set_page_owner_migrate_reason(page, reason);
+ __folio_set_owner_migrate_reason(folio, reason);
}
static inline void dump_page_owner(const struct page *page)
{
@@ -68,7 +68,7 @@ static inline void split_page_owner(struct page *page, int old_order,
static inline void folio_copy_owner(struct folio *newfolio, struct folio *folio)
{
}
-static inline void set_page_owner_migrate_reason(struct page *page, int reason)
+static inline void folio_set_owner_migrate_reason(struct folio *folio, int reason)
{
}
static inline void dump_page_owner(const struct page *page)
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index e73a4292ef02..6a44be0f39f4 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -19,15 +19,33 @@ enum pageblock_bits {
PB_migrate,
PB_migrate_end = PB_migrate + PB_migratetype_bits - 1,
/* 3 bits required for migrate types */
- PB_migrate_skip,/* If set the block is skipped by compaction */
+ PB_compact_skip,/* If set the block is skipped by compaction */
+#ifdef CONFIG_MEMORY_ISOLATION
+ /*
+ * Pageblock isolation is represented with a separate bit, so that
+ * the migratetype of a block is not overwritten by isolation.
+ */
+ PB_migrate_isolate, /* If set the block is isolated */
+#endif
/*
* Assume the bits will always align on a word. If this assumption
* changes then get/set pageblock needs updating.
*/
- NR_PAGEBLOCK_BITS
+ __NR_PAGEBLOCK_BITS
};
+#define NR_PAGEBLOCK_BITS (roundup_pow_of_two(__NR_PAGEBLOCK_BITS))
+
+#define MIGRATETYPE_MASK ((1UL << (PB_migrate_end + 1)) - 1)
+
+#ifdef CONFIG_MEMORY_ISOLATION
+#define MIGRATETYPE_AND_ISO_MASK \
+ (((1UL << (PB_migrate_end + 1)) - 1) | BIT(PB_migrate_isolate))
+#else
+#define MIGRATETYPE_AND_ISO_MASK MIGRATETYPE_MASK
+#endif
+
#if defined(CONFIG_HUGETLB_PAGE)
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@ -41,18 +59,18 @@ extern unsigned int pageblock_order;
* Huge pages are a constant size, but don't exceed the maximum allocation
* granularity.
*/
-#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, PAGE_BLOCK_ORDER)
+#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, PAGE_BLOCK_MAX_ORDER)
#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
#elif defined(CONFIG_TRANSPARENT_HUGEPAGE)
-#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, PAGE_BLOCK_ORDER)
+#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, PAGE_BLOCK_MAX_ORDER)
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
-/* If huge pages are not used, group by PAGE_BLOCK_ORDER */
-#define pageblock_order PAGE_BLOCK_ORDER
+/* If huge pages are not used, group by PAGE_BLOCK_MAX_ORDER */
+#define pageblock_order PAGE_BLOCK_MAX_ORDER
#endif /* CONFIG_HUGETLB_PAGE */
@@ -65,27 +83,23 @@ extern unsigned int pageblock_order;
/* Forward declaration */
struct page;
-unsigned long get_pfnblock_flags_mask(const struct page *page,
- unsigned long pfn,
- unsigned long mask);
-
-void set_pfnblock_flags_mask(struct page *page,
- unsigned long flags,
- unsigned long pfn,
- unsigned long mask);
+enum migratetype get_pfnblock_migratetype(const struct page *page,
+ unsigned long pfn);
+bool get_pfnblock_bit(const struct page *page, unsigned long pfn,
+ enum pageblock_bits pb_bit);
+void set_pfnblock_bit(const struct page *page, unsigned long pfn,
+ enum pageblock_bits pb_bit);
+void clear_pfnblock_bit(const struct page *page, unsigned long pfn,
+ enum pageblock_bits pb_bit);
/* Declarations for getting and setting flags. See mm/page_alloc.c */
#ifdef CONFIG_COMPACTION
#define get_pageblock_skip(page) \
- get_pfnblock_flags_mask(page, page_to_pfn(page), \
- (1 << (PB_migrate_skip)))
+ get_pfnblock_bit(page, page_to_pfn(page), PB_compact_skip)
#define clear_pageblock_skip(page) \
- set_pfnblock_flags_mask(page, 0, page_to_pfn(page), \
- (1 << PB_migrate_skip))
+ clear_pfnblock_bit(page, page_to_pfn(page), PB_compact_skip)
#define set_pageblock_skip(page) \
- set_pfnblock_flags_mask(page, (1 << PB_migrate_skip), \
- page_to_pfn(page), \
- (1 << PB_migrate_skip))
+ set_pfnblock_bit(page, page_to_pfn(page), PB_compact_skip)
#else
static inline bool get_pageblock_skip(struct page *page)
{
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ce2bcdcadb73..12a12dae727d 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -502,7 +502,7 @@ static inline pgoff_t mapping_align_index(struct address_space *mapping,
static inline bool mapping_large_folio_support(struct address_space *mapping)
{
/* AS_FOLIO_ORDER is only reasonable for pagecache folios */
- VM_WARN_ONCE((unsigned long)mapping & PAGE_MAPPING_ANON,
+ VM_WARN_ONCE((unsigned long)mapping & FOLIO_MAPPING_ANON,
"Anonymous mapping always supports large folio");
return mapping_max_folio_order(mapping) > 0;
@@ -905,7 +905,8 @@ static inline struct page *find_or_create_page(struct address_space *mapping,
* @mapping: target address_space
* @index: the page index
*
- * Same as grab_cache_page(), but do not wait if the page is unavailable.
+ * Returns locked page at given index in given cache, creating it if
+ * needed, but do not wait if the page is locked or to reclaim memory.
* This is intended for speculative data generators, where the data can
* be regenerated if the page couldn't be grabbed. This routine should
* be safe to call while holding the lock for another page.
@@ -969,15 +970,6 @@ unsigned filemap_get_folios_contig(struct address_space *mapping,
unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start,
pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch);
-/*
- * Returns locked page at given index in given cache, creating it if needed.
- */
-static inline struct page *grab_cache_page(struct address_space *mapping,
- pgoff_t index)
-{
- return find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
-}
-
struct folio *read_cache_folio(struct address_space *, pgoff_t index,
filler_t *filler, struct file *file);
struct folio *mapping_read_folio_gfp(struct address_space *, pgoff_t index,
diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index 9700a29f8afb..682472c15495 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -14,6 +14,8 @@ enum page_walk_lock {
PGWALK_WRLOCK = 1,
/* vma is expected to be already write-locked during the walk */
PGWALK_WRLOCK_VERIFY = 2,
+ /* vma is expected to be already read-locked during the walk */
+ PGWALK_VMA_RDLOCK_VERIFY = 3,
};
/**
@@ -129,10 +131,9 @@ struct mm_walk {
int walk_page_range(struct mm_struct *mm, unsigned long start,
unsigned long end, const struct mm_walk_ops *ops,
void *private);
-int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
- unsigned long end, const struct mm_walk_ops *ops,
- pgd_t *pgd,
- void *private);
+int walk_kernel_page_table_range(unsigned long start,
+ unsigned long end, const struct mm_walk_ops *ops,
+ pgd_t *pgd, void *private);
int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start,
unsigned long end, const struct mm_walk_ops *ops,
void *private);
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index c16cdeaa505e..12d90360f6db 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -63,14 +63,15 @@
* 1. The symbol must be globally unique, even the static ones.
* 2. Static percpu variables cannot be defined inside a function.
*
- * Archs which need weak percpu definitions should define
- * ARCH_NEEDS_WEAK_PER_CPU in asm/percpu.h when necessary.
+ * Archs which need weak percpu definitions should set
+ * CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU when necessary.
*
* To ensure that the generic code observes the above two
* restrictions, if CONFIG_DEBUG_FORCE_WEAK_PER_CPU is set weak
* definition is used for all cases.
*/
-#if defined(ARCH_NEEDS_WEAK_PER_CPU) || defined(CONFIG_DEBUG_FORCE_WEAK_PER_CPU)
+#if (defined(CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU) && defined(MODULE)) || \
+ defined(CONFIG_DEBUG_FORCE_WEAK_PER_CPU)
/*
* __pcpu_scope_* dummy variable is used to enforce scope. It
* receives the static modifier when it's used in front of
diff --git a/include/linux/pfn.h b/include/linux/pfn.h
index 14bc053c53d8..b90ca0b6c331 100644
--- a/include/linux/pfn.h
+++ b/include/linux/pfn.h
@@ -4,15 +4,6 @@
#ifndef __ASSEMBLY__
#include <linux/types.h>
-
-/*
- * pfn_t: encapsulates a page-frame number that is optionally backed
- * by memmap (struct page). Whether a pfn_t has a 'struct page'
- * backing is indicated by flags in the high bits of the value.
- */
-typedef struct {
- u64 val;
-} pfn_t;
#endif
#define PFN_ALIGN(x) (((unsigned long)(x) + (PAGE_SIZE - 1)) & PAGE_MASK)
diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
deleted file mode 100644
index 2d9148221e9a..000000000000
--- a/include/linux/pfn_t.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_PFN_T_H_
-#define _LINUX_PFN_T_H_
-#include <linux/mm.h>
-
-/*
- * PFN_FLAGS_MASK - mask of all the possible valid pfn_t flags
- * PFN_SG_CHAIN - pfn is a pointer to the next scatterlist entry
- * PFN_SG_LAST - pfn references a page and is the last scatterlist entry
- * PFN_DEV - pfn is not covered by system memmap by default
- * PFN_MAP - pfn has a dynamic page mapping established by a device driver
- * PFN_SPECIAL - for CONFIG_FS_DAX_LIMITED builds to allow XIP, but not
- * get_user_pages
- */
-#define PFN_FLAGS_MASK (((u64) (~PAGE_MASK)) << (BITS_PER_LONG_LONG - PAGE_SHIFT))
-#define PFN_SG_CHAIN (1ULL << (BITS_PER_LONG_LONG - 1))
-#define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2))
-#define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3))
-#define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4))
-#define PFN_SPECIAL (1ULL << (BITS_PER_LONG_LONG - 5))
-
-#define PFN_FLAGS_TRACE \
- { PFN_SPECIAL, "SPECIAL" }, \
- { PFN_SG_CHAIN, "SG_CHAIN" }, \
- { PFN_SG_LAST, "SG_LAST" }, \
- { PFN_DEV, "DEV" }, \
- { PFN_MAP, "MAP" }
-
-static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, u64 flags)
-{
- pfn_t pfn_t = { .val = pfn | (flags & PFN_FLAGS_MASK), };
-
- return pfn_t;
-}
-
-/* a default pfn to pfn_t conversion assumes that @pfn is pfn_valid() */
-static inline pfn_t pfn_to_pfn_t(unsigned long pfn)
-{
- return __pfn_to_pfn_t(pfn, 0);
-}
-
-static inline pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
-{
- return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
-}
-
-static inline bool pfn_t_has_page(pfn_t pfn)
-{
- return (pfn.val & PFN_MAP) == PFN_MAP || (pfn.val & PFN_DEV) == 0;
-}
-
-static inline unsigned long pfn_t_to_pfn(pfn_t pfn)
-{
- return pfn.val & ~PFN_FLAGS_MASK;
-}
-
-static inline struct page *pfn_t_to_page(pfn_t pfn)
-{
- if (pfn_t_has_page(pfn))
- return pfn_to_page(pfn_t_to_pfn(pfn));
- return NULL;
-}
-
-static inline phys_addr_t pfn_t_to_phys(pfn_t pfn)
-{
- return PFN_PHYS(pfn_t_to_pfn(pfn));
-}
-
-static inline pfn_t page_to_pfn_t(struct page *page)
-{
- return pfn_to_pfn_t(page_to_pfn(page));
-}
-
-static inline int pfn_t_valid(pfn_t pfn)
-{
- return pfn_valid(pfn_t_to_pfn(pfn));
-}
-
-#ifdef CONFIG_MMU
-static inline pte_t pfn_t_pte(pfn_t pfn, pgprot_t pgprot)
-{
- return pfn_pte(pfn_t_to_pfn(pfn), pgprot);
-}
-#endif
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline pmd_t pfn_t_pmd(pfn_t pfn, pgprot_t pgprot)
-{
- return pfn_pmd(pfn_t_to_pfn(pfn), pgprot);
-}
-
-#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
-static inline pud_t pfn_t_pud(pfn_t pfn, pgprot_t pgprot)
-{
- return pfn_pud(pfn_t_to_pfn(pfn), pgprot);
-}
-#endif
-#endif
-
-#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
-static inline bool pfn_t_devmap(pfn_t pfn)
-{
- const u64 flags = PFN_DEV|PFN_MAP;
-
- return (pfn.val & flags) == flags;
-}
-#else
-static inline bool pfn_t_devmap(pfn_t pfn)
-{
- return false;
-}
-pte_t pte_mkdevmap(pte_t pte);
-pmd_t pmd_mkdevmap(pmd_t pmd);
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
- defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
-pud_t pud_mkdevmap(pud_t pud);
-#endif
-#endif /* CONFIG_ARCH_HAS_PTE_DEVMAP */
-
-#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
-static inline bool pfn_t_special(pfn_t pfn)
-{
- return (pfn.val & PFN_SPECIAL) == PFN_SPECIAL;
-}
-#else
-static inline bool pfn_t_special(pfn_t pfn)
-{
- return false;
-}
-#endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */
-#endif /* _LINUX_PFN_T_H_ */
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 0b6e1f781d86..e3b99920be05 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -456,6 +456,17 @@ static inline bool arch_has_hw_pte_young(void)
}
#endif
+#ifndef exec_folio_order
+/*
+ * Returns preferred minimum folio order for executable file-backed memory. Must
+ * be in range [0, PMD_ORDER). Default to order-0.
+ */
+static inline unsigned int exec_folio_order(void)
+{
+ return 0;
+}
+#endif
+
#ifndef arch_check_zapped_pte
static inline void arch_check_zapped_pte(struct vm_area_struct *vma,
pte_t pte)
@@ -1320,7 +1331,9 @@ static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
/*
* Commit an update to a pte, leaving any hardware-controlled bits in
- * the PTE unmodified.
+ * the PTE unmodified. The pte returned from ptep_modify_prot_start() may
+ * additionally have young and/or dirty bits set where previously they were not,
+ * so the updated pte may have these additional changes.
*/
static inline void ptep_modify_prot_commit(struct vm_area_struct *vma,
unsigned long addr,
@@ -1329,6 +1342,86 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma,
__ptep_modify_prot_commit(vma, addr, ptep, pte);
}
#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
+
+/**
+ * modify_prot_start_ptes - Start a pte protection read-modify-write transaction
+ * over a batch of ptes, which protects against asynchronous hardware
+ * modifications to the ptes. The intention is not to prevent the hardware from
+ * making pte updates, but to prevent any updates it may make from being lost.
+ * Please see the comment above ptep_modify_prot_start() for full description.
+ *
+ * @vma: The virtual memory area the pages are mapped into.
+ * @addr: Address the first page is mapped at.
+ * @ptep: Page table pointer for the first entry.
+ * @nr: Number of entries.
+ *
+ * May be overridden by the architecture; otherwise, implemented as a simple
+ * loop over ptep_modify_prot_start(), collecting the a/d bits from each pte
+ * in the batch.
+ *
+ * Note that PTE bits in the PTE batch besides the PFN can differ.
+ *
+ * Context: The caller holds the page table lock. The PTEs map consecutive
+ * pages that belong to the same folio. All other PTE bits must be identical for
+ * all PTEs in the batch except for young and dirty bits. The PTEs are all in
+ * the same PMD.
+ */
+#ifndef modify_prot_start_ptes
+static inline pte_t modify_prot_start_ptes(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep, unsigned int nr)
+{
+ pte_t pte, tmp_pte;
+
+ pte = ptep_modify_prot_start(vma, addr, ptep);
+ while (--nr) {
+ ptep++;
+ addr += PAGE_SIZE;
+ tmp_pte = ptep_modify_prot_start(vma, addr, ptep);
+ if (pte_dirty(tmp_pte))
+ pte = pte_mkdirty(pte);
+ if (pte_young(tmp_pte))
+ pte = pte_mkyoung(pte);
+ }
+ return pte;
+}
+#endif
+
+/**
+ * modify_prot_commit_ptes - Commit an update to a batch of ptes, leaving any
+ * hardware-controlled bits in the PTE unmodified.
+ *
+ * @vma: The virtual memory area the pages are mapped into.
+ * @addr: Address the first page is mapped at.
+ * @ptep: Page table pointer for the first entry.
+ * @old_pte: Old page table entry (for the first entry) which is now cleared.
+ * @pte: New page table entry to be set.
+ * @nr: Number of entries.
+ *
+ * May be overridden by the architecture; otherwise, implemented as a simple
+ * loop over ptep_modify_prot_commit().
+ *
+ * Context: The caller holds the page table lock. The PTEs are all in the same
+ * PMD. On exit, the set ptes in the batch map the same folio. The ptes set by
+ * ptep_modify_prot_start() may additionally have young and/or dirty bits set
+ * where previously they were not, so the updated ptes may have these
+ * additional changes.
+ */
+#ifndef modify_prot_commit_ptes
+static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pte_t old_pte, pte_t pte, unsigned int nr)
+{
+ int i;
+
+ for (i = 0; i < nr; ++i, ++ptep, addr += PAGE_SIZE) {
+ ptep_modify_prot_commit(vma, addr, ptep, old_pte, pte);
+
+ /* Advance PFN only, set same prot */
+ old_pte = pte_next_pfn(old_pte);
+ pte = pte_next_pfn(pte);
+ }
+}
+#endif
+
#endif /* CONFIG_MMU */
/*
@@ -1632,21 +1725,6 @@ static inline int pud_write(pud_t pud)
}
#endif /* pud_write */
-#if !defined(CONFIG_ARCH_HAS_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE)
-static inline int pmd_devmap(pmd_t pmd)
-{
- return 0;
-}
-static inline int pud_devmap(pud_t pud)
-{
- return 0;
-}
-static inline int pgd_devmap(pgd_t pgd)
-{
- return 0;
-}
-#endif
-
#if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \
!defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
static inline int pud_trans_huge(pud_t pud)
@@ -1661,7 +1739,7 @@ static inline int pud_trans_unstable(pud_t *pud)
defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
pud_t pudval = READ_ONCE(*pud);
- if (pud_none(pudval) || pud_trans_huge(pudval) || pud_devmap(pudval))
+ if (pud_none(pudval) || pud_trans_huge(pudval))
return 1;
if (unlikely(pud_bad(pudval))) {
pud_clear_bad(pud);
@@ -1901,8 +1979,8 @@ typedef unsigned int pgtbl_mod_mask;
* - It should contain a huge PFN, which points to a huge page larger than
* PAGE_SIZE of the platform. The PFN format isn't important here.
*
- * - It should cover all kinds of huge mappings (e.g., pXd_trans_huge(),
- * pXd_devmap(), or hugetlb mappings).
+ * - It should cover all kinds of huge mappings (i.e. pXd_trans_huge()
+ * or hugetlb mappings).
*/
#ifndef pgd_leaf
#define pgd_leaf(x) false
@@ -2005,7 +2083,7 @@ typedef unsigned int pgtbl_mod_mask;
* x: (yes) yes
*/
#define DECLARE_VM_GET_PAGE_PROT \
-pgprot_t vm_get_page_prot(unsigned long vm_flags) \
+pgprot_t vm_get_page_prot(vm_flags_t vm_flags) \
{ \
return protection_map[vm_flags & \
(VM_READ | VM_WRITE | VM_EXEC | VM_SHARED)]; \
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index de1d24f19f76..f139377f4b31 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -27,6 +27,7 @@ enum {
PROC_ENTRY_proc_read_iter = 1U << 1,
PROC_ENTRY_proc_compat_ioctl = 1U << 2,
+ PROC_ENTRY_proc_lseek = 1U << 3,
PROC_ENTRY_FORCE_LOOKUP = 1U << 7,
};
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index c4f4903b1088..20803fcb49a7 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -893,7 +893,7 @@ static inline int folio_try_share_anon_rmap_pmd(struct folio *folio,
* Called from mm/vmscan.c to handle paging out
*/
int folio_referenced(struct folio *, int is_locked,
- struct mem_cgroup *memcg, unsigned long *vm_flags);
+ struct mem_cgroup *memcg, vm_flags_t *vm_flags);
void try_to_migrate(struct folio *folio, enum ttu_flags flags);
void try_to_unmap(struct folio *, enum ttu_flags flags);
@@ -1025,7 +1025,7 @@ struct anon_vma *folio_lock_anon_vma_read(const struct folio *folio,
static inline int folio_referenced(struct folio *folio, int is_locked,
struct mem_cgroup *memcg,
- unsigned long *vm_flags)
+ vm_flags_t *vm_flags)
{
*vm_flags = 0;
return 0;
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 5f03a39a26f7..6d0f9c599ff7 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -11,6 +11,8 @@
#include <linux/fs_parser.h>
#include <linux/userfaultfd_k.h>
+struct swap_iocb;
+
/* inode in-kernel data */
#ifdef CONFIG_TMPFS_QUOTA
@@ -107,7 +109,8 @@ static inline bool shmem_mapping(struct address_space *mapping)
void shmem_unlock_mapping(struct address_space *mapping);
struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
pgoff_t index, gfp_t gfp_mask);
-int shmem_writeout(struct folio *folio, struct writeback_control *wbc);
+int shmem_writeout(struct folio *folio, struct swap_iocb **plug,
+ struct list_head *folio_list);
void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
int shmem_unuse(unsigned int type);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index bc0e1c275fc0..2fe6ed2cc3fd 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -376,8 +376,9 @@ extern unsigned long totalreserve_pages;
/* linux/mm/swap.c */
-void lru_note_cost(struct lruvec *lruvec, bool file,
- unsigned int nr_io, unsigned int nr_rotated);
+void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
+ unsigned int nr_io, unsigned int nr_rotated)
+ __releases(lruvec->lru_lock);
void lru_note_cost_refault(struct folio *);
void folio_add_lru(struct folio *);
void folio_add_lru_vma(struct folio *, struct vm_area_struct *);
@@ -415,7 +416,7 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
#define MIN_SWAPPINESS 0
#define MAX_SWAPPINESS 200
-/* Just recliam from anon folios in proactive memory reclaim */
+/* Just reclaim from anon folios in proactive memory reclaim */
#define SWAPPINESS_ANON_ONLY (MAX_SWAPPINESS + 1)
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
@@ -431,6 +432,22 @@ extern unsigned long shrink_all_memory(unsigned long nr_pages);
extern int vm_swappiness;
long remove_mapping(struct address_space *mapping, struct folio *folio);
+#if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
+extern int reclaim_register_node(struct node *node);
+extern void reclaim_unregister_node(struct node *node);
+
+#else
+
+static inline int reclaim_register_node(struct node *node)
+{
+ return 0;
+}
+
+static inline void reclaim_unregister_node(struct node *node)
+{
+}
+#endif /* CONFIG_SYSFS && CONFIG_NUMA */
+
#ifdef CONFIG_NUMA
extern int sysctl_min_unmapped_ratio;
extern int sysctl_min_slab_ratio;
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 75342022d144..c0e716aec26a 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -30,11 +30,7 @@
* from userfaultfd, in order to leave a free define-space for
* shared O_* flags.
*/
-#define UFFD_CLOEXEC O_CLOEXEC
-#define UFFD_NONBLOCK O_NONBLOCK
-
#define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
-#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
/*
* Start with fault_pending_wqh and fault_wqh so they're more likely
@@ -213,12 +209,12 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
}
static inline bool vma_can_userfault(struct vm_area_struct *vma,
- unsigned long vm_flags,
+ vm_flags_t vm_flags,
bool wp_async)
{
vm_flags &= __VM_UFFD_FLAGS;
- if (vm_flags & VM_DROPPABLE)
+ if (vma->vm_flags & VM_DROPPABLE)
return false;
if ((vm_flags & VM_UFFD_MINOR) &&
@@ -263,6 +259,7 @@ extern void mremap_userfaultfd_prep(struct vm_area_struct *,
extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *,
unsigned long from, unsigned long to,
unsigned long len);
+void mremap_userfaultfd_fail(struct vm_userfaultfd_ctx *);
extern bool userfaultfd_remove(struct vm_area_struct *vma,
unsigned long start,
@@ -285,7 +282,7 @@ struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi,
int userfaultfd_register_range(struct userfaultfd_ctx *ctx,
struct vm_area_struct *vma,
- unsigned long vm_flags,
+ vm_flags_t vm_flags,
unsigned long start, unsigned long end,
bool wp_async);
@@ -375,6 +372,10 @@ static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx,
{
}
+static inline void mremap_userfaultfd_fail(struct vm_userfaultfd_ctx *ctx)
+{
+}
+
static inline bool userfaultfd_remove(struct vm_area_struct *vma,
unsigned long start,
unsigned long end)
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index b2ccb6845595..c287998908bf 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -507,7 +507,7 @@ static inline const char *lru_list_name(enum lru_list lru)
return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
}
-#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
+#if defined(CONFIG_VM_EVENT_COUNTERS)
static inline const char *vm_event_name(enum vm_event_item item)
{
return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
@@ -516,7 +516,7 @@ static inline const char *vm_event_name(enum vm_event_item item)
NR_VM_STAT_ITEMS +
item];
}
-#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
+#endif /* CONFIG_VM_EVENT_COUNTERS */
#ifdef CONFIG_MEMCG
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index eda4b62511f7..a2848d731a46 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -59,7 +59,6 @@ struct writeback_control {
unsigned for_kupdate:1; /* A kupdate writeback */
unsigned for_background:1; /* A background writeback */
unsigned tagged_writepages:1; /* tag-and-write to avoid livelock */
- unsigned for_reclaim:1; /* Invoked from the page allocator */
unsigned range_cyclic:1; /* range_start is cyclic */
unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */
unsigned unpinned_netfs_wb:1; /* Cleared I_PINNING_NETFS_WB */
@@ -72,16 +71,6 @@ struct writeback_control {
*/
unsigned no_cgroup_owner:1;
- /* To enable batching of swap writes to non-block-device backends,
- * "plug" can be set point to a 'struct swap_iocb *'. When all swap
- * writes have been submitted, if with swap_iocb is not NULL,
- * swap_write_unplug() should be called.
- */
- struct swap_iocb **swap_plug;
-
- /* Target list for splitting a large folio */
- struct list_head *list;
-
/* internal fields used by the ->writepages implementation: */
struct folio_batch fbatch;
pgoff_t index;
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index 13e9cc5490f7..f3ccff2d966c 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -46,4 +46,6 @@ void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
void zs_obj_write(struct zs_pool *pool, unsigned long handle,
void *handle_mem, size_t mem_len);
+extern const struct movable_operations zsmalloc_mops;
+
#endif