summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorArnaldo Carvalho de Melo <acme@redhat.com>2025-05-19 11:12:36 -0300
committerArnaldo Carvalho de Melo <acme@redhat.com>2025-05-19 11:12:36 -0300
commit4d4eb38795b5cbc66103ae2582bccead5bf0f736 (patch)
tree8048284284f48cb7c97a5c4fef0fc712125bbffa /mm
parent8cdf00b843ea3ca0e920176937bbc906b0b5bb04 (diff)
parenta5806cd506af5a7c19bcd596e4708b5c464bfd21 (diff)
Merge remote-tracking branch 'torvalds/master' into perf-tools-next
To pick up changes for other tools/ libraries used by perf and for header synchronization with the kernel sources originals. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'mm')
-rw-r--r--mm/execmem.c40
-rw-r--r--mm/huge_memory.c11
-rw-r--r--mm/hugetlb.c34
-rw-r--r--mm/internal.h28
-rw-r--r--mm/memblock.c9
-rw-r--r--mm/memory.c2
-rw-r--r--mm/mm_init.c3
-rw-r--r--mm/page_alloc.c88
-rw-r--r--mm/swapfile.c32
-rw-r--r--mm/userfaultfd.c12
-rw-r--r--mm/vmalloc.c31
-rw-r--r--mm/zsmalloc.c8
12 files changed, 177 insertions, 121 deletions
diff --git a/mm/execmem.c b/mm/execmem.c
index e6c4f5076ca8..6f7a2653b280 100644
--- a/mm/execmem.c
+++ b/mm/execmem.c
@@ -254,6 +254,34 @@ out_unlock:
return ptr;
}
+static bool execmem_cache_rox = false;
+
+void execmem_cache_make_ro(void)
+{
+ struct maple_tree *free_areas = &execmem_cache.free_areas;
+ struct maple_tree *busy_areas = &execmem_cache.busy_areas;
+ MA_STATE(mas_free, free_areas, 0, ULONG_MAX);
+ MA_STATE(mas_busy, busy_areas, 0, ULONG_MAX);
+ struct mutex *mutex = &execmem_cache.mutex;
+ void *area;
+
+ execmem_cache_rox = true;
+
+ mutex_lock(mutex);
+
+ mas_for_each(&mas_free, area, ULONG_MAX) {
+ unsigned long pages = mas_range_len(&mas_free) >> PAGE_SHIFT;
+ set_memory_ro(mas_free.index, pages);
+ }
+
+ mas_for_each(&mas_busy, area, ULONG_MAX) {
+ unsigned long pages = mas_range_len(&mas_busy) >> PAGE_SHIFT;
+ set_memory_ro(mas_busy.index, pages);
+ }
+
+ mutex_unlock(mutex);
+}
+
static int execmem_cache_populate(struct execmem_range *range, size_t size)
{
unsigned long vm_flags = VM_ALLOW_HUGE_VMAP;
@@ -274,9 +302,15 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size)
/* fill memory with instructions that will trap */
execmem_fill_trapping_insns(p, alloc_size, /* writable = */ true);
- err = set_memory_rox((unsigned long)p, vm->nr_pages);
- if (err)
- goto err_free_mem;
+ if (execmem_cache_rox) {
+ err = set_memory_rox((unsigned long)p, vm->nr_pages);
+ if (err)
+ goto err_free_mem;
+ } else {
+ err = set_memory_x((unsigned long)p, vm->nr_pages);
+ if (err)
+ goto err_free_mem;
+ }
err = execmem_cache_add(p, alloc_size);
if (err)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2a47682d1ab7..47d76d03ce30 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3075,6 +3075,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmd, bool freeze, struct folio *folio)
{
+ bool pmd_migration = is_pmd_migration_entry(*pmd);
+
VM_WARN_ON_ONCE(folio && !folio_test_pmd_mappable(folio));
VM_WARN_ON_ONCE(!IS_ALIGNED(address, HPAGE_PMD_SIZE));
VM_WARN_ON_ONCE(folio && !folio_test_locked(folio));
@@ -3085,9 +3087,12 @@ void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
* require a folio to check the PMD against. Otherwise, there
* is a risk of replacing the wrong folio.
*/
- if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) ||
- is_pmd_migration_entry(*pmd)) {
- if (folio && folio != pmd_folio(*pmd))
+ if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) || pmd_migration) {
+ /*
+ * Do not apply pmd_folio() to a migration entry; and folio lock
+ * guarantees that it must be of the wrong folio anyway.
+ */
+ if (folio && (pmd_migration || folio != pmd_folio(*pmd)))
return;
__split_huge_pmd_locked(vma, pmd, address, freeze);
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e3e6ac991b9c..7ae38bfb9096 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3010,7 +3010,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
struct hugepage_subpool *spool = subpool_vma(vma);
struct hstate *h = hstate_vma(vma);
struct folio *folio;
- long retval, gbl_chg;
+ long retval, gbl_chg, gbl_reserve;
map_chg_state map_chg;
int ret, idx;
struct hugetlb_cgroup *h_cg = NULL;
@@ -3163,8 +3163,16 @@ out_uncharge_cgroup_reservation:
hugetlb_cgroup_uncharge_cgroup_rsvd(idx, pages_per_huge_page(h),
h_cg);
out_subpool_put:
- if (map_chg)
- hugepage_subpool_put_pages(spool, 1);
+ /*
+ * put page to subpool iff the quota of subpool's rsv_hpages is used
+ * during hugepage_subpool_get_pages.
+ */
+ if (map_chg && !gbl_chg) {
+ gbl_reserve = hugepage_subpool_put_pages(spool, 1);
+ hugetlb_acct_memory(h, -gbl_reserve);
+ }
+
+
out_end_reservation:
if (map_chg != MAP_CHG_ENFORCED)
vma_end_reservation(h, vma, addr);
@@ -4034,10 +4042,13 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst,
list_for_each_entry_safe(folio, next, src_list, lru) {
int i;
+ bool cma;
if (folio_test_hugetlb_vmemmap_optimized(folio))
continue;
+ cma = folio_test_hugetlb_cma(folio);
+
list_del(&folio->lru);
split_page_owner(&folio->page, huge_page_order(src), huge_page_order(dst));
@@ -4053,6 +4064,9 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst,
new_folio->mapping = NULL;
init_new_hugetlb_folio(dst, new_folio);
+ /* Copy the CMA flag so that it is freed correctly */
+ if (cma)
+ folio_set_hugetlb_cma(new_folio);
list_add(&new_folio->lru, &dst_list);
}
}
@@ -7233,7 +7247,7 @@ bool hugetlb_reserve_pages(struct inode *inode,
struct vm_area_struct *vma,
vm_flags_t vm_flags)
{
- long chg = -1, add = -1;
+ long chg = -1, add = -1, spool_resv, gbl_resv;
struct hstate *h = hstate_inode(inode);
struct hugepage_subpool *spool = subpool_inode(inode);
struct resv_map *resv_map;
@@ -7368,8 +7382,16 @@ bool hugetlb_reserve_pages(struct inode *inode,
return true;
out_put_pages:
- /* put back original number of pages, chg */
- (void)hugepage_subpool_put_pages(spool, chg);
+ spool_resv = chg - gbl_reserve;
+ if (spool_resv) {
+ /* put sub pool's reservation back, chg - gbl_reserve */
+ gbl_resv = hugepage_subpool_put_pages(spool, spool_resv);
+ /*
+ * subpool's reserved pages can not be put back due to race,
+ * return to hstate.
+ */
+ hugetlb_acct_memory(h, -gbl_resv);
+ }
out_uncharge_cgroup:
hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
chg * pages_per_huge_page(h), h_cg);
diff --git a/mm/internal.h b/mm/internal.h
index e9695baa5922..5c7a2b43ad76 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -248,11 +248,9 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
pte_t *start_ptep, pte_t pte, int max_nr, fpb_t flags,
bool *any_writable, bool *any_young, bool *any_dirty)
{
- unsigned long folio_end_pfn = folio_pfn(folio) + folio_nr_pages(folio);
- const pte_t *end_ptep = start_ptep + max_nr;
pte_t expected_pte, *ptep;
bool writable, young, dirty;
- int nr;
+ int nr, cur_nr;
if (any_writable)
*any_writable = false;
@@ -265,11 +263,15 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
VM_WARN_ON_FOLIO(!folio_test_large(folio) || max_nr < 1, folio);
VM_WARN_ON_FOLIO(page_folio(pfn_to_page(pte_pfn(pte))) != folio, folio);
+ /* Limit max_nr to the actual remaining PFNs in the folio we could batch. */
+ max_nr = min_t(unsigned long, max_nr,
+ folio_pfn(folio) + folio_nr_pages(folio) - pte_pfn(pte));
+
nr = pte_batch_hint(start_ptep, pte);
expected_pte = __pte_batch_clear_ignored(pte_advance_pfn(pte, nr), flags);
ptep = start_ptep + nr;
- while (ptep < end_ptep) {
+ while (nr < max_nr) {
pte = ptep_get(ptep);
if (any_writable)
writable = !!pte_write(pte);
@@ -282,14 +284,6 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
if (!pte_same(pte, expected_pte))
break;
- /*
- * Stop immediately once we reached the end of the folio. In
- * corner cases the next PFN might fall into a different
- * folio.
- */
- if (pte_pfn(pte) >= folio_end_pfn)
- break;
-
if (any_writable)
*any_writable |= writable;
if (any_young)
@@ -297,12 +291,13 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
if (any_dirty)
*any_dirty |= dirty;
- nr = pte_batch_hint(ptep, pte);
- expected_pte = pte_advance_pfn(expected_pte, nr);
- ptep += nr;
+ cur_nr = pte_batch_hint(ptep, pte);
+ expected_pte = pte_advance_pfn(expected_pte, cur_nr);
+ ptep += cur_nr;
+ nr += cur_nr;
}
- return min(ptep - start_ptep, max_nr);
+ return min(nr, max_nr);
}
/**
@@ -1595,7 +1590,6 @@ unsigned long move_page_tables(struct pagetable_move_control *pmc);
#ifdef CONFIG_UNACCEPTED_MEMORY
void accept_page(struct page *page);
-void unaccepted_cleanup_work(struct work_struct *work);
#else /* CONFIG_UNACCEPTED_MEMORY */
static inline void accept_page(struct page *page)
{
diff --git a/mm/memblock.c b/mm/memblock.c
index d3509414b8c3..0e9ebb8aa7fe 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -457,7 +457,14 @@ static int __init_memblock memblock_double_array(struct memblock_type *type,
min(new_area_start, memblock.current_limit),
new_alloc_size, PAGE_SIZE);
- new_array = addr ? __va(addr) : NULL;
+ if (addr) {
+ /* The memory may not have been accepted, yet. */
+ accept_memory(addr, new_alloc_size);
+
+ new_array = __va(addr);
+ } else {
+ new_array = NULL;
+ }
}
if (!addr) {
pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
diff --git a/mm/memory.c b/mm/memory.c
index ba3ea0a82f7f..49199410805c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3751,7 +3751,7 @@ static bool __wp_can_reuse_large_anon_folio(struct folio *folio,
/* Stabilize the mapcount vs. refcount and recheck. */
folio_lock_large_mapcount(folio);
- VM_WARN_ON_ONCE(folio_large_mapcount(folio) < folio_ref_count(folio));
+ VM_WARN_ON_ONCE_FOLIO(folio_large_mapcount(folio) > folio_ref_count(folio), folio);
if (folio_test_large_maybe_mapped_shared(folio))
goto unlock;
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 9659689b8ace..eedce9321e13 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1441,7 +1441,6 @@ static void __meminit zone_init_free_lists(struct zone *zone)
#ifdef CONFIG_UNACCEPTED_MEMORY
INIT_LIST_HEAD(&zone->unaccepted_pages);
- INIT_WORK(&zone->unaccepted_cleanup, unaccepted_cleanup_work);
#endif
}
@@ -1786,7 +1785,7 @@ static bool arch_has_descending_max_zone_pfns(void)
return IS_ENABLED(CONFIG_ARC) && !IS_ENABLED(CONFIG_ARC_HAS_PAE40);
}
-static void set_high_memory(void)
+static void __init set_high_memory(void)
{
phys_addr_t highmem = memblock_end_of_DRAM();
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5669baf2a6fe..8258349e49ac 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -290,7 +290,8 @@ EXPORT_SYMBOL(nr_online_nodes);
#endif
static bool page_contains_unaccepted(struct page *page, unsigned int order);
-static bool cond_accept_memory(struct zone *zone, unsigned int order);
+static bool cond_accept_memory(struct zone *zone, unsigned int order,
+ int alloc_flags);
static bool __free_unaccepted(struct page *page);
int page_group_by_mobility_disabled __read_mostly;
@@ -1151,14 +1152,9 @@ static inline void pgalloc_tag_sub(struct page *page, unsigned int nr)
__pgalloc_tag_sub(page, nr);
}
-static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr)
+/* When tag is not NULL, assuming mem_alloc_profiling_enabled */
+static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
{
- struct alloc_tag *tag;
-
- if (!mem_alloc_profiling_enabled())
- return;
-
- tag = __pgalloc_tag_get(page);
if (tag)
this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr);
}
@@ -1168,7 +1164,7 @@ static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr)
static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
unsigned int nr) {}
static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
-static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr) {}
+static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
#endif /* CONFIG_MEM_ALLOC_PROFILING */
@@ -3616,7 +3612,7 @@ retry:
}
}
- cond_accept_memory(zone, order);
+ cond_accept_memory(zone, order, alloc_flags);
/*
* Detect whether the number of free pages is below high
@@ -3643,7 +3639,7 @@ check_alloc_wmark:
gfp_mask)) {
int ret;
- if (cond_accept_memory(zone, order))
+ if (cond_accept_memory(zone, order, alloc_flags))
goto try_this_zone;
/*
@@ -3696,7 +3692,7 @@ try_this_zone:
return page;
} else {
- if (cond_accept_memory(zone, order))
+ if (cond_accept_memory(zone, order, alloc_flags))
goto try_this_zone;
/* Try again if zone has deferred pages */
@@ -4849,7 +4845,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
goto failed;
}
- cond_accept_memory(zone, 0);
+ cond_accept_memory(zone, 0, alloc_flags);
retry_this_zone:
mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK) + nr_pages;
if (zone_watermark_fast(zone, 0, mark,
@@ -4858,7 +4854,7 @@ retry_this_zone:
break;
}
- if (cond_accept_memory(zone, 0))
+ if (cond_accept_memory(zone, 0, alloc_flags))
goto retry_this_zone;
/* Try again if zone has deferred pages */
@@ -5065,11 +5061,13 @@ static void ___free_pages(struct page *page, unsigned int order,
{
/* get PageHead before we drop reference */
int head = PageHead(page);
+ /* get alloc tag in case the page is released by others */
+ struct alloc_tag *tag = pgalloc_tag_get(page);
if (put_page_testzero(page))
__free_frozen_pages(page, order, fpi_flags);
else if (!head) {
- pgalloc_tag_sub_pages(page, (1 << order) - 1);
+ pgalloc_tag_sub_pages(tag, (1 << order) - 1);
while (order-- > 0)
__free_frozen_pages(page + (1 << order), order,
fpi_flags);
@@ -7174,16 +7172,8 @@ bool has_managed_dma(void)
#ifdef CONFIG_UNACCEPTED_MEMORY
-/* Counts number of zones with unaccepted pages. */
-static DEFINE_STATIC_KEY_FALSE(zones_with_unaccepted_pages);
-
static bool lazy_accept = true;
-void unaccepted_cleanup_work(struct work_struct *work)
-{
- static_branch_dec(&zones_with_unaccepted_pages);
-}
-
static int __init accept_memory_parse(char *p)
{
if (!strcmp(p, "lazy")) {
@@ -7208,11 +7198,7 @@ static bool page_contains_unaccepted(struct page *page, unsigned int order)
static void __accept_page(struct zone *zone, unsigned long *flags,
struct page *page)
{
- bool last;
-
list_del(&page->lru);
- last = list_empty(&zone->unaccepted_pages);
-
account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
__mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES);
__ClearPageUnaccepted(page);
@@ -7221,28 +7207,6 @@ static void __accept_page(struct zone *zone, unsigned long *flags,
accept_memory(page_to_phys(page), PAGE_SIZE << MAX_PAGE_ORDER);
__free_pages_ok(page, MAX_PAGE_ORDER, FPI_TO_TAIL);
-
- if (last) {
- /*
- * There are two corner cases:
- *
- * - If allocation occurs during the CPU bring up,
- * static_branch_dec() cannot be used directly as
- * it causes a deadlock on cpu_hotplug_lock.
- *
- * Instead, use schedule_work() to prevent deadlock.
- *
- * - If allocation occurs before workqueues are initialized,
- * static_branch_dec() should be called directly.
- *
- * Workqueues are initialized before CPU bring up, so this
- * will not conflict with the first scenario.
- */
- if (system_wq)
- schedule_work(&zone->unaccepted_cleanup);
- else
- unaccepted_cleanup_work(&zone->unaccepted_cleanup);
- }
}
void accept_page(struct page *page)
@@ -7279,20 +7243,17 @@ static bool try_to_accept_memory_one(struct zone *zone)
return true;
}
-static inline bool has_unaccepted_memory(void)
-{
- return static_branch_unlikely(&zones_with_unaccepted_pages);
-}
-
-static bool cond_accept_memory(struct zone *zone, unsigned int order)
+static bool cond_accept_memory(struct zone *zone, unsigned int order,
+ int alloc_flags)
{
long to_accept, wmark;
bool ret = false;
- if (!has_unaccepted_memory())
+ if (list_empty(&zone->unaccepted_pages))
return false;
- if (list_empty(&zone->unaccepted_pages))
+ /* Bailout, since try_to_accept_memory_one() needs to take a lock */
+ if (alloc_flags & ALLOC_TRYLOCK)
return false;
wmark = promo_wmark_pages(zone);
@@ -7325,22 +7286,17 @@ static bool __free_unaccepted(struct page *page)
{
struct zone *zone = page_zone(page);
unsigned long flags;
- bool first = false;
if (!lazy_accept)
return false;
spin_lock_irqsave(&zone->lock, flags);
- first = list_empty(&zone->unaccepted_pages);
list_add_tail(&page->lru, &zone->unaccepted_pages);
account_freepages(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
__mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES);
__SetPageUnaccepted(page);
spin_unlock_irqrestore(&zone->lock, flags);
- if (first)
- static_branch_inc(&zones_with_unaccepted_pages);
-
return true;
}
@@ -7351,7 +7307,8 @@ static bool page_contains_unaccepted(struct page *page, unsigned int order)
return false;
}
-static bool cond_accept_memory(struct zone *zone, unsigned int order)
+static bool cond_accept_memory(struct zone *zone, unsigned int order,
+ int alloc_flags)
{
return false;
}
@@ -7422,11 +7379,6 @@ struct page *try_alloc_pages_noprof(int nid, unsigned int order)
if (!pcp_allowed_order(order))
return NULL;
-#ifdef CONFIG_UNACCEPTED_MEMORY
- /* Bailout, since try_to_accept_memory_one() needs to take a lock */
- if (has_unaccepted_memory())
- return NULL;
-#endif
/* Bailout, since _deferred_grow_zone() needs to take a lock */
if (deferred_pages_enabled())
return NULL;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2eff8b51a945..412ccd6543b3 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1272,13 +1272,22 @@ int folio_alloc_swap(struct folio *folio, gfp_t gfp)
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
VM_BUG_ON_FOLIO(!folio_test_uptodate(folio), folio);
- /*
- * Should not even be attempting large allocations when huge
- * page swap is disabled. Warn and fail the allocation.
- */
- if (order && (!IS_ENABLED(CONFIG_THP_SWAP) || size > SWAPFILE_CLUSTER)) {
- VM_WARN_ON_ONCE(1);
- return -EINVAL;
+ if (order) {
+ /*
+ * Reject large allocation when THP_SWAP is disabled,
+ * the caller should split the folio and try again.
+ */
+ if (!IS_ENABLED(CONFIG_THP_SWAP))
+ return -EAGAIN;
+
+ /*
+ * Allocation size should never exceed cluster size
+ * (HPAGE_PMD_SIZE).
+ */
+ if (size > SWAPFILE_CLUSTER) {
+ VM_WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
}
local_lock(&percpu_swap_cluster.lock);
@@ -3323,6 +3332,15 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
}
/*
+ * The swap subsystem needs a major overhaul to support this.
+ * It doesn't work yet so just disable it for now.
+ */
+ if (mapping_min_folio_order(mapping) > 0) {
+ error = -EINVAL;
+ goto bad_swap_unlock_inode;
+ }
+
+ /*
* Read the swap header.
*/
if (!mapping->a_ops->read_folio) {
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 7d5d709cc838..e0db855c89b4 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -1064,8 +1064,13 @@ static int move_present_pte(struct mm_struct *mm,
src_folio->index = linear_page_index(dst_vma, dst_addr);
orig_dst_pte = mk_pte(&src_folio->page, dst_vma->vm_page_prot);
- /* Follow mremap() behavior and treat the entry dirty after the move */
- orig_dst_pte = pte_mkwrite(pte_mkdirty(orig_dst_pte), dst_vma);
+ /* Set soft dirty bit so userspace can notice the pte was moved */
+#ifdef CONFIG_MEM_SOFT_DIRTY
+ orig_dst_pte = pte_mksoft_dirty(orig_dst_pte);
+#endif
+ if (pte_dirty(orig_src_pte))
+ orig_dst_pte = pte_mkdirty(orig_dst_pte);
+ orig_dst_pte = pte_mkwrite(orig_dst_pte, dst_vma);
set_pte_at(mm, dst_addr, dst_pte, orig_dst_pte);
out:
@@ -1100,6 +1105,9 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma,
}
orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte);
+#ifdef CONFIG_MEM_SOFT_DIRTY
+ orig_src_pte = pte_swp_mksoft_dirty(orig_src_pte);
+#endif
set_pte_at(mm, dst_addr, dst_pte, orig_src_pte);
double_pt_unlock(dst_ptl, src_ptl);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 3ed720a787ec..2d7511654831 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1940,7 +1940,7 @@ static inline void setup_vmalloc_vm(struct vm_struct *vm,
{
vm->flags = flags;
vm->addr = (void *)va->va_start;
- vm->size = va_size(va);
+ vm->size = vm->requested_size = va_size(va);
vm->caller = caller;
va->vm = vm;
}
@@ -3133,6 +3133,7 @@ struct vm_struct *__get_vm_area_node(unsigned long size,
area->flags = flags;
area->caller = caller;
+ area->requested_size = requested_size;
va = alloc_vmap_area(size, align, start, end, node, gfp_mask, 0, area);
if (IS_ERR(va)) {
@@ -4063,6 +4064,8 @@ EXPORT_SYMBOL(vzalloc_node_noprof);
*/
void *vrealloc_noprof(const void *p, size_t size, gfp_t flags)
{
+ struct vm_struct *vm = NULL;
+ size_t alloced_size = 0;
size_t old_size = 0;
void *n;
@@ -4072,15 +4075,17 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags)
}
if (p) {
- struct vm_struct *vm;
-
vm = find_vm_area(p);
if (unlikely(!vm)) {
WARN(1, "Trying to vrealloc() nonexistent vm area (%p)\n", p);
return NULL;
}
- old_size = get_vm_area_size(vm);
+ alloced_size = get_vm_area_size(vm);
+ old_size = vm->requested_size;
+ if (WARN(alloced_size < old_size,
+ "vrealloc() has mismatched area vs requested sizes (%p)\n", p))
+ return NULL;
}
/*
@@ -4088,14 +4093,26 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags)
* would be a good heuristic for when to shrink the vm_area?
*/
if (size <= old_size) {
- /* Zero out spare memory. */
- if (want_init_on_alloc(flags))
+ /* Zero out "freed" memory. */
+ if (want_init_on_free())
memset((void *)p + size, 0, old_size - size);
+ vm->requested_size = size;
kasan_poison_vmalloc(p + size, old_size - size);
- kasan_unpoison_vmalloc(p, size, KASAN_VMALLOC_PROT_NORMAL);
return (void *)p;
}
+ /*
+ * We already have the bytes available in the allocation; use them.
+ */
+ if (size <= alloced_size) {
+ kasan_unpoison_vmalloc(p + old_size, size - old_size,
+ KASAN_VMALLOC_PROT_NORMAL);
+ /* Zero out "alloced" memory. */
+ if (want_init_on_alloc(flags))
+ memset((void *)p + old_size, 0, size - old_size);
+ vm->requested_size = size;
+ }
+
/* TODO: Grow the vm_area, i.e. allocate and map additional pages. */
n = __vmalloc_noprof(size, flags);
if (!n)
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 961b270f023c..d14a7e317ac8 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1243,19 +1243,19 @@ void zs_obj_write(struct zs_pool *pool, unsigned long handle,
class = zspage_class(pool, zspage);
off = offset_in_page(class->size * obj_idx);
- if (off + class->size <= PAGE_SIZE) {
+ if (!ZsHugePage(zspage))
+ off += ZS_HANDLE_SIZE;
+
+ if (off + mem_len <= PAGE_SIZE) {
/* this object is contained entirely within a page */
void *dst = kmap_local_zpdesc(zpdesc);
- if (!ZsHugePage(zspage))
- off += ZS_HANDLE_SIZE;
memcpy(dst + off, handle_mem, mem_len);
kunmap_local(dst);
} else {
/* this object spans two pages */
size_t sizes[2];
- off += ZS_HANDLE_SIZE;
sizes[0] = PAGE_SIZE - off;
sizes[1] = mem_len - sizes[0];