summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/damon/vaddr.c8
-rw-r--r--mm/huge_memory.c15
-rw-r--r--mm/hugetlb.c2
-rw-r--r--mm/khugepaged.c5
-rw-r--r--mm/kmsan/hooks.c3
-rw-r--r--mm/ksm.c27
-rw-r--r--mm/madvise.c4
-rw-r--r--mm/memblock.c68
-rw-r--r--mm/memcontrol.c7
-rw-r--r--mm/memory_hotplug.c32
-rw-r--r--mm/migrate.c23
-rw-r--r--mm/mm_init.c197
-rw-r--r--mm/slub.c17
-rw-r--r--mm/util.c3
14 files changed, 140 insertions, 271 deletions
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 8c048f9b129e..7e834467b2d8 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -328,10 +328,8 @@ static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr,
}
pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
- if (!pte) {
- walk->action = ACTION_AGAIN;
+ if (!pte)
return 0;
- }
if (!pte_present(ptep_get(pte)))
goto out;
damon_ptep_mkold(pte, walk->vma, addr);
@@ -481,10 +479,8 @@ regular_page:
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
- if (!pte) {
- walk->action = ACTION_AGAIN;
+ if (!pte)
return 0;
- }
ptent = ptep_get(pte);
if (!pte_present(ptent))
goto out;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 5acca24bbabb..1b81680b4225 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -4104,32 +4104,23 @@ static unsigned long deferred_split_count(struct shrinker *shrink,
static bool thp_underused(struct folio *folio)
{
int num_zero_pages = 0, num_filled_pages = 0;
- void *kaddr;
int i;
if (khugepaged_max_ptes_none == HPAGE_PMD_NR - 1)
return false;
for (i = 0; i < folio_nr_pages(folio); i++) {
- kaddr = kmap_local_folio(folio, i * PAGE_SIZE);
- if (!memchr_inv(kaddr, 0, PAGE_SIZE)) {
- num_zero_pages++;
- if (num_zero_pages > khugepaged_max_ptes_none) {
- kunmap_local(kaddr);
+ if (pages_identical(folio_page(folio, i), ZERO_PAGE(0))) {
+ if (++num_zero_pages > khugepaged_max_ptes_none)
return true;
- }
} else {
/*
* Another path for early exit once the number
* of non-zero filled pages exceeds threshold.
*/
- num_filled_pages++;
- if (num_filled_pages >= HPAGE_PMD_NR - khugepaged_max_ptes_none) {
- kunmap_local(kaddr);
+ if (++num_filled_pages >= HPAGE_PMD_NR - khugepaged_max_ptes_none)
return false;
- }
}
- kunmap_local(kaddr);
}
return false;
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6cac826cb61f..795ee393eac0 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7222,6 +7222,8 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
psize);
}
spin_unlock(ptl);
+
+ cond_resched();
}
/*
* Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 7ab2d1a42df3..abe54f0043c7 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -376,10 +376,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
int __init khugepaged_init(void)
{
- mm_slot_cache = kmem_cache_create("khugepaged_mm_slot",
- sizeof(struct mm_slot),
- __alignof__(struct mm_slot),
- 0, NULL);
+ mm_slot_cache = KMEM_CACHE(mm_slot, 0);
if (!mm_slot_cache)
return -ENOMEM;
diff --git a/mm/kmsan/hooks.c b/mm/kmsan/hooks.c
index 90bee565b9bc..2cee59d89c80 100644
--- a/mm/kmsan/hooks.c
+++ b/mm/kmsan/hooks.c
@@ -339,13 +339,12 @@ static void kmsan_handle_dma_page(const void *addr, size_t size,
void kmsan_handle_dma(phys_addr_t phys, size_t size,
enum dma_data_direction dir)
{
- struct page *page = phys_to_page(phys);
u64 page_offset, to_go;
void *addr;
if (PhysHighMem(phys))
return;
- addr = page_to_virt(page);
+ addr = phys_to_virt(phys);
/*
* The kernel may occasionally give us adjacent DMA pages not belonging
* to the same allocation. Process them separately to avoid triggering
diff --git a/mm/ksm.c b/mm/ksm.c
index 04019a15b25d..7bc726b50b2f 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2921,7 +2921,7 @@ int __ksm_enter(struct mm_struct *mm)
void __ksm_exit(struct mm_struct *mm)
{
- struct ksm_mm_slot *mm_slot;
+ struct ksm_mm_slot *mm_slot = NULL;
struct mm_slot *slot;
int easy_to_free = 0;
@@ -2936,19 +2936,20 @@ void __ksm_exit(struct mm_struct *mm)
spin_lock(&ksm_mmlist_lock);
slot = mm_slot_lookup(mm_slots_hash, mm);
- if (slot) {
- mm_slot = mm_slot_entry(slot, struct ksm_mm_slot, slot);
- if (ksm_scan.mm_slot != mm_slot) {
- if (!mm_slot->rmap_list) {
- hash_del(&slot->hash);
- list_del(&slot->mm_node);
- easy_to_free = 1;
- } else {
- list_move(&slot->mm_node,
- &ksm_scan.mm_slot->slot.mm_node);
- }
- }
+ if (!slot)
+ goto unlock;
+ mm_slot = mm_slot_entry(slot, struct ksm_mm_slot, slot);
+ if (ksm_scan.mm_slot == mm_slot)
+ goto unlock;
+ if (!mm_slot->rmap_list) {
+ hash_del(&slot->hash);
+ list_del(&slot->mm_node);
+ easy_to_free = 1;
+ } else {
+ list_move(&slot->mm_node,
+ &ksm_scan.mm_slot->slot.mm_node);
}
+unlock:
spin_unlock(&ksm_mmlist_lock);
if (easy_to_free) {
diff --git a/mm/madvise.c b/mm/madvise.c
index 35ed4ab0d7c5..fb1c86e630b6 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1071,8 +1071,8 @@ static bool is_valid_guard_vma(struct vm_area_struct *vma, bool allow_locked)
static bool is_guard_pte_marker(pte_t ptent)
{
- return is_pte_marker(ptent) &&
- is_guard_swp_entry(pte_to_swp_entry(ptent));
+ return is_swap_pte(ptent) &&
+ is_guard_swp_entry(pte_to_swp_entry(ptent));
}
static int guard_install_pud_entry(pud_t *pud, unsigned long addr,
diff --git a/mm/memblock.c b/mm/memblock.c
index 117d963e677c..e23e16618e9b 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1445,70 +1445,6 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
return 0;
}
-#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
-/**
- * __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone()
- *
- * @idx: pointer to u64 loop variable
- * @zone: zone in which all of the memory blocks reside
- * @out_spfn: ptr to ulong for start pfn of the range, can be %NULL
- * @out_epfn: ptr to ulong for end pfn of the range, can be %NULL
- *
- * This function is meant to be a zone/pfn specific wrapper for the
- * for_each_mem_range type iterators. Specifically they are used in the
- * deferred memory init routines and as such we were duplicating much of
- * this logic throughout the code. So instead of having it in multiple
- * locations it seemed like it would make more sense to centralize this to
- * one new iterator that does everything they need.
- */
-void __init_memblock
-__next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
- unsigned long *out_spfn, unsigned long *out_epfn)
-{
- int zone_nid = zone_to_nid(zone);
- phys_addr_t spa, epa;
-
- __next_mem_range(idx, zone_nid, MEMBLOCK_NONE,
- &memblock.memory, &memblock.reserved,
- &spa, &epa, NULL);
-
- while (*idx != U64_MAX) {
- unsigned long epfn = PFN_DOWN(epa);
- unsigned long spfn = PFN_UP(spa);
-
- /*
- * Verify the end is at least past the start of the zone and
- * that we have at least one PFN to initialize.
- */
- if (zone->zone_start_pfn < epfn && spfn < epfn) {
- /* if we went too far just stop searching */
- if (zone_end_pfn(zone) <= spfn) {
- *idx = U64_MAX;
- break;
- }
-
- if (out_spfn)
- *out_spfn = max(zone->zone_start_pfn, spfn);
- if (out_epfn)
- *out_epfn = min(zone_end_pfn(zone), epfn);
-
- return;
- }
-
- __next_mem_range(idx, zone_nid, MEMBLOCK_NONE,
- &memblock.memory, &memblock.reserved,
- &spa, &epa, NULL);
- }
-
- /* signal end of iteration */
- if (out_spfn)
- *out_spfn = ULONG_MAX;
- if (out_epfn)
- *out_epfn = 0;
-}
-
-#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
-
/**
* memblock_alloc_range_nid - allocate boot memory block
* @size: size of memory block to be allocated in bytes
@@ -2516,8 +2452,10 @@ static int reserve_mem_kho_finalize(struct kho_serialization *ser)
for (i = 0; i < reserved_mem_count; i++) {
struct reserve_mem_table *map = &reserved_mem_table[i];
+ struct page *page = phys_to_page(map->start);
+ unsigned int nr_pages = map->size >> PAGE_SHIFT;
- err |= kho_preserve_phys(map->start, map->size);
+ err |= kho_preserve_pages(page, nr_pages);
}
err |= kho_preserve_folio(page_folio(kho_fdt));
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e090f29eb03b..4deda33625f4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2307,12 +2307,13 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
bool drained = false;
bool raised_max_event = false;
unsigned long pflags;
+ bool allow_spinning = gfpflags_allow_spinning(gfp_mask);
retry:
if (consume_stock(memcg, nr_pages))
return 0;
- if (!gfpflags_allow_spinning(gfp_mask))
+ if (!allow_spinning)
/* Avoid the refill and flush of the older stock */
batch = nr_pages;
@@ -2348,7 +2349,7 @@ retry:
if (!gfpflags_allow_blocking(gfp_mask))
goto nomem;
- memcg_memory_event(mem_over_limit, MEMCG_MAX);
+ __memcg_memory_event(mem_over_limit, MEMCG_MAX, allow_spinning);
raised_max_event = true;
psi_memstall_enter(&pflags);
@@ -2415,7 +2416,7 @@ force:
* a MEMCG_MAX event.
*/
if (!raised_max_event)
- memcg_memory_event(mem_over_limit, MEMCG_MAX);
+ __memcg_memory_event(mem_over_limit, MEMCG_MAX, allow_spinning);
/*
* The allocation either can't fail or will lead to more memory
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e9f14de4a9c9..0be83039c3b5 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1477,7 +1477,7 @@ static int create_altmaps_and_memory_blocks(int nid, struct memory_group *group,
}
/* create memory block devices after memory was added */
- ret = create_memory_block_devices(cur_start, memblock_size,
+ ret = create_memory_block_devices(cur_start, memblock_size, nid,
params.altmap, group);
if (ret) {
arch_remove_memory(cur_start, memblock_size, NULL);
@@ -1539,8 +1539,16 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
ret = __try_online_node(nid, false);
if (ret < 0)
- goto error;
- new_node = ret;
+ goto error_memblock_remove;
+ if (ret) {
+ node_set_online(nid);
+ ret = register_one_node(nid);
+ if (WARN_ON(ret)) {
+ node_set_offline(nid);
+ goto error_memblock_remove;
+ }
+ new_node = true;
+ }
/*
* Self hosted memmap array
@@ -1556,24 +1564,13 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
goto error;
/* create memory block devices after memory was added */
- ret = create_memory_block_devices(start, size, NULL, group);
+ ret = create_memory_block_devices(start, size, nid, NULL, group);
if (ret) {
arch_remove_memory(start, size, params.altmap);
goto error;
}
}
- if (new_node) {
- /* If sysfs file of new node can't be created, cpu on the node
- * can't be hot-added. There is no rollback way now.
- * So, check by BUG_ON() to catch it reluctantly..
- * We online node here. We can't roll back from here.
- */
- node_set_online(nid);
- ret = register_one_node(nid);
- BUG_ON(ret);
- }
-
register_memory_blocks_under_node_hotplug(nid, PFN_DOWN(start),
PFN_UP(start + size - 1));
@@ -1597,6 +1594,11 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
return ret;
error:
+ if (new_node) {
+ node_set_offline(nid);
+ unregister_one_node(nid);
+ }
+error_memblock_remove:
if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK))
memblock_remove(start, size);
error_mem_hotplug_end:
diff --git a/mm/migrate.c b/mm/migrate.c
index aee61a980374..e3065c9edb55 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -296,19 +296,16 @@ bool isolate_folio_to_list(struct folio *folio, struct list_head *list)
}
static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw,
- struct folio *folio,
- unsigned long idx)
+ struct folio *folio, pte_t old_pte, unsigned long idx)
{
struct page *page = folio_page(folio, idx);
- bool contains_data;
pte_t newpte;
- void *addr;
if (PageCompound(page))
return false;
VM_BUG_ON_PAGE(!PageAnon(page), page);
VM_BUG_ON_PAGE(!PageLocked(page), page);
- VM_BUG_ON_PAGE(pte_present(ptep_get(pvmw->pte)), page);
+ VM_BUG_ON_PAGE(pte_present(old_pte), page);
if (folio_test_mlocked(folio) || (pvmw->vma->vm_flags & VM_LOCKED) ||
mm_forbids_zeropage(pvmw->vma->vm_mm))
@@ -319,15 +316,17 @@ static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw,
* this subpage has been non present. If the subpage is only zero-filled
* then map it to the shared zeropage.
*/
- addr = kmap_local_page(page);
- contains_data = memchr_inv(addr, 0, PAGE_SIZE);
- kunmap_local(addr);
-
- if (contains_data)
+ if (!pages_identical(page, ZERO_PAGE(0)))
return false;
newpte = pte_mkspecial(pfn_pte(my_zero_pfn(pvmw->address),
pvmw->vma->vm_page_prot));
+
+ if (pte_swp_soft_dirty(old_pte))
+ newpte = pte_mksoft_dirty(newpte);
+ if (pte_swp_uffd_wp(old_pte))
+ newpte = pte_mkuffd_wp(newpte);
+
set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, newpte);
dec_mm_counter(pvmw->vma->vm_mm, mm_counter(folio));
@@ -370,13 +369,13 @@ static bool remove_migration_pte(struct folio *folio,
continue;
}
#endif
+ old_pte = ptep_get(pvmw.pte);
if (rmap_walk_arg->map_unused_to_zeropage &&
- try_to_map_unused_to_zeropage(&pvmw, folio, idx))
+ try_to_map_unused_to_zeropage(&pvmw, folio, old_pte, idx))
continue;
folio_get(folio);
pte = mk_pte(new, READ_ONCE(vma->vm_page_prot));
- old_pte = ptep_get(pvmw.pte);
entry = pte_to_swp_entry(old_pte);
if (!is_migration_entry_young(entry))
diff --git a/mm/mm_init.c b/mm/mm_init.c
index df614556741a..3db2dea7db4c 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2045,112 +2045,63 @@ static unsigned long __init deferred_init_pages(struct zone *zone,
}
/*
- * This function is meant to pre-load the iterator for the zone init from
- * a given point.
- * Specifically it walks through the ranges starting with initial index
- * passed to it until we are caught up to the first_init_pfn value and
- * exits there. If we never encounter the value we return false indicating
- * there are no valid ranges left.
- */
-static bool __init
-deferred_init_mem_pfn_range_in_zone(u64 *i, struct zone *zone,
- unsigned long *spfn, unsigned long *epfn,
- unsigned long first_init_pfn)
-{
- u64 j = *i;
-
- if (j == 0)
- __next_mem_pfn_range_in_zone(&j, zone, spfn, epfn);
-
- /*
- * Start out by walking through the ranges in this zone that have
- * already been initialized. We don't need to do anything with them
- * so we just need to flush them out of the system.
- */
- for_each_free_mem_pfn_range_in_zone_from(j, zone, spfn, epfn) {
- if (*epfn <= first_init_pfn)
- continue;
- if (*spfn < first_init_pfn)
- *spfn = first_init_pfn;
- *i = j;
- return true;
- }
-
- return false;
-}
-
-/*
- * Initialize and free pages. We do it in two loops: first we initialize
- * struct page, then free to buddy allocator, because while we are
- * freeing pages we can access pages that are ahead (computing buddy
- * page in __free_one_page()).
+ * Initialize and free pages.
+ *
+ * At this point reserved pages and struct pages that correspond to holes in
+ * memblock.memory are already intialized so every free range has a valid
+ * memory map around it.
+ * This ensures that access of pages that are ahead of the range being
+ * initialized (computing buddy page in __free_one_page()) always reads a valid
+ * struct page.
*
- * In order to try and keep some memory in the cache we have the loop
- * broken along max page order boundaries. This way we will not cause
- * any issues with the buddy page computation.
+ * In order to try and improve CPU cache locality we have the loop broken along
+ * max page order boundaries.
*/
static unsigned long __init
-deferred_init_maxorder(u64 *i, struct zone *zone, unsigned long *start_pfn,
- unsigned long *end_pfn)
+deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
+ struct zone *zone)
{
- unsigned long mo_pfn = ALIGN(*start_pfn + 1, MAX_ORDER_NR_PAGES);
- unsigned long spfn = *start_pfn, epfn = *end_pfn;
+ int nid = zone_to_nid(zone);
unsigned long nr_pages = 0;
- u64 j = *i;
-
- /* First we loop through and initialize the page values */
- for_each_free_mem_pfn_range_in_zone_from(j, zone, start_pfn, end_pfn) {
- unsigned long t;
-
- if (mo_pfn <= *start_pfn)
- break;
+ phys_addr_t start, end;
+ u64 i = 0;
- t = min(mo_pfn, *end_pfn);
- nr_pages += deferred_init_pages(zone, *start_pfn, t);
+ for_each_free_mem_range(i, nid, 0, &start, &end, NULL) {
+ unsigned long spfn = PFN_UP(start);
+ unsigned long epfn = PFN_DOWN(end);
- if (mo_pfn < *end_pfn) {
- *start_pfn = mo_pfn;
+ if (spfn >= end_pfn)
break;
- }
- }
- /* Reset values and now loop through freeing pages as needed */
- swap(j, *i);
+ spfn = max(spfn, start_pfn);
+ epfn = min(epfn, end_pfn);
- for_each_free_mem_pfn_range_in_zone_from(j, zone, &spfn, &epfn) {
- unsigned long t;
+ while (spfn < epfn) {
+ unsigned long mo_pfn = ALIGN(spfn + 1, MAX_ORDER_NR_PAGES);
+ unsigned long chunk_end = min(mo_pfn, epfn);
- if (mo_pfn <= spfn)
- break;
+ nr_pages += deferred_init_pages(zone, spfn, chunk_end);
+ deferred_free_pages(spfn, chunk_end - spfn);
- t = min(mo_pfn, epfn);
- deferred_free_pages(spfn, t - spfn);
+ spfn = chunk_end;
- if (mo_pfn <= epfn)
- break;
+ if (irqs_disabled())
+ touch_nmi_watchdog();
+ else
+ cond_resched();
+ }
}
return nr_pages;
}
static void __init
-deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
- void *arg)
+deferred_init_memmap_job(unsigned long start_pfn, unsigned long end_pfn,
+ void *arg)
{
- unsigned long spfn, epfn;
struct zone *zone = arg;
- u64 i = 0;
-
- deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, start_pfn);
- /*
- * Initialize and free pages in MAX_PAGE_ORDER sized increments so that
- * we can avoid introducing any issues with the buddy allocator.
- */
- while (spfn < end_pfn) {
- deferred_init_maxorder(&i, zone, &spfn, &epfn);
- cond_resched();
- }
+ deferred_init_memmap_chunk(start_pfn, end_pfn, zone);
}
static unsigned int __init
@@ -2164,12 +2115,10 @@ static int __init deferred_init_memmap(void *data)
{
pg_data_t *pgdat = data;
const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
- unsigned long spfn = 0, epfn = 0;
- unsigned long first_init_pfn, flags;
+ int max_threads = deferred_page_init_max_threads(cpumask);
+ unsigned long first_init_pfn, last_pfn, flags;
unsigned long start = jiffies;
struct zone *zone;
- int max_threads;
- u64 i = 0;
/* Bind memory initialisation thread to a local node if possible */
if (!cpumask_empty(cpumask))
@@ -2197,24 +2146,20 @@ static int __init deferred_init_memmap(void *data)
/* Only the highest zone is deferred */
zone = pgdat->node_zones + pgdat->nr_zones - 1;
+ last_pfn = SECTION_ALIGN_UP(zone_end_pfn(zone));
- max_threads = deferred_page_init_max_threads(cpumask);
+ struct padata_mt_job job = {
+ .thread_fn = deferred_init_memmap_job,
+ .fn_arg = zone,
+ .start = first_init_pfn,
+ .size = last_pfn - first_init_pfn,
+ .align = PAGES_PER_SECTION,
+ .min_chunk = PAGES_PER_SECTION,
+ .max_threads = max_threads,
+ .numa_aware = false,
+ };
- while (deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, first_init_pfn)) {
- first_init_pfn = ALIGN(epfn, PAGES_PER_SECTION);
- struct padata_mt_job job = {
- .thread_fn = deferred_init_memmap_chunk,
- .fn_arg = zone,
- .start = spfn,
- .size = first_init_pfn - spfn,
- .align = PAGES_PER_SECTION,
- .min_chunk = PAGES_PER_SECTION,
- .max_threads = max_threads,
- .numa_aware = false,
- };
-
- padata_do_multithreaded(&job);
- }
+ padata_do_multithreaded(&job);
/* Sanity check that the next zone really is unpopulated */
WARN_ON(pgdat->nr_zones < MAX_NR_ZONES && populated_zone(++zone));
@@ -2239,12 +2184,11 @@ static int __init deferred_init_memmap(void *data)
*/
bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
{
- unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION);
+ unsigned long nr_pages_needed = SECTION_ALIGN_UP(1 << order);
pg_data_t *pgdat = zone->zone_pgdat;
unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
unsigned long spfn, epfn, flags;
unsigned long nr_pages = 0;
- u64 i = 0;
/* Only the last zone may have deferred pages */
if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat))
@@ -2261,37 +2205,26 @@ bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
return true;
}
- /* If the zone is empty somebody else may have cleared out the zone */
- if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
- first_deferred_pfn)) {
- pgdat->first_deferred_pfn = ULONG_MAX;
- pgdat_resize_unlock(pgdat, &flags);
- /* Retry only once. */
- return first_deferred_pfn != ULONG_MAX;
+ /*
+ * Initialize at least nr_pages_needed in section chunks.
+ * If a section has less free memory than nr_pages_needed, the next
+ * section will be also initialized.
+ * Note, that it still does not guarantee that allocation of order can
+ * be satisfied if the sections are fragmented because of memblock
+ * allocations.
+ */
+ for (spfn = first_deferred_pfn, epfn = SECTION_ALIGN_UP(spfn + 1);
+ nr_pages < nr_pages_needed && spfn < zone_end_pfn(zone);
+ spfn = epfn, epfn += PAGES_PER_SECTION) {
+ nr_pages += deferred_init_memmap_chunk(spfn, epfn, zone);
}
/*
- * Initialize and free pages in MAX_PAGE_ORDER sized increments so
- * that we can avoid introducing any issues with the buddy
- * allocator.
+ * There were no pages to initialize and free which means the zone's
+ * memory map is completely initialized.
*/
- while (spfn < epfn) {
- /* update our first deferred PFN for this section */
- first_deferred_pfn = spfn;
-
- nr_pages += deferred_init_maxorder(&i, zone, &spfn, &epfn);
- touch_nmi_watchdog();
-
- /* We should only stop along section boundaries */
- if ((first_deferred_pfn ^ spfn) < PAGES_PER_SECTION)
- continue;
-
- /* If our quota has been met we can stop here */
- if (nr_pages >= nr_pages_needed)
- break;
- }
+ pgdat->first_deferred_pfn = nr_pages ? spfn : ULONG_MAX;
- pgdat->first_deferred_pfn = spfn;
pgdat_resize_unlock(pgdat, &flags);
return nr_pages > 0;
diff --git a/mm/slub.c b/mm/slub.c
index 584a5ff1828b..135c408e0515 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2152,7 +2152,8 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
return 0;
}
- kmemleak_not_leak(vec);
+ if (allow_spin)
+ kmemleak_not_leak(vec);
return 0;
}
@@ -6431,17 +6432,24 @@ static void free_deferred_objects(struct irq_work *work)
static void defer_free(struct kmem_cache *s, void *head)
{
- struct defer_free *df = this_cpu_ptr(&defer_free_objects);
+ struct defer_free *df;
+
+ guard(preempt)();
+ df = this_cpu_ptr(&defer_free_objects);
if (llist_add(head + s->offset, &df->objects))
irq_work_queue(&df->work);
}
static void defer_deactivate_slab(struct slab *slab, void *flush_freelist)
{
- struct defer_free *df = this_cpu_ptr(&defer_free_objects);
+ struct defer_free *df;
slab->flush_freelist = flush_freelist;
+
+ guard(preempt)();
+
+ df = this_cpu_ptr(&defer_free_objects);
if (llist_add(&slab->llnode, &df->slabs))
irq_work_queue(&df->work);
}
@@ -7693,7 +7701,8 @@ void __kmem_cache_release(struct kmem_cache *s)
pcs_destroy(s);
#ifndef CONFIG_SLUB_TINY
#ifdef CONFIG_PREEMPT_RT
- lockdep_unregister_key(&s->lock_key);
+ if (s->cpu_slab)
+ lockdep_unregister_key(&s->lock_key);
#endif
free_percpu(s->cpu_slab);
#endif
diff --git a/mm/util.c b/mm/util.c
index 6c1d64ed0221..8989d5767528 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -566,6 +566,7 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot,
unsigned long flag, unsigned long pgoff)
{
+ loff_t off = (loff_t)pgoff << PAGE_SHIFT;
unsigned long ret;
struct mm_struct *mm = current->mm;
unsigned long populate;
@@ -573,7 +574,7 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
ret = security_mmap_file(file, prot, flag);
if (!ret)
- ret = fsnotify_mmap_perm(file, prot, pgoff >> PAGE_SHIFT, len);
+ ret = fsnotify_mmap_perm(file, prot, off, len);
if (!ret) {
if (mmap_write_lock_killable(mm))
return -EINTR;