summaryrefslogtreecommitdiff
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c169
1 files changed, 94 insertions, 75 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6cfe0b43ab8f..6cac826cb61f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1473,17 +1473,14 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
#ifdef CONFIG_CONTIG_ALLOC
-static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
+static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask,
int nid, nodemask_t *nodemask)
{
struct folio *folio;
- int order = huge_page_order(h);
bool retried = false;
- if (nid == NUMA_NO_NODE)
- nid = numa_mem_id();
retry:
- folio = hugetlb_cma_alloc_folio(h, gfp_mask, nid, nodemask);
+ folio = hugetlb_cma_alloc_folio(order, gfp_mask, nid, nodemask);
if (!folio) {
if (hugetlb_cma_exclusive_alloc())
return NULL;
@@ -1506,16 +1503,16 @@ retry:
}
#else /* !CONFIG_CONTIG_ALLOC */
-static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
- int nid, nodemask_t *nodemask)
+static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid,
+ nodemask_t *nodemask)
{
return NULL;
}
#endif /* CONFIG_CONTIG_ALLOC */
#else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */
-static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
- int nid, nodemask_t *nodemask)
+static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid,
+ nodemask_t *nodemask)
{
return NULL;
}
@@ -1890,14 +1887,14 @@ void free_huge_folio(struct folio *folio)
/*
* Must be called with the hugetlb lock held
*/
-static void __prep_account_new_huge_page(struct hstate *h, int nid)
+static void account_new_hugetlb_folio(struct hstate *h, struct folio *folio)
{
lockdep_assert_held(&hugetlb_lock);
h->nr_huge_pages++;
- h->nr_huge_pages_node[nid]++;
+ h->nr_huge_pages_node[folio_nid(folio)]++;
}
-static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio)
+static void init_new_hugetlb_folio(struct folio *folio)
{
__folio_set_hugetlb(folio);
INIT_LIST_HEAD(&folio->lru);
@@ -1906,20 +1903,6 @@ static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio)
set_hugetlb_cgroup_rsvd(folio, NULL);
}
-static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio)
-{
- init_new_hugetlb_folio(h, folio);
- hugetlb_vmemmap_optimize_folio(h, folio);
-}
-
-static void prep_new_hugetlb_folio(struct hstate *h, struct folio *folio, int nid)
-{
- __prep_new_hugetlb_folio(h, folio);
- spin_lock_irq(&hugetlb_lock);
- __prep_account_new_huge_page(h, nid);
- spin_unlock_irq(&hugetlb_lock);
-}
-
/*
* Find and lock address space (mapping) in write mode.
*
@@ -1940,11 +1923,9 @@ struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio)
return NULL;
}
-static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
- gfp_t gfp_mask, int nid, nodemask_t *nmask,
- nodemask_t *node_alloc_noretry)
+static struct folio *alloc_buddy_hugetlb_folio(int order, gfp_t gfp_mask,
+ int nid, nodemask_t *nmask, nodemask_t *node_alloc_noretry)
{
- int order = huge_page_order(h);
struct folio *folio;
bool alloc_try_hard = true;
@@ -1959,8 +1940,6 @@ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
alloc_try_hard = false;
if (alloc_try_hard)
gfp_mask |= __GFP_RETRY_MAYFAIL;
- if (nid == NUMA_NO_NODE)
- nid = numa_mem_id();
folio = (struct folio *)__alloc_frozen_pages(gfp_mask, order, nid, nmask);
@@ -1994,36 +1973,36 @@ static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h,
nodemask_t *node_alloc_noretry)
{
struct folio *folio;
+ int order = huge_page_order(h);
- if (hstate_is_gigantic(h))
- folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask);
+ if (nid == NUMA_NO_NODE)
+ nid = numa_mem_id();
+
+ if (order_is_gigantic(order))
+ folio = alloc_gigantic_folio(order, gfp_mask, nid, nmask);
else
- folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, nmask, node_alloc_noretry);
+ folio = alloc_buddy_hugetlb_folio(order, gfp_mask, nid, nmask,
+ node_alloc_noretry);
if (folio)
- init_new_hugetlb_folio(h, folio);
+ init_new_hugetlb_folio(folio);
return folio;
}
/*
- * Common helper to allocate a fresh hugetlb page. All specific allocators
- * should use this function to get new hugetlb pages
+ * Common helper to allocate a fresh hugetlb folio. All specific allocators
+ * should use this function to get new hugetlb folio
*
- * Note that returned page is 'frozen': ref count of head page and all tail
- * pages is zero.
+ * Note that returned folio is 'frozen': ref count of head page and all tail
+ * pages is zero, and the accounting must be done in the caller.
*/
static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h,
gfp_t gfp_mask, int nid, nodemask_t *nmask)
{
struct folio *folio;
- if (hstate_is_gigantic(h))
- folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask);
- else
- folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
- if (!folio)
- return NULL;
-
- prep_new_hugetlb_folio(h, folio, folio_nid(folio));
+ folio = only_alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
+ if (folio)
+ hugetlb_vmemmap_optimize_folio(h, folio);
return folio;
}
@@ -2039,7 +2018,7 @@ static void prep_and_add_allocated_folios(struct hstate *h,
/* Add all new pool pages to free lists in one lock cycle */
spin_lock_irqsave(&hugetlb_lock, flags);
list_for_each_entry_safe(folio, tmp_f, folio_list, lru) {
- __prep_account_new_huge_page(h, folio_nid(folio));
+ account_new_hugetlb_folio(h, folio);
enqueue_hugetlb_folio(h, folio);
}
spin_unlock_irqrestore(&hugetlb_lock, flags);
@@ -2241,19 +2220,17 @@ static struct folio *alloc_surplus_hugetlb_folio(struct hstate *h,
goto out_unlock;
spin_unlock_irq(&hugetlb_lock);
- folio = only_alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
+ folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask);
if (!folio)
return NULL;
- hugetlb_vmemmap_optimize_folio(h, folio);
-
spin_lock_irq(&hugetlb_lock);
/*
* nr_huge_pages needs to be adjusted within the same lock cycle
* as surplus_pages, otherwise it might confuse
* persistent_huge_pages() momentarily.
*/
- __prep_account_new_huge_page(h, folio_nid(folio));
+ account_new_hugetlb_folio(h, folio);
/*
* We could have raced with the pool size change.
@@ -2290,6 +2267,10 @@ static struct folio *alloc_migrate_hugetlb_folio(struct hstate *h, gfp_t gfp_mas
if (!folio)
return NULL;
+ spin_lock_irq(&hugetlb_lock);
+ account_new_hugetlb_folio(h, folio);
+ spin_unlock_irq(&hugetlb_lock);
+
/* fresh huge pages are frozen */
folio_ref_unfreeze(folio, 1);
/*
@@ -2836,18 +2817,17 @@ retry:
if (!new_folio) {
spin_unlock_irq(&hugetlb_lock);
gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
- new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid,
- NULL, NULL);
+ new_folio = alloc_fresh_hugetlb_folio(h, gfp_mask,
+ nid, NULL);
if (!new_folio)
return -ENOMEM;
- __prep_new_hugetlb_folio(h, new_folio);
goto retry;
}
/*
* Ok, old_folio is still a genuine free hugepage. Remove it from
* the freelist and decrease the counters. These will be
- * incremented again when calling __prep_account_new_huge_page()
+ * incremented again when calling account_new_hugetlb_folio()
* and enqueue_hugetlb_folio() for new_folio. The counters will
* remain stable since this happens under the lock.
*/
@@ -2857,7 +2837,7 @@ retry:
* Ref count on new_folio is already zero as it was dropped
* earlier. It can be directly added to the pool free list.
*/
- __prep_account_new_huge_page(h, nid);
+ account_new_hugetlb_folio(h, new_folio);
enqueue_hugetlb_folio(h, new_folio);
/*
@@ -2890,7 +2870,7 @@ int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list)
* alloc_contig_range and them. Return -ENOMEM as this has the effect
* of bailing out right away without further retrying.
*/
- if (folio_order(folio) > MAX_PAGE_ORDER)
+ if (order_is_gigantic(folio_order(folio)))
return -ENOMEM;
if (folio_ref_count(folio) && folio_isolate_hugetlb(folio, list))
@@ -3237,17 +3217,18 @@ static void __init hugetlb_folio_init_tail_vmemmap(struct folio *folio,
{
enum zone_type zone = zone_idx(folio_zone(folio));
int nid = folio_nid(folio);
+ struct page *page = folio_page(folio, start_page_number);
unsigned long head_pfn = folio_pfn(folio);
unsigned long pfn, end_pfn = head_pfn + end_page_number;
- int ret;
-
- for (pfn = head_pfn + start_page_number; pfn < end_pfn; pfn++) {
- struct page *page = pfn_to_page(pfn);
+ /*
+ * As we marked all tail pages with memblock_reserved_mark_noinit(),
+ * we must initialize them ourselves here.
+ */
+ for (pfn = head_pfn + start_page_number; pfn < end_pfn; page++, pfn++) {
__init_single_page(page, pfn, zone, nid);
prep_compound_tail((struct page *)folio, pfn - head_pfn);
- ret = page_ref_freeze(page, 1);
- VM_BUG_ON(!ret);
+ set_page_count(page, 0);
}
}
@@ -3257,12 +3238,15 @@ static void __init hugetlb_folio_init_vmemmap(struct folio *folio,
{
int ret;
- /* Prepare folio head */
+ /*
+ * This is an open-coded prep_compound_page() whereby we avoid
+ * walking pages twice by initializing/preparing+freezing them in the
+ * same go.
+ */
__folio_clear_reserved(folio);
__folio_set_head(folio);
ret = folio_ref_freeze(folio, 1);
VM_BUG_ON(!ret);
- /* Initialize the necessary tail struct pages */
hugetlb_folio_init_tail_vmemmap(folio, 1, nr_pages);
prep_compound_head((struct page *)folio, huge_page_order(h));
}
@@ -3327,7 +3311,7 @@ static void __init prep_and_add_bootmem_folios(struct hstate *h,
hugetlb_bootmem_init_migratetype(folio, h);
/* Subdivide locks to achieve better parallel performance */
spin_lock_irqsave(&hugetlb_lock, flags);
- __prep_account_new_huge_page(h, folio_nid(folio));
+ account_new_hugetlb_folio(h, folio);
enqueue_hugetlb_folio(h, folio);
spin_unlock_irqrestore(&hugetlb_lock, flags);
}
@@ -3423,7 +3407,7 @@ static void __init gather_bootmem_prealloc_node(unsigned long nid)
hugetlb_folio_init_vmemmap(folio, h,
HUGETLB_VMEMMAP_RESERVE_PAGES);
- init_new_hugetlb_folio(h, folio);
+ init_new_hugetlb_folio(folio);
if (hugetlb_bootmem_page_prehvo(m))
/*
@@ -3554,7 +3538,14 @@ static void __init hugetlb_pages_alloc_boot_node(unsigned long start, unsigned l
nodes_clear(node_alloc_noretry);
for (i = 0; i < num; ++i) {
- struct folio *folio = alloc_pool_huge_folio(h, &node_states[N_MEMORY],
+ struct folio *folio;
+
+ if (hugetlb_vmemmap_optimizable_size(h) &&
+ (si_mem_available() == 0) && !list_empty(&folio_list)) {
+ prep_and_add_allocated_folios(h, &folio_list);
+ INIT_LIST_HEAD(&folio_list);
+ }
+ folio = alloc_pool_huge_folio(h, &node_states[N_MEMORY],
&node_alloc_noretry, &next_node);
if (!folio)
break;
@@ -3589,10 +3580,9 @@ static unsigned long __init hugetlb_pages_alloc_boot(struct hstate *h)
unsigned long jiffies_start;
unsigned long jiffies_end;
+ unsigned long remaining;
job.thread_fn = hugetlb_pages_alloc_boot_node;
- job.start = 0;
- job.size = h->max_huge_pages;
/*
* job.max_threads is 25% of the available cpu threads by default.
@@ -3616,10 +3606,29 @@ static unsigned long __init hugetlb_pages_alloc_boot(struct hstate *h)
}
job.max_threads = hugepage_allocation_threads;
- job.min_chunk = h->max_huge_pages / hugepage_allocation_threads;
jiffies_start = jiffies;
- padata_do_multithreaded(&job);
+ do {
+ remaining = h->max_huge_pages - h->nr_huge_pages;
+
+ job.start = h->nr_huge_pages;
+ job.size = remaining;
+ job.min_chunk = remaining / hugepage_allocation_threads;
+ padata_do_multithreaded(&job);
+
+ if (h->nr_huge_pages == h->max_huge_pages)
+ break;
+
+ /*
+ * Retry only if the vmemmap optimization might have been able to free
+ * some memory back to the system.
+ */
+ if (!hugetlb_vmemmap_optimizable(h))
+ break;
+
+ /* Continue if progress was made in last iteration */
+ } while (remaining != (h->max_huge_pages - h->nr_huge_pages));
+
jiffies_end = jiffies;
pr_info("HugeTLB: allocation took %dms with hugepage_allocation_threads=%ld\n",
@@ -3654,6 +3663,9 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
return;
}
+ if (!h->max_huge_pages)
+ return;
+
/* do node specific alloc */
if (hugetlb_hstate_alloc_pages_specific_nodes(h))
return;
@@ -4035,7 +4047,7 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst,
prep_compound_page(page, dst->order);
new_folio->mapping = NULL;
- init_new_hugetlb_folio(dst, new_folio);
+ init_new_hugetlb_folio(new_folio);
/* Copy the CMA flag so that it is freed correctly */
if (cma)
folio_set_hugetlb_cma(new_folio);
@@ -4654,6 +4666,7 @@ static int __init hugetlb_init(void)
BUILD_BUG_ON(sizeof_field(struct page, private) * BITS_PER_BYTE <
__NR_HPAGEFLAGS);
+ BUILD_BUG_ON_INVALID(HUGETLB_PAGE_ORDER > MAX_FOLIO_ORDER);
if (!hugepages_supported()) {
if (hugetlb_max_hstate || default_hstate_max_huge_pages)
@@ -4737,6 +4750,7 @@ void __init hugetlb_add_hstate(unsigned int order)
}
BUG_ON(hugetlb_max_hstate >= HUGE_MAX_HSTATE);
BUG_ON(order < order_base_2(__NR_USED_SUBPAGE));
+ WARN_ON(order > MAX_FOLIO_ORDER);
h = &hstates[hugetlb_max_hstate++];
__mutex_init(&h->resize_lock, "resize mutex", &h->resize_key);
h->order = order;
@@ -6927,6 +6941,11 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
folio = alloc_hugetlb_folio(dst_vma, dst_addr, false);
if (IS_ERR(folio)) {
+ pte_t *actual_pte = hugetlb_walk(dst_vma, dst_addr, PMD_SIZE);
+ if (actual_pte) {
+ ret = -EEXIST;
+ goto out;
+ }
ret = -ENOMEM;
goto out;
}