diff options
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 169 |
1 files changed, 94 insertions, 75 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6cfe0b43ab8f..6cac826cb61f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1473,17 +1473,14 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE #ifdef CONFIG_CONTIG_ALLOC -static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask, +static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid, nodemask_t *nodemask) { struct folio *folio; - int order = huge_page_order(h); bool retried = false; - if (nid == NUMA_NO_NODE) - nid = numa_mem_id(); retry: - folio = hugetlb_cma_alloc_folio(h, gfp_mask, nid, nodemask); + folio = hugetlb_cma_alloc_folio(order, gfp_mask, nid, nodemask); if (!folio) { if (hugetlb_cma_exclusive_alloc()) return NULL; @@ -1506,16 +1503,16 @@ retry: } #else /* !CONFIG_CONTIG_ALLOC */ -static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask, - int nid, nodemask_t *nodemask) +static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid, + nodemask_t *nodemask) { return NULL; } #endif /* CONFIG_CONTIG_ALLOC */ #else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */ -static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask, - int nid, nodemask_t *nodemask) +static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid, + nodemask_t *nodemask) { return NULL; } @@ -1890,14 +1887,14 @@ void free_huge_folio(struct folio *folio) /* * Must be called with the hugetlb lock held */ -static void __prep_account_new_huge_page(struct hstate *h, int nid) +static void account_new_hugetlb_folio(struct hstate *h, struct folio *folio) { lockdep_assert_held(&hugetlb_lock); h->nr_huge_pages++; - h->nr_huge_pages_node[nid]++; + h->nr_huge_pages_node[folio_nid(folio)]++; } -static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio) +static void init_new_hugetlb_folio(struct folio *folio) { __folio_set_hugetlb(folio); INIT_LIST_HEAD(&folio->lru); @@ -1906,20 +1903,6 @@ static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio) set_hugetlb_cgroup_rsvd(folio, NULL); } -static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio) -{ - init_new_hugetlb_folio(h, folio); - hugetlb_vmemmap_optimize_folio(h, folio); -} - -static void prep_new_hugetlb_folio(struct hstate *h, struct folio *folio, int nid) -{ - __prep_new_hugetlb_folio(h, folio); - spin_lock_irq(&hugetlb_lock); - __prep_account_new_huge_page(h, nid); - spin_unlock_irq(&hugetlb_lock); -} - /* * Find and lock address space (mapping) in write mode. * @@ -1940,11 +1923,9 @@ struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio) return NULL; } -static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h, - gfp_t gfp_mask, int nid, nodemask_t *nmask, - nodemask_t *node_alloc_noretry) +static struct folio *alloc_buddy_hugetlb_folio(int order, gfp_t gfp_mask, + int nid, nodemask_t *nmask, nodemask_t *node_alloc_noretry) { - int order = huge_page_order(h); struct folio *folio; bool alloc_try_hard = true; @@ -1959,8 +1940,6 @@ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h, alloc_try_hard = false; if (alloc_try_hard) gfp_mask |= __GFP_RETRY_MAYFAIL; - if (nid == NUMA_NO_NODE) - nid = numa_mem_id(); folio = (struct folio *)__alloc_frozen_pages(gfp_mask, order, nid, nmask); @@ -1994,36 +1973,36 @@ static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h, nodemask_t *node_alloc_noretry) { struct folio *folio; + int order = huge_page_order(h); - if (hstate_is_gigantic(h)) - folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask); + if (nid == NUMA_NO_NODE) + nid = numa_mem_id(); + + if (order_is_gigantic(order)) + folio = alloc_gigantic_folio(order, gfp_mask, nid, nmask); else - folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, nmask, node_alloc_noretry); + folio = alloc_buddy_hugetlb_folio(order, gfp_mask, nid, nmask, + node_alloc_noretry); if (folio) - init_new_hugetlb_folio(h, folio); + init_new_hugetlb_folio(folio); return folio; } /* - * Common helper to allocate a fresh hugetlb page. All specific allocators - * should use this function to get new hugetlb pages + * Common helper to allocate a fresh hugetlb folio. All specific allocators + * should use this function to get new hugetlb folio * - * Note that returned page is 'frozen': ref count of head page and all tail - * pages is zero. + * Note that returned folio is 'frozen': ref count of head page and all tail + * pages is zero, and the accounting must be done in the caller. */ static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nmask) { struct folio *folio; - if (hstate_is_gigantic(h)) - folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask); - else - folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, nmask, NULL); - if (!folio) - return NULL; - - prep_new_hugetlb_folio(h, folio, folio_nid(folio)); + folio = only_alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL); + if (folio) + hugetlb_vmemmap_optimize_folio(h, folio); return folio; } @@ -2039,7 +2018,7 @@ static void prep_and_add_allocated_folios(struct hstate *h, /* Add all new pool pages to free lists in one lock cycle */ spin_lock_irqsave(&hugetlb_lock, flags); list_for_each_entry_safe(folio, tmp_f, folio_list, lru) { - __prep_account_new_huge_page(h, folio_nid(folio)); + account_new_hugetlb_folio(h, folio); enqueue_hugetlb_folio(h, folio); } spin_unlock_irqrestore(&hugetlb_lock, flags); @@ -2241,19 +2220,17 @@ static struct folio *alloc_surplus_hugetlb_folio(struct hstate *h, goto out_unlock; spin_unlock_irq(&hugetlb_lock); - folio = only_alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL); + folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask); if (!folio) return NULL; - hugetlb_vmemmap_optimize_folio(h, folio); - spin_lock_irq(&hugetlb_lock); /* * nr_huge_pages needs to be adjusted within the same lock cycle * as surplus_pages, otherwise it might confuse * persistent_huge_pages() momentarily. */ - __prep_account_new_huge_page(h, folio_nid(folio)); + account_new_hugetlb_folio(h, folio); /* * We could have raced with the pool size change. @@ -2290,6 +2267,10 @@ static struct folio *alloc_migrate_hugetlb_folio(struct hstate *h, gfp_t gfp_mas if (!folio) return NULL; + spin_lock_irq(&hugetlb_lock); + account_new_hugetlb_folio(h, folio); + spin_unlock_irq(&hugetlb_lock); + /* fresh huge pages are frozen */ folio_ref_unfreeze(folio, 1); /* @@ -2836,18 +2817,17 @@ retry: if (!new_folio) { spin_unlock_irq(&hugetlb_lock); gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; - new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, - NULL, NULL); + new_folio = alloc_fresh_hugetlb_folio(h, gfp_mask, + nid, NULL); if (!new_folio) return -ENOMEM; - __prep_new_hugetlb_folio(h, new_folio); goto retry; } /* * Ok, old_folio is still a genuine free hugepage. Remove it from * the freelist and decrease the counters. These will be - * incremented again when calling __prep_account_new_huge_page() + * incremented again when calling account_new_hugetlb_folio() * and enqueue_hugetlb_folio() for new_folio. The counters will * remain stable since this happens under the lock. */ @@ -2857,7 +2837,7 @@ retry: * Ref count on new_folio is already zero as it was dropped * earlier. It can be directly added to the pool free list. */ - __prep_account_new_huge_page(h, nid); + account_new_hugetlb_folio(h, new_folio); enqueue_hugetlb_folio(h, new_folio); /* @@ -2890,7 +2870,7 @@ int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list) * alloc_contig_range and them. Return -ENOMEM as this has the effect * of bailing out right away without further retrying. */ - if (folio_order(folio) > MAX_PAGE_ORDER) + if (order_is_gigantic(folio_order(folio))) return -ENOMEM; if (folio_ref_count(folio) && folio_isolate_hugetlb(folio, list)) @@ -3237,17 +3217,18 @@ static void __init hugetlb_folio_init_tail_vmemmap(struct folio *folio, { enum zone_type zone = zone_idx(folio_zone(folio)); int nid = folio_nid(folio); + struct page *page = folio_page(folio, start_page_number); unsigned long head_pfn = folio_pfn(folio); unsigned long pfn, end_pfn = head_pfn + end_page_number; - int ret; - - for (pfn = head_pfn + start_page_number; pfn < end_pfn; pfn++) { - struct page *page = pfn_to_page(pfn); + /* + * As we marked all tail pages with memblock_reserved_mark_noinit(), + * we must initialize them ourselves here. + */ + for (pfn = head_pfn + start_page_number; pfn < end_pfn; page++, pfn++) { __init_single_page(page, pfn, zone, nid); prep_compound_tail((struct page *)folio, pfn - head_pfn); - ret = page_ref_freeze(page, 1); - VM_BUG_ON(!ret); + set_page_count(page, 0); } } @@ -3257,12 +3238,15 @@ static void __init hugetlb_folio_init_vmemmap(struct folio *folio, { int ret; - /* Prepare folio head */ + /* + * This is an open-coded prep_compound_page() whereby we avoid + * walking pages twice by initializing/preparing+freezing them in the + * same go. + */ __folio_clear_reserved(folio); __folio_set_head(folio); ret = folio_ref_freeze(folio, 1); VM_BUG_ON(!ret); - /* Initialize the necessary tail struct pages */ hugetlb_folio_init_tail_vmemmap(folio, 1, nr_pages); prep_compound_head((struct page *)folio, huge_page_order(h)); } @@ -3327,7 +3311,7 @@ static void __init prep_and_add_bootmem_folios(struct hstate *h, hugetlb_bootmem_init_migratetype(folio, h); /* Subdivide locks to achieve better parallel performance */ spin_lock_irqsave(&hugetlb_lock, flags); - __prep_account_new_huge_page(h, folio_nid(folio)); + account_new_hugetlb_folio(h, folio); enqueue_hugetlb_folio(h, folio); spin_unlock_irqrestore(&hugetlb_lock, flags); } @@ -3423,7 +3407,7 @@ static void __init gather_bootmem_prealloc_node(unsigned long nid) hugetlb_folio_init_vmemmap(folio, h, HUGETLB_VMEMMAP_RESERVE_PAGES); - init_new_hugetlb_folio(h, folio); + init_new_hugetlb_folio(folio); if (hugetlb_bootmem_page_prehvo(m)) /* @@ -3554,7 +3538,14 @@ static void __init hugetlb_pages_alloc_boot_node(unsigned long start, unsigned l nodes_clear(node_alloc_noretry); for (i = 0; i < num; ++i) { - struct folio *folio = alloc_pool_huge_folio(h, &node_states[N_MEMORY], + struct folio *folio; + + if (hugetlb_vmemmap_optimizable_size(h) && + (si_mem_available() == 0) && !list_empty(&folio_list)) { + prep_and_add_allocated_folios(h, &folio_list); + INIT_LIST_HEAD(&folio_list); + } + folio = alloc_pool_huge_folio(h, &node_states[N_MEMORY], &node_alloc_noretry, &next_node); if (!folio) break; @@ -3589,10 +3580,9 @@ static unsigned long __init hugetlb_pages_alloc_boot(struct hstate *h) unsigned long jiffies_start; unsigned long jiffies_end; + unsigned long remaining; job.thread_fn = hugetlb_pages_alloc_boot_node; - job.start = 0; - job.size = h->max_huge_pages; /* * job.max_threads is 25% of the available cpu threads by default. @@ -3616,10 +3606,29 @@ static unsigned long __init hugetlb_pages_alloc_boot(struct hstate *h) } job.max_threads = hugepage_allocation_threads; - job.min_chunk = h->max_huge_pages / hugepage_allocation_threads; jiffies_start = jiffies; - padata_do_multithreaded(&job); + do { + remaining = h->max_huge_pages - h->nr_huge_pages; + + job.start = h->nr_huge_pages; + job.size = remaining; + job.min_chunk = remaining / hugepage_allocation_threads; + padata_do_multithreaded(&job); + + if (h->nr_huge_pages == h->max_huge_pages) + break; + + /* + * Retry only if the vmemmap optimization might have been able to free + * some memory back to the system. + */ + if (!hugetlb_vmemmap_optimizable(h)) + break; + + /* Continue if progress was made in last iteration */ + } while (remaining != (h->max_huge_pages - h->nr_huge_pages)); + jiffies_end = jiffies; pr_info("HugeTLB: allocation took %dms with hugepage_allocation_threads=%ld\n", @@ -3654,6 +3663,9 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h) return; } + if (!h->max_huge_pages) + return; + /* do node specific alloc */ if (hugetlb_hstate_alloc_pages_specific_nodes(h)) return; @@ -4035,7 +4047,7 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst, prep_compound_page(page, dst->order); new_folio->mapping = NULL; - init_new_hugetlb_folio(dst, new_folio); + init_new_hugetlb_folio(new_folio); /* Copy the CMA flag so that it is freed correctly */ if (cma) folio_set_hugetlb_cma(new_folio); @@ -4654,6 +4666,7 @@ static int __init hugetlb_init(void) BUILD_BUG_ON(sizeof_field(struct page, private) * BITS_PER_BYTE < __NR_HPAGEFLAGS); + BUILD_BUG_ON_INVALID(HUGETLB_PAGE_ORDER > MAX_FOLIO_ORDER); if (!hugepages_supported()) { if (hugetlb_max_hstate || default_hstate_max_huge_pages) @@ -4737,6 +4750,7 @@ void __init hugetlb_add_hstate(unsigned int order) } BUG_ON(hugetlb_max_hstate >= HUGE_MAX_HSTATE); BUG_ON(order < order_base_2(__NR_USED_SUBPAGE)); + WARN_ON(order > MAX_FOLIO_ORDER); h = &hstates[hugetlb_max_hstate++]; __mutex_init(&h->resize_lock, "resize mutex", &h->resize_key); h->order = order; @@ -6927,6 +6941,11 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte, folio = alloc_hugetlb_folio(dst_vma, dst_addr, false); if (IS_ERR(folio)) { + pte_t *actual_pte = hugetlb_walk(dst_vma, dst_addr, PMD_SIZE); + if (actual_pte) { + ret = -EEXIST; + goto out; + } ret = -ENOMEM; goto out; } |