summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Hildenbrand <david@redhat.com>2025-08-15 14:54:55 +0100
committerAndrew Morton <akpm@linux-foundation.org>2025-09-13 16:55:05 -0700
commit8cdc4d27019356b0304308eb799484c899b62a87 (patch)
tree414259af7af5c19763f6f7fd98ee5cb5d6130a87
parent1f1c061089dcd274befa0c76fb9f6e253a8368c0 (diff)
mm/huge_memory: respect MADV_COLLAPSE with PR_THP_DISABLE_EXCEPT_ADVISED
Let's allow for making MADV_COLLAPSE succeed on areas that neither have VM_HUGEPAGE nor VM_NOHUGEPAGE when we have THP disabled unless explicitly advised (PR_THP_DISABLE_EXCEPT_ADVISED). MADV_COLLAPSE is a clear advice that we want to collapse. Note that we still respect the VM_NOHUGEPAGE flag, just like MADV_COLLAPSE always does. So consequently, MADV_COLLAPSE is now only refused on VM_NOHUGEPAGE with PR_THP_DISABLE_EXCEPT_ADVISED, including for shmem. Link: https://lkml.kernel.org/r/20250815135549.130506-4-usamaarif642@gmail.com Co-developed-by: Usama Arif <usamaarif642@gmail.com> Signed-off-by: Usama Arif <usamaarif642@gmail.com> Signed-off-by: David Hildenbrand <david@redhat.com> Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com> Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Reviewed-by: Zi Yan <ziy@nvidia.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Barry Song <baohua@kernel.org> Cc: Dev Jain <dev.jain@arm.com> Cc: Jann Horn <jannh@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Liam Howlett <liam.howlett@oracle.com> Cc: Mariano Pache <npache@redhat.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Rapoport <rppt@kernel.org> Cc: Rik van Riel <riel@surriel.com> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: SeongJae Park <sj@kernel.org> Cc: Shakeel Butt <shakeel.butt@linux.dev> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Yafang <laoar.shao@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-rw-r--r--include/linux/huge_mm.h8
-rw-r--r--include/uapi/linux/prctl.h2
-rw-r--r--mm/huge_memory.c5
-rw-r--r--mm/memory.c6
-rw-r--r--mm/shmem.c2
5 files changed, 16 insertions, 7 deletions
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 92ea0b9771fa..1ac0d06fb3c1 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -329,7 +329,7 @@ struct thpsize {
* through madvise or prctl.
*/
static inline bool vma_thp_disabled(struct vm_area_struct *vma,
- vm_flags_t vm_flags)
+ vm_flags_t vm_flags, bool forced_collapse)
{
/* Are THPs disabled for this VMA? */
if (vm_flags & VM_NOHUGEPAGE)
@@ -343,6 +343,12 @@ static inline bool vma_thp_disabled(struct vm_area_struct *vma,
*/
if (vm_flags & VM_HUGEPAGE)
return false;
+ /*
+ * Forcing a collapse (e.g., madv_collapse), is a clear advice to
+ * use THPs.
+ */
+ if (forced_collapse)
+ return false;
return mm_flags_test(MMF_DISABLE_THP_EXCEPT_ADVISED, vma->vm_mm);
}
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 150b6deebfb1..51c4e8c82b1e 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -185,7 +185,7 @@ struct prctl_mm_map {
#define PR_SET_THP_DISABLE 41
/*
* Don't disable THPs when explicitly advised (e.g., MADV_HUGEPAGE /
- * VM_HUGEPAGE).
+ * VM_HUGEPAGE, MADV_COLLAPSE).
*/
# define PR_THP_DISABLE_EXCEPT_ADVISED (1 << 1)
#define PR_GET_THP_DISABLE 42
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 899d9ac86ecd..d89992b65acc 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -104,7 +104,8 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
{
const bool smaps = type == TVA_SMAPS;
const bool in_pf = type == TVA_PAGEFAULT;
- const bool enforce_sysfs = type != TVA_FORCED_COLLAPSE;
+ const bool forced_collapse = type == TVA_FORCED_COLLAPSE;
+ const bool enforce_sysfs = !forced_collapse;
unsigned long supported_orders;
/* Check the intersection of requested and supported orders. */
@@ -122,7 +123,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
if (!vma->vm_mm) /* vdso */
return 0;
- if (thp_disabled_by_hw() || vma_thp_disabled(vma, vm_flags))
+ if (thp_disabled_by_hw() || vma_thp_disabled(vma, vm_flags, forced_collapse))
return 0;
/* khugepaged doesn't collapse DAX vma, but page fault is fine. */
diff --git a/mm/memory.c b/mm/memory.c
index 7b1e8f137fa3..d9de6c056179 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5332,9 +5332,11 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct folio *folio, struct page *pa
* It is too late to allocate a small folio, we already have a large
* folio in the pagecache: especially s390 KVM cannot tolerate any
* PMD mappings, but PTE-mapped THP are fine. So let's simply refuse any
- * PMD mappings if THPs are disabled.
+ * PMD mappings if THPs are disabled. As we already have a THP,
+ * behave as if we are forcing a collapse.
*/
- if (thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags))
+ if (thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags,
+ /* forced_collapse=*/ true))
return ret;
if (!thp_vma_suitable_order(vma, haddr, PMD_ORDER))
diff --git a/mm/shmem.c b/mm/shmem.c
index e2c76a30802b..d945de3a7f0e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1817,7 +1817,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
vm_flags_t vm_flags = vma ? vma->vm_flags : 0;
unsigned int global_orders;
- if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags)))
+ if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags, shmem_huge_force)))
return 0;
global_orders = shmem_huge_global_enabled(inode, index, write_end,