summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/huge_mm.h24
-rw-r--r--mm/huge_memory.c52
-rw-r--r--mm/mprotect.c39
3 files changed, 107 insertions, 8 deletions
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index ce44caa40eed..6370026689e0 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -342,6 +342,17 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
unsigned long address);
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+int change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ pud_t *pudp, unsigned long addr, pgprot_t newprot,
+ unsigned long cp_flags);
+#else
+static inline int
+change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ pud_t *pudp, unsigned long addr, pgprot_t newprot,
+ unsigned long cp_flags) { return 0; }
+#endif
+
#define split_huge_pud(__vma, __pud, __address) \
do { \
pud_t *____pud = (__pud); \
@@ -585,6 +596,19 @@ static inline int next_order(unsigned long *orders, int prev)
{
return 0;
}
+
+static inline void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
+ unsigned long address)
+{
+}
+
+static inline int change_huge_pud(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, pud_t *pudp,
+ unsigned long addr, pgprot_t newprot,
+ unsigned long cp_flags)
+{
+ return 0;
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
static inline int split_folio_to_list_to_order(struct folio *folio,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 608ea551da16..ac9e8a77059d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2104,6 +2104,53 @@ unlock:
return ret;
}
+/*
+ * Returns:
+ *
+ * - 0: if pud leaf changed from under us
+ * - 1: if pud can be skipped
+ * - HPAGE_PUD_NR: if pud was successfully processed
+ */
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+int change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ pud_t *pudp, unsigned long addr, pgprot_t newprot,
+ unsigned long cp_flags)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ pud_t oldpud, entry;
+ spinlock_t *ptl;
+
+ tlb_change_page_size(tlb, HPAGE_PUD_SIZE);
+
+ /* NUMA balancing doesn't apply to dax */
+ if (cp_flags & MM_CP_PROT_NUMA)
+ return 1;
+
+ /*
+ * Huge entries on userfault-wp only works with anonymous, while we
+ * don't have anonymous PUDs yet.
+ */
+ if (WARN_ON_ONCE(cp_flags & MM_CP_UFFD_WP_ALL))
+ return 1;
+
+ ptl = __pud_trans_huge_lock(pudp, vma);
+ if (!ptl)
+ return 0;
+
+ /*
+ * Can't clear PUD or it can race with concurrent zapping. See
+ * change_huge_pmd().
+ */
+ oldpud = pudp_invalidate(vma, addr, pudp);
+ entry = pud_modify(oldpud, newprot);
+ set_pud_at(mm, addr, pudp, entry);
+ tlb_flush_pud_range(tlb, addr, HPAGE_PUD_SIZE);
+
+ spin_unlock(ptl);
+ return HPAGE_PUD_NR;
+}
+#endif
+
#ifdef CONFIG_USERFAULTFD
/*
* The PT lock for src_pmd and dst_vma/src_vma (for reading) are locked by
@@ -2334,6 +2381,11 @@ out:
spin_unlock(ptl);
mmu_notifier_invalidate_range_end(&range);
}
+#else
+void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
+ unsigned long address)
+{
+}
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
diff --git a/mm/mprotect.c b/mm/mprotect.c
index d423080e6509..446f8e5f10d9 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -302,8 +302,9 @@ pgtable_split_needed(struct vm_area_struct *vma, unsigned long cp_flags)
{
/*
* pte markers only resides in pte level, if we need pte markers,
- * we need to split. We cannot wr-protect shmem thp because file
- * thp is handled differently when split by erasing the pmd so far.
+ * we need to split. For example, we cannot wr-protect a file thp
+ * (e.g. 2M shmem) because file thp is handled differently when
+ * split by erasing the pmd so far.
*/
return (cp_flags & MM_CP_UFFD_WP) && !vma_is_anonymous(vma);
}
@@ -430,31 +431,53 @@ static inline long change_pud_range(struct mmu_gather *tlb,
unsigned long end, pgprot_t newprot, unsigned long cp_flags)
{
struct mmu_notifier_range range;
- pud_t *pud;
+ pud_t *pudp, pud;
unsigned long next;
long pages = 0, ret;
range.start = 0;
- pud = pud_offset(p4d, addr);
+ pudp = pud_offset(p4d, addr);
do {
+again:
next = pud_addr_end(addr, end);
- ret = change_prepare(vma, pud, pmd, addr, cp_flags);
+ ret = change_prepare(vma, pudp, pmd, addr, cp_flags);
if (ret) {
pages = ret;
break;
}
- if (pud_none_or_clear_bad(pud))
+
+ pud = READ_ONCE(*pudp);
+ if (pud_none(pud))
continue;
+
if (!range.start) {
mmu_notifier_range_init(&range,
MMU_NOTIFY_PROTECTION_VMA, 0,
vma->vm_mm, addr, end);
mmu_notifier_invalidate_range_start(&range);
}
- pages += change_pmd_range(tlb, vma, pud, addr, next, newprot,
+
+ if (pud_leaf(pud)) {
+ if ((next - addr != PUD_SIZE) ||
+ pgtable_split_needed(vma, cp_flags)) {
+ __split_huge_pud(vma, pudp, addr);
+ goto again;
+ } else {
+ ret = change_huge_pud(tlb, vma, pudp,
+ addr, newprot, cp_flags);
+ if (ret == 0)
+ goto again;
+ /* huge pud was handled */
+ if (ret == HPAGE_PUD_NR)
+ pages += HPAGE_PUD_NR;
+ continue;
+ }
+ }
+
+ pages += change_pmd_range(tlb, vma, pudp, addr, next, newprot,
cp_flags);
- } while (pud++, addr = next, addr != end);
+ } while (pudp++, addr = next, addr != end);
if (range.start)
mmu_notifier_invalidate_range_end(&range);