summaryrefslogtreecommitdiff
path: root/mm/rmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c170
1 files changed, 87 insertions, 83 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index 5bcb334cd6f2..93d5a6f793d2 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -93,7 +93,8 @@ static inline struct anon_vma *anon_vma_alloc(void)
anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
if (anon_vma) {
atomic_set(&anon_vma->refcount, 1);
- anon_vma->degree = 1; /* Reference for first vma */
+ anon_vma->num_children = 0;
+ anon_vma->num_active_vmas = 0;
anon_vma->parent = anon_vma;
/*
* Initialise the anon_vma root to point to itself. If called
@@ -201,6 +202,7 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
anon_vma = anon_vma_alloc();
if (unlikely(!anon_vma))
goto out_enomem_free_avc;
+ anon_vma->num_children++; /* self-parent link for new root */
allocated = anon_vma;
}
@@ -210,8 +212,7 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
if (likely(!vma->anon_vma)) {
vma->anon_vma = anon_vma;
anon_vma_chain_link(vma, avc, anon_vma);
- /* vma reference or self-parent link for new root */
- anon_vma->degree++;
+ anon_vma->num_active_vmas++;
allocated = NULL;
avc = NULL;
}
@@ -296,19 +297,19 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
anon_vma_chain_link(dst, avc, anon_vma);
/*
- * Reuse existing anon_vma if its degree lower than two,
- * that means it has no vma and only one anon_vma child.
+ * Reuse existing anon_vma if it has no vma and only one
+ * anon_vma child.
*
- * Do not choose parent anon_vma, otherwise first child
- * will always reuse it. Root anon_vma is never reused:
+ * Root anon_vma is never reused:
* it has self-parent reference and at least one child.
*/
if (!dst->anon_vma && src->anon_vma &&
- anon_vma != src->anon_vma && anon_vma->degree < 2)
+ anon_vma->num_children < 2 &&
+ anon_vma->num_active_vmas == 0)
dst->anon_vma = anon_vma;
}
if (dst->anon_vma)
- dst->anon_vma->degree++;
+ dst->anon_vma->num_active_vmas++;
unlock_anon_vma_root(root);
return 0;
@@ -358,6 +359,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
anon_vma = anon_vma_alloc();
if (!anon_vma)
goto out_error;
+ anon_vma->num_active_vmas++;
avc = anon_vma_chain_alloc(GFP_KERNEL);
if (!avc)
goto out_error_free_anon_vma;
@@ -378,7 +380,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
vma->anon_vma = anon_vma;
anon_vma_lock_write(anon_vma);
anon_vma_chain_link(vma, avc, anon_vma);
- anon_vma->parent->degree++;
+ anon_vma->parent->num_children++;
anon_vma_unlock_write(anon_vma);
return 0;
@@ -410,7 +412,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
* to free them outside the lock.
*/
if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) {
- anon_vma->parent->degree--;
+ anon_vma->parent->num_children--;
continue;
}
@@ -418,7 +420,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
anon_vma_chain_free(avc);
}
if (vma->anon_vma) {
- vma->anon_vma->degree--;
+ vma->anon_vma->num_active_vmas--;
/*
* vma would still be needed after unlink, and anon_vma will be prepared
@@ -436,7 +438,8 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
struct anon_vma *anon_vma = avc->anon_vma;
- VM_WARN_ON(anon_vma->degree);
+ VM_WARN_ON(anon_vma->num_children);
+ VM_WARN_ON(anon_vma->num_active_vmas);
put_anon_vma(anon_vma);
list_del(&avc->same_vma);
@@ -999,7 +1002,7 @@ static int page_vma_mkclean_one(struct page_vma_mapped_walk *pvmw)
* downgrading page table protection not changing it to point
* to a new page.
*
- * See Documentation/vm/mmu_notifier.rst
+ * See Documentation/mm/mmu_notifier.rst
*/
if (ret)
cleaned++;
@@ -1537,6 +1540,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
PageAnonExclusive(subpage);
if (folio_test_hugetlb(folio)) {
+ bool anon = folio_test_anon(folio);
+
/*
* The try_to_unmap() is only passed a hugetlb page
* in the case where the hugetlb page is poisoned.
@@ -1551,31 +1556,28 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
*/
flush_cache_range(vma, range.start, range.end);
- if (!folio_test_anon(folio)) {
+ /*
+ * To call huge_pmd_unshare, i_mmap_rwsem must be
+ * held in write mode. Caller needs to explicitly
+ * do this outside rmap routines.
+ */
+ VM_BUG_ON(!anon && !(flags & TTU_RMAP_LOCKED));
+ if (!anon && huge_pmd_unshare(mm, vma, address, pvmw.pte)) {
+ flush_tlb_range(vma, range.start, range.end);
+ mmu_notifier_invalidate_range(mm, range.start,
+ range.end);
+
/*
- * To call huge_pmd_unshare, i_mmap_rwsem must be
- * held in write mode. Caller needs to explicitly
- * do this outside rmap routines.
+ * The ref count of the PMD page was dropped
+ * which is part of the way map counting
+ * is done for shared PMDs. Return 'true'
+ * here. When there is no other sharing,
+ * huge_pmd_unshare returns false and we will
+ * unmap the actual page and drop map count
+ * to zero.
*/
- VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
-
- if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
- flush_tlb_range(vma, range.start, range.end);
- mmu_notifier_invalidate_range(mm, range.start,
- range.end);
-
- /*
- * The ref count of the PMD page was dropped
- * which is part of the way map counting
- * is done for shared PMDs. Return 'true'
- * here. When there is no other sharing,
- * huge_pmd_unshare returns false and we will
- * unmap the actual page and drop map count
- * to zero.
- */
- page_vma_mapped_walk_done(&pvmw);
- break;
- }
+ page_vma_mapped_walk_done(&pvmw);
+ break;
}
pteval = huge_ptep_clear_flush(vma, address, pvmw.pte);
} else {
@@ -1619,9 +1621,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
if (folio_test_hugetlb(folio)) {
hugetlb_count_sub(folio_nr_pages(folio), mm);
- set_huge_swap_pte_at(mm, address,
- pvmw.pte, pteval,
- vma_mmu_pagesize(vma));
+ set_huge_pte_at(mm, address, pvmw.pte, pteval);
} else {
dec_mm_counter(mm, mm_counter(&folio->page));
set_pte_at(mm, address, pvmw.pte, pteval);
@@ -1765,7 +1765,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
* to point at a new folio while a device is
* still using this folio.
*
- * See Documentation/vm/mmu_notifier.rst
+ * See Documentation/mm/mmu_notifier.rst
*/
dec_mm_counter(mm, mm_counter_file(&folio->page));
}
@@ -1775,7 +1775,7 @@ discard:
* done above for all cases requiring it to happen under page
* table lock before mmu_notifier_invalidate_range_end()
*
- * See Documentation/vm/mmu_notifier.rst
+ * See Documentation/mm/mmu_notifier.rst
*/
page_remove_rmap(subpage, vma, folio_test_hugetlb(folio));
if (vma->vm_flags & VM_LOCKED)
@@ -1899,13 +1899,30 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
/* Unexpected PMD-mapped THP? */
VM_BUG_ON_FOLIO(!pvmw.pte, folio);
- subpage = folio_page(folio,
- pte_pfn(*pvmw.pte) - folio_pfn(folio));
+ if (folio_is_zone_device(folio)) {
+ /*
+ * Our PTE is a non-present device exclusive entry and
+ * calculating the subpage as for the common case would
+ * result in an invalid pointer.
+ *
+ * Since only PAGE_SIZE pages can currently be
+ * migrated, just set it to page. This will need to be
+ * changed when hugepage migrations to device private
+ * memory are supported.
+ */
+ VM_BUG_ON_FOLIO(folio_nr_pages(folio) > 1, folio);
+ subpage = &folio->page;
+ } else {
+ subpage = folio_page(folio,
+ pte_pfn(*pvmw.pte) - folio_pfn(folio));
+ }
address = pvmw.address;
anon_exclusive = folio_test_anon(folio) &&
PageAnonExclusive(subpage);
if (folio_test_hugetlb(folio)) {
+ bool anon = folio_test_anon(folio);
+
/*
* huge_pmd_unshare may unmap an entire PMD page.
* There is no way of knowing exactly which PMDs may
@@ -1915,31 +1932,28 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
*/
flush_cache_range(vma, range.start, range.end);
- if (!folio_test_anon(folio)) {
+ /*
+ * To call huge_pmd_unshare, i_mmap_rwsem must be
+ * held in write mode. Caller needs to explicitly
+ * do this outside rmap routines.
+ */
+ VM_BUG_ON(!anon && !(flags & TTU_RMAP_LOCKED));
+ if (!anon && huge_pmd_unshare(mm, vma, address, pvmw.pte)) {
+ flush_tlb_range(vma, range.start, range.end);
+ mmu_notifier_invalidate_range(mm, range.start,
+ range.end);
+
/*
- * To call huge_pmd_unshare, i_mmap_rwsem must be
- * held in write mode. Caller needs to explicitly
- * do this outside rmap routines.
+ * The ref count of the PMD page was dropped
+ * which is part of the way map counting
+ * is done for shared PMDs. Return 'true'
+ * here. When there is no other sharing,
+ * huge_pmd_unshare returns false and we will
+ * unmap the actual page and drop map count
+ * to zero.
*/
- VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
-
- if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
- flush_tlb_range(vma, range.start, range.end);
- mmu_notifier_invalidate_range(mm, range.start,
- range.end);
-
- /*
- * The ref count of the PMD page was dropped
- * which is part of the way map counting
- * is done for shared PMDs. Return 'true'
- * here. When there is no other sharing,
- * huge_pmd_unshare returns false and we will
- * unmap the actual page and drop map count
- * to zero.
- */
- page_vma_mapped_walk_done(&pvmw);
- break;
- }
+ page_vma_mapped_walk_done(&pvmw);
+ break;
}
/* Nuke the hugetlb page table entry */
@@ -1957,7 +1971,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
/* Update high watermark before we lower rss */
update_hiwater_rss(mm);
- if (folio_is_zone_device(folio)) {
+ if (folio_is_device_private(folio)) {
unsigned long pfn = folio_pfn(folio);
swp_entry_t entry;
pte_t swp_pte;
@@ -1993,22 +2007,12 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
/*
* No need to invalidate here it will synchronize on
* against the special swap migration pte.
- *
- * The assignment to subpage above was computed from a
- * swap PTE which results in an invalid pointer.
- * Since only PAGE_SIZE pages can currently be
- * migrated, just set it to page. This will need to be
- * changed when hugepage migrations to device private
- * memory are supported.
*/
- subpage = &folio->page;
} else if (PageHWPoison(subpage)) {
pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
if (folio_test_hugetlb(folio)) {
hugetlb_count_sub(folio_nr_pages(folio), mm);
- set_huge_swap_pte_at(mm, address,
- pvmw.pte, pteval,
- vma_mmu_pagesize(vma));
+ set_huge_pte_at(mm, address, pvmw.pte, pteval);
} else {
dec_mm_counter(mm, mm_counter(&folio->page));
set_pte_at(mm, address, pvmw.pte, pteval);
@@ -2076,8 +2080,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
if (pte_uffd_wp(pteval))
swp_pte = pte_swp_mkuffd_wp(swp_pte);
if (folio_test_hugetlb(folio))
- set_huge_swap_pte_at(mm, address, pvmw.pte,
- swp_pte, vma_mmu_pagesize(vma));
+ set_huge_pte_at(mm, address, pvmw.pte, swp_pte);
else
set_pte_at(mm, address, pvmw.pte, swp_pte);
trace_set_migration_pte(address, pte_val(swp_pte),
@@ -2093,7 +2096,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
* done above for all cases requiring it to happen under page
* table lock before mmu_notifier_invalidate_range_end()
*
- * See Documentation/vm/mmu_notifier.rst
+ * See Documentation/mm/mmu_notifier.rst
*/
page_remove_rmap(subpage, vma, folio_test_hugetlb(folio));
if (vma->vm_flags & VM_LOCKED)
@@ -2131,7 +2134,8 @@ void try_to_migrate(struct folio *folio, enum ttu_flags flags)
TTU_SYNC)))
return;
- if (folio_is_zone_device(folio) && !folio_is_device_private(folio))
+ if (folio_is_zone_device(folio) &&
+ (!folio_is_device_private(folio) && !folio_is_device_coherent(folio)))
return;
/*