summaryrefslogtreecommitdiff
path: root/mm/gup.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/gup.c')
-rw-r--r--mm/gup.c126
1 files changed, 53 insertions, 73 deletions
diff --git a/mm/gup.c b/mm/gup.c
index 0bc4d140fc07..a8ba5112e4d0 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -28,11 +28,6 @@
#include "internal.h"
#include "swap.h"
-struct follow_page_context {
- struct dev_pagemap *pgmap;
- unsigned int page_mask;
-};
-
static inline void sanity_check_pinned_pages(struct page **pages,
unsigned long npages)
{
@@ -148,7 +143,7 @@ int __must_check try_grab_folio(struct folio *folio, int refs,
if (WARN_ON_ONCE(folio_ref_count(folio) <= 0))
return -ENOMEM;
- if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(&folio->page)))
+ if (unlikely(!(flags & FOLL_PCI_P2PDMA) && folio_is_pci_p2pdma(folio)))
return -EREMOTEIO;
if (flags & FOLL_GET)
@@ -237,7 +232,7 @@ void folio_add_pin(struct folio *folio)
static inline struct folio *gup_folio_range_next(struct page *start,
unsigned long npages, unsigned long i, unsigned int *ntails)
{
- struct page *next = nth_page(start, i);
+ struct page *next = start + i;
struct folio *folio = page_folio(next);
unsigned int nr = 1;
@@ -342,6 +337,10 @@ EXPORT_SYMBOL(unpin_user_pages_dirty_lock);
* "gup-pinned page range" refers to a range of pages that has had one of the
* pin_user_pages() variants called on that page.
*
+ * The page range must be truly physically contiguous: the page range
+ * corresponds to a contiguous PFN range and all pages can be iterated
+ * naturally.
+ *
* For the page ranges defined by [page .. page+npages], make that range (or
* its head pages, if a compound page) dirty, if @make_dirty is true, and if the
* page range was previously listed as clean.
@@ -359,6 +358,8 @@ void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages,
struct folio *folio;
unsigned int nr;
+ VM_WARN_ON_ONCE(!page_range_contiguous(page, npages));
+
for (i = 0; i < npages; i += nr) {
folio = gup_folio_range_next(page, npages, i, &nr);
if (make_dirty && !folio_test_dirty(folio)) {
@@ -475,29 +476,15 @@ EXPORT_SYMBOL_GPL(unpin_folios);
* lifecycle. Avoid setting the bit unless necessary, or it might cause write
* cache bouncing on large SMP machines for concurrent pinned gups.
*/
-static inline void mm_set_has_pinned_flag(unsigned long *mm_flags)
+static inline void mm_set_has_pinned_flag(struct mm_struct *mm)
{
- if (!test_bit(MMF_HAS_PINNED, mm_flags))
- set_bit(MMF_HAS_PINNED, mm_flags);
+ if (!mm_flags_test(MMF_HAS_PINNED, mm))
+ mm_flags_set(MMF_HAS_PINNED, mm);
}
#ifdef CONFIG_MMU
#ifdef CONFIG_HAVE_GUP_FAST
-static int record_subpages(struct page *page, unsigned long sz,
- unsigned long addr, unsigned long end,
- struct page **pages)
-{
- struct page *start_page;
- int nr;
-
- start_page = nth_page(page, (addr & (sz - 1)) >> PAGE_SHIFT);
- for (nr = 0; addr != end; nr++, addr += PAGE_SIZE)
- pages[nr] = nth_page(start_page, nr);
-
- return nr;
-}
-
/**
* try_grab_folio_fast() - Attempt to get or pin a folio in fast path.
* @page: pointer to page to be grabbed
@@ -661,7 +648,7 @@ static inline bool can_follow_write_pud(pud_t pud, struct page *page,
static struct page *follow_huge_pud(struct vm_area_struct *vma,
unsigned long addr, pud_t *pudp,
- int flags, struct follow_page_context *ctx)
+ int flags, unsigned long *page_mask)
{
struct mm_struct *mm = vma->vm_mm;
struct page *page;
@@ -688,7 +675,7 @@ static struct page *follow_huge_pud(struct vm_area_struct *vma,
if (ret)
page = ERR_PTR(ret);
else
- ctx->page_mask = HPAGE_PUD_NR - 1;
+ *page_mask = HPAGE_PUD_NR - 1;
return page;
}
@@ -714,7 +701,7 @@ static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page,
static struct page *follow_huge_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmd,
unsigned int flags,
- struct follow_page_context *ctx)
+ unsigned long *page_mask)
{
struct mm_struct *mm = vma->vm_mm;
pmd_t pmdval = *pmd;
@@ -751,7 +738,7 @@ static struct page *follow_huge_pmd(struct vm_area_struct *vma,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
- ctx->page_mask = HPAGE_PMD_NR - 1;
+ *page_mask = HPAGE_PMD_NR - 1;
return page;
}
@@ -759,7 +746,7 @@ static struct page *follow_huge_pmd(struct vm_area_struct *vma,
#else /* CONFIG_PGTABLE_HAS_HUGE_LEAVES */
static struct page *follow_huge_pud(struct vm_area_struct *vma,
unsigned long addr, pud_t *pudp,
- int flags, struct follow_page_context *ctx)
+ int flags, unsigned long *page_mask)
{
return NULL;
}
@@ -767,7 +754,7 @@ static struct page *follow_huge_pud(struct vm_area_struct *vma,
static struct page *follow_huge_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmd,
unsigned int flags,
- struct follow_page_context *ctx)
+ unsigned long *page_mask)
{
return NULL;
}
@@ -813,8 +800,7 @@ static inline bool can_follow_write_pte(pte_t pte, struct page *page,
}
static struct page *follow_page_pte(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmd, unsigned int flags,
- struct dev_pagemap **pgmap)
+ unsigned long address, pmd_t *pmd, unsigned int flags)
{
struct mm_struct *mm = vma->vm_mm;
struct folio *folio;
@@ -912,7 +898,7 @@ no_page:
static struct page *follow_pmd_mask(struct vm_area_struct *vma,
unsigned long address, pud_t *pudp,
unsigned int flags,
- struct follow_page_context *ctx)
+ unsigned long *page_mask)
{
pmd_t *pmd, pmdval;
spinlock_t *ptl;
@@ -926,7 +912,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
if (!pmd_present(pmdval))
return no_page_table(vma, flags, address);
if (likely(!pmd_leaf(pmdval)))
- return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
+ return follow_page_pte(vma, address, pmd, flags);
if (pmd_protnone(pmdval) && !gup_can_follow_protnone(vma, flags))
return no_page_table(vma, flags, address);
@@ -939,16 +925,16 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
}
if (unlikely(!pmd_leaf(pmdval))) {
spin_unlock(ptl);
- return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
+ return follow_page_pte(vma, address, pmd, flags);
}
if (pmd_trans_huge(pmdval) && (flags & FOLL_SPLIT_PMD)) {
spin_unlock(ptl);
split_huge_pmd(vma, pmd, address);
/* If pmd was left empty, stuff a page table in there quickly */
return pte_alloc(mm, pmd) ? ERR_PTR(-ENOMEM) :
- follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
+ follow_page_pte(vma, address, pmd, flags);
}
- page = follow_huge_pmd(vma, address, pmd, flags, ctx);
+ page = follow_huge_pmd(vma, address, pmd, flags, page_mask);
spin_unlock(ptl);
return page;
}
@@ -956,7 +942,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
static struct page *follow_pud_mask(struct vm_area_struct *vma,
unsigned long address, p4d_t *p4dp,
unsigned int flags,
- struct follow_page_context *ctx)
+ unsigned long *page_mask)
{
pud_t *pudp, pud;
spinlock_t *ptl;
@@ -969,7 +955,7 @@ static struct page *follow_pud_mask(struct vm_area_struct *vma,
return no_page_table(vma, flags, address);
if (pud_leaf(pud)) {
ptl = pud_lock(mm, pudp);
- page = follow_huge_pud(vma, address, pudp, flags, ctx);
+ page = follow_huge_pud(vma, address, pudp, flags, page_mask);
spin_unlock(ptl);
if (page)
return page;
@@ -978,13 +964,13 @@ static struct page *follow_pud_mask(struct vm_area_struct *vma,
if (unlikely(pud_bad(pud)))
return no_page_table(vma, flags, address);
- return follow_pmd_mask(vma, address, pudp, flags, ctx);
+ return follow_pmd_mask(vma, address, pudp, flags, page_mask);
}
static struct page *follow_p4d_mask(struct vm_area_struct *vma,
unsigned long address, pgd_t *pgdp,
unsigned int flags,
- struct follow_page_context *ctx)
+ unsigned long *page_mask)
{
p4d_t *p4dp, p4d;
@@ -995,7 +981,7 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
if (!p4d_present(p4d) || p4d_bad(p4d))
return no_page_table(vma, flags, address);
- return follow_pud_mask(vma, address, p4dp, flags, ctx);
+ return follow_pud_mask(vma, address, p4dp, flags, page_mask);
}
/**
@@ -1003,20 +989,16 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
* @vma: vm_area_struct mapping @address
* @address: virtual address to look up
* @flags: flags modifying lookup behaviour
- * @ctx: contains dev_pagemap for %ZONE_DEVICE memory pinning and a
- * pointer to output page_mask
+ * @page_mask: a pointer to output page_mask
*
* @flags can have FOLL_ flags set, defined in <linux/mm.h>
*
- * When getting pages from ZONE_DEVICE memory, the @ctx->pgmap caches
- * the device's dev_pagemap metadata to avoid repeating expensive lookups.
- *
* When getting an anonymous page and the caller has to trigger unsharing
* of a shared anonymous page first, -EMLINK is returned. The caller should
* trigger a fault with FAULT_FLAG_UNSHARE set. Note that unsharing is only
* relevant with FOLL_PIN and !FOLL_WRITE.
*
- * On output, the @ctx->page_mask is set according to the size of the page.
+ * On output, @page_mask is set according to the size of the page.
*
* Return: the mapped (struct page *), %NULL if no mapping exists, or
* an error pointer if there is a mapping to something not represented
@@ -1024,7 +1006,7 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
*/
static struct page *follow_page_mask(struct vm_area_struct *vma,
unsigned long address, unsigned int flags,
- struct follow_page_context *ctx)
+ unsigned long *page_mask)
{
pgd_t *pgd;
struct mm_struct *mm = vma->vm_mm;
@@ -1032,13 +1014,13 @@ static struct page *follow_page_mask(struct vm_area_struct *vma,
vma_pgtable_walk_begin(vma);
- ctx->page_mask = 0;
+ *page_mask = 0;
pgd = pgd_offset(mm, address);
if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
page = no_page_table(vma, flags, address);
else
- page = follow_p4d_mask(vma, address, pgd, flags, ctx);
+ page = follow_p4d_mask(vma, address, pgd, flags, page_mask);
vma_pgtable_walk_end(vma);
@@ -1376,7 +1358,7 @@ static long __get_user_pages(struct mm_struct *mm,
{
long ret = 0, i = 0;
struct vm_area_struct *vma = NULL;
- struct follow_page_context ctx = { NULL };
+ unsigned long page_mask = 0;
if (!nr_pages)
return 0;
@@ -1418,7 +1400,7 @@ static long __get_user_pages(struct mm_struct *mm,
pages ? &page : NULL);
if (ret)
goto out;
- ctx.page_mask = 0;
+ page_mask = 0;
goto next_page;
}
@@ -1441,7 +1423,7 @@ retry:
}
cond_resched();
- page = follow_page_mask(vma, start, gup_flags, &ctx);
+ page = follow_page_mask(vma, start, gup_flags, &page_mask);
if (!page || PTR_ERR(page) == -EMLINK) {
ret = faultin_page(vma, start, gup_flags,
PTR_ERR(page) == -EMLINK, locked);
@@ -1474,7 +1456,7 @@ retry:
goto out;
}
next_page:
- page_increm = 1 + (~(start >> PAGE_SHIFT) & ctx.page_mask);
+ page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
if (page_increm > nr_pages)
page_increm = nr_pages;
@@ -1512,7 +1494,7 @@ next_page:
}
for (j = 0; j < page_increm; j++) {
- subpage = nth_page(page, j);
+ subpage = page + j;
pages[i + j] = subpage;
flush_anon_page(vma, subpage, start + j * PAGE_SIZE);
flush_dcache_page(subpage);
@@ -1524,8 +1506,6 @@ next_page:
nr_pages -= page_increm;
} while (nr_pages);
out:
- if (ctx.pgmap)
- put_dev_pagemap(ctx.pgmap);
return i ? i : ret;
}
@@ -1693,7 +1673,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
mmap_assert_locked(mm);
if (flags & FOLL_PIN)
- mm_set_has_pinned_flag(&mm->flags);
+ mm_set_has_pinned_flag(mm);
/*
* FOLL_PIN and FOLL_GET are mutually exclusive. Traditional behavior
@@ -2861,7 +2841,6 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
unsigned long end, unsigned int flags, struct page **pages,
int *nr)
{
- struct dev_pagemap *pgmap = NULL;
int ret = 0;
pte_t *ptep, *ptem;
@@ -2919,12 +2898,9 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
* see Documentation/core-api/pin_user_pages.rst for
* details.
*/
- if (flags & FOLL_PIN) {
- ret = arch_make_folio_accessible(folio);
- if (ret) {
- gup_put_folio(folio, 1, flags);
- goto pte_unmap;
- }
+ if ((flags & FOLL_PIN) && arch_make_folio_accessible(folio)) {
+ gup_put_folio(folio, 1, flags);
+ goto pte_unmap;
}
folio_set_referenced(folio);
pages[*nr] = page;
@@ -2934,8 +2910,6 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
ret = 1;
pte_unmap:
- if (pgmap)
- put_dev_pagemap(pgmap);
pte_unmap(ptem);
return ret;
}
@@ -2972,8 +2946,8 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
if (pmd_special(orig))
return 0;
- page = pmd_page(orig);
- refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr);
+ refs = (end - addr) >> PAGE_SHIFT;
+ page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
folio = try_grab_folio_fast(page, refs, flags);
if (!folio)
@@ -2993,7 +2967,10 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
return 0;
}
+ pages += *nr;
*nr += refs;
+ for (; refs; refs--)
+ *(pages++) = page++;
folio_set_referenced(folio);
return 1;
}
@@ -3012,8 +2989,8 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr,
if (pud_special(orig))
return 0;
- page = pud_page(orig);
- refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr);
+ refs = (end - addr) >> PAGE_SHIFT;
+ page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
folio = try_grab_folio_fast(page, refs, flags);
if (!folio)
@@ -3034,7 +3011,10 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr,
return 0;
}
+ pages += *nr;
*nr += refs;
+ for (; refs; refs--)
+ *(pages++) = page++;
folio_set_referenced(folio);
return 1;
}
@@ -3218,7 +3198,7 @@ static int gup_fast_fallback(unsigned long start, unsigned long nr_pages,
return -EINVAL;
if (gup_flags & FOLL_PIN)
- mm_set_has_pinned_flag(&current->mm->flags);
+ mm_set_has_pinned_flag(current->mm);
if (!(gup_flags & FOLL_FAST_ONLY))
might_lock_read(&current->mm->mmap_lock);