From 04769cb1c45a919cd94b931e6494d2a9afc32914 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 27 Nov 2020 17:41:19 +0100 Subject: mm/frame-vector: Use FOLL_LONGTERM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is used by media/videbuf2 for persistent dma mappings, not just for a single dma operation and then freed again, so needs FOLL_LONGTERM. Unfortunately current pup_locked doesn't support FOLL_LONGTERM due to locking issues. Rework the code to pull the pup path out from the mmap_sem critical section as suggested by Jason. By relying entirely on the vma checks in pin_user_pages and follow_pfn (for vm_flags and vma_is_fsdax) we can also streamline the code a lot. Note that pin_user_pages_fast is a safe replacement despite the seeming lack of checking for vma->vm_flasg & (VM_IO | VM_PFNMAP). Such ptes are marked with pte_mkspecial (which pup_fast rejects in the fastpath), and only architectures supporting that support the pin_user_pages_fast fastpath. Signed-off-by: Daniel Vetter Cc: Christoph Hellwig Cc: Jason Gunthorpe Cc: Pawel Osciak Cc: Marek Szyprowski Cc: Kyungmin Park Cc: Tomasz Figa Cc: Mauro Carvalho Chehab Cc: Andrew Morton Cc: John Hubbard Cc: Jérôme Glisse Cc: Jan Kara Cc: Dan Williams Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-samsung-soc@vger.kernel.org Cc: linux-media@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20201127164131.2244124-6-daniel.vetter@ffwll.ch --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ecdf8a8cd6ae..2e7b1d048df9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1768,7 +1768,7 @@ struct frame_vector { struct frame_vector *frame_vector_create(unsigned int nr_frames); void frame_vector_destroy(struct frame_vector *vec); int get_vaddr_frames(unsigned long start, unsigned int nr_pfns, - unsigned int gup_flags, struct frame_vector *vec); + struct frame_vector *vec); void put_vaddr_frames(struct frame_vector *vec); int frame_vector_to_pages(struct frame_vector *vec); void frame_vector_to_pfns(struct frame_vector *vec); -- cgit v1.2.3 From eb83b8e3e6478845f8130622a2048ed4ec3b3be3 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 27 Nov 2020 17:41:20 +0100 Subject: media: videobuf2: Move frame_vector into media subsystem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's the only user. This also garbage collects the CONFIG_FRAME_VECTOR symbol from all over the tree (well just one place, somehow omap media driver still had this in its Kconfig, despite not using it). Reviewed-by: John Hubbard Acked-by: Hans Verkuil Acked-by: Mauro Carvalho Chehab Acked-by: Tomasz Figa Signed-off-by: Daniel Vetter Cc: Jason Gunthorpe Cc: Pawel Osciak Cc: Marek Szyprowski Cc: Kyungmin Park Cc: Tomasz Figa Cc: Mauro Carvalho Chehab Cc: Andrew Morton Cc: John Hubbard Cc: Jérôme Glisse Cc: Jan Kara Cc: Dan Williams Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-samsung-soc@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20201127164131.2244124-7-daniel.vetter@ffwll.ch --- drivers/media/common/videobuf2/Kconfig | 1 - drivers/media/common/videobuf2/Makefile | 1 + drivers/media/common/videobuf2/frame_vector.c | 223 ++++++++++++++++++++++++++ drivers/media/platform/omap/Kconfig | 1 - include/linux/mm.h | 42 ----- include/media/frame_vector.h | 47 ++++++ include/media/videobuf2-core.h | 1 + mm/Kconfig | 3 - mm/Makefile | 1 - mm/frame_vector.c | 221 ------------------------- 10 files changed, 272 insertions(+), 269 deletions(-) create mode 100644 drivers/media/common/videobuf2/frame_vector.c create mode 100644 include/media/frame_vector.h delete mode 100644 mm/frame_vector.c (limited to 'include/linux') diff --git a/drivers/media/common/videobuf2/Kconfig b/drivers/media/common/videobuf2/Kconfig index edbc99ebba87..d2223a12c95f 100644 --- a/drivers/media/common/videobuf2/Kconfig +++ b/drivers/media/common/videobuf2/Kconfig @@ -9,7 +9,6 @@ config VIDEOBUF2_V4L2 config VIDEOBUF2_MEMOPS tristate - select FRAME_VECTOR config VIDEOBUF2_DMA_CONTIG tristate diff --git a/drivers/media/common/videobuf2/Makefile b/drivers/media/common/videobuf2/Makefile index 77bebe8b202f..54306f8d096c 100644 --- a/drivers/media/common/videobuf2/Makefile +++ b/drivers/media/common/videobuf2/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 videobuf2-common-objs := videobuf2-core.o +videobuf2-common-objs += frame_vector.o ifeq ($(CONFIG_TRACEPOINTS),y) videobuf2-common-objs += vb2-trace.o diff --git a/drivers/media/common/videobuf2/frame_vector.c b/drivers/media/common/videobuf2/frame_vector.c new file mode 100644 index 000000000000..a0e65481a201 --- /dev/null +++ b/drivers/media/common/videobuf2/frame_vector.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/** + * get_vaddr_frames() - map virtual addresses to pfns + * @start: starting user address + * @nr_frames: number of pages / pfns from start to map + * @gup_flags: flags modifying lookup behaviour + * @vec: structure which receives pages / pfns of the addresses mapped. + * It should have space for at least nr_frames entries. + * + * This function maps virtual addresses from @start and fills @vec structure + * with page frame numbers or page pointers to corresponding pages (choice + * depends on the type of the vma underlying the virtual address). If @start + * belongs to a normal vma, the function grabs reference to each of the pages + * to pin them in memory. If @start belongs to VM_IO | VM_PFNMAP vma, we don't + * touch page structures and the caller must make sure pfns aren't reused for + * anything else while he is using them. + * + * The function returns number of pages mapped which may be less than + * @nr_frames. In particular we stop mapping if there are more vmas of + * different type underlying the specified range of virtual addresses. + * When the function isn't able to map a single page, it returns error. + * + * This function takes care of grabbing mmap_lock as necessary. + */ +int get_vaddr_frames(unsigned long start, unsigned int nr_frames, + struct frame_vector *vec) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int ret = 0; + int err; + + if (nr_frames == 0) + return 0; + + if (WARN_ON_ONCE(nr_frames > vec->nr_allocated)) + nr_frames = vec->nr_allocated; + + start = untagged_addr(start); + + ret = pin_user_pages_fast(start, nr_frames, + FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM, + (struct page **)(vec->ptrs)); + if (ret > 0) { + vec->got_ref = true; + vec->is_pfns = false; + goto out_unlocked; + } + + mmap_read_lock(mm); + vec->got_ref = false; + vec->is_pfns = true; + ret = 0; + do { + unsigned long *nums = frame_vector_pfns(vec); + + vma = find_vma_intersection(mm, start, start + 1); + if (!vma) + break; + + while (ret < nr_frames && start + PAGE_SIZE <= vma->vm_end) { + err = follow_pfn(vma, start, &nums[ret]); + if (err) { + if (ret == 0) + ret = err; + goto out; + } + start += PAGE_SIZE; + ret++; + } + /* Bail out if VMA doesn't completely cover the tail page. */ + if (start < vma->vm_end) + break; + } while (ret < nr_frames); +out: + mmap_read_unlock(mm); +out_unlocked: + if (!ret) + ret = -EFAULT; + if (ret > 0) + vec->nr_frames = ret; + return ret; +} +EXPORT_SYMBOL(get_vaddr_frames); + +/** + * put_vaddr_frames() - drop references to pages if get_vaddr_frames() acquired + * them + * @vec: frame vector to put + * + * Drop references to pages if get_vaddr_frames() acquired them. We also + * invalidate the frame vector so that it is prepared for the next call into + * get_vaddr_frames(). + */ +void put_vaddr_frames(struct frame_vector *vec) +{ + struct page **pages; + + if (!vec->got_ref) + goto out; + pages = frame_vector_pages(vec); + /* + * frame_vector_pages() might needed to do a conversion when + * get_vaddr_frames() got pages but vec was later converted to pfns. + * But it shouldn't really fail to convert pfns back... + */ + if (WARN_ON(IS_ERR(pages))) + goto out; + + unpin_user_pages(pages, vec->nr_frames); + vec->got_ref = false; +out: + vec->nr_frames = 0; +} +EXPORT_SYMBOL(put_vaddr_frames); + +/** + * frame_vector_to_pages - convert frame vector to contain page pointers + * @vec: frame vector to convert + * + * Convert @vec to contain array of page pointers. If the conversion is + * successful, return 0. Otherwise return an error. Note that we do not grab + * page references for the page structures. + */ +int frame_vector_to_pages(struct frame_vector *vec) +{ + int i; + unsigned long *nums; + struct page **pages; + + if (!vec->is_pfns) + return 0; + nums = frame_vector_pfns(vec); + for (i = 0; i < vec->nr_frames; i++) + if (!pfn_valid(nums[i])) + return -EINVAL; + pages = (struct page **)nums; + for (i = 0; i < vec->nr_frames; i++) + pages[i] = pfn_to_page(nums[i]); + vec->is_pfns = false; + return 0; +} +EXPORT_SYMBOL(frame_vector_to_pages); + +/** + * frame_vector_to_pfns - convert frame vector to contain pfns + * @vec: frame vector to convert + * + * Convert @vec to contain array of pfns. + */ +void frame_vector_to_pfns(struct frame_vector *vec) +{ + int i; + unsigned long *nums; + struct page **pages; + + if (vec->is_pfns) + return; + pages = (struct page **)(vec->ptrs); + nums = (unsigned long *)pages; + for (i = 0; i < vec->nr_frames; i++) + nums[i] = page_to_pfn(pages[i]); + vec->is_pfns = true; +} +EXPORT_SYMBOL(frame_vector_to_pfns); + +/** + * frame_vector_create() - allocate & initialize structure for pinned pfns + * @nr_frames: number of pfns slots we should reserve + * + * Allocate and initialize struct pinned_pfns to be able to hold @nr_pfns + * pfns. + */ +struct frame_vector *frame_vector_create(unsigned int nr_frames) +{ + struct frame_vector *vec; + int size = sizeof(struct frame_vector) + sizeof(void *) * nr_frames; + + if (WARN_ON_ONCE(nr_frames == 0)) + return NULL; + /* + * This is absurdly high. It's here just to avoid strange effects when + * arithmetics overflows. + */ + if (WARN_ON_ONCE(nr_frames > INT_MAX / sizeof(void *) / 2)) + return NULL; + /* + * Avoid higher order allocations, use vmalloc instead. It should + * be rare anyway. + */ + vec = kvmalloc(size, GFP_KERNEL); + if (!vec) + return NULL; + vec->nr_allocated = nr_frames; + vec->nr_frames = 0; + return vec; +} +EXPORT_SYMBOL(frame_vector_create); + +/** + * frame_vector_destroy() - free memory allocated to carry frame vector + * @vec: Frame vector to free + * + * Free structure allocated by frame_vector_create() to carry frames. + */ +void frame_vector_destroy(struct frame_vector *vec) +{ + /* Make sure put_vaddr_frames() got called properly... */ + VM_BUG_ON(vec->nr_frames > 0); + kvfree(vec); +} +EXPORT_SYMBOL(frame_vector_destroy); diff --git a/drivers/media/platform/omap/Kconfig b/drivers/media/platform/omap/Kconfig index f73b5893220d..de16de46c0f4 100644 --- a/drivers/media/platform/omap/Kconfig +++ b/drivers/media/platform/omap/Kconfig @@ -12,6 +12,5 @@ config VIDEO_OMAP2_VOUT depends on VIDEO_V4L2 select VIDEOBUF2_DMA_CONTIG select OMAP2_VRFB if ARCH_OMAP2 || ARCH_OMAP3 - select FRAME_VECTOR help V4L2 Display driver support for OMAP2/3 based boards. diff --git a/include/linux/mm.h b/include/linux/mm.h index 2e7b1d048df9..049439b65553 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1754,48 +1754,6 @@ int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc); int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc, struct task_struct *task, bool bypass_rlim); -/* Container for pinned pfns / pages */ -struct frame_vector { - unsigned int nr_allocated; /* Number of frames we have space for */ - unsigned int nr_frames; /* Number of frames stored in ptrs array */ - bool got_ref; /* Did we pin pages by getting page ref? */ - bool is_pfns; /* Does array contain pages or pfns? */ - void *ptrs[]; /* Array of pinned pfns / pages. Use - * pfns_vector_pages() or pfns_vector_pfns() - * for access */ -}; - -struct frame_vector *frame_vector_create(unsigned int nr_frames); -void frame_vector_destroy(struct frame_vector *vec); -int get_vaddr_frames(unsigned long start, unsigned int nr_pfns, - struct frame_vector *vec); -void put_vaddr_frames(struct frame_vector *vec); -int frame_vector_to_pages(struct frame_vector *vec); -void frame_vector_to_pfns(struct frame_vector *vec); - -static inline unsigned int frame_vector_count(struct frame_vector *vec) -{ - return vec->nr_frames; -} - -static inline struct page **frame_vector_pages(struct frame_vector *vec) -{ - if (vec->is_pfns) { - int err = frame_vector_to_pages(vec); - - if (err) - return ERR_PTR(err); - } - return (struct page **)(vec->ptrs); -} - -static inline unsigned long *frame_vector_pfns(struct frame_vector *vec) -{ - if (!vec->is_pfns) - frame_vector_to_pfns(vec); - return (unsigned long *)(vec->ptrs); -} - struct kvec; int get_kernel_pages(const struct kvec *iov, int nr_pages, int write, struct page **pages); diff --git a/include/media/frame_vector.h b/include/media/frame_vector.h new file mode 100644 index 000000000000..bfed1710dc24 --- /dev/null +++ b/include/media/frame_vector.h @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _MEDIA_FRAME_VECTOR_H +#define _MEDIA_FRAME_VECTOR_H + +/* Container for pinned pfns / pages in frame_vector.c */ +struct frame_vector { + unsigned int nr_allocated; /* Number of frames we have space for */ + unsigned int nr_frames; /* Number of frames stored in ptrs array */ + bool got_ref; /* Did we pin pages by getting page ref? */ + bool is_pfns; /* Does array contain pages or pfns? */ + void *ptrs[]; /* Array of pinned pfns / pages. Use + * pfns_vector_pages() or pfns_vector_pfns() + * for access */ +}; + +struct frame_vector *frame_vector_create(unsigned int nr_frames); +void frame_vector_destroy(struct frame_vector *vec); +int get_vaddr_frames(unsigned long start, unsigned int nr_pfns, + struct frame_vector *vec); +void put_vaddr_frames(struct frame_vector *vec); +int frame_vector_to_pages(struct frame_vector *vec); +void frame_vector_to_pfns(struct frame_vector *vec); + +static inline unsigned int frame_vector_count(struct frame_vector *vec) +{ + return vec->nr_frames; +} + +static inline struct page **frame_vector_pages(struct frame_vector *vec) +{ + if (vec->is_pfns) { + int err = frame_vector_to_pages(vec); + + if (err) + return ERR_PTR(err); + } + return (struct page **)(vec->ptrs); +} + +static inline unsigned long *frame_vector_pfns(struct frame_vector *vec) +{ + if (!vec->is_pfns) + frame_vector_to_pfns(vec); + return (unsigned long *)(vec->ptrs); +} + +#endif /* _MEDIA_FRAME_VECTOR_H */ diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h index 61969402a0e3..799ba61b5b6f 100644 --- a/include/media/videobuf2-core.h +++ b/include/media/videobuf2-core.h @@ -18,6 +18,7 @@ #include #include #include +#include #define VB2_MAX_FRAME (32) #define VB2_MAX_PLANES (8) diff --git a/mm/Kconfig b/mm/Kconfig index f730605b8dcf..24c045b24b95 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -804,9 +804,6 @@ config DEVICE_PRIVATE config VMAP_PFN bool -config FRAME_VECTOR - bool - config ARCH_USES_HIGH_VMA_FLAGS bool config ARCH_HAS_PKEYS diff --git a/mm/Makefile b/mm/Makefile index b6cd2fffa492..135bbb65511a 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -110,7 +110,6 @@ obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o obj-$(CONFIG_USERFAULTFD) += userfaultfd.o obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o -obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o diff --git a/mm/frame_vector.c b/mm/frame_vector.c deleted file mode 100644 index f8c34b895c76..000000000000 --- a/mm/frame_vector.c +++ /dev/null @@ -1,221 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include - -/** - * get_vaddr_frames() - map virtual addresses to pfns - * @start: starting user address - * @nr_frames: number of pages / pfns from start to map - * @gup_flags: flags modifying lookup behaviour - * @vec: structure which receives pages / pfns of the addresses mapped. - * It should have space for at least nr_frames entries. - * - * This function maps virtual addresses from @start and fills @vec structure - * with page frame numbers or page pointers to corresponding pages (choice - * depends on the type of the vma underlying the virtual address). If @start - * belongs to a normal vma, the function grabs reference to each of the pages - * to pin them in memory. If @start belongs to VM_IO | VM_PFNMAP vma, we don't - * touch page structures and the caller must make sure pfns aren't reused for - * anything else while he is using them. - * - * The function returns number of pages mapped which may be less than - * @nr_frames. In particular we stop mapping if there are more vmas of - * different type underlying the specified range of virtual addresses. - * When the function isn't able to map a single page, it returns error. - * - * This function takes care of grabbing mmap_lock as necessary. - */ -int get_vaddr_frames(unsigned long start, unsigned int nr_frames, - struct frame_vector *vec) -{ - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - int ret = 0; - int err; - - if (nr_frames == 0) - return 0; - - if (WARN_ON_ONCE(nr_frames > vec->nr_allocated)) - nr_frames = vec->nr_allocated; - - start = untagged_addr(start); - - ret = pin_user_pages_fast(start, nr_frames, - FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM, - (struct page **)(vec->ptrs)); - if (ret > 0) { - vec->got_ref = true; - vec->is_pfns = false; - goto out_unlocked; - } - - mmap_read_lock(mm); - vec->got_ref = false; - vec->is_pfns = true; - ret = 0; - do { - unsigned long *nums = frame_vector_pfns(vec); - - vma = find_vma_intersection(mm, start, start + 1); - if (!vma) - break; - - while (ret < nr_frames && start + PAGE_SIZE <= vma->vm_end) { - err = follow_pfn(vma, start, &nums[ret]); - if (err) { - if (ret == 0) - ret = err; - goto out; - } - start += PAGE_SIZE; - ret++; - } - /* Bail out if VMA doesn't completely cover the tail page. */ - if (start < vma->vm_end) - break; - } while (ret < nr_frames); -out: - mmap_read_unlock(mm); -out_unlocked: - if (!ret) - ret = -EFAULT; - if (ret > 0) - vec->nr_frames = ret; - return ret; -} -EXPORT_SYMBOL(get_vaddr_frames); - -/** - * put_vaddr_frames() - drop references to pages if get_vaddr_frames() acquired - * them - * @vec: frame vector to put - * - * Drop references to pages if get_vaddr_frames() acquired them. We also - * invalidate the frame vector so that it is prepared for the next call into - * get_vaddr_frames(). - */ -void put_vaddr_frames(struct frame_vector *vec) -{ - struct page **pages; - - if (!vec->got_ref) - goto out; - pages = frame_vector_pages(vec); - /* - * frame_vector_pages() might needed to do a conversion when - * get_vaddr_frames() got pages but vec was later converted to pfns. - * But it shouldn't really fail to convert pfns back... - */ - if (WARN_ON(IS_ERR(pages))) - goto out; - - unpin_user_pages(pages, vec->nr_frames); - vec->got_ref = false; -out: - vec->nr_frames = 0; -} -EXPORT_SYMBOL(put_vaddr_frames); - -/** - * frame_vector_to_pages - convert frame vector to contain page pointers - * @vec: frame vector to convert - * - * Convert @vec to contain array of page pointers. If the conversion is - * successful, return 0. Otherwise return an error. Note that we do not grab - * page references for the page structures. - */ -int frame_vector_to_pages(struct frame_vector *vec) -{ - int i; - unsigned long *nums; - struct page **pages; - - if (!vec->is_pfns) - return 0; - nums = frame_vector_pfns(vec); - for (i = 0; i < vec->nr_frames; i++) - if (!pfn_valid(nums[i])) - return -EINVAL; - pages = (struct page **)nums; - for (i = 0; i < vec->nr_frames; i++) - pages[i] = pfn_to_page(nums[i]); - vec->is_pfns = false; - return 0; -} -EXPORT_SYMBOL(frame_vector_to_pages); - -/** - * frame_vector_to_pfns - convert frame vector to contain pfns - * @vec: frame vector to convert - * - * Convert @vec to contain array of pfns. - */ -void frame_vector_to_pfns(struct frame_vector *vec) -{ - int i; - unsigned long *nums; - struct page **pages; - - if (vec->is_pfns) - return; - pages = (struct page **)(vec->ptrs); - nums = (unsigned long *)pages; - for (i = 0; i < vec->nr_frames; i++) - nums[i] = page_to_pfn(pages[i]); - vec->is_pfns = true; -} -EXPORT_SYMBOL(frame_vector_to_pfns); - -/** - * frame_vector_create() - allocate & initialize structure for pinned pfns - * @nr_frames: number of pfns slots we should reserve - * - * Allocate and initialize struct pinned_pfns to be able to hold @nr_pfns - * pfns. - */ -struct frame_vector *frame_vector_create(unsigned int nr_frames) -{ - struct frame_vector *vec; - int size = sizeof(struct frame_vector) + sizeof(void *) * nr_frames; - - if (WARN_ON_ONCE(nr_frames == 0)) - return NULL; - /* - * This is absurdly high. It's here just to avoid strange effects when - * arithmetics overflows. - */ - if (WARN_ON_ONCE(nr_frames > INT_MAX / sizeof(void *) / 2)) - return NULL; - /* - * Avoid higher order allocations, use vmalloc instead. It should - * be rare anyway. - */ - vec = kvmalloc(size, GFP_KERNEL); - if (!vec) - return NULL; - vec->nr_allocated = nr_frames; - vec->nr_frames = 0; - return vec; -} -EXPORT_SYMBOL(frame_vector_create); - -/** - * frame_vector_destroy() - free memory allocated to carry frame vector - * @vec: Frame vector to free - * - * Free structure allocated by frame_vector_create() to carry frames. - */ -void frame_vector_destroy(struct frame_vector *vec) -{ - /* Make sure put_vaddr_frames() got called properly... */ - VM_BUG_ON(vec->nr_frames > 0); - kvfree(vec); -} -EXPORT_SYMBOL(frame_vector_destroy); -- cgit v1.2.3 From 96667f8a4382db9ed042332ca6ee165ae9b91307 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 27 Nov 2020 17:41:21 +0100 Subject: mm: Close race in generic_access_phys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Way back it was a reasonable assumptions that iomem mappings never change the pfn range they point at. But this has changed: - gpu drivers dynamically manage their memory nowadays, invalidating ptes with unmap_mapping_range when buffers get moved - contiguous dma allocations have moved from dedicated carvetouts to cma regions. This means if we miss the unmap the pfn might contain pagecache or anon memory (well anything allocated with GFP_MOVEABLE) - even /dev/mem now invalidates mappings when the kernel requests that iomem region when CONFIG_IO_STRICT_DEVMEM is set, see 3234ac664a87 ("/dev/mem: Revoke mappings when a driver claims the region") Accessing pfns obtained from ptes without holding all the locks is therefore no longer a good idea. Fix this. Since ioremap might need to manipulate pagetables too we need to drop the pt lock and have a retry loop if we raced. While at it, also add kerneldoc and improve the comment for the vma_ops->access function. It's for accessing, not for moving the memory from iomem to system memory, as the old comment seemed to suggest. References: 28b2ee20c7cb ("access_process_vm device memory infrastructure") Signed-off-by: Daniel Vetter Cc: Jason Gunthorpe Cc: Dan Williams Cc: Kees Cook Cc: Benjamin Herrensmidt Cc: Dave Airlie Cc: Andrew Morton Cc: John Hubbard Cc: Jérôme Glisse Cc: Jan Kara Cc: Dan Williams Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-samsung-soc@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: Chris Wilson Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20201127164131.2244124-8-daniel.vetter@ffwll.ch --- include/linux/mm.h | 3 ++- mm/memory.c | 46 +++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 45 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 049439b65553..c9900aedc195 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -590,7 +590,8 @@ struct vm_operations_struct { vm_fault_t (*pfn_mkwrite)(struct vm_fault *vmf); /* called by access_process_vm when get_user_pages() fails, typically - * for use by special VMAs that can switch between memory and hardware + * for use by special VMAs. See also generic_access_phys() for a generic + * implementation useful for any iomem mapping. */ int (*access)(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write); diff --git a/mm/memory.c b/mm/memory.c index feff48e1465a..8079c181efff 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4834,28 +4834,68 @@ out: return ret; } +/** + * generic_access_phys - generic implementation for iomem mmap access + * @vma: the vma to access + * @addr: userspace addres, not relative offset within @vma + * @buf: buffer to read/write + * @len: length of transfer + * @write: set to FOLL_WRITE when writing, otherwise reading + * + * This is a generic implementation for &vm_operations_struct.access for an + * iomem mapping. This callback is used by access_process_vm() when the @vma is + * not page based. + */ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write) { resource_size_t phys_addr; unsigned long prot = 0; void __iomem *maddr; - int offset = addr & (PAGE_SIZE-1); + pte_t *ptep, pte; + spinlock_t *ptl; + int offset = offset_in_page(addr); + int ret = -EINVAL; + + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) + return -EINVAL; + +retry: + if (follow_pte(vma->vm_mm, addr, NULL, &ptep, NULL, &ptl)) + return -EINVAL; + pte = *ptep; + pte_unmap_unlock(ptep, ptl); - if (follow_phys(vma, addr, write, &prot, &phys_addr)) + prot = pgprot_val(pte_pgprot(pte)); + phys_addr = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT; + + if ((write & FOLL_WRITE) && !pte_write(pte)) return -EINVAL; maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot); if (!maddr) return -ENOMEM; + if (follow_pte(vma->vm_mm, addr, NULL, &ptep, NULL, &ptl)) + goto out_unmap; + + if (!pte_same(pte, *ptep)) { + pte_unmap_unlock(ptep, ptl); + iounmap(maddr); + + goto retry; + } + if (write) memcpy_toio(maddr + offset, buf, len); else memcpy_fromio(buf, maddr + offset, len); + ret = len; + pte_unmap_unlock(ptep, ptl); +out_unmap: iounmap(maddr); - return len; + return ret; } EXPORT_SYMBOL_GPL(generic_access_phys); #endif -- cgit v1.2.3 From 71a1d8ed900f8cf53151beff17e3e2ff8e9283a1 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 27 Nov 2020 17:41:24 +0100 Subject: resource: Move devmem revoke code to resource framework MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want all iomem mmaps to consistently revoke ptes when the kernel takes over and CONFIG_IO_STRICT_DEVMEM is enabled. This includes the pci bar mmaps available through procfs and sysfs, which currently do not revoke mappings. To prepare for this, move the code from the /dev/kmem driver to kernel/resource.c. During review Jason spotted that barriers are used somewhat inconsistently. Fix that up while we shuffle this code, since it doesn't have an actual impact at runtime. Otherwise no semantic and behavioural changes intended, just code extraction and adjusting comments and names. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Daniel Vetter Cc: Jason Gunthorpe Cc: Kees Cook Cc: Dan Williams Cc: Andrew Morton Cc: John Hubbard Cc: Jérôme Glisse Cc: Jan Kara Cc: Dan Williams Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-samsung-soc@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Daniel Vetter Cc: David Hildenbrand Cc: "Rafael J. Wysocki" Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20201127164131.2244124-11-daniel.vetter@ffwll.ch --- drivers/char/mem.c | 85 +------------------------------------------ include/linux/ioport.h | 6 +--- kernel/resource.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 99 insertions(+), 90 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 7dcf9e4ea79d..43c871dc7477 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -31,9 +31,6 @@ #include #include #include -#include -#include -#include #ifdef CONFIG_IA64 # include @@ -836,42 +833,6 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig) return ret; } -static struct inode *devmem_inode; - -#ifdef CONFIG_IO_STRICT_DEVMEM -void revoke_devmem(struct resource *res) -{ - /* pairs with smp_store_release() in devmem_init_inode() */ - struct inode *inode = smp_load_acquire(&devmem_inode); - - /* - * Check that the initialization has completed. Losing the race - * is ok because it means drivers are claiming resources before - * the fs_initcall level of init and prevent /dev/mem from - * establishing mappings. - */ - if (!inode) - return; - - /* - * The expectation is that the driver has successfully marked - * the resource busy by this point, so devmem_is_allowed() - * should start returning false, however for performance this - * does not iterate the entire resource range. - */ - if (devmem_is_allowed(PHYS_PFN(res->start)) && - devmem_is_allowed(PHYS_PFN(res->end))) { - /* - * *cringe* iomem=relaxed says "go ahead, what's the - * worst that can happen?" - */ - return; - } - - unmap_mapping_range(inode->i_mapping, res->start, resource_size(res), 1); -} -#endif - static int open_port(struct inode *inode, struct file *filp) { int rc; @@ -891,7 +852,7 @@ static int open_port(struct inode *inode, struct file *filp) * revocations when drivers want to take over a /dev/mem mapped * range. */ - filp->f_mapping = inode->i_mapping; + filp->f_mapping = iomem_get_mapping(); return 0; } @@ -1023,48 +984,6 @@ static char *mem_devnode(struct device *dev, umode_t *mode) static struct class *mem_class; -static int devmem_fs_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, DEVMEM_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type devmem_fs_type = { - .name = "devmem", - .owner = THIS_MODULE, - .init_fs_context = devmem_fs_init_fs_context, - .kill_sb = kill_anon_super, -}; - -static int devmem_init_inode(void) -{ - static struct vfsmount *devmem_vfs_mount; - static int devmem_fs_cnt; - struct inode *inode; - int rc; - - rc = simple_pin_fs(&devmem_fs_type, &devmem_vfs_mount, &devmem_fs_cnt); - if (rc < 0) { - pr_err("Cannot mount /dev/mem pseudo filesystem: %d\n", rc); - return rc; - } - - inode = alloc_anon_inode(devmem_vfs_mount->mnt_sb); - if (IS_ERR(inode)) { - rc = PTR_ERR(inode); - pr_err("Cannot allocate inode for /dev/mem: %d\n", rc); - simple_release_fs(&devmem_vfs_mount, &devmem_fs_cnt); - return rc; - } - - /* - * Publish /dev/mem initialized. - * Pairs with smp_load_acquire() in revoke_devmem(). - */ - smp_store_release(&devmem_inode, inode); - - return 0; -} - static int __init chr_dev_init(void) { int minor; @@ -1086,8 +1005,6 @@ static int __init chr_dev_init(void) */ if ((minor == DEVPORT_MINOR) && !arch_has_dev_port()) continue; - if ((minor == DEVMEM_MINOR) && devmem_init_inode() != 0) - continue; device_create(mem_class, NULL, MKDEV(MEM_MAJOR, minor), NULL, devlist[minor].name); diff --git a/include/linux/ioport.h b/include/linux/ioport.h index fe48b7840665..55de385c839c 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -334,11 +334,7 @@ static inline void irqresource_disabled(struct resource *res, u32 irq) res->flags = IORESOURCE_IRQ | IORESOURCE_DISABLED | IORESOURCE_UNSET; } -#ifdef CONFIG_IO_STRICT_DEVMEM -void revoke_devmem(struct resource *res); -#else -static inline void revoke_devmem(struct resource *res) { }; -#endif +extern struct address_space *iomem_get_mapping(void); #endif /* __ASSEMBLY__ */ #endif /* _LINUX_IOPORT_H */ diff --git a/kernel/resource.c b/kernel/resource.c index 833394f9c608..627e61b0c124 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -18,12 +18,15 @@ #include #include #include +#include #include #include #include #include #include +#include #include +#include #include @@ -1119,6 +1122,55 @@ resource_size_t resource_alignment(struct resource *res) static DECLARE_WAIT_QUEUE_HEAD(muxed_resource_wait); +static struct inode *iomem_inode; + +#ifdef CONFIG_IO_STRICT_DEVMEM +static void revoke_iomem(struct resource *res) +{ + /* pairs with smp_store_release() in iomem_init_inode() */ + struct inode *inode = smp_load_acquire(&iomem_inode); + + /* + * Check that the initialization has completed. Losing the race + * is ok because it means drivers are claiming resources before + * the fs_initcall level of init and prevent iomem_get_mapping users + * from establishing mappings. + */ + if (!inode) + return; + + /* + * The expectation is that the driver has successfully marked + * the resource busy by this point, so devmem_is_allowed() + * should start returning false, however for performance this + * does not iterate the entire resource range. + */ + if (devmem_is_allowed(PHYS_PFN(res->start)) && + devmem_is_allowed(PHYS_PFN(res->end))) { + /* + * *cringe* iomem=relaxed says "go ahead, what's the + * worst that can happen?" + */ + return; + } + + unmap_mapping_range(inode->i_mapping, res->start, resource_size(res), 1); +} +#else +static void revoke_iomem(struct resource *res) {} +#endif + +struct address_space *iomem_get_mapping(void) +{ + /* + * This function is only called from file open paths, hence guaranteed + * that fs_initcalls have completed and no need to check for NULL. But + * since revoke_iomem can be called before the initcall we still need + * the barrier to appease checkers. + */ + return smp_load_acquire(&iomem_inode)->i_mapping; +} + /** * __request_region - create a new busy resource region * @parent: parent resource descriptor @@ -1186,7 +1238,7 @@ struct resource * __request_region(struct resource *parent, write_unlock(&resource_lock); if (res && orig_parent == &iomem_resource) - revoke_devmem(res); + revoke_iomem(res); return res; } @@ -1786,4 +1838,48 @@ static int __init strict_iomem(char *str) return 1; } +static int iomem_fs_init_fs_context(struct fs_context *fc) +{ + return init_pseudo(fc, DEVMEM_MAGIC) ? 0 : -ENOMEM; +} + +static struct file_system_type iomem_fs_type = { + .name = "iomem", + .owner = THIS_MODULE, + .init_fs_context = iomem_fs_init_fs_context, + .kill_sb = kill_anon_super, +}; + +static int __init iomem_init_inode(void) +{ + static struct vfsmount *iomem_vfs_mount; + static int iomem_fs_cnt; + struct inode *inode; + int rc; + + rc = simple_pin_fs(&iomem_fs_type, &iomem_vfs_mount, &iomem_fs_cnt); + if (rc < 0) { + pr_err("Cannot mount iomem pseudo filesystem: %d\n", rc); + return rc; + } + + inode = alloc_anon_inode(iomem_vfs_mount->mnt_sb); + if (IS_ERR(inode)) { + rc = PTR_ERR(inode); + pr_err("Cannot allocate inode for iomem: %d\n", rc); + simple_release_fs(&iomem_vfs_mount, &iomem_fs_cnt); + return rc; + } + + /* + * Publish iomem revocation inode initialized. + * Pairs with smp_load_acquire() in revoke_iomem(). + */ + smp_store_release(&iomem_inode, inode); + + return 0; +} + +fs_initcall(iomem_init_inode); + __setup("iomem=", strict_iomem); -- cgit v1.2.3 From 74b30195395c406c787280a77ae55aed82dbbfc7 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 27 Nov 2020 17:41:25 +0100 Subject: sysfs: Support zapping of binary attr mmaps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to be able to revoke pci mmaps so that the same access rules applies as for /dev/kmem. Revoke support for devmem was added in 3234ac664a87 ("/dev/mem: Revoke mappings when a driver claims the region"). The simplest way to achieve this is by having the same filp->f_mapping for all mappings, so that unmap_mapping_range can find them all, no matter through which file they've been created. Since this must be set at open time we need sysfs support for this. Add an optional mapping parameter bin_attr, which is only consulted when there's also an mmap callback, since without mmap support allowing to adjust the ->f_mapping makes no sense. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Daniel Vetter Cc: Jason Gunthorpe Cc: Kees Cook Cc: Dan Williams Cc: Andrew Morton Cc: John Hubbard Cc: Jérôme Glisse Cc: Jan Kara Cc: Dan Williams Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-samsung-soc@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: Bjorn Helgaas Cc: linux-pci@vger.kernel.org Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Christian Brauner Cc: "David S. Miller" Cc: Michael Ellerman Cc: Sourabh Jain Cc: Daniel Vetter Cc: Mauro Carvalho Chehab Cc: Nayna Jain Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20201127164131.2244124-12-daniel.vetter@ffwll.ch --- fs/sysfs/file.c | 11 +++++++++++ include/linux/sysfs.h | 2 ++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 96d0da65e088..9aefa7779b29 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -170,6 +170,16 @@ static int sysfs_kf_bin_mmap(struct kernfs_open_file *of, return battr->mmap(of->file, kobj, battr, vma); } +static int sysfs_kf_bin_open(struct kernfs_open_file *of) +{ + struct bin_attribute *battr = of->kn->priv; + + if (battr->mapping) + of->file->f_mapping = battr->mapping; + + return 0; +} + void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr) { struct kernfs_node *kn = kobj->sd, *tmp; @@ -241,6 +251,7 @@ static const struct kernfs_ops sysfs_bin_kfops_mmap = { .read = sysfs_kf_bin_read, .write = sysfs_kf_bin_write, .mmap = sysfs_kf_bin_mmap, + .open = sysfs_kf_bin_open, }; int sysfs_add_file_mode_ns(struct kernfs_node *parent, diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 2caa34c1ca1a..d76a1ddf83a3 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -164,11 +164,13 @@ __ATTRIBUTE_GROUPS(_name) struct file; struct vm_area_struct; +struct address_space; struct bin_attribute { struct attribute attr; size_t size; void *private; + struct address_space *mapping; ssize_t (*read)(struct file *, struct kobject *, struct bin_attribute *, char *, loff_t, size_t); ssize_t (*write)(struct file *, struct kobject *, struct bin_attribute *, -- cgit v1.2.3