From 24d28e4f1271cb2f91613dada8f2acccd00eff56 Mon Sep 17 00:00:00 2001 From: Nick Dyer Date: Tue, 5 Jun 2018 10:17:51 -0700 Subject: Input: synaptics-rmi4 - convert irq distribution to irq_domain Convert the RMI driver to use the standard mechanism for distributing IRQs to the various functions. Tested on: * S7300 (F11, F34, F54) * S7817 (F12, F34, F54) Signed-off-by: Nick Dyer Acked-by: Christopher Heiny Signed-off-by: Dmitry Torokhov --- include/linux/rmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rmi.h b/include/linux/rmi.h index 64125443f8a6..5ef5c7c412a7 100644 --- a/include/linux/rmi.h +++ b/include/linux/rmi.h @@ -354,6 +354,8 @@ struct rmi_driver_data { struct mutex irq_mutex; struct input_dev *input; + struct irq_domain *irqdomain; + u8 pdt_props; u8 num_rx_electrodes; -- cgit v1.2.3 From bf6247a70f2b7569180304041b63b59ab14217bc Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 5 Jun 2018 10:08:44 -0700 Subject: Input: make input_report_slot_state() return boolean Let's make input_report_slot_state() return boolean representing whether the contact is active or not. This will allow writing code like: if (input_mt_report_slot_state(input, obj->mt_tool, obj->type != RMI_2D_OBJECT_NONE) { input_event(sensor->input, EV_ABS, ABS_MT_POSITION_X, obj->x); input_event(sensor->input, EV_ABS, ABS_MT_POSITION_Y, obj->y); ... } instead of: input_mt_report_slot_state(input, obj->mt_tool, obj->type != RMI_2D_OBJECT_NONE); if (obj->type != RMI_2D_OBJECT_NONE) { input_event(sensor->input, EV_ABS, ABS_MT_POSITION_X, obj->x); input_event(sensor->input, EV_ABS, ABS_MT_POSITION_Y, obj->y); ... } Reviewed-by: Henrik Rydberg Acked-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- drivers/input/input-mt.c | 10 +++++++--- include/linux/input/mt.h | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/input-mt.c b/drivers/input/input-mt.c index a1bbec9cda8d..8de52e3c00c0 100644 --- a/drivers/input/input-mt.c +++ b/drivers/input/input-mt.c @@ -131,8 +131,10 @@ EXPORT_SYMBOL(input_mt_destroy_slots); * inactive, or if the tool type is changed, a new tracking id is * assigned to the slot. The tool type is only reported if the * corresponding absbit field is set. + * + * Returns true if contact is active. */ -void input_mt_report_slot_state(struct input_dev *dev, +bool input_mt_report_slot_state(struct input_dev *dev, unsigned int tool_type, bool active) { struct input_mt *mt = dev->mt; @@ -140,14 +142,14 @@ void input_mt_report_slot_state(struct input_dev *dev, int id; if (!mt) - return; + return false; slot = &mt->slots[mt->slot]; slot->frame = mt->frame; if (!active) { input_event(dev, EV_ABS, ABS_MT_TRACKING_ID, -1); - return; + return false; } id = input_mt_get_value(slot, ABS_MT_TRACKING_ID); @@ -156,6 +158,8 @@ void input_mt_report_slot_state(struct input_dev *dev, input_event(dev, EV_ABS, ABS_MT_TRACKING_ID, id); input_event(dev, EV_ABS, ABS_MT_TOOL_TYPE, tool_type); + + return true; } EXPORT_SYMBOL(input_mt_report_slot_state); diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h index d7188de4db96..3f4bf60b0bb5 100644 --- a/include/linux/input/mt.h +++ b/include/linux/input/mt.h @@ -100,7 +100,7 @@ static inline bool input_is_mt_axis(int axis) return axis == ABS_MT_SLOT || input_is_mt_value(axis); } -void input_mt_report_slot_state(struct input_dev *dev, +bool input_mt_report_slot_state(struct input_dev *dev, unsigned int tool_type, bool active); void input_mt_report_finger_count(struct input_dev *dev, int count); -- cgit v1.2.3 From ccfbb5bed407053b27492a9adc06064d949a9aa6 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Tue, 12 Jun 2018 18:16:20 +0200 Subject: atomic: Add irqsave variant of atomic_dec_and_lock() There are in-tree users of atomic_dec_and_lock() which must acquire the spin lock with interrupts disabled. To workaround the lack of an irqsave variant of atomic_dec_and_lock() they use local_irq_save() at the call site. This causes extra code and creates in some places unneeded long interrupt disabled times. These places need also extra treatment for PREEMPT_RT due to the disconnect of the irq disabling and the lock function. Implement the missing irqsave variant of the function. Signed-off-by: Anna-Maria Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r20180612161621.22645-3-bigeasy@linutronix.de --- include/linux/spinlock.h | 5 +++++ lib/dec_and_lock.c | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 1e8a46435838..fd57888d4942 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -427,6 +427,11 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); #define atomic_dec_and_lock(atomic, lock) \ __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) +extern int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, + unsigned long *flags); +#define atomic_dec_and_lock_irqsave(atomic, lock, flags) \ + __cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags))) + int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, size_t max_size, unsigned int cpu_mult, gfp_t gfp); diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c index 347fa7ac2e8a..9555b68bb774 100644 --- a/lib/dec_and_lock.c +++ b/lib/dec_and_lock.c @@ -33,3 +33,19 @@ int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) } EXPORT_SYMBOL(_atomic_dec_and_lock); + +int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, + unsigned long *flags) +{ + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ + if (atomic_add_unless(atomic, -1, 1)) + return 0; + + /* Otherwise do it the slow way */ + spin_lock_irqsave(lock, *flags); + if (atomic_dec_and_test(atomic)) + return 1; + spin_unlock_irqrestore(lock, *flags); + return 0; +} +EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave); -- cgit v1.2.3 From 7ea959c45769612aa92557fb6464679f5fec7d9e Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Tue, 12 Jun 2018 18:16:21 +0200 Subject: locking/refcounts: Implement refcount_dec_and_lock_irqsave() There are in-tree users of refcount_dec_and_lock() which must acquire the spin lock with interrupts disabled. To workaround the lack of an irqsave variant of refcount_dec_and_lock() they use local_irq_save() at the call site. This causes extra code and creates in some places unneeded long interrupt disabled times. These places need also extra treatment for PREEMPT_RT due to the disconnect of the irq disabling and the lock function. Implement the missing irqsave variant of the function. Signed-off-by: Anna-Maria Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r20180612161621.22645-4-bigeasy@linutronix.de [bigeasy: s@atomic_dec_and_lock@refcount_dec_and_lock@g] --- include/linux/refcount.h | 4 +++- lib/refcount.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 4193c41e383a..a685da2c4522 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -98,5 +98,7 @@ extern __must_check bool refcount_dec_if_one(refcount_t *r); extern __must_check bool refcount_dec_not_one(refcount_t *r); extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock); extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock); - +extern __must_check bool refcount_dec_and_lock_irqsave(refcount_t *r, + spinlock_t *lock, + unsigned long *flags); #endif /* _LINUX_REFCOUNT_H */ diff --git a/lib/refcount.c b/lib/refcount.c index 0eb48353abe3..d3b81cefce91 100644 --- a/lib/refcount.c +++ b/lib/refcount.c @@ -350,3 +350,31 @@ bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) } EXPORT_SYMBOL(refcount_dec_and_lock); +/** + * refcount_dec_and_lock_irqsave - return holding spinlock with disabled + * interrupts if able to decrement refcount to 0 + * @r: the refcount + * @lock: the spinlock to be locked + * @flags: saved IRQ-flags if the is acquired + * + * Same as refcount_dec_and_lock() above except that the spinlock is acquired + * with disabled interupts. + * + * Return: true and hold spinlock if able to decrement refcount to 0, false + * otherwise + */ +bool refcount_dec_and_lock_irqsave(refcount_t *r, spinlock_t *lock, + unsigned long *flags) +{ + if (refcount_dec_not_one(r)) + return false; + + spin_lock_irqsave(lock, *flags); + if (!refcount_dec_and_test(r)) { + spin_unlock_irqrestore(lock, *flags); + return false; + } + + return true; +} +EXPORT_SYMBOL(refcount_dec_and_lock_irqsave); -- cgit v1.2.3 From cf65a0f6f6ff7631ba0ac0513a14ca5b65320d80 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Jun 2018 19:01:45 +0200 Subject: dma-mapping: move all DMA mapping code to kernel/dma Currently the code is split over various files with dma- prefixes in the lib/ and drives/base directories, and the number of files keeps growing. Move them into a single directory to keep the code together and remove the file name prefixes. To match the irq infrastructure this directory is placed under the kernel/ directory. Signed-off-by: Christoph Hellwig --- Documentation/driver-api/infrastructure.rst | 4 +- MAINTAINERS | 9 +- drivers/base/Makefile | 3 - drivers/base/dma-coherent.c | 434 ------- drivers/base/dma-contiguous.c | 278 ----- drivers/base/dma-mapping.c | 345 ------ include/linux/dma-contiguous.h | 2 +- init/Kconfig | 4 - kernel/Makefile | 1 + kernel/dma/Kconfig | 50 + kernel/dma/Makefile | 11 + kernel/dma/coherent.c | 434 +++++++ kernel/dma/contiguous.c | 278 +++++ kernel/dma/debug.c | 1773 +++++++++++++++++++++++++++ kernel/dma/direct.c | 204 +++ kernel/dma/mapping.c | 345 ++++++ kernel/dma/noncoherent.c | 102 ++ kernel/dma/swiotlb.c | 1087 ++++++++++++++++ kernel/dma/virt.c | 59 + lib/Kconfig | 47 +- lib/Makefile | 6 - lib/dma-debug.c | 1773 --------------------------- lib/dma-direct.c | 204 --- lib/dma-noncoherent.c | 102 -- lib/dma-virt.c | 61 - lib/swiotlb.c | 1087 ---------------- 26 files changed, 4350 insertions(+), 4353 deletions(-) delete mode 100644 drivers/base/dma-coherent.c delete mode 100644 drivers/base/dma-contiguous.c delete mode 100644 drivers/base/dma-mapping.c create mode 100644 kernel/dma/Kconfig create mode 100644 kernel/dma/Makefile create mode 100644 kernel/dma/coherent.c create mode 100644 kernel/dma/contiguous.c create mode 100644 kernel/dma/debug.c create mode 100644 kernel/dma/direct.c create mode 100644 kernel/dma/mapping.c create mode 100644 kernel/dma/noncoherent.c create mode 100644 kernel/dma/swiotlb.c create mode 100644 kernel/dma/virt.c delete mode 100644 lib/dma-debug.c delete mode 100644 lib/dma-direct.c delete mode 100644 lib/dma-noncoherent.c delete mode 100644 lib/dma-virt.c delete mode 100644 lib/swiotlb.c (limited to 'include/linux') diff --git a/Documentation/driver-api/infrastructure.rst b/Documentation/driver-api/infrastructure.rst index bee1b9a1702f..6172f3cc3d0b 100644 --- a/Documentation/driver-api/infrastructure.rst +++ b/Documentation/driver-api/infrastructure.rst @@ -49,10 +49,10 @@ Device Drivers Base Device Drivers DMA Management ----------------------------- -.. kernel-doc:: drivers/base/dma-coherent.c +.. kernel-doc:: kernel/dma/coherent.c :export: -.. kernel-doc:: drivers/base/dma-mapping.c +.. kernel-doc:: kernel/dma/mapping.c :export: Device drivers PnP support diff --git a/MAINTAINERS b/MAINTAINERS index c13b9fb3be0b..a6844a9e2f64 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4359,12 +4359,7 @@ L: iommu@lists.linux-foundation.org T: git git://git.infradead.org/users/hch/dma-mapping.git W: http://git.infradead.org/users/hch/dma-mapping.git S: Supported -F: lib/dma-debug.c -F: lib/dma-direct.c -F: lib/dma-noncoherent.c -F: lib/dma-virt.c -F: drivers/base/dma-mapping.c -F: drivers/base/dma-coherent.c +F: kernel/dma/ F: include/asm-generic/dma-mapping.h F: include/linux/dma-direct.h F: include/linux/dma-mapping.h @@ -13642,7 +13637,7 @@ M: Konrad Rzeszutek Wilk L: iommu@lists.linux-foundation.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb.git S: Supported -F: lib/swiotlb.c +F: kernel/dma/swiotlb.c F: arch/*/kernel/pci-swiotlb.c F: include/linux/swiotlb.h diff --git a/drivers/base/Makefile b/drivers/base/Makefile index b074f242a435..704f44295810 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -8,10 +8,7 @@ obj-y := component.o core.o bus.o dd.o syscore.o \ topology.o container.o property.o cacheinfo.o \ devcon.o obj-$(CONFIG_DEVTMPFS) += devtmpfs.o -obj-$(CONFIG_DMA_CMA) += dma-contiguous.o obj-y += power/ -obj-$(CONFIG_HAS_DMA) += dma-mapping.o -obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o obj-$(CONFIG_ISA_BUS_API) += isa.o obj-y += firmware_loader/ obj-$(CONFIG_NUMA) += node.o diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c deleted file mode 100644 index 597d40893862..000000000000 --- a/drivers/base/dma-coherent.c +++ /dev/null @@ -1,434 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Coherent per-device memory handling. - * Borrowed from i386 - */ -#include -#include -#include -#include -#include - -struct dma_coherent_mem { - void *virt_base; - dma_addr_t device_base; - unsigned long pfn_base; - int size; - int flags; - unsigned long *bitmap; - spinlock_t spinlock; - bool use_dev_dma_pfn_offset; -}; - -static struct dma_coherent_mem *dma_coherent_default_memory __ro_after_init; - -static inline struct dma_coherent_mem *dev_get_coherent_memory(struct device *dev) -{ - if (dev && dev->dma_mem) - return dev->dma_mem; - return NULL; -} - -static inline dma_addr_t dma_get_device_base(struct device *dev, - struct dma_coherent_mem * mem) -{ - if (mem->use_dev_dma_pfn_offset) - return (mem->pfn_base - dev->dma_pfn_offset) << PAGE_SHIFT; - else - return mem->device_base; -} - -static int dma_init_coherent_memory( - phys_addr_t phys_addr, dma_addr_t device_addr, size_t size, int flags, - struct dma_coherent_mem **mem) -{ - struct dma_coherent_mem *dma_mem = NULL; - void __iomem *mem_base = NULL; - int pages = size >> PAGE_SHIFT; - int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); - int ret; - - if (!size) { - ret = -EINVAL; - goto out; - } - - mem_base = memremap(phys_addr, size, MEMREMAP_WC); - if (!mem_base) { - ret = -EINVAL; - goto out; - } - dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); - if (!dma_mem) { - ret = -ENOMEM; - goto out; - } - dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!dma_mem->bitmap) { - ret = -ENOMEM; - goto out; - } - - dma_mem->virt_base = mem_base; - dma_mem->device_base = device_addr; - dma_mem->pfn_base = PFN_DOWN(phys_addr); - dma_mem->size = pages; - dma_mem->flags = flags; - spin_lock_init(&dma_mem->spinlock); - - *mem = dma_mem; - return 0; - -out: - kfree(dma_mem); - if (mem_base) - memunmap(mem_base); - return ret; -} - -static void dma_release_coherent_memory(struct dma_coherent_mem *mem) -{ - if (!mem) - return; - - memunmap(mem->virt_base); - kfree(mem->bitmap); - kfree(mem); -} - -static int dma_assign_coherent_memory(struct device *dev, - struct dma_coherent_mem *mem) -{ - if (!dev) - return -ENODEV; - - if (dev->dma_mem) - return -EBUSY; - - dev->dma_mem = mem; - return 0; -} - -int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, - dma_addr_t device_addr, size_t size, int flags) -{ - struct dma_coherent_mem *mem; - int ret; - - ret = dma_init_coherent_memory(phys_addr, device_addr, size, flags, &mem); - if (ret) - return ret; - - ret = dma_assign_coherent_memory(dev, mem); - if (ret) - dma_release_coherent_memory(mem); - return ret; -} -EXPORT_SYMBOL(dma_declare_coherent_memory); - -void dma_release_declared_memory(struct device *dev) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - - if (!mem) - return; - dma_release_coherent_memory(mem); - dev->dma_mem = NULL; -} -EXPORT_SYMBOL(dma_release_declared_memory); - -void *dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - unsigned long flags; - int pos, err; - - size += device_addr & ~PAGE_MASK; - - if (!mem) - return ERR_PTR(-EINVAL); - - spin_lock_irqsave(&mem->spinlock, flags); - pos = PFN_DOWN(device_addr - dma_get_device_base(dev, mem)); - err = bitmap_allocate_region(mem->bitmap, pos, get_order(size)); - spin_unlock_irqrestore(&mem->spinlock, flags); - - if (err != 0) - return ERR_PTR(err); - return mem->virt_base + (pos << PAGE_SHIFT); -} -EXPORT_SYMBOL(dma_mark_declared_memory_occupied); - -static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem, - ssize_t size, dma_addr_t *dma_handle) -{ - int order = get_order(size); - unsigned long flags; - int pageno; - void *ret; - - spin_lock_irqsave(&mem->spinlock, flags); - - if (unlikely(size > (mem->size << PAGE_SHIFT))) - goto err; - - pageno = bitmap_find_free_region(mem->bitmap, mem->size, order); - if (unlikely(pageno < 0)) - goto err; - - /* - * Memory was found in the coherent area. - */ - *dma_handle = mem->device_base + (pageno << PAGE_SHIFT); - ret = mem->virt_base + (pageno << PAGE_SHIFT); - spin_unlock_irqrestore(&mem->spinlock, flags); - memset(ret, 0, size); - return ret; -err: - spin_unlock_irqrestore(&mem->spinlock, flags); - return NULL; -} - -/** - * dma_alloc_from_dev_coherent() - allocate memory from device coherent pool - * @dev: device from which we allocate memory - * @size: size of requested memory area - * @dma_handle: This will be filled with the correct dma handle - * @ret: This pointer will be filled with the virtual address - * to allocated area. - * - * This function should be only called from per-arch dma_alloc_coherent() - * to support allocation from per-device coherent memory pools. - * - * Returns 0 if dma_alloc_coherent should continue with allocating from - * generic memory areas, or !0 if dma_alloc_coherent should return @ret. - */ -int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size, - dma_addr_t *dma_handle, void **ret) -{ - struct dma_coherent_mem *mem = dev_get_coherent_memory(dev); - - if (!mem) - return 0; - - *ret = __dma_alloc_from_coherent(mem, size, dma_handle); - if (*ret) - return 1; - - /* - * In the case where the allocation can not be satisfied from the - * per-device area, try to fall back to generic memory if the - * constraints allow it. - */ - return mem->flags & DMA_MEMORY_EXCLUSIVE; -} -EXPORT_SYMBOL(dma_alloc_from_dev_coherent); - -void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle) -{ - if (!dma_coherent_default_memory) - return NULL; - - return __dma_alloc_from_coherent(dma_coherent_default_memory, size, - dma_handle); -} - -static int __dma_release_from_coherent(struct dma_coherent_mem *mem, - int order, void *vaddr) -{ - if (mem && vaddr >= mem->virt_base && vaddr < - (mem->virt_base + (mem->size << PAGE_SHIFT))) { - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; - unsigned long flags; - - spin_lock_irqsave(&mem->spinlock, flags); - bitmap_release_region(mem->bitmap, page, order); - spin_unlock_irqrestore(&mem->spinlock, flags); - return 1; - } - return 0; -} - -/** - * dma_release_from_dev_coherent() - free memory to device coherent memory pool - * @dev: device from which the memory was allocated - * @order: the order of pages allocated - * @vaddr: virtual address of allocated pages - * - * This checks whether the memory was allocated from the per-device - * coherent memory pool and if so, releases that memory. - * - * Returns 1 if we correctly released the memory, or 0 if the caller should - * proceed with releasing memory from generic pools. - */ -int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr) -{ - struct dma_coherent_mem *mem = dev_get_coherent_memory(dev); - - return __dma_release_from_coherent(mem, order, vaddr); -} -EXPORT_SYMBOL(dma_release_from_dev_coherent); - -int dma_release_from_global_coherent(int order, void *vaddr) -{ - if (!dma_coherent_default_memory) - return 0; - - return __dma_release_from_coherent(dma_coherent_default_memory, order, - vaddr); -} - -static int __dma_mmap_from_coherent(struct dma_coherent_mem *mem, - struct vm_area_struct *vma, void *vaddr, size_t size, int *ret) -{ - if (mem && vaddr >= mem->virt_base && vaddr + size <= - (mem->virt_base + (mem->size << PAGE_SHIFT))) { - unsigned long off = vma->vm_pgoff; - int start = (vaddr - mem->virt_base) >> PAGE_SHIFT; - int user_count = vma_pages(vma); - int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - - *ret = -ENXIO; - if (off < count && user_count <= count - off) { - unsigned long pfn = mem->pfn_base + start + off; - *ret = remap_pfn_range(vma, vma->vm_start, pfn, - user_count << PAGE_SHIFT, - vma->vm_page_prot); - } - return 1; - } - return 0; -} - -/** - * dma_mmap_from_dev_coherent() - mmap memory from the device coherent pool - * @dev: device from which the memory was allocated - * @vma: vm_area for the userspace memory - * @vaddr: cpu address returned by dma_alloc_from_dev_coherent - * @size: size of the memory buffer allocated - * @ret: result from remap_pfn_range() - * - * This checks whether the memory was allocated from the per-device - * coherent memory pool and if so, maps that memory to the provided vma. - * - * Returns 1 if @vaddr belongs to the device coherent pool and the caller - * should return @ret, or 0 if they should proceed with mapping memory from - * generic areas. - */ -int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma, - void *vaddr, size_t size, int *ret) -{ - struct dma_coherent_mem *mem = dev_get_coherent_memory(dev); - - return __dma_mmap_from_coherent(mem, vma, vaddr, size, ret); -} -EXPORT_SYMBOL(dma_mmap_from_dev_coherent); - -int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr, - size_t size, int *ret) -{ - if (!dma_coherent_default_memory) - return 0; - - return __dma_mmap_from_coherent(dma_coherent_default_memory, vma, - vaddr, size, ret); -} - -/* - * Support for reserved memory regions defined in device tree - */ -#ifdef CONFIG_OF_RESERVED_MEM -#include -#include -#include - -static struct reserved_mem *dma_reserved_default_memory __initdata; - -static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev) -{ - struct dma_coherent_mem *mem = rmem->priv; - int ret; - - if (!mem) { - ret = dma_init_coherent_memory(rmem->base, rmem->base, - rmem->size, - DMA_MEMORY_EXCLUSIVE, &mem); - if (ret) { - pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %ld MiB\n", - &rmem->base, (unsigned long)rmem->size / SZ_1M); - return ret; - } - } - mem->use_dev_dma_pfn_offset = true; - rmem->priv = mem; - dma_assign_coherent_memory(dev, mem); - return 0; -} - -static void rmem_dma_device_release(struct reserved_mem *rmem, - struct device *dev) -{ - if (dev) - dev->dma_mem = NULL; -} - -static const struct reserved_mem_ops rmem_dma_ops = { - .device_init = rmem_dma_device_init, - .device_release = rmem_dma_device_release, -}; - -static int __init rmem_dma_setup(struct reserved_mem *rmem) -{ - unsigned long node = rmem->fdt_node; - - if (of_get_flat_dt_prop(node, "reusable", NULL)) - return -EINVAL; - -#ifdef CONFIG_ARM - if (!of_get_flat_dt_prop(node, "no-map", NULL)) { - pr_err("Reserved memory: regions without no-map are not yet supported\n"); - return -EINVAL; - } - - if (of_get_flat_dt_prop(node, "linux,dma-default", NULL)) { - WARN(dma_reserved_default_memory, - "Reserved memory: region for default DMA coherent area is redefined\n"); - dma_reserved_default_memory = rmem; - } -#endif - - rmem->ops = &rmem_dma_ops; - pr_info("Reserved memory: created DMA memory pool at %pa, size %ld MiB\n", - &rmem->base, (unsigned long)rmem->size / SZ_1M); - return 0; -} - -static int __init dma_init_reserved_memory(void) -{ - const struct reserved_mem_ops *ops; - int ret; - - if (!dma_reserved_default_memory) - return -ENOMEM; - - ops = dma_reserved_default_memory->ops; - - /* - * We rely on rmem_dma_device_init() does not propagate error of - * dma_assign_coherent_memory() for "NULL" device. - */ - ret = ops->device_init(dma_reserved_default_memory, NULL); - - if (!ret) { - dma_coherent_default_memory = dma_reserved_default_memory->priv; - pr_info("DMA: default coherent area is set\n"); - } - - return ret; -} - -core_initcall(dma_init_reserved_memory); - -RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", rmem_dma_setup); -#endif diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c deleted file mode 100644 index d987dcd1bd56..000000000000 --- a/drivers/base/dma-contiguous.c +++ /dev/null @@ -1,278 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Contiguous Memory Allocator for DMA mapping framework - * Copyright (c) 2010-2011 by Samsung Electronics. - * Written by: - * Marek Szyprowski - * Michal Nazarewicz - */ - -#define pr_fmt(fmt) "cma: " fmt - -#ifdef CONFIG_CMA_DEBUG -#ifndef DEBUG -# define DEBUG -#endif -#endif - -#include -#include - -#include -#include -#include -#include -#include - -#ifdef CONFIG_CMA_SIZE_MBYTES -#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES -#else -#define CMA_SIZE_MBYTES 0 -#endif - -struct cma *dma_contiguous_default_area; - -/* - * Default global CMA area size can be defined in kernel's .config. - * This is useful mainly for distro maintainers to create a kernel - * that works correctly for most supported systems. - * The size can be set in bytes or as a percentage of the total memory - * in the system. - * - * Users, who want to set the size of global CMA area for their system - * should use cma= kernel parameter. - */ -static const phys_addr_t size_bytes = (phys_addr_t)CMA_SIZE_MBYTES * SZ_1M; -static phys_addr_t size_cmdline = -1; -static phys_addr_t base_cmdline; -static phys_addr_t limit_cmdline; - -static int __init early_cma(char *p) -{ - pr_debug("%s(%s)\n", __func__, p); - size_cmdline = memparse(p, &p); - if (*p != '@') - return 0; - base_cmdline = memparse(p + 1, &p); - if (*p != '-') { - limit_cmdline = base_cmdline + size_cmdline; - return 0; - } - limit_cmdline = memparse(p + 1, &p); - - return 0; -} -early_param("cma", early_cma); - -#ifdef CONFIG_CMA_SIZE_PERCENTAGE - -static phys_addr_t __init __maybe_unused cma_early_percent_memory(void) -{ - struct memblock_region *reg; - unsigned long total_pages = 0; - - /* - * We cannot use memblock_phys_mem_size() here, because - * memblock_analyze() has not been called yet. - */ - for_each_memblock(memory, reg) - total_pages += memblock_region_memory_end_pfn(reg) - - memblock_region_memory_base_pfn(reg); - - return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT; -} - -#else - -static inline __maybe_unused phys_addr_t cma_early_percent_memory(void) -{ - return 0; -} - -#endif - -/** - * dma_contiguous_reserve() - reserve area(s) for contiguous memory handling - * @limit: End address of the reserved memory (optional, 0 for any). - * - * This function reserves memory from early allocator. It should be - * called by arch specific code once the early allocator (memblock or bootmem) - * has been activated and all other subsystems have already allocated/reserved - * memory. - */ -void __init dma_contiguous_reserve(phys_addr_t limit) -{ - phys_addr_t selected_size = 0; - phys_addr_t selected_base = 0; - phys_addr_t selected_limit = limit; - bool fixed = false; - - pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit); - - if (size_cmdline != -1) { - selected_size = size_cmdline; - selected_base = base_cmdline; - selected_limit = min_not_zero(limit_cmdline, limit); - if (base_cmdline + size_cmdline == limit_cmdline) - fixed = true; - } else { -#ifdef CONFIG_CMA_SIZE_SEL_MBYTES - selected_size = size_bytes; -#elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE) - selected_size = cma_early_percent_memory(); -#elif defined(CONFIG_CMA_SIZE_SEL_MIN) - selected_size = min(size_bytes, cma_early_percent_memory()); -#elif defined(CONFIG_CMA_SIZE_SEL_MAX) - selected_size = max(size_bytes, cma_early_percent_memory()); -#endif - } - - if (selected_size && !dma_contiguous_default_area) { - pr_debug("%s: reserving %ld MiB for global area\n", __func__, - (unsigned long)selected_size / SZ_1M); - - dma_contiguous_reserve_area(selected_size, selected_base, - selected_limit, - &dma_contiguous_default_area, - fixed); - } -} - -/** - * dma_contiguous_reserve_area() - reserve custom contiguous area - * @size: Size of the reserved area (in bytes), - * @base: Base address of the reserved area optional, use 0 for any - * @limit: End address of the reserved memory (optional, 0 for any). - * @res_cma: Pointer to store the created cma region. - * @fixed: hint about where to place the reserved area - * - * This function reserves memory from early allocator. It should be - * called by arch specific code once the early allocator (memblock or bootmem) - * has been activated and all other subsystems have already allocated/reserved - * memory. This function allows to create custom reserved areas for specific - * devices. - * - * If @fixed is true, reserve contiguous area at exactly @base. If false, - * reserve in range from @base to @limit. - */ -int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, - phys_addr_t limit, struct cma **res_cma, - bool fixed) -{ - int ret; - - ret = cma_declare_contiguous(base, size, limit, 0, 0, fixed, - "reserved", res_cma); - if (ret) - return ret; - - /* Architecture specific contiguous memory fixup. */ - dma_contiguous_early_fixup(cma_get_base(*res_cma), - cma_get_size(*res_cma)); - - return 0; -} - -/** - * dma_alloc_from_contiguous() - allocate pages from contiguous area - * @dev: Pointer to device for which the allocation is performed. - * @count: Requested number of pages. - * @align: Requested alignment of pages (in PAGE_SIZE order). - * @gfp_mask: GFP flags to use for this allocation. - * - * This function allocates memory buffer for specified device. It uses - * device specific contiguous memory area if available or the default - * global one. Requires architecture specific dev_get_cma_area() helper - * function. - */ -struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, - unsigned int align, gfp_t gfp_mask) -{ - if (align > CONFIG_CMA_ALIGNMENT) - align = CONFIG_CMA_ALIGNMENT; - - return cma_alloc(dev_get_cma_area(dev), count, align, gfp_mask); -} - -/** - * dma_release_from_contiguous() - release allocated pages - * @dev: Pointer to device for which the pages were allocated. - * @pages: Allocated pages. - * @count: Number of allocated pages. - * - * This function releases memory allocated by dma_alloc_from_contiguous(). - * It returns false when provided pages do not belong to contiguous area and - * true otherwise. - */ -bool dma_release_from_contiguous(struct device *dev, struct page *pages, - int count) -{ - return cma_release(dev_get_cma_area(dev), pages, count); -} - -/* - * Support for reserved memory regions defined in device tree - */ -#ifdef CONFIG_OF_RESERVED_MEM -#include -#include -#include - -#undef pr_fmt -#define pr_fmt(fmt) fmt - -static int rmem_cma_device_init(struct reserved_mem *rmem, struct device *dev) -{ - dev_set_cma_area(dev, rmem->priv); - return 0; -} - -static void rmem_cma_device_release(struct reserved_mem *rmem, - struct device *dev) -{ - dev_set_cma_area(dev, NULL); -} - -static const struct reserved_mem_ops rmem_cma_ops = { - .device_init = rmem_cma_device_init, - .device_release = rmem_cma_device_release, -}; - -static int __init rmem_cma_setup(struct reserved_mem *rmem) -{ - phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order); - phys_addr_t mask = align - 1; - unsigned long node = rmem->fdt_node; - struct cma *cma; - int err; - - if (!of_get_flat_dt_prop(node, "reusable", NULL) || - of_get_flat_dt_prop(node, "no-map", NULL)) - return -EINVAL; - - if ((rmem->base & mask) || (rmem->size & mask)) { - pr_err("Reserved memory: incorrect alignment of CMA region\n"); - return -EINVAL; - } - - err = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma); - if (err) { - pr_err("Reserved memory: unable to setup CMA region\n"); - return err; - } - /* Architecture specific contiguous memory fixup. */ - dma_contiguous_early_fixup(rmem->base, rmem->size); - - if (of_get_flat_dt_prop(node, "linux,cma-default", NULL)) - dma_contiguous_set_default(cma); - - rmem->ops = &rmem_cma_ops; - rmem->priv = cma; - - pr_info("Reserved memory: created CMA memory pool at %pa, size %ld MiB\n", - &rmem->base, (unsigned long)rmem->size / SZ_1M); - - return 0; -} -RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", rmem_cma_setup); -#endif diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c deleted file mode 100644 index f831a582209c..000000000000 --- a/drivers/base/dma-mapping.c +++ /dev/null @@ -1,345 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * drivers/base/dma-mapping.c - arch-independent dma-mapping routines - * - * Copyright (c) 2006 SUSE Linux Products GmbH - * Copyright (c) 2006 Tejun Heo - */ - -#include -#include -#include -#include -#include -#include -#include - -/* - * Managed DMA API - */ -struct dma_devres { - size_t size; - void *vaddr; - dma_addr_t dma_handle; - unsigned long attrs; -}; - -static void dmam_release(struct device *dev, void *res) -{ - struct dma_devres *this = res; - - dma_free_attrs(dev, this->size, this->vaddr, this->dma_handle, - this->attrs); -} - -static int dmam_match(struct device *dev, void *res, void *match_data) -{ - struct dma_devres *this = res, *match = match_data; - - if (this->vaddr == match->vaddr) { - WARN_ON(this->size != match->size || - this->dma_handle != match->dma_handle); - return 1; - } - return 0; -} - -/** - * dmam_alloc_coherent - Managed dma_alloc_coherent() - * @dev: Device to allocate coherent memory for - * @size: Size of allocation - * @dma_handle: Out argument for allocated DMA handle - * @gfp: Allocation flags - * - * Managed dma_alloc_coherent(). Memory allocated using this function - * will be automatically released on driver detach. - * - * RETURNS: - * Pointer to allocated memory on success, NULL on failure. - */ -void *dmam_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp) -{ - struct dma_devres *dr; - void *vaddr; - - dr = devres_alloc(dmam_release, sizeof(*dr), gfp); - if (!dr) - return NULL; - - vaddr = dma_alloc_coherent(dev, size, dma_handle, gfp); - if (!vaddr) { - devres_free(dr); - return NULL; - } - - dr->vaddr = vaddr; - dr->dma_handle = *dma_handle; - dr->size = size; - - devres_add(dev, dr); - - return vaddr; -} -EXPORT_SYMBOL(dmam_alloc_coherent); - -/** - * dmam_free_coherent - Managed dma_free_coherent() - * @dev: Device to free coherent memory for - * @size: Size of allocation - * @vaddr: Virtual address of the memory to free - * @dma_handle: DMA handle of the memory to free - * - * Managed dma_free_coherent(). - */ -void dmam_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle) -{ - struct dma_devres match_data = { size, vaddr, dma_handle }; - - dma_free_coherent(dev, size, vaddr, dma_handle); - WARN_ON(devres_destroy(dev, dmam_release, dmam_match, &match_data)); -} -EXPORT_SYMBOL(dmam_free_coherent); - -/** - * dmam_alloc_attrs - Managed dma_alloc_attrs() - * @dev: Device to allocate non_coherent memory for - * @size: Size of allocation - * @dma_handle: Out argument for allocated DMA handle - * @gfp: Allocation flags - * @attrs: Flags in the DMA_ATTR_* namespace. - * - * Managed dma_alloc_attrs(). Memory allocated using this function will be - * automatically released on driver detach. - * - * RETURNS: - * Pointer to allocated memory on success, NULL on failure. - */ -void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) -{ - struct dma_devres *dr; - void *vaddr; - - dr = devres_alloc(dmam_release, sizeof(*dr), gfp); - if (!dr) - return NULL; - - vaddr = dma_alloc_attrs(dev, size, dma_handle, gfp, attrs); - if (!vaddr) { - devres_free(dr); - return NULL; - } - - dr->vaddr = vaddr; - dr->dma_handle = *dma_handle; - dr->size = size; - dr->attrs = attrs; - - devres_add(dev, dr); - - return vaddr; -} -EXPORT_SYMBOL(dmam_alloc_attrs); - -#ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT - -static void dmam_coherent_decl_release(struct device *dev, void *res) -{ - dma_release_declared_memory(dev); -} - -/** - * dmam_declare_coherent_memory - Managed dma_declare_coherent_memory() - * @dev: Device to declare coherent memory for - * @phys_addr: Physical address of coherent memory to be declared - * @device_addr: Device address of coherent memory to be declared - * @size: Size of coherent memory to be declared - * @flags: Flags - * - * Managed dma_declare_coherent_memory(). - * - * RETURNS: - * 0 on success, -errno on failure. - */ -int dmam_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, - dma_addr_t device_addr, size_t size, int flags) -{ - void *res; - int rc; - - res = devres_alloc(dmam_coherent_decl_release, 0, GFP_KERNEL); - if (!res) - return -ENOMEM; - - rc = dma_declare_coherent_memory(dev, phys_addr, device_addr, size, - flags); - if (!rc) - devres_add(dev, res); - else - devres_free(res); - - return rc; -} -EXPORT_SYMBOL(dmam_declare_coherent_memory); - -/** - * dmam_release_declared_memory - Managed dma_release_declared_memory(). - * @dev: Device to release declared coherent memory for - * - * Managed dmam_release_declared_memory(). - */ -void dmam_release_declared_memory(struct device *dev) -{ - WARN_ON(devres_destroy(dev, dmam_coherent_decl_release, NULL, NULL)); -} -EXPORT_SYMBOL(dmam_release_declared_memory); - -#endif - -/* - * Create scatter-list for the already allocated DMA buffer. - */ -int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t handle, size_t size) -{ - struct page *page = virt_to_page(cpu_addr); - int ret; - - ret = sg_alloc_table(sgt, 1, GFP_KERNEL); - if (unlikely(ret)) - return ret; - - sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); - return 0; -} -EXPORT_SYMBOL(dma_common_get_sgtable); - -/* - * Create userspace mapping for the DMA-coherent memory. - */ -int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size) -{ - int ret = -ENXIO; -#ifndef CONFIG_ARCH_NO_COHERENT_DMA_MMAP - unsigned long user_count = vma_pages(vma); - unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long off = vma->vm_pgoff; - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) - return ret; - - if (off < count && user_count <= (count - off)) - ret = remap_pfn_range(vma, vma->vm_start, - page_to_pfn(virt_to_page(cpu_addr)) + off, - user_count << PAGE_SHIFT, - vma->vm_page_prot); -#endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */ - - return ret; -} -EXPORT_SYMBOL(dma_common_mmap); - -#ifdef CONFIG_MMU -static struct vm_struct *__dma_common_pages_remap(struct page **pages, - size_t size, unsigned long vm_flags, pgprot_t prot, - const void *caller) -{ - struct vm_struct *area; - - area = get_vm_area_caller(size, vm_flags, caller); - if (!area) - return NULL; - - if (map_vm_area(area, prot, pages)) { - vunmap(area->addr); - return NULL; - } - - return area; -} - -/* - * remaps an array of PAGE_SIZE pages into another vm_area - * Cannot be used in non-sleeping contexts - */ -void *dma_common_pages_remap(struct page **pages, size_t size, - unsigned long vm_flags, pgprot_t prot, - const void *caller) -{ - struct vm_struct *area; - - area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller); - if (!area) - return NULL; - - area->pages = pages; - - return area->addr; -} - -/* - * remaps an allocated contiguous region into another vm_area. - * Cannot be used in non-sleeping contexts - */ - -void *dma_common_contiguous_remap(struct page *page, size_t size, - unsigned long vm_flags, - pgprot_t prot, const void *caller) -{ - int i; - struct page **pages; - struct vm_struct *area; - - pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL); - if (!pages) - return NULL; - - for (i = 0; i < (size >> PAGE_SHIFT); i++) - pages[i] = nth_page(page, i); - - area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller); - - kfree(pages); - - if (!area) - return NULL; - return area->addr; -} - -/* - * unmaps a range previously mapped by dma_common_*_remap - */ -void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags) -{ - struct vm_struct *area = find_vm_area(cpu_addr); - - if (!area || (area->flags & vm_flags) != vm_flags) { - WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); - return; - } - - unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size)); - vunmap(cpu_addr); -} -#endif - -/* - * enables DMA API use for a device - */ -int dma_configure(struct device *dev) -{ - if (dev->bus->dma_configure) - return dev->bus->dma_configure(dev); - return 0; -} - -void dma_deconfigure(struct device *dev) -{ - of_dma_deconfigure(dev); - acpi_dma_deconfigure(dev); -} diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index b67bf6ac907d..3c5a4cb3eb95 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -48,7 +48,7 @@ * CMA should not be used by the device drivers directly. It is * only a helper framework for dma-mapping subsystem. * - * For more information, see kernel-docs in drivers/base/dma-contiguous.c + * For more information, see kernel-docs in kernel/dma/contiguous.c */ #ifdef __KERNEL__ diff --git a/init/Kconfig b/init/Kconfig index 5a52f07259a2..fde3d09e8b27 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1719,10 +1719,6 @@ source "arch/Kconfig" endmenu # General setup -config HAVE_GENERIC_DMA_COHERENT - bool - default n - config RT_MUTEXES bool diff --git a/kernel/Makefile b/kernel/Makefile index d2001624fe7a..04bc07c2b42a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -41,6 +41,7 @@ obj-y += printk/ obj-y += irq/ obj-y += rcu/ obj-y += livepatch/ +obj-y += dma/ obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o obj-$(CONFIG_FREEZER) += freezer.o diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig new file mode 100644 index 000000000000..9bd54304446f --- /dev/null +++ b/kernel/dma/Kconfig @@ -0,0 +1,50 @@ + +config HAS_DMA + bool + depends on !NO_DMA + default y + +config NEED_SG_DMA_LENGTH + bool + +config NEED_DMA_MAP_STATE + bool + +config ARCH_DMA_ADDR_T_64BIT + def_bool 64BIT || PHYS_ADDR_T_64BIT + +config HAVE_GENERIC_DMA_COHERENT + bool + +config ARCH_HAS_SYNC_DMA_FOR_DEVICE + bool + +config ARCH_HAS_SYNC_DMA_FOR_CPU + bool + select NEED_DMA_MAP_STATE + +config DMA_DIRECT_OPS + bool + depends on HAS_DMA + +config DMA_NONCOHERENT_OPS + bool + depends on HAS_DMA + select DMA_DIRECT_OPS + +config DMA_NONCOHERENT_MMAP + bool + depends on DMA_NONCOHERENT_OPS + +config DMA_NONCOHERENT_CACHE_SYNC + bool + depends on DMA_NONCOHERENT_OPS + +config DMA_VIRT_OPS + bool + depends on HAS_DMA + +config SWIOTLB + bool + select DMA_DIRECT_OPS + select NEED_DMA_MAP_STATE diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile new file mode 100644 index 000000000000..6de44e4eb454 --- /dev/null +++ b/kernel/dma/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_HAS_DMA) += mapping.o +obj-$(CONFIG_DMA_CMA) += contiguous.o +obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += coherent.o +obj-$(CONFIG_DMA_DIRECT_OPS) += direct.o +obj-$(CONFIG_DMA_NONCOHERENT_OPS) += noncoherent.o +obj-$(CONFIG_DMA_VIRT_OPS) += virt.o +obj-$(CONFIG_DMA_API_DEBUG) += debug.o +obj-$(CONFIG_SWIOTLB) += swiotlb.o + diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c new file mode 100644 index 000000000000..597d40893862 --- /dev/null +++ b/kernel/dma/coherent.c @@ -0,0 +1,434 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Coherent per-device memory handling. + * Borrowed from i386 + */ +#include +#include +#include +#include +#include + +struct dma_coherent_mem { + void *virt_base; + dma_addr_t device_base; + unsigned long pfn_base; + int size; + int flags; + unsigned long *bitmap; + spinlock_t spinlock; + bool use_dev_dma_pfn_offset; +}; + +static struct dma_coherent_mem *dma_coherent_default_memory __ro_after_init; + +static inline struct dma_coherent_mem *dev_get_coherent_memory(struct device *dev) +{ + if (dev && dev->dma_mem) + return dev->dma_mem; + return NULL; +} + +static inline dma_addr_t dma_get_device_base(struct device *dev, + struct dma_coherent_mem * mem) +{ + if (mem->use_dev_dma_pfn_offset) + return (mem->pfn_base - dev->dma_pfn_offset) << PAGE_SHIFT; + else + return mem->device_base; +} + +static int dma_init_coherent_memory( + phys_addr_t phys_addr, dma_addr_t device_addr, size_t size, int flags, + struct dma_coherent_mem **mem) +{ + struct dma_coherent_mem *dma_mem = NULL; + void __iomem *mem_base = NULL; + int pages = size >> PAGE_SHIFT; + int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); + int ret; + + if (!size) { + ret = -EINVAL; + goto out; + } + + mem_base = memremap(phys_addr, size, MEMREMAP_WC); + if (!mem_base) { + ret = -EINVAL; + goto out; + } + dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); + if (!dma_mem) { + ret = -ENOMEM; + goto out; + } + dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!dma_mem->bitmap) { + ret = -ENOMEM; + goto out; + } + + dma_mem->virt_base = mem_base; + dma_mem->device_base = device_addr; + dma_mem->pfn_base = PFN_DOWN(phys_addr); + dma_mem->size = pages; + dma_mem->flags = flags; + spin_lock_init(&dma_mem->spinlock); + + *mem = dma_mem; + return 0; + +out: + kfree(dma_mem); + if (mem_base) + memunmap(mem_base); + return ret; +} + +static void dma_release_coherent_memory(struct dma_coherent_mem *mem) +{ + if (!mem) + return; + + memunmap(mem->virt_base); + kfree(mem->bitmap); + kfree(mem); +} + +static int dma_assign_coherent_memory(struct device *dev, + struct dma_coherent_mem *mem) +{ + if (!dev) + return -ENODEV; + + if (dev->dma_mem) + return -EBUSY; + + dev->dma_mem = mem; + return 0; +} + +int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, + dma_addr_t device_addr, size_t size, int flags) +{ + struct dma_coherent_mem *mem; + int ret; + + ret = dma_init_coherent_memory(phys_addr, device_addr, size, flags, &mem); + if (ret) + return ret; + + ret = dma_assign_coherent_memory(dev, mem); + if (ret) + dma_release_coherent_memory(mem); + return ret; +} +EXPORT_SYMBOL(dma_declare_coherent_memory); + +void dma_release_declared_memory(struct device *dev) +{ + struct dma_coherent_mem *mem = dev->dma_mem; + + if (!mem) + return; + dma_release_coherent_memory(mem); + dev->dma_mem = NULL; +} +EXPORT_SYMBOL(dma_release_declared_memory); + +void *dma_mark_declared_memory_occupied(struct device *dev, + dma_addr_t device_addr, size_t size) +{ + struct dma_coherent_mem *mem = dev->dma_mem; + unsigned long flags; + int pos, err; + + size += device_addr & ~PAGE_MASK; + + if (!mem) + return ERR_PTR(-EINVAL); + + spin_lock_irqsave(&mem->spinlock, flags); + pos = PFN_DOWN(device_addr - dma_get_device_base(dev, mem)); + err = bitmap_allocate_region(mem->bitmap, pos, get_order(size)); + spin_unlock_irqrestore(&mem->spinlock, flags); + + if (err != 0) + return ERR_PTR(err); + return mem->virt_base + (pos << PAGE_SHIFT); +} +EXPORT_SYMBOL(dma_mark_declared_memory_occupied); + +static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem, + ssize_t size, dma_addr_t *dma_handle) +{ + int order = get_order(size); + unsigned long flags; + int pageno; + void *ret; + + spin_lock_irqsave(&mem->spinlock, flags); + + if (unlikely(size > (mem->size << PAGE_SHIFT))) + goto err; + + pageno = bitmap_find_free_region(mem->bitmap, mem->size, order); + if (unlikely(pageno < 0)) + goto err; + + /* + * Memory was found in the coherent area. + */ + *dma_handle = mem->device_base + (pageno << PAGE_SHIFT); + ret = mem->virt_base + (pageno << PAGE_SHIFT); + spin_unlock_irqrestore(&mem->spinlock, flags); + memset(ret, 0, size); + return ret; +err: + spin_unlock_irqrestore(&mem->spinlock, flags); + return NULL; +} + +/** + * dma_alloc_from_dev_coherent() - allocate memory from device coherent pool + * @dev: device from which we allocate memory + * @size: size of requested memory area + * @dma_handle: This will be filled with the correct dma handle + * @ret: This pointer will be filled with the virtual address + * to allocated area. + * + * This function should be only called from per-arch dma_alloc_coherent() + * to support allocation from per-device coherent memory pools. + * + * Returns 0 if dma_alloc_coherent should continue with allocating from + * generic memory areas, or !0 if dma_alloc_coherent should return @ret. + */ +int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size, + dma_addr_t *dma_handle, void **ret) +{ + struct dma_coherent_mem *mem = dev_get_coherent_memory(dev); + + if (!mem) + return 0; + + *ret = __dma_alloc_from_coherent(mem, size, dma_handle); + if (*ret) + return 1; + + /* + * In the case where the allocation can not be satisfied from the + * per-device area, try to fall back to generic memory if the + * constraints allow it. + */ + return mem->flags & DMA_MEMORY_EXCLUSIVE; +} +EXPORT_SYMBOL(dma_alloc_from_dev_coherent); + +void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle) +{ + if (!dma_coherent_default_memory) + return NULL; + + return __dma_alloc_from_coherent(dma_coherent_default_memory, size, + dma_handle); +} + +static int __dma_release_from_coherent(struct dma_coherent_mem *mem, + int order, void *vaddr) +{ + if (mem && vaddr >= mem->virt_base && vaddr < + (mem->virt_base + (mem->size << PAGE_SHIFT))) { + int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; + unsigned long flags; + + spin_lock_irqsave(&mem->spinlock, flags); + bitmap_release_region(mem->bitmap, page, order); + spin_unlock_irqrestore(&mem->spinlock, flags); + return 1; + } + return 0; +} + +/** + * dma_release_from_dev_coherent() - free memory to device coherent memory pool + * @dev: device from which the memory was allocated + * @order: the order of pages allocated + * @vaddr: virtual address of allocated pages + * + * This checks whether the memory was allocated from the per-device + * coherent memory pool and if so, releases that memory. + * + * Returns 1 if we correctly released the memory, or 0 if the caller should + * proceed with releasing memory from generic pools. + */ +int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr) +{ + struct dma_coherent_mem *mem = dev_get_coherent_memory(dev); + + return __dma_release_from_coherent(mem, order, vaddr); +} +EXPORT_SYMBOL(dma_release_from_dev_coherent); + +int dma_release_from_global_coherent(int order, void *vaddr) +{ + if (!dma_coherent_default_memory) + return 0; + + return __dma_release_from_coherent(dma_coherent_default_memory, order, + vaddr); +} + +static int __dma_mmap_from_coherent(struct dma_coherent_mem *mem, + struct vm_area_struct *vma, void *vaddr, size_t size, int *ret) +{ + if (mem && vaddr >= mem->virt_base && vaddr + size <= + (mem->virt_base + (mem->size << PAGE_SHIFT))) { + unsigned long off = vma->vm_pgoff; + int start = (vaddr - mem->virt_base) >> PAGE_SHIFT; + int user_count = vma_pages(vma); + int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + + *ret = -ENXIO; + if (off < count && user_count <= count - off) { + unsigned long pfn = mem->pfn_base + start + off; + *ret = remap_pfn_range(vma, vma->vm_start, pfn, + user_count << PAGE_SHIFT, + vma->vm_page_prot); + } + return 1; + } + return 0; +} + +/** + * dma_mmap_from_dev_coherent() - mmap memory from the device coherent pool + * @dev: device from which the memory was allocated + * @vma: vm_area for the userspace memory + * @vaddr: cpu address returned by dma_alloc_from_dev_coherent + * @size: size of the memory buffer allocated + * @ret: result from remap_pfn_range() + * + * This checks whether the memory was allocated from the per-device + * coherent memory pool and if so, maps that memory to the provided vma. + * + * Returns 1 if @vaddr belongs to the device coherent pool and the caller + * should return @ret, or 0 if they should proceed with mapping memory from + * generic areas. + */ +int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma, + void *vaddr, size_t size, int *ret) +{ + struct dma_coherent_mem *mem = dev_get_coherent_memory(dev); + + return __dma_mmap_from_coherent(mem, vma, vaddr, size, ret); +} +EXPORT_SYMBOL(dma_mmap_from_dev_coherent); + +int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr, + size_t size, int *ret) +{ + if (!dma_coherent_default_memory) + return 0; + + return __dma_mmap_from_coherent(dma_coherent_default_memory, vma, + vaddr, size, ret); +} + +/* + * Support for reserved memory regions defined in device tree + */ +#ifdef CONFIG_OF_RESERVED_MEM +#include +#include +#include + +static struct reserved_mem *dma_reserved_default_memory __initdata; + +static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev) +{ + struct dma_coherent_mem *mem = rmem->priv; + int ret; + + if (!mem) { + ret = dma_init_coherent_memory(rmem->base, rmem->base, + rmem->size, + DMA_MEMORY_EXCLUSIVE, &mem); + if (ret) { + pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %ld MiB\n", + &rmem->base, (unsigned long)rmem->size / SZ_1M); + return ret; + } + } + mem->use_dev_dma_pfn_offset = true; + rmem->priv = mem; + dma_assign_coherent_memory(dev, mem); + return 0; +} + +static void rmem_dma_device_release(struct reserved_mem *rmem, + struct device *dev) +{ + if (dev) + dev->dma_mem = NULL; +} + +static const struct reserved_mem_ops rmem_dma_ops = { + .device_init = rmem_dma_device_init, + .device_release = rmem_dma_device_release, +}; + +static int __init rmem_dma_setup(struct reserved_mem *rmem) +{ + unsigned long node = rmem->fdt_node; + + if (of_get_flat_dt_prop(node, "reusable", NULL)) + return -EINVAL; + +#ifdef CONFIG_ARM + if (!of_get_flat_dt_prop(node, "no-map", NULL)) { + pr_err("Reserved memory: regions without no-map are not yet supported\n"); + return -EINVAL; + } + + if (of_get_flat_dt_prop(node, "linux,dma-default", NULL)) { + WARN(dma_reserved_default_memory, + "Reserved memory: region for default DMA coherent area is redefined\n"); + dma_reserved_default_memory = rmem; + } +#endif + + rmem->ops = &rmem_dma_ops; + pr_info("Reserved memory: created DMA memory pool at %pa, size %ld MiB\n", + &rmem->base, (unsigned long)rmem->size / SZ_1M); + return 0; +} + +static int __init dma_init_reserved_memory(void) +{ + const struct reserved_mem_ops *ops; + int ret; + + if (!dma_reserved_default_memory) + return -ENOMEM; + + ops = dma_reserved_default_memory->ops; + + /* + * We rely on rmem_dma_device_init() does not propagate error of + * dma_assign_coherent_memory() for "NULL" device. + */ + ret = ops->device_init(dma_reserved_default_memory, NULL); + + if (!ret) { + dma_coherent_default_memory = dma_reserved_default_memory->priv; + pr_info("DMA: default coherent area is set\n"); + } + + return ret; +} + +core_initcall(dma_init_reserved_memory); + +RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", rmem_dma_setup); +#endif diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c new file mode 100644 index 000000000000..d987dcd1bd56 --- /dev/null +++ b/kernel/dma/contiguous.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Contiguous Memory Allocator for DMA mapping framework + * Copyright (c) 2010-2011 by Samsung Electronics. + * Written by: + * Marek Szyprowski + * Michal Nazarewicz + */ + +#define pr_fmt(fmt) "cma: " fmt + +#ifdef CONFIG_CMA_DEBUG +#ifndef DEBUG +# define DEBUG +#endif +#endif + +#include +#include + +#include +#include +#include +#include +#include + +#ifdef CONFIG_CMA_SIZE_MBYTES +#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES +#else +#define CMA_SIZE_MBYTES 0 +#endif + +struct cma *dma_contiguous_default_area; + +/* + * Default global CMA area size can be defined in kernel's .config. + * This is useful mainly for distro maintainers to create a kernel + * that works correctly for most supported systems. + * The size can be set in bytes or as a percentage of the total memory + * in the system. + * + * Users, who want to set the size of global CMA area for their system + * should use cma= kernel parameter. + */ +static const phys_addr_t size_bytes = (phys_addr_t)CMA_SIZE_MBYTES * SZ_1M; +static phys_addr_t size_cmdline = -1; +static phys_addr_t base_cmdline; +static phys_addr_t limit_cmdline; + +static int __init early_cma(char *p) +{ + pr_debug("%s(%s)\n", __func__, p); + size_cmdline = memparse(p, &p); + if (*p != '@') + return 0; + base_cmdline = memparse(p + 1, &p); + if (*p != '-') { + limit_cmdline = base_cmdline + size_cmdline; + return 0; + } + limit_cmdline = memparse(p + 1, &p); + + return 0; +} +early_param("cma", early_cma); + +#ifdef CONFIG_CMA_SIZE_PERCENTAGE + +static phys_addr_t __init __maybe_unused cma_early_percent_memory(void) +{ + struct memblock_region *reg; + unsigned long total_pages = 0; + + /* + * We cannot use memblock_phys_mem_size() here, because + * memblock_analyze() has not been called yet. + */ + for_each_memblock(memory, reg) + total_pages += memblock_region_memory_end_pfn(reg) - + memblock_region_memory_base_pfn(reg); + + return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT; +} + +#else + +static inline __maybe_unused phys_addr_t cma_early_percent_memory(void) +{ + return 0; +} + +#endif + +/** + * dma_contiguous_reserve() - reserve area(s) for contiguous memory handling + * @limit: End address of the reserved memory (optional, 0 for any). + * + * This function reserves memory from early allocator. It should be + * called by arch specific code once the early allocator (memblock or bootmem) + * has been activated and all other subsystems have already allocated/reserved + * memory. + */ +void __init dma_contiguous_reserve(phys_addr_t limit) +{ + phys_addr_t selected_size = 0; + phys_addr_t selected_base = 0; + phys_addr_t selected_limit = limit; + bool fixed = false; + + pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit); + + if (size_cmdline != -1) { + selected_size = size_cmdline; + selected_base = base_cmdline; + selected_limit = min_not_zero(limit_cmdline, limit); + if (base_cmdline + size_cmdline == limit_cmdline) + fixed = true; + } else { +#ifdef CONFIG_CMA_SIZE_SEL_MBYTES + selected_size = size_bytes; +#elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE) + selected_size = cma_early_percent_memory(); +#elif defined(CONFIG_CMA_SIZE_SEL_MIN) + selected_size = min(size_bytes, cma_early_percent_memory()); +#elif defined(CONFIG_CMA_SIZE_SEL_MAX) + selected_size = max(size_bytes, cma_early_percent_memory()); +#endif + } + + if (selected_size && !dma_contiguous_default_area) { + pr_debug("%s: reserving %ld MiB for global area\n", __func__, + (unsigned long)selected_size / SZ_1M); + + dma_contiguous_reserve_area(selected_size, selected_base, + selected_limit, + &dma_contiguous_default_area, + fixed); + } +} + +/** + * dma_contiguous_reserve_area() - reserve custom contiguous area + * @size: Size of the reserved area (in bytes), + * @base: Base address of the reserved area optional, use 0 for any + * @limit: End address of the reserved memory (optional, 0 for any). + * @res_cma: Pointer to store the created cma region. + * @fixed: hint about where to place the reserved area + * + * This function reserves memory from early allocator. It should be + * called by arch specific code once the early allocator (memblock or bootmem) + * has been activated and all other subsystems have already allocated/reserved + * memory. This function allows to create custom reserved areas for specific + * devices. + * + * If @fixed is true, reserve contiguous area at exactly @base. If false, + * reserve in range from @base to @limit. + */ +int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, + phys_addr_t limit, struct cma **res_cma, + bool fixed) +{ + int ret; + + ret = cma_declare_contiguous(base, size, limit, 0, 0, fixed, + "reserved", res_cma); + if (ret) + return ret; + + /* Architecture specific contiguous memory fixup. */ + dma_contiguous_early_fixup(cma_get_base(*res_cma), + cma_get_size(*res_cma)); + + return 0; +} + +/** + * dma_alloc_from_contiguous() - allocate pages from contiguous area + * @dev: Pointer to device for which the allocation is performed. + * @count: Requested number of pages. + * @align: Requested alignment of pages (in PAGE_SIZE order). + * @gfp_mask: GFP flags to use for this allocation. + * + * This function allocates memory buffer for specified device. It uses + * device specific contiguous memory area if available or the default + * global one. Requires architecture specific dev_get_cma_area() helper + * function. + */ +struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, + unsigned int align, gfp_t gfp_mask) +{ + if (align > CONFIG_CMA_ALIGNMENT) + align = CONFIG_CMA_ALIGNMENT; + + return cma_alloc(dev_get_cma_area(dev), count, align, gfp_mask); +} + +/** + * dma_release_from_contiguous() - release allocated pages + * @dev: Pointer to device for which the pages were allocated. + * @pages: Allocated pages. + * @count: Number of allocated pages. + * + * This function releases memory allocated by dma_alloc_from_contiguous(). + * It returns false when provided pages do not belong to contiguous area and + * true otherwise. + */ +bool dma_release_from_contiguous(struct device *dev, struct page *pages, + int count) +{ + return cma_release(dev_get_cma_area(dev), pages, count); +} + +/* + * Support for reserved memory regions defined in device tree + */ +#ifdef CONFIG_OF_RESERVED_MEM +#include +#include +#include + +#undef pr_fmt +#define pr_fmt(fmt) fmt + +static int rmem_cma_device_init(struct reserved_mem *rmem, struct device *dev) +{ + dev_set_cma_area(dev, rmem->priv); + return 0; +} + +static void rmem_cma_device_release(struct reserved_mem *rmem, + struct device *dev) +{ + dev_set_cma_area(dev, NULL); +} + +static const struct reserved_mem_ops rmem_cma_ops = { + .device_init = rmem_cma_device_init, + .device_release = rmem_cma_device_release, +}; + +static int __init rmem_cma_setup(struct reserved_mem *rmem) +{ + phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order); + phys_addr_t mask = align - 1; + unsigned long node = rmem->fdt_node; + struct cma *cma; + int err; + + if (!of_get_flat_dt_prop(node, "reusable", NULL) || + of_get_flat_dt_prop(node, "no-map", NULL)) + return -EINVAL; + + if ((rmem->base & mask) || (rmem->size & mask)) { + pr_err("Reserved memory: incorrect alignment of CMA region\n"); + return -EINVAL; + } + + err = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma); + if (err) { + pr_err("Reserved memory: unable to setup CMA region\n"); + return err; + } + /* Architecture specific contiguous memory fixup. */ + dma_contiguous_early_fixup(rmem->base, rmem->size); + + if (of_get_flat_dt_prop(node, "linux,cma-default", NULL)) + dma_contiguous_set_default(cma); + + rmem->ops = &rmem_cma_ops; + rmem->priv = cma; + + pr_info("Reserved memory: created CMA memory pool at %pa, size %ld MiB\n", + &rmem->base, (unsigned long)rmem->size / SZ_1M); + + return 0; +} +RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", rmem_cma_setup); +#endif diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c new file mode 100644 index 000000000000..c007d25bee09 --- /dev/null +++ b/kernel/dma/debug.c @@ -0,0 +1,1773 @@ +/* + * Copyright (C) 2008 Advanced Micro Devices, Inc. + * + * Author: Joerg Roedel + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define HASH_SIZE 1024ULL +#define HASH_FN_SHIFT 13 +#define HASH_FN_MASK (HASH_SIZE - 1) + +/* allow architectures to override this if absolutely required */ +#ifndef PREALLOC_DMA_DEBUG_ENTRIES +#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) +#endif + +enum { + dma_debug_single, + dma_debug_page, + dma_debug_sg, + dma_debug_coherent, + dma_debug_resource, +}; + +enum map_err_types { + MAP_ERR_CHECK_NOT_APPLICABLE, + MAP_ERR_NOT_CHECKED, + MAP_ERR_CHECKED, +}; + +#define DMA_DEBUG_STACKTRACE_ENTRIES 5 + +/** + * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping + * @list: node on pre-allocated free_entries list + * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent + * @type: single, page, sg, coherent + * @pfn: page frame of the start address + * @offset: offset of mapping relative to pfn + * @size: length of the mapping + * @direction: enum dma_data_direction + * @sg_call_ents: 'nents' from dma_map_sg + * @sg_mapped_ents: 'mapped_ents' from dma_map_sg + * @map_err_type: track whether dma_mapping_error() was checked + * @stacktrace: support backtraces when a violation is detected + */ +struct dma_debug_entry { + struct list_head list; + struct device *dev; + int type; + unsigned long pfn; + size_t offset; + u64 dev_addr; + u64 size; + int direction; + int sg_call_ents; + int sg_mapped_ents; + enum map_err_types map_err_type; +#ifdef CONFIG_STACKTRACE + struct stack_trace stacktrace; + unsigned long st_entries[DMA_DEBUG_STACKTRACE_ENTRIES]; +#endif +}; + +typedef bool (*match_fn)(struct dma_debug_entry *, struct dma_debug_entry *); + +struct hash_bucket { + struct list_head list; + spinlock_t lock; +} ____cacheline_aligned_in_smp; + +/* Hash list to save the allocated dma addresses */ +static struct hash_bucket dma_entry_hash[HASH_SIZE]; +/* List of pre-allocated dma_debug_entry's */ +static LIST_HEAD(free_entries); +/* Lock for the list above */ +static DEFINE_SPINLOCK(free_entries_lock); + +/* Global disable flag - will be set in case of an error */ +static bool global_disable __read_mostly; + +/* Early initialization disable flag, set at the end of dma_debug_init */ +static bool dma_debug_initialized __read_mostly; + +static inline bool dma_debug_disabled(void) +{ + return global_disable || !dma_debug_initialized; +} + +/* Global error count */ +static u32 error_count; + +/* Global error show enable*/ +static u32 show_all_errors __read_mostly; +/* Number of errors to show */ +static u32 show_num_errors = 1; + +static u32 num_free_entries; +static u32 min_free_entries; +static u32 nr_total_entries; + +/* number of preallocated entries requested by kernel cmdline */ +static u32 nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; + +/* debugfs dentry's for the stuff above */ +static struct dentry *dma_debug_dent __read_mostly; +static struct dentry *global_disable_dent __read_mostly; +static struct dentry *error_count_dent __read_mostly; +static struct dentry *show_all_errors_dent __read_mostly; +static struct dentry *show_num_errors_dent __read_mostly; +static struct dentry *num_free_entries_dent __read_mostly; +static struct dentry *min_free_entries_dent __read_mostly; +static struct dentry *filter_dent __read_mostly; + +/* per-driver filter related state */ + +#define NAME_MAX_LEN 64 + +static char current_driver_name[NAME_MAX_LEN] __read_mostly; +static struct device_driver *current_driver __read_mostly; + +static DEFINE_RWLOCK(driver_name_lock); + +static const char *const maperr2str[] = { + [MAP_ERR_CHECK_NOT_APPLICABLE] = "dma map error check not applicable", + [MAP_ERR_NOT_CHECKED] = "dma map error not checked", + [MAP_ERR_CHECKED] = "dma map error checked", +}; + +static const char *type2name[5] = { "single", "page", + "scather-gather", "coherent", + "resource" }; + +static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE", + "DMA_FROM_DEVICE", "DMA_NONE" }; + +/* + * The access to some variables in this macro is racy. We can't use atomic_t + * here because all these variables are exported to debugfs. Some of them even + * writeable. This is also the reason why a lock won't help much. But anyway, + * the races are no big deal. Here is why: + * + * error_count: the addition is racy, but the worst thing that can happen is + * that we don't count some errors + * show_num_errors: the subtraction is racy. Also no big deal because in + * worst case this will result in one warning more in the + * system log than the user configured. This variable is + * writeable via debugfs. + */ +static inline void dump_entry_trace(struct dma_debug_entry *entry) +{ +#ifdef CONFIG_STACKTRACE + if (entry) { + pr_warning("Mapped at:\n"); + print_stack_trace(&entry->stacktrace, 0); + } +#endif +} + +static bool driver_filter(struct device *dev) +{ + struct device_driver *drv; + unsigned long flags; + bool ret; + + /* driver filter off */ + if (likely(!current_driver_name[0])) + return true; + + /* driver filter on and initialized */ + if (current_driver && dev && dev->driver == current_driver) + return true; + + /* driver filter on, but we can't filter on a NULL device... */ + if (!dev) + return false; + + if (current_driver || !current_driver_name[0]) + return false; + + /* driver filter on but not yet initialized */ + drv = dev->driver; + if (!drv) + return false; + + /* lock to protect against change of current_driver_name */ + read_lock_irqsave(&driver_name_lock, flags); + + ret = false; + if (drv->name && + strncmp(current_driver_name, drv->name, NAME_MAX_LEN - 1) == 0) { + current_driver = drv; + ret = true; + } + + read_unlock_irqrestore(&driver_name_lock, flags); + + return ret; +} + +#define err_printk(dev, entry, format, arg...) do { \ + error_count += 1; \ + if (driver_filter(dev) && \ + (show_all_errors || show_num_errors > 0)) { \ + WARN(1, "%s %s: " format, \ + dev ? dev_driver_string(dev) : "NULL", \ + dev ? dev_name(dev) : "NULL", ## arg); \ + dump_entry_trace(entry); \ + } \ + if (!show_all_errors && show_num_errors > 0) \ + show_num_errors -= 1; \ + } while (0); + +/* + * Hash related functions + * + * Every DMA-API request is saved into a struct dma_debug_entry. To + * have quick access to these structs they are stored into a hash. + */ +static int hash_fn(struct dma_debug_entry *entry) +{ + /* + * Hash function is based on the dma address. + * We use bits 20-27 here as the index into the hash + */ + return (entry->dev_addr >> HASH_FN_SHIFT) & HASH_FN_MASK; +} + +/* + * Request exclusive access to a hash bucket for a given dma_debug_entry. + */ +static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry, + unsigned long *flags) + __acquires(&dma_entry_hash[idx].lock) +{ + int idx = hash_fn(entry); + unsigned long __flags; + + spin_lock_irqsave(&dma_entry_hash[idx].lock, __flags); + *flags = __flags; + return &dma_entry_hash[idx]; +} + +/* + * Give up exclusive access to the hash bucket + */ +static void put_hash_bucket(struct hash_bucket *bucket, + unsigned long *flags) + __releases(&bucket->lock) +{ + unsigned long __flags = *flags; + + spin_unlock_irqrestore(&bucket->lock, __flags); +} + +static bool exact_match(struct dma_debug_entry *a, struct dma_debug_entry *b) +{ + return ((a->dev_addr == b->dev_addr) && + (a->dev == b->dev)) ? true : false; +} + +static bool containing_match(struct dma_debug_entry *a, + struct dma_debug_entry *b) +{ + if (a->dev != b->dev) + return false; + + if ((b->dev_addr <= a->dev_addr) && + ((b->dev_addr + b->size) >= (a->dev_addr + a->size))) + return true; + + return false; +} + +/* + * Search a given entry in the hash bucket list + */ +static struct dma_debug_entry *__hash_bucket_find(struct hash_bucket *bucket, + struct dma_debug_entry *ref, + match_fn match) +{ + struct dma_debug_entry *entry, *ret = NULL; + int matches = 0, match_lvl, last_lvl = -1; + + list_for_each_entry(entry, &bucket->list, list) { + if (!match(ref, entry)) + continue; + + /* + * Some drivers map the same physical address multiple + * times. Without a hardware IOMMU this results in the + * same device addresses being put into the dma-debug + * hash multiple times too. This can result in false + * positives being reported. Therefore we implement a + * best-fit algorithm here which returns the entry from + * the hash which fits best to the reference value + * instead of the first-fit. + */ + matches += 1; + match_lvl = 0; + entry->size == ref->size ? ++match_lvl : 0; + entry->type == ref->type ? ++match_lvl : 0; + entry->direction == ref->direction ? ++match_lvl : 0; + entry->sg_call_ents == ref->sg_call_ents ? ++match_lvl : 0; + + if (match_lvl == 4) { + /* perfect-fit - return the result */ + return entry; + } else if (match_lvl > last_lvl) { + /* + * We found an entry that fits better then the + * previous one or it is the 1st match. + */ + last_lvl = match_lvl; + ret = entry; + } + } + + /* + * If we have multiple matches but no perfect-fit, just return + * NULL. + */ + ret = (matches == 1) ? ret : NULL; + + return ret; +} + +static struct dma_debug_entry *bucket_find_exact(struct hash_bucket *bucket, + struct dma_debug_entry *ref) +{ + return __hash_bucket_find(bucket, ref, exact_match); +} + +static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket, + struct dma_debug_entry *ref, + unsigned long *flags) +{ + + unsigned int max_range = dma_get_max_seg_size(ref->dev); + struct dma_debug_entry *entry, index = *ref; + unsigned int range = 0; + + while (range <= max_range) { + entry = __hash_bucket_find(*bucket, ref, containing_match); + + if (entry) + return entry; + + /* + * Nothing found, go back a hash bucket + */ + put_hash_bucket(*bucket, flags); + range += (1 << HASH_FN_SHIFT); + index.dev_addr -= (1 << HASH_FN_SHIFT); + *bucket = get_hash_bucket(&index, flags); + } + + return NULL; +} + +/* + * Add an entry to a hash bucket + */ +static void hash_bucket_add(struct hash_bucket *bucket, + struct dma_debug_entry *entry) +{ + list_add_tail(&entry->list, &bucket->list); +} + +/* + * Remove entry from a hash bucket list + */ +static void hash_bucket_del(struct dma_debug_entry *entry) +{ + list_del(&entry->list); +} + +static unsigned long long phys_addr(struct dma_debug_entry *entry) +{ + if (entry->type == dma_debug_resource) + return __pfn_to_phys(entry->pfn) + entry->offset; + + return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset; +} + +/* + * Dump mapping entries for debugging purposes + */ +void debug_dma_dump_mappings(struct device *dev) +{ + int idx; + + for (idx = 0; idx < HASH_SIZE; idx++) { + struct hash_bucket *bucket = &dma_entry_hash[idx]; + struct dma_debug_entry *entry; + unsigned long flags; + + spin_lock_irqsave(&bucket->lock, flags); + + list_for_each_entry(entry, &bucket->list, list) { + if (!dev || dev == entry->dev) { + dev_info(entry->dev, + "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n", + type2name[entry->type], idx, + phys_addr(entry), entry->pfn, + entry->dev_addr, entry->size, + dir2name[entry->direction], + maperr2str[entry->map_err_type]); + } + } + + spin_unlock_irqrestore(&bucket->lock, flags); + } +} + +/* + * For each mapping (initial cacheline in the case of + * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a + * scatterlist, or the cacheline specified in dma_map_single) insert + * into this tree using the cacheline as the key. At + * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If + * the entry already exists at insertion time add a tag as a reference + * count for the overlapping mappings. For now, the overlap tracking + * just ensures that 'unmaps' balance 'maps' before marking the + * cacheline idle, but we should also be flagging overlaps as an API + * violation. + * + * Memory usage is mostly constrained by the maximum number of available + * dma-debug entries in that we need a free dma_debug_entry before + * inserting into the tree. In the case of dma_map_page and + * dma_alloc_coherent there is only one dma_debug_entry and one + * dma_active_cacheline entry to track per event. dma_map_sg(), on the + * other hand, consumes a single dma_debug_entry, but inserts 'nents' + * entries into the tree. + * + * At any time debug_dma_assert_idle() can be called to trigger a + * warning if any cachelines in the given page are in the active set. + */ +static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT); +static DEFINE_SPINLOCK(radix_lock); +#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) +#define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) +#define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT) + +static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry) +{ + return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) + + (entry->offset >> L1_CACHE_SHIFT); +} + +static int active_cacheline_read_overlap(phys_addr_t cln) +{ + int overlap = 0, i; + + for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) + if (radix_tree_tag_get(&dma_active_cacheline, cln, i)) + overlap |= 1 << i; + return overlap; +} + +static int active_cacheline_set_overlap(phys_addr_t cln, int overlap) +{ + int i; + + if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0) + return overlap; + + for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) + if (overlap & 1 << i) + radix_tree_tag_set(&dma_active_cacheline, cln, i); + else + radix_tree_tag_clear(&dma_active_cacheline, cln, i); + + return overlap; +} + +static void active_cacheline_inc_overlap(phys_addr_t cln) +{ + int overlap = active_cacheline_read_overlap(cln); + + overlap = active_cacheline_set_overlap(cln, ++overlap); + + /* If we overflowed the overlap counter then we're potentially + * leaking dma-mappings. Otherwise, if maps and unmaps are + * balanced then this overflow may cause false negatives in + * debug_dma_assert_idle() as the cacheline may be marked idle + * prematurely. + */ + WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP, + "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n", + ACTIVE_CACHELINE_MAX_OVERLAP, &cln); +} + +static int active_cacheline_dec_overlap(phys_addr_t cln) +{ + int overlap = active_cacheline_read_overlap(cln); + + return active_cacheline_set_overlap(cln, --overlap); +} + +static int active_cacheline_insert(struct dma_debug_entry *entry) +{ + phys_addr_t cln = to_cacheline_number(entry); + unsigned long flags; + int rc; + + /* If the device is not writing memory then we don't have any + * concerns about the cpu consuming stale data. This mitigates + * legitimate usages of overlapping mappings. + */ + if (entry->direction == DMA_TO_DEVICE) + return 0; + + spin_lock_irqsave(&radix_lock, flags); + rc = radix_tree_insert(&dma_active_cacheline, cln, entry); + if (rc == -EEXIST) + active_cacheline_inc_overlap(cln); + spin_unlock_irqrestore(&radix_lock, flags); + + return rc; +} + +static void active_cacheline_remove(struct dma_debug_entry *entry) +{ + phys_addr_t cln = to_cacheline_number(entry); + unsigned long flags; + + /* ...mirror the insert case */ + if (entry->direction == DMA_TO_DEVICE) + return; + + spin_lock_irqsave(&radix_lock, flags); + /* since we are counting overlaps the final put of the + * cacheline will occur when the overlap count is 0. + * active_cacheline_dec_overlap() returns -1 in that case + */ + if (active_cacheline_dec_overlap(cln) < 0) + radix_tree_delete(&dma_active_cacheline, cln); + spin_unlock_irqrestore(&radix_lock, flags); +} + +/** + * debug_dma_assert_idle() - assert that a page is not undergoing dma + * @page: page to lookup in the dma_active_cacheline tree + * + * Place a call to this routine in cases where the cpu touching the page + * before the dma completes (page is dma_unmapped) will lead to data + * corruption. + */ +void debug_dma_assert_idle(struct page *page) +{ + static struct dma_debug_entry *ents[CACHELINES_PER_PAGE]; + struct dma_debug_entry *entry = NULL; + void **results = (void **) &ents; + unsigned int nents, i; + unsigned long flags; + phys_addr_t cln; + + if (dma_debug_disabled()) + return; + + if (!page) + return; + + cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT; + spin_lock_irqsave(&radix_lock, flags); + nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln, + CACHELINES_PER_PAGE); + for (i = 0; i < nents; i++) { + phys_addr_t ent_cln = to_cacheline_number(ents[i]); + + if (ent_cln == cln) { + entry = ents[i]; + break; + } else if (ent_cln >= cln + CACHELINES_PER_PAGE) + break; + } + spin_unlock_irqrestore(&radix_lock, flags); + + if (!entry) + return; + + cln = to_cacheline_number(entry); + err_printk(entry->dev, entry, + "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n", + &cln); +} + +/* + * Wrapper function for adding an entry to the hash. + * This function takes care of locking itself. + */ +static void add_dma_entry(struct dma_debug_entry *entry) +{ + struct hash_bucket *bucket; + unsigned long flags; + int rc; + + bucket = get_hash_bucket(entry, &flags); + hash_bucket_add(bucket, entry); + put_hash_bucket(bucket, &flags); + + rc = active_cacheline_insert(entry); + if (rc == -ENOMEM) { + pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n"); + global_disable = true; + } + + /* TODO: report -EEXIST errors here as overlapping mappings are + * not supported by the DMA API + */ +} + +static struct dma_debug_entry *__dma_entry_alloc(void) +{ + struct dma_debug_entry *entry; + + entry = list_entry(free_entries.next, struct dma_debug_entry, list); + list_del(&entry->list); + memset(entry, 0, sizeof(*entry)); + + num_free_entries -= 1; + if (num_free_entries < min_free_entries) + min_free_entries = num_free_entries; + + return entry; +} + +/* struct dma_entry allocator + * + * The next two functions implement the allocator for + * struct dma_debug_entries. + */ +static struct dma_debug_entry *dma_entry_alloc(void) +{ + struct dma_debug_entry *entry; + unsigned long flags; + + spin_lock_irqsave(&free_entries_lock, flags); + + if (list_empty(&free_entries)) { + global_disable = true; + spin_unlock_irqrestore(&free_entries_lock, flags); + pr_err("DMA-API: debugging out of memory - disabling\n"); + return NULL; + } + + entry = __dma_entry_alloc(); + + spin_unlock_irqrestore(&free_entries_lock, flags); + +#ifdef CONFIG_STACKTRACE + entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES; + entry->stacktrace.entries = entry->st_entries; + entry->stacktrace.skip = 2; + save_stack_trace(&entry->stacktrace); +#endif + + return entry; +} + +static void dma_entry_free(struct dma_debug_entry *entry) +{ + unsigned long flags; + + active_cacheline_remove(entry); + + /* + * add to beginning of the list - this way the entries are + * more likely cache hot when they are reallocated. + */ + spin_lock_irqsave(&free_entries_lock, flags); + list_add(&entry->list, &free_entries); + num_free_entries += 1; + spin_unlock_irqrestore(&free_entries_lock, flags); +} + +int dma_debug_resize_entries(u32 num_entries) +{ + int i, delta, ret = 0; + unsigned long flags; + struct dma_debug_entry *entry; + LIST_HEAD(tmp); + + spin_lock_irqsave(&free_entries_lock, flags); + + if (nr_total_entries < num_entries) { + delta = num_entries - nr_total_entries; + + spin_unlock_irqrestore(&free_entries_lock, flags); + + for (i = 0; i < delta; i++) { + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + break; + + list_add_tail(&entry->list, &tmp); + } + + spin_lock_irqsave(&free_entries_lock, flags); + + list_splice(&tmp, &free_entries); + nr_total_entries += i; + num_free_entries += i; + } else { + delta = nr_total_entries - num_entries; + + for (i = 0; i < delta && !list_empty(&free_entries); i++) { + entry = __dma_entry_alloc(); + kfree(entry); + } + + nr_total_entries -= i; + } + + if (nr_total_entries != num_entries) + ret = 1; + + spin_unlock_irqrestore(&free_entries_lock, flags); + + return ret; +} + +/* + * DMA-API debugging init code + * + * The init code does two things: + * 1. Initialize core data structures + * 2. Preallocate a given number of dma_debug_entry structs + */ + +static int prealloc_memory(u32 num_entries) +{ + struct dma_debug_entry *entry, *next_entry; + int i; + + for (i = 0; i < num_entries; ++i) { + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + goto out_err; + + list_add_tail(&entry->list, &free_entries); + } + + num_free_entries = num_entries; + min_free_entries = num_entries; + + pr_info("DMA-API: preallocated %d debug entries\n", num_entries); + + return 0; + +out_err: + + list_for_each_entry_safe(entry, next_entry, &free_entries, list) { + list_del(&entry->list); + kfree(entry); + } + + return -ENOMEM; +} + +static ssize_t filter_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + char buf[NAME_MAX_LEN + 1]; + unsigned long flags; + int len; + + if (!current_driver_name[0]) + return 0; + + /* + * We can't copy to userspace directly because current_driver_name can + * only be read under the driver_name_lock with irqs disabled. So + * create a temporary copy first. + */ + read_lock_irqsave(&driver_name_lock, flags); + len = scnprintf(buf, NAME_MAX_LEN + 1, "%s\n", current_driver_name); + read_unlock_irqrestore(&driver_name_lock, flags); + + return simple_read_from_buffer(user_buf, count, ppos, buf, len); +} + +static ssize_t filter_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *ppos) +{ + char buf[NAME_MAX_LEN]; + unsigned long flags; + size_t len; + int i; + + /* + * We can't copy from userspace directly. Access to + * current_driver_name is protected with a write_lock with irqs + * disabled. Since copy_from_user can fault and may sleep we + * need to copy to temporary buffer first + */ + len = min(count, (size_t)(NAME_MAX_LEN - 1)); + if (copy_from_user(buf, userbuf, len)) + return -EFAULT; + + buf[len] = 0; + + write_lock_irqsave(&driver_name_lock, flags); + + /* + * Now handle the string we got from userspace very carefully. + * The rules are: + * - only use the first token we got + * - token delimiter is everything looking like a space + * character (' ', '\n', '\t' ...) + * + */ + if (!isalnum(buf[0])) { + /* + * If the first character userspace gave us is not + * alphanumerical then assume the filter should be + * switched off. + */ + if (current_driver_name[0]) + pr_info("DMA-API: switching off dma-debug driver filter\n"); + current_driver_name[0] = 0; + current_driver = NULL; + goto out_unlock; + } + + /* + * Now parse out the first token and use it as the name for the + * driver to filter for. + */ + for (i = 0; i < NAME_MAX_LEN - 1; ++i) { + current_driver_name[i] = buf[i]; + if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0) + break; + } + current_driver_name[i] = 0; + current_driver = NULL; + + pr_info("DMA-API: enable driver filter for driver [%s]\n", + current_driver_name); + +out_unlock: + write_unlock_irqrestore(&driver_name_lock, flags); + + return count; +} + +static const struct file_operations filter_fops = { + .read = filter_read, + .write = filter_write, + .llseek = default_llseek, +}; + +static int dma_debug_fs_init(void) +{ + dma_debug_dent = debugfs_create_dir("dma-api", NULL); + if (!dma_debug_dent) { + pr_err("DMA-API: can not create debugfs directory\n"); + return -ENOMEM; + } + + global_disable_dent = debugfs_create_bool("disabled", 0444, + dma_debug_dent, + &global_disable); + if (!global_disable_dent) + goto out_err; + + error_count_dent = debugfs_create_u32("error_count", 0444, + dma_debug_dent, &error_count); + if (!error_count_dent) + goto out_err; + + show_all_errors_dent = debugfs_create_u32("all_errors", 0644, + dma_debug_dent, + &show_all_errors); + if (!show_all_errors_dent) + goto out_err; + + show_num_errors_dent = debugfs_create_u32("num_errors", 0644, + dma_debug_dent, + &show_num_errors); + if (!show_num_errors_dent) + goto out_err; + + num_free_entries_dent = debugfs_create_u32("num_free_entries", 0444, + dma_debug_dent, + &num_free_entries); + if (!num_free_entries_dent) + goto out_err; + + min_free_entries_dent = debugfs_create_u32("min_free_entries", 0444, + dma_debug_dent, + &min_free_entries); + if (!min_free_entries_dent) + goto out_err; + + filter_dent = debugfs_create_file("driver_filter", 0644, + dma_debug_dent, NULL, &filter_fops); + if (!filter_dent) + goto out_err; + + return 0; + +out_err: + debugfs_remove_recursive(dma_debug_dent); + + return -ENOMEM; +} + +static int device_dma_allocations(struct device *dev, struct dma_debug_entry **out_entry) +{ + struct dma_debug_entry *entry; + unsigned long flags; + int count = 0, i; + + for (i = 0; i < HASH_SIZE; ++i) { + spin_lock_irqsave(&dma_entry_hash[i].lock, flags); + list_for_each_entry(entry, &dma_entry_hash[i].list, list) { + if (entry->dev == dev) { + count += 1; + *out_entry = entry; + } + } + spin_unlock_irqrestore(&dma_entry_hash[i].lock, flags); + } + + return count; +} + +static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data) +{ + struct device *dev = data; + struct dma_debug_entry *uninitialized_var(entry); + int count; + + if (dma_debug_disabled()) + return 0; + + switch (action) { + case BUS_NOTIFY_UNBOUND_DRIVER: + count = device_dma_allocations(dev, &entry); + if (count == 0) + break; + err_printk(dev, entry, "DMA-API: device driver has pending " + "DMA allocations while released from device " + "[count=%d]\n" + "One of leaked entries details: " + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped with %s] [mapped as %s]\n", + count, entry->dev_addr, entry->size, + dir2name[entry->direction], type2name[entry->type]); + break; + default: + break; + } + + return 0; +} + +void dma_debug_add_bus(struct bus_type *bus) +{ + struct notifier_block *nb; + + if (dma_debug_disabled()) + return; + + nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL); + if (nb == NULL) { + pr_err("dma_debug_add_bus: out of memory\n"); + return; + } + + nb->notifier_call = dma_debug_device_change; + + bus_register_notifier(bus, nb); +} + +static int dma_debug_init(void) +{ + int i; + + /* Do not use dma_debug_initialized here, since we really want to be + * called to set dma_debug_initialized + */ + if (global_disable) + return 0; + + for (i = 0; i < HASH_SIZE; ++i) { + INIT_LIST_HEAD(&dma_entry_hash[i].list); + spin_lock_init(&dma_entry_hash[i].lock); + } + + if (dma_debug_fs_init() != 0) { + pr_err("DMA-API: error creating debugfs entries - disabling\n"); + global_disable = true; + + return 0; + } + + if (prealloc_memory(nr_prealloc_entries) != 0) { + pr_err("DMA-API: debugging out of memory error - disabled\n"); + global_disable = true; + + return 0; + } + + nr_total_entries = num_free_entries; + + dma_debug_initialized = true; + + pr_info("DMA-API: debugging enabled by kernel config\n"); + return 0; +} +core_initcall(dma_debug_init); + +static __init int dma_debug_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (strncmp(str, "off", 3) == 0) { + pr_info("DMA-API: debugging disabled on kernel command line\n"); + global_disable = true; + } + + return 0; +} + +static __init int dma_debug_entries_cmdline(char *str) +{ + if (!str) + return -EINVAL; + if (!get_option(&str, &nr_prealloc_entries)) + nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; + return 0; +} + +__setup("dma_debug=", dma_debug_cmdline); +__setup("dma_debug_entries=", dma_debug_entries_cmdline); + +static void check_unmap(struct dma_debug_entry *ref) +{ + struct dma_debug_entry *entry; + struct hash_bucket *bucket; + unsigned long flags; + + bucket = get_hash_bucket(ref, &flags); + entry = bucket_find_exact(bucket, ref); + + if (!entry) { + /* must drop lock before calling dma_mapping_error */ + put_hash_bucket(bucket, &flags); + + if (dma_mapping_error(ref->dev, ref->dev_addr)) { + err_printk(ref->dev, NULL, + "DMA-API: device driver tries to free an " + "invalid DMA memory address\n"); + } else { + err_printk(ref->dev, NULL, + "DMA-API: device driver tries to free DMA " + "memory it has not allocated [device " + "address=0x%016llx] [size=%llu bytes]\n", + ref->dev_addr, ref->size); + } + return; + } + + if (ref->size != entry->size) { + err_printk(ref->dev, entry, "DMA-API: device driver frees " + "DMA memory with different size " + "[device address=0x%016llx] [map size=%llu bytes] " + "[unmap size=%llu bytes]\n", + ref->dev_addr, entry->size, ref->size); + } + + if (ref->type != entry->type) { + err_printk(ref->dev, entry, "DMA-API: device driver frees " + "DMA memory with wrong function " + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped as %s] [unmapped as %s]\n", + ref->dev_addr, ref->size, + type2name[entry->type], type2name[ref->type]); + } else if ((entry->type == dma_debug_coherent) && + (phys_addr(ref) != phys_addr(entry))) { + err_printk(ref->dev, entry, "DMA-API: device driver frees " + "DMA memory with different CPU address " + "[device address=0x%016llx] [size=%llu bytes] " + "[cpu alloc address=0x%016llx] " + "[cpu free address=0x%016llx]", + ref->dev_addr, ref->size, + phys_addr(entry), + phys_addr(ref)); + } + + if (ref->sg_call_ents && ref->type == dma_debug_sg && + ref->sg_call_ents != entry->sg_call_ents) { + err_printk(ref->dev, entry, "DMA-API: device driver frees " + "DMA sg list with different entry count " + "[map count=%d] [unmap count=%d]\n", + entry->sg_call_ents, ref->sg_call_ents); + } + + /* + * This may be no bug in reality - but most implementations of the + * DMA API don't handle this properly, so check for it here + */ + if (ref->direction != entry->direction) { + err_printk(ref->dev, entry, "DMA-API: device driver frees " + "DMA memory with different direction " + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped with %s] [unmapped with %s]\n", + ref->dev_addr, ref->size, + dir2name[entry->direction], + dir2name[ref->direction]); + } + + /* + * Drivers should use dma_mapping_error() to check the returned + * addresses of dma_map_single() and dma_map_page(). + * If not, print this warning message. See Documentation/DMA-API.txt. + */ + if (entry->map_err_type == MAP_ERR_NOT_CHECKED) { + err_printk(ref->dev, entry, + "DMA-API: device driver failed to check map error" + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped as %s]", + ref->dev_addr, ref->size, + type2name[entry->type]); + } + + hash_bucket_del(entry); + dma_entry_free(entry); + + put_hash_bucket(bucket, &flags); +} + +static void check_for_stack(struct device *dev, + struct page *page, size_t offset) +{ + void *addr; + struct vm_struct *stack_vm_area = task_stack_vm_area(current); + + if (!stack_vm_area) { + /* Stack is direct-mapped. */ + if (PageHighMem(page)) + return; + addr = page_address(page) + offset; + if (object_is_on_stack(addr)) + err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [addr=%p]\n", addr); + } else { + /* Stack is vmalloced. */ + int i; + + for (i = 0; i < stack_vm_area->nr_pages; i++) { + if (page != stack_vm_area->pages[i]) + continue; + + addr = (u8 *)current->stack + i * PAGE_SIZE + offset; + err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [probable addr=%p]\n", addr); + break; + } + } +} + +static inline bool overlap(void *addr, unsigned long len, void *start, void *end) +{ + unsigned long a1 = (unsigned long)addr; + unsigned long b1 = a1 + len; + unsigned long a2 = (unsigned long)start; + unsigned long b2 = (unsigned long)end; + + return !(b1 <= a2 || a1 >= b2); +} + +static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len) +{ + if (overlap(addr, len, _stext, _etext) || + overlap(addr, len, __start_rodata, __end_rodata)) + err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len); +} + +static void check_sync(struct device *dev, + struct dma_debug_entry *ref, + bool to_cpu) +{ + struct dma_debug_entry *entry; + struct hash_bucket *bucket; + unsigned long flags; + + bucket = get_hash_bucket(ref, &flags); + + entry = bucket_find_contain(&bucket, ref, &flags); + + if (!entry) { + err_printk(dev, NULL, "DMA-API: device driver tries " + "to sync DMA memory it has not allocated " + "[device address=0x%016llx] [size=%llu bytes]\n", + (unsigned long long)ref->dev_addr, ref->size); + goto out; + } + + if (ref->size > entry->size) { + err_printk(dev, entry, "DMA-API: device driver syncs" + " DMA memory outside allocated range " + "[device address=0x%016llx] " + "[allocation size=%llu bytes] " + "[sync offset+size=%llu]\n", + entry->dev_addr, entry->size, + ref->size); + } + + if (entry->direction == DMA_BIDIRECTIONAL) + goto out; + + if (ref->direction != entry->direction) { + err_printk(dev, entry, "DMA-API: device driver syncs " + "DMA memory with different direction " + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped with %s] [synced with %s]\n", + (unsigned long long)ref->dev_addr, entry->size, + dir2name[entry->direction], + dir2name[ref->direction]); + } + + if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) && + !(ref->direction == DMA_TO_DEVICE)) + err_printk(dev, entry, "DMA-API: device driver syncs " + "device read-only DMA memory for cpu " + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped with %s] [synced with %s]\n", + (unsigned long long)ref->dev_addr, entry->size, + dir2name[entry->direction], + dir2name[ref->direction]); + + if (!to_cpu && !(entry->direction == DMA_TO_DEVICE) && + !(ref->direction == DMA_FROM_DEVICE)) + err_printk(dev, entry, "DMA-API: device driver syncs " + "device write-only DMA memory to device " + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped with %s] [synced with %s]\n", + (unsigned long long)ref->dev_addr, entry->size, + dir2name[entry->direction], + dir2name[ref->direction]); + + if (ref->sg_call_ents && ref->type == dma_debug_sg && + ref->sg_call_ents != entry->sg_call_ents) { + err_printk(ref->dev, entry, "DMA-API: device driver syncs " + "DMA sg list with different entry count " + "[map count=%d] [sync count=%d]\n", + entry->sg_call_ents, ref->sg_call_ents); + } + +out: + put_hash_bucket(bucket, &flags); +} + +static void check_sg_segment(struct device *dev, struct scatterlist *sg) +{ +#ifdef CONFIG_DMA_API_DEBUG_SG + unsigned int max_seg = dma_get_max_seg_size(dev); + u64 start, end, boundary = dma_get_seg_boundary(dev); + + /* + * Either the driver forgot to set dma_parms appropriately, or + * whoever generated the list forgot to check them. + */ + if (sg->length > max_seg) + err_printk(dev, NULL, "DMA-API: mapping sg segment longer than device claims to support [len=%u] [max=%u]\n", + sg->length, max_seg); + /* + * In some cases this could potentially be the DMA API + * implementation's fault, but it would usually imply that + * the scatterlist was built inappropriately to begin with. + */ + start = sg_dma_address(sg); + end = start + sg_dma_len(sg) - 1; + if ((start ^ end) & ~boundary) + err_printk(dev, NULL, "DMA-API: mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n", + start, end, boundary); +#endif +} + +void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, + size_t size, int direction, dma_addr_t dma_addr, + bool map_single) +{ + struct dma_debug_entry *entry; + + if (unlikely(dma_debug_disabled())) + return; + + if (dma_mapping_error(dev, dma_addr)) + return; + + entry = dma_entry_alloc(); + if (!entry) + return; + + entry->dev = dev; + entry->type = dma_debug_page; + entry->pfn = page_to_pfn(page); + entry->offset = offset, + entry->dev_addr = dma_addr; + entry->size = size; + entry->direction = direction; + entry->map_err_type = MAP_ERR_NOT_CHECKED; + + if (map_single) + entry->type = dma_debug_single; + + check_for_stack(dev, page, offset); + + if (!PageHighMem(page)) { + void *addr = page_address(page) + offset; + + check_for_illegal_area(dev, addr, size); + } + + add_dma_entry(entry); +} +EXPORT_SYMBOL(debug_dma_map_page); + +void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + struct dma_debug_entry ref; + struct dma_debug_entry *entry; + struct hash_bucket *bucket; + unsigned long flags; + + if (unlikely(dma_debug_disabled())) + return; + + ref.dev = dev; + ref.dev_addr = dma_addr; + bucket = get_hash_bucket(&ref, &flags); + + list_for_each_entry(entry, &bucket->list, list) { + if (!exact_match(&ref, entry)) + continue; + + /* + * The same physical address can be mapped multiple + * times. Without a hardware IOMMU this results in the + * same device addresses being put into the dma-debug + * hash multiple times too. This can result in false + * positives being reported. Therefore we implement a + * best-fit algorithm here which updates the first entry + * from the hash which fits the reference value and is + * not currently listed as being checked. + */ + if (entry->map_err_type == MAP_ERR_NOT_CHECKED) { + entry->map_err_type = MAP_ERR_CHECKED; + break; + } + } + + put_hash_bucket(bucket, &flags); +} +EXPORT_SYMBOL(debug_dma_mapping_error); + +void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, int direction, bool map_single) +{ + struct dma_debug_entry ref = { + .type = dma_debug_page, + .dev = dev, + .dev_addr = addr, + .size = size, + .direction = direction, + }; + + if (unlikely(dma_debug_disabled())) + return; + + if (map_single) + ref.type = dma_debug_single; + + check_unmap(&ref); +} +EXPORT_SYMBOL(debug_dma_unmap_page); + +void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, int mapped_ents, int direction) +{ + struct dma_debug_entry *entry; + struct scatterlist *s; + int i; + + if (unlikely(dma_debug_disabled())) + return; + + for_each_sg(sg, s, mapped_ents, i) { + entry = dma_entry_alloc(); + if (!entry) + return; + + entry->type = dma_debug_sg; + entry->dev = dev; + entry->pfn = page_to_pfn(sg_page(s)); + entry->offset = s->offset, + entry->size = sg_dma_len(s); + entry->dev_addr = sg_dma_address(s); + entry->direction = direction; + entry->sg_call_ents = nents; + entry->sg_mapped_ents = mapped_ents; + + check_for_stack(dev, sg_page(s), s->offset); + + if (!PageHighMem(sg_page(s))) { + check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s)); + } + + check_sg_segment(dev, s); + + add_dma_entry(entry); + } +} +EXPORT_SYMBOL(debug_dma_map_sg); + +static int get_nr_mapped_entries(struct device *dev, + struct dma_debug_entry *ref) +{ + struct dma_debug_entry *entry; + struct hash_bucket *bucket; + unsigned long flags; + int mapped_ents; + + bucket = get_hash_bucket(ref, &flags); + entry = bucket_find_exact(bucket, ref); + mapped_ents = 0; + + if (entry) + mapped_ents = entry->sg_mapped_ents; + put_hash_bucket(bucket, &flags); + + return mapped_ents; +} + +void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, + int nelems, int dir) +{ + struct scatterlist *s; + int mapped_ents = 0, i; + + if (unlikely(dma_debug_disabled())) + return; + + for_each_sg(sglist, s, nelems, i) { + + struct dma_debug_entry ref = { + .type = dma_debug_sg, + .dev = dev, + .pfn = page_to_pfn(sg_page(s)), + .offset = s->offset, + .dev_addr = sg_dma_address(s), + .size = sg_dma_len(s), + .direction = dir, + .sg_call_ents = nelems, + }; + + if (mapped_ents && i >= mapped_ents) + break; + + if (!i) + mapped_ents = get_nr_mapped_entries(dev, &ref); + + check_unmap(&ref); + } +} +EXPORT_SYMBOL(debug_dma_unmap_sg); + +void debug_dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t dma_addr, void *virt) +{ + struct dma_debug_entry *entry; + + if (unlikely(dma_debug_disabled())) + return; + + if (unlikely(virt == NULL)) + return; + + /* handle vmalloc and linear addresses */ + if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt)) + return; + + entry = dma_entry_alloc(); + if (!entry) + return; + + entry->type = dma_debug_coherent; + entry->dev = dev; + entry->offset = offset_in_page(virt); + entry->size = size; + entry->dev_addr = dma_addr; + entry->direction = DMA_BIDIRECTIONAL; + + if (is_vmalloc_addr(virt)) + entry->pfn = vmalloc_to_pfn(virt); + else + entry->pfn = page_to_pfn(virt_to_page(virt)); + + add_dma_entry(entry); +} +EXPORT_SYMBOL(debug_dma_alloc_coherent); + +void debug_dma_free_coherent(struct device *dev, size_t size, + void *virt, dma_addr_t addr) +{ + struct dma_debug_entry ref = { + .type = dma_debug_coherent, + .dev = dev, + .offset = offset_in_page(virt), + .dev_addr = addr, + .size = size, + .direction = DMA_BIDIRECTIONAL, + }; + + /* handle vmalloc and linear addresses */ + if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt)) + return; + + if (is_vmalloc_addr(virt)) + ref.pfn = vmalloc_to_pfn(virt); + else + ref.pfn = page_to_pfn(virt_to_page(virt)); + + if (unlikely(dma_debug_disabled())) + return; + + check_unmap(&ref); +} +EXPORT_SYMBOL(debug_dma_free_coherent); + +void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size, + int direction, dma_addr_t dma_addr) +{ + struct dma_debug_entry *entry; + + if (unlikely(dma_debug_disabled())) + return; + + entry = dma_entry_alloc(); + if (!entry) + return; + + entry->type = dma_debug_resource; + entry->dev = dev; + entry->pfn = PHYS_PFN(addr); + entry->offset = offset_in_page(addr); + entry->size = size; + entry->dev_addr = dma_addr; + entry->direction = direction; + entry->map_err_type = MAP_ERR_NOT_CHECKED; + + add_dma_entry(entry); +} +EXPORT_SYMBOL(debug_dma_map_resource); + +void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, + size_t size, int direction) +{ + struct dma_debug_entry ref = { + .type = dma_debug_resource, + .dev = dev, + .dev_addr = dma_addr, + .size = size, + .direction = direction, + }; + + if (unlikely(dma_debug_disabled())) + return; + + check_unmap(&ref); +} +EXPORT_SYMBOL(debug_dma_unmap_resource); + +void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, int direction) +{ + struct dma_debug_entry ref; + + if (unlikely(dma_debug_disabled())) + return; + + ref.type = dma_debug_single; + ref.dev = dev; + ref.dev_addr = dma_handle; + ref.size = size; + ref.direction = direction; + ref.sg_call_ents = 0; + + check_sync(dev, &ref, true); +} +EXPORT_SYMBOL(debug_dma_sync_single_for_cpu); + +void debug_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, size_t size, + int direction) +{ + struct dma_debug_entry ref; + + if (unlikely(dma_debug_disabled())) + return; + + ref.type = dma_debug_single; + ref.dev = dev; + ref.dev_addr = dma_handle; + ref.size = size; + ref.direction = direction; + ref.sg_call_ents = 0; + + check_sync(dev, &ref, false); +} +EXPORT_SYMBOL(debug_dma_sync_single_for_device); + +void debug_dma_sync_single_range_for_cpu(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, size_t size, + int direction) +{ + struct dma_debug_entry ref; + + if (unlikely(dma_debug_disabled())) + return; + + ref.type = dma_debug_single; + ref.dev = dev; + ref.dev_addr = dma_handle; + ref.size = offset + size; + ref.direction = direction; + ref.sg_call_ents = 0; + + check_sync(dev, &ref, true); +} +EXPORT_SYMBOL(debug_dma_sync_single_range_for_cpu); + +void debug_dma_sync_single_range_for_device(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, + size_t size, int direction) +{ + struct dma_debug_entry ref; + + if (unlikely(dma_debug_disabled())) + return; + + ref.type = dma_debug_single; + ref.dev = dev; + ref.dev_addr = dma_handle; + ref.size = offset + size; + ref.direction = direction; + ref.sg_call_ents = 0; + + check_sync(dev, &ref, false); +} +EXPORT_SYMBOL(debug_dma_sync_single_range_for_device); + +void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nelems, int direction) +{ + struct scatterlist *s; + int mapped_ents = 0, i; + + if (unlikely(dma_debug_disabled())) + return; + + for_each_sg(sg, s, nelems, i) { + + struct dma_debug_entry ref = { + .type = dma_debug_sg, + .dev = dev, + .pfn = page_to_pfn(sg_page(s)), + .offset = s->offset, + .dev_addr = sg_dma_address(s), + .size = sg_dma_len(s), + .direction = direction, + .sg_call_ents = nelems, + }; + + if (!i) + mapped_ents = get_nr_mapped_entries(dev, &ref); + + if (i >= mapped_ents) + break; + + check_sync(dev, &ref, true); + } +} +EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu); + +void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nelems, int direction) +{ + struct scatterlist *s; + int mapped_ents = 0, i; + + if (unlikely(dma_debug_disabled())) + return; + + for_each_sg(sg, s, nelems, i) { + + struct dma_debug_entry ref = { + .type = dma_debug_sg, + .dev = dev, + .pfn = page_to_pfn(sg_page(s)), + .offset = s->offset, + .dev_addr = sg_dma_address(s), + .size = sg_dma_len(s), + .direction = direction, + .sg_call_ents = nelems, + }; + if (!i) + mapped_ents = get_nr_mapped_entries(dev, &ref); + + if (i >= mapped_ents) + break; + + check_sync(dev, &ref, false); + } +} +EXPORT_SYMBOL(debug_dma_sync_sg_for_device); + +static int __init dma_debug_driver_setup(char *str) +{ + int i; + + for (i = 0; i < NAME_MAX_LEN - 1; ++i, ++str) { + current_driver_name[i] = *str; + if (*str == 0) + break; + } + + if (current_driver_name[0]) + pr_info("DMA-API: enable driver filter for driver [%s]\n", + current_driver_name); + + + return 1; +} +__setup("dma_debug_driver=", dma_debug_driver_setup); diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c new file mode 100644 index 000000000000..8be8106270c2 --- /dev/null +++ b/kernel/dma/direct.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DMA operations that map physical memory directly without using an IOMMU or + * flushing caches. + */ +#include +#include +#include +#include +#include +#include +#include + +#define DIRECT_MAPPING_ERROR 0 + +/* + * Most architectures use ZONE_DMA for the first 16 Megabytes, but + * some use it for entirely different regions: + */ +#ifndef ARCH_ZONE_DMA_BITS +#define ARCH_ZONE_DMA_BITS 24 +#endif + +/* + * For AMD SEV all DMA must be to unencrypted addresses. + */ +static inline bool force_dma_unencrypted(void) +{ + return sev_active(); +} + +static bool +check_addr(struct device *dev, dma_addr_t dma_addr, size_t size, + const char *caller) +{ + if (unlikely(dev && !dma_capable(dev, dma_addr, size))) { + if (!dev->dma_mask) { + dev_err(dev, + "%s: call on device without dma_mask\n", + caller); + return false; + } + + if (*dev->dma_mask >= DMA_BIT_MASK(32)) { + dev_err(dev, + "%s: overflow %pad+%zu of device mask %llx\n", + caller, &dma_addr, size, *dev->dma_mask); + } + return false; + } + return true; +} + +static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) +{ + dma_addr_t addr = force_dma_unencrypted() ? + __phys_to_dma(dev, phys) : phys_to_dma(dev, phys); + return addr + size - 1 <= dev->coherent_dma_mask; +} + +void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, unsigned long attrs) +{ + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + int page_order = get_order(size); + struct page *page = NULL; + void *ret; + + /* we always manually zero the memory once we are done: */ + gfp &= ~__GFP_ZERO; + + /* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */ + if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) + gfp |= GFP_DMA; + if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) + gfp |= GFP_DMA32; + +again: + /* CMA can be used only in the context which permits sleeping */ + if (gfpflags_allow_blocking(gfp)) { + page = dma_alloc_from_contiguous(dev, count, page_order, gfp); + if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { + dma_release_from_contiguous(dev, page, count); + page = NULL; + } + } + if (!page) + page = alloc_pages_node(dev_to_node(dev), gfp, page_order); + + if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { + __free_pages(page, page_order); + page = NULL; + + if (IS_ENABLED(CONFIG_ZONE_DMA32) && + dev->coherent_dma_mask < DMA_BIT_MASK(64) && + !(gfp & (GFP_DMA32 | GFP_DMA))) { + gfp |= GFP_DMA32; + goto again; + } + + if (IS_ENABLED(CONFIG_ZONE_DMA) && + dev->coherent_dma_mask < DMA_BIT_MASK(32) && + !(gfp & GFP_DMA)) { + gfp = (gfp & ~GFP_DMA32) | GFP_DMA; + goto again; + } + } + + if (!page) + return NULL; + ret = page_address(page); + if (force_dma_unencrypted()) { + set_memory_decrypted((unsigned long)ret, 1 << page_order); + *dma_handle = __phys_to_dma(dev, page_to_phys(page)); + } else { + *dma_handle = phys_to_dma(dev, page_to_phys(page)); + } + memset(ret, 0, size); + return ret; +} + +/* + * NOTE: this function must never look at the dma_addr argument, because we want + * to be able to use it as a helper for iommu implementations as well. + */ +void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_addr, unsigned long attrs) +{ + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned int page_order = get_order(size); + + if (force_dma_unencrypted()) + set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); + if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count)) + free_pages((unsigned long)cpu_addr, page_order); +} + +dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + dma_addr_t dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset; + + if (!check_addr(dev, dma_addr, size, __func__)) + return DIRECT_MAPPING_ERROR; + return dma_addr; +} + +int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs) +{ + int i; + struct scatterlist *sg; + + for_each_sg(sgl, sg, nents, i) { + BUG_ON(!sg_page(sg)); + + sg_dma_address(sg) = phys_to_dma(dev, sg_phys(sg)); + if (!check_addr(dev, sg_dma_address(sg), sg->length, __func__)) + return 0; + sg_dma_len(sg) = sg->length; + } + + return nents; +} + +int dma_direct_supported(struct device *dev, u64 mask) +{ +#ifdef CONFIG_ZONE_DMA + if (mask < DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) + return 0; +#else + /* + * Because 32-bit DMA masks are so common we expect every architecture + * to be able to satisfy them - either by not supporting more physical + * memory, or by providing a ZONE_DMA32. If neither is the case, the + * architecture needs to use an IOMMU instead of the direct mapping. + */ + if (mask < DMA_BIT_MASK(32)) + return 0; +#endif + /* + * Various PCI/PCIe bridges have broken support for > 32bit DMA even + * if the device itself might support it. + */ + if (dev->dma_32bit_limit && mask > DMA_BIT_MASK(32)) + return 0; + return 1; +} + +int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return dma_addr == DIRECT_MAPPING_ERROR; +} + +const struct dma_map_ops dma_direct_ops = { + .alloc = dma_direct_alloc, + .free = dma_direct_free, + .map_page = dma_direct_map_page, + .map_sg = dma_direct_map_sg, + .dma_supported = dma_direct_supported, + .mapping_error = dma_direct_mapping_error, +}; +EXPORT_SYMBOL(dma_direct_ops); diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c new file mode 100644 index 000000000000..d2a92ddaac4d --- /dev/null +++ b/kernel/dma/mapping.c @@ -0,0 +1,345 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * arch-independent dma-mapping routines + * + * Copyright (c) 2006 SUSE Linux Products GmbH + * Copyright (c) 2006 Tejun Heo + */ + +#include +#include +#include +#include +#include +#include +#include + +/* + * Managed DMA API + */ +struct dma_devres { + size_t size; + void *vaddr; + dma_addr_t dma_handle; + unsigned long attrs; +}; + +static void dmam_release(struct device *dev, void *res) +{ + struct dma_devres *this = res; + + dma_free_attrs(dev, this->size, this->vaddr, this->dma_handle, + this->attrs); +} + +static int dmam_match(struct device *dev, void *res, void *match_data) +{ + struct dma_devres *this = res, *match = match_data; + + if (this->vaddr == match->vaddr) { + WARN_ON(this->size != match->size || + this->dma_handle != match->dma_handle); + return 1; + } + return 0; +} + +/** + * dmam_alloc_coherent - Managed dma_alloc_coherent() + * @dev: Device to allocate coherent memory for + * @size: Size of allocation + * @dma_handle: Out argument for allocated DMA handle + * @gfp: Allocation flags + * + * Managed dma_alloc_coherent(). Memory allocated using this function + * will be automatically released on driver detach. + * + * RETURNS: + * Pointer to allocated memory on success, NULL on failure. + */ +void *dmam_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ + struct dma_devres *dr; + void *vaddr; + + dr = devres_alloc(dmam_release, sizeof(*dr), gfp); + if (!dr) + return NULL; + + vaddr = dma_alloc_coherent(dev, size, dma_handle, gfp); + if (!vaddr) { + devres_free(dr); + return NULL; + } + + dr->vaddr = vaddr; + dr->dma_handle = *dma_handle; + dr->size = size; + + devres_add(dev, dr); + + return vaddr; +} +EXPORT_SYMBOL(dmam_alloc_coherent); + +/** + * dmam_free_coherent - Managed dma_free_coherent() + * @dev: Device to free coherent memory for + * @size: Size of allocation + * @vaddr: Virtual address of the memory to free + * @dma_handle: DMA handle of the memory to free + * + * Managed dma_free_coherent(). + */ +void dmam_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle) +{ + struct dma_devres match_data = { size, vaddr, dma_handle }; + + dma_free_coherent(dev, size, vaddr, dma_handle); + WARN_ON(devres_destroy(dev, dmam_release, dmam_match, &match_data)); +} +EXPORT_SYMBOL(dmam_free_coherent); + +/** + * dmam_alloc_attrs - Managed dma_alloc_attrs() + * @dev: Device to allocate non_coherent memory for + * @size: Size of allocation + * @dma_handle: Out argument for allocated DMA handle + * @gfp: Allocation flags + * @attrs: Flags in the DMA_ATTR_* namespace. + * + * Managed dma_alloc_attrs(). Memory allocated using this function will be + * automatically released on driver detach. + * + * RETURNS: + * Pointer to allocated memory on success, NULL on failure. + */ +void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, unsigned long attrs) +{ + struct dma_devres *dr; + void *vaddr; + + dr = devres_alloc(dmam_release, sizeof(*dr), gfp); + if (!dr) + return NULL; + + vaddr = dma_alloc_attrs(dev, size, dma_handle, gfp, attrs); + if (!vaddr) { + devres_free(dr); + return NULL; + } + + dr->vaddr = vaddr; + dr->dma_handle = *dma_handle; + dr->size = size; + dr->attrs = attrs; + + devres_add(dev, dr); + + return vaddr; +} +EXPORT_SYMBOL(dmam_alloc_attrs); + +#ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT + +static void dmam_coherent_decl_release(struct device *dev, void *res) +{ + dma_release_declared_memory(dev); +} + +/** + * dmam_declare_coherent_memory - Managed dma_declare_coherent_memory() + * @dev: Device to declare coherent memory for + * @phys_addr: Physical address of coherent memory to be declared + * @device_addr: Device address of coherent memory to be declared + * @size: Size of coherent memory to be declared + * @flags: Flags + * + * Managed dma_declare_coherent_memory(). + * + * RETURNS: + * 0 on success, -errno on failure. + */ +int dmam_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, + dma_addr_t device_addr, size_t size, int flags) +{ + void *res; + int rc; + + res = devres_alloc(dmam_coherent_decl_release, 0, GFP_KERNEL); + if (!res) + return -ENOMEM; + + rc = dma_declare_coherent_memory(dev, phys_addr, device_addr, size, + flags); + if (!rc) + devres_add(dev, res); + else + devres_free(res); + + return rc; +} +EXPORT_SYMBOL(dmam_declare_coherent_memory); + +/** + * dmam_release_declared_memory - Managed dma_release_declared_memory(). + * @dev: Device to release declared coherent memory for + * + * Managed dmam_release_declared_memory(). + */ +void dmam_release_declared_memory(struct device *dev) +{ + WARN_ON(devres_destroy(dev, dmam_coherent_decl_release, NULL, NULL)); +} +EXPORT_SYMBOL(dmam_release_declared_memory); + +#endif + +/* + * Create scatter-list for the already allocated DMA buffer. + */ +int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t handle, size_t size) +{ + struct page *page = virt_to_page(cpu_addr); + int ret; + + ret = sg_alloc_table(sgt, 1, GFP_KERNEL); + if (unlikely(ret)) + return ret; + + sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); + return 0; +} +EXPORT_SYMBOL(dma_common_get_sgtable); + +/* + * Create userspace mapping for the DMA-coherent memory. + */ +int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size) +{ + int ret = -ENXIO; +#ifndef CONFIG_ARCH_NO_COHERENT_DMA_MMAP + unsigned long user_count = vma_pages(vma); + unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long off = vma->vm_pgoff; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) + return ret; + + if (off < count && user_count <= (count - off)) + ret = remap_pfn_range(vma, vma->vm_start, + page_to_pfn(virt_to_page(cpu_addr)) + off, + user_count << PAGE_SHIFT, + vma->vm_page_prot); +#endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */ + + return ret; +} +EXPORT_SYMBOL(dma_common_mmap); + +#ifdef CONFIG_MMU +static struct vm_struct *__dma_common_pages_remap(struct page **pages, + size_t size, unsigned long vm_flags, pgprot_t prot, + const void *caller) +{ + struct vm_struct *area; + + area = get_vm_area_caller(size, vm_flags, caller); + if (!area) + return NULL; + + if (map_vm_area(area, prot, pages)) { + vunmap(area->addr); + return NULL; + } + + return area; +} + +/* + * remaps an array of PAGE_SIZE pages into another vm_area + * Cannot be used in non-sleeping contexts + */ +void *dma_common_pages_remap(struct page **pages, size_t size, + unsigned long vm_flags, pgprot_t prot, + const void *caller) +{ + struct vm_struct *area; + + area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller); + if (!area) + return NULL; + + area->pages = pages; + + return area->addr; +} + +/* + * remaps an allocated contiguous region into another vm_area. + * Cannot be used in non-sleeping contexts + */ + +void *dma_common_contiguous_remap(struct page *page, size_t size, + unsigned long vm_flags, + pgprot_t prot, const void *caller) +{ + int i; + struct page **pages; + struct vm_struct *area; + + pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL); + if (!pages) + return NULL; + + for (i = 0; i < (size >> PAGE_SHIFT); i++) + pages[i] = nth_page(page, i); + + area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller); + + kfree(pages); + + if (!area) + return NULL; + return area->addr; +} + +/* + * unmaps a range previously mapped by dma_common_*_remap + */ +void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags) +{ + struct vm_struct *area = find_vm_area(cpu_addr); + + if (!area || (area->flags & vm_flags) != vm_flags) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + return; + } + + unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size)); + vunmap(cpu_addr); +} +#endif + +/* + * enables DMA API use for a device + */ +int dma_configure(struct device *dev) +{ + if (dev->bus->dma_configure) + return dev->bus->dma_configure(dev); + return 0; +} + +void dma_deconfigure(struct device *dev) +{ + of_dma_deconfigure(dev); + acpi_dma_deconfigure(dev); +} diff --git a/kernel/dma/noncoherent.c b/kernel/dma/noncoherent.c new file mode 100644 index 000000000000..79e9a757387f --- /dev/null +++ b/kernel/dma/noncoherent.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018 Christoph Hellwig. + * + * DMA operations that map physical memory directly without providing cache + * coherence. + */ +#include +#include +#include +#include +#include + +static void dma_noncoherent_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir); +} + +static void dma_noncoherent_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); +} + +static dma_addr_t dma_noncoherent_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + dma_addr_t addr; + + addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); + if (!dma_mapping_error(dev, addr) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + arch_sync_dma_for_device(dev, page_to_phys(page) + offset, + size, dir); + return addr; +} + +static int dma_noncoherent_map_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + nents = dma_direct_map_sg(dev, sgl, nents, dir, attrs); + if (nents > 0 && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + dma_noncoherent_sync_sg_for_device(dev, sgl, nents, dir); + return nents; +} + +#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU +static void dma_noncoherent_sync_single_for_cpu(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir); +} + +static void dma_noncoherent_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); +} + +static void dma_noncoherent_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + dma_noncoherent_sync_single_for_cpu(dev, addr, size, dir); +} + +static void dma_noncoherent_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + dma_noncoherent_sync_sg_for_cpu(dev, sgl, nents, dir); +} +#endif + +const struct dma_map_ops dma_noncoherent_ops = { + .alloc = arch_dma_alloc, + .free = arch_dma_free, + .mmap = arch_dma_mmap, + .sync_single_for_device = dma_noncoherent_sync_single_for_device, + .sync_sg_for_device = dma_noncoherent_sync_sg_for_device, + .map_page = dma_noncoherent_map_page, + .map_sg = dma_noncoherent_map_sg, +#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU + .sync_single_for_cpu = dma_noncoherent_sync_single_for_cpu, + .sync_sg_for_cpu = dma_noncoherent_sync_sg_for_cpu, + .unmap_page = dma_noncoherent_unmap_page, + .unmap_sg = dma_noncoherent_unmap_sg, +#endif + .dma_supported = dma_direct_supported, + .mapping_error = dma_direct_mapping_error, + .cache_sync = arch_dma_cache_sync, +}; +EXPORT_SYMBOL(dma_noncoherent_ops); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c new file mode 100644 index 000000000000..04b68d9dffac --- /dev/null +++ b/kernel/dma/swiotlb.c @@ -0,0 +1,1087 @@ +/* + * Dynamic DMA mapping support. + * + * This implementation is a fallback for platforms that do not support + * I/O TLBs (aka DMA address translation hardware). + * Copyright (C) 2000 Asit Mallick + * Copyright (C) 2000 Goutham Rao + * Copyright (C) 2000, 2003 Hewlett-Packard Co + * David Mosberger-Tang + * + * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API. + * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid + * unnecessary i-cache flushing. + * 04/07/.. ak Better overflow handling. Assorted fixes. + * 05/09/10 linville Add support for syncing ranges, support syncing for + * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup. + * 08/12/11 beckyb Add highmem support + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include + +#define OFFSET(val,align) ((unsigned long) \ + ( (val) & ( (align) - 1))) + +#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) + +/* + * Minimum IO TLB size to bother booting with. Systems with mainly + * 64bit capable cards will only lightly use the swiotlb. If we can't + * allocate a contiguous 1MB, we're probably in trouble anyway. + */ +#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) + +enum swiotlb_force swiotlb_force; + +/* + * Used to do a quick range check in swiotlb_tbl_unmap_single and + * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this + * API. + */ +static phys_addr_t io_tlb_start, io_tlb_end; + +/* + * The number of IO TLB blocks (in groups of 64) between io_tlb_start and + * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. + */ +static unsigned long io_tlb_nslabs; + +/* + * When the IOMMU overflows we return a fallback buffer. This sets the size. + */ +static unsigned long io_tlb_overflow = 32*1024; + +static phys_addr_t io_tlb_overflow_buffer; + +/* + * This is a free list describing the number of free entries available from + * each index + */ +static unsigned int *io_tlb_list; +static unsigned int io_tlb_index; + +/* + * Max segment that we can provide which (if pages are contingous) will + * not be bounced (unless SWIOTLB_FORCE is set). + */ +unsigned int max_segment; + +/* + * We need to save away the original address corresponding to a mapped entry + * for the sync operations. + */ +#define INVALID_PHYS_ADDR (~(phys_addr_t)0) +static phys_addr_t *io_tlb_orig_addr; + +/* + * Protect the above data structures in the map and unmap calls + */ +static DEFINE_SPINLOCK(io_tlb_lock); + +static int late_alloc; + +static int __init +setup_io_tlb_npages(char *str) +{ + if (isdigit(*str)) { + io_tlb_nslabs = simple_strtoul(str, &str, 0); + /* avoid tail segment of size < IO_TLB_SEGSIZE */ + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + } + if (*str == ',') + ++str; + if (!strcmp(str, "force")) { + swiotlb_force = SWIOTLB_FORCE; + } else if (!strcmp(str, "noforce")) { + swiotlb_force = SWIOTLB_NO_FORCE; + io_tlb_nslabs = 1; + } + + return 0; +} +early_param("swiotlb", setup_io_tlb_npages); +/* make io_tlb_overflow tunable too? */ + +unsigned long swiotlb_nr_tbl(void) +{ + return io_tlb_nslabs; +} +EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); + +unsigned int swiotlb_max_segment(void) +{ + return max_segment; +} +EXPORT_SYMBOL_GPL(swiotlb_max_segment); + +void swiotlb_set_max_segment(unsigned int val) +{ + if (swiotlb_force == SWIOTLB_FORCE) + max_segment = 1; + else + max_segment = rounddown(val, PAGE_SIZE); +} + +/* default to 64MB */ +#define IO_TLB_DEFAULT_SIZE (64UL<<20) +unsigned long swiotlb_size_or_default(void) +{ + unsigned long size; + + size = io_tlb_nslabs << IO_TLB_SHIFT; + + return size ? size : (IO_TLB_DEFAULT_SIZE); +} + +static bool no_iotlb_memory; + +void swiotlb_print_info(void) +{ + unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; + unsigned char *vstart, *vend; + + if (no_iotlb_memory) { + pr_warn("software IO TLB: No low mem\n"); + return; + } + + vstart = phys_to_virt(io_tlb_start); + vend = phys_to_virt(io_tlb_end); + + printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n", + (unsigned long long)io_tlb_start, + (unsigned long long)io_tlb_end, + bytes >> 20, vstart, vend - 1); +} + +/* + * Early SWIOTLB allocation may be too early to allow an architecture to + * perform the desired operations. This function allows the architecture to + * call SWIOTLB when the operations are possible. It needs to be called + * before the SWIOTLB memory is used. + */ +void __init swiotlb_update_mem_attributes(void) +{ + void *vaddr; + unsigned long bytes; + + if (no_iotlb_memory || late_alloc) + return; + + vaddr = phys_to_virt(io_tlb_start); + bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT); + set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); + memset(vaddr, 0, bytes); + + vaddr = phys_to_virt(io_tlb_overflow_buffer); + bytes = PAGE_ALIGN(io_tlb_overflow); + set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); + memset(vaddr, 0, bytes); +} + +int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) +{ + void *v_overflow_buffer; + unsigned long i, bytes; + + bytes = nslabs << IO_TLB_SHIFT; + + io_tlb_nslabs = nslabs; + io_tlb_start = __pa(tlb); + io_tlb_end = io_tlb_start + bytes; + + /* + * Get the overflow emergency buffer + */ + v_overflow_buffer = memblock_virt_alloc_low_nopanic( + PAGE_ALIGN(io_tlb_overflow), + PAGE_SIZE); + if (!v_overflow_buffer) + return -ENOMEM; + + io_tlb_overflow_buffer = __pa(v_overflow_buffer); + + /* + * Allocate and initialize the free list array. This array is used + * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE + * between io_tlb_start and io_tlb_end. + */ + io_tlb_list = memblock_virt_alloc( + PAGE_ALIGN(io_tlb_nslabs * sizeof(int)), + PAGE_SIZE); + io_tlb_orig_addr = memblock_virt_alloc( + PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)), + PAGE_SIZE); + for (i = 0; i < io_tlb_nslabs; i++) { + io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); + io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; + } + io_tlb_index = 0; + + if (verbose) + swiotlb_print_info(); + + swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); + return 0; +} + +/* + * Statically reserve bounce buffer space and initialize bounce buffer data + * structures for the software IO TLB used to implement the DMA API. + */ +void __init +swiotlb_init(int verbose) +{ + size_t default_size = IO_TLB_DEFAULT_SIZE; + unsigned char *vstart; + unsigned long bytes; + + if (!io_tlb_nslabs) { + io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + } + + bytes = io_tlb_nslabs << IO_TLB_SHIFT; + + /* Get IO TLB memory from the low pages */ + vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE); + if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) + return; + + if (io_tlb_start) + memblock_free_early(io_tlb_start, + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + pr_warn("Cannot allocate SWIOTLB buffer"); + no_iotlb_memory = true; +} + +/* + * Systems with larger DMA zones (those that don't support ISA) can + * initialize the swiotlb later using the slab allocator if needed. + * This should be just like above, but with some error catching. + */ +int +swiotlb_late_init_with_default_size(size_t default_size) +{ + unsigned long bytes, req_nslabs = io_tlb_nslabs; + unsigned char *vstart = NULL; + unsigned int order; + int rc = 0; + + if (!io_tlb_nslabs) { + io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + } + + /* + * Get IO TLB memory from the low pages + */ + order = get_order(io_tlb_nslabs << IO_TLB_SHIFT); + io_tlb_nslabs = SLABS_PER_PAGE << order; + bytes = io_tlb_nslabs << IO_TLB_SHIFT; + + while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { + vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, + order); + if (vstart) + break; + order--; + } + + if (!vstart) { + io_tlb_nslabs = req_nslabs; + return -ENOMEM; + } + if (order != get_order(bytes)) { + printk(KERN_WARNING "Warning: only able to allocate %ld MB " + "for software IO TLB\n", (PAGE_SIZE << order) >> 20); + io_tlb_nslabs = SLABS_PER_PAGE << order; + } + rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs); + if (rc) + free_pages((unsigned long)vstart, order); + + return rc; +} + +int +swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) +{ + unsigned long i, bytes; + unsigned char *v_overflow_buffer; + + bytes = nslabs << IO_TLB_SHIFT; + + io_tlb_nslabs = nslabs; + io_tlb_start = virt_to_phys(tlb); + io_tlb_end = io_tlb_start + bytes; + + set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT); + memset(tlb, 0, bytes); + + /* + * Get the overflow emergency buffer + */ + v_overflow_buffer = (void *)__get_free_pages(GFP_DMA, + get_order(io_tlb_overflow)); + if (!v_overflow_buffer) + goto cleanup2; + + set_memory_decrypted((unsigned long)v_overflow_buffer, + io_tlb_overflow >> PAGE_SHIFT); + memset(v_overflow_buffer, 0, io_tlb_overflow); + io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer); + + /* + * Allocate and initialize the free list array. This array is used + * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE + * between io_tlb_start and io_tlb_end. + */ + io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL, + get_order(io_tlb_nslabs * sizeof(int))); + if (!io_tlb_list) + goto cleanup3; + + io_tlb_orig_addr = (phys_addr_t *) + __get_free_pages(GFP_KERNEL, + get_order(io_tlb_nslabs * + sizeof(phys_addr_t))); + if (!io_tlb_orig_addr) + goto cleanup4; + + for (i = 0; i < io_tlb_nslabs; i++) { + io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); + io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; + } + io_tlb_index = 0; + + swiotlb_print_info(); + + late_alloc = 1; + + swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); + + return 0; + +cleanup4: + free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * + sizeof(int))); + io_tlb_list = NULL; +cleanup3: + free_pages((unsigned long)v_overflow_buffer, + get_order(io_tlb_overflow)); + io_tlb_overflow_buffer = 0; +cleanup2: + io_tlb_end = 0; + io_tlb_start = 0; + io_tlb_nslabs = 0; + max_segment = 0; + return -ENOMEM; +} + +void __init swiotlb_exit(void) +{ + if (!io_tlb_orig_addr) + return; + + if (late_alloc) { + free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer), + get_order(io_tlb_overflow)); + free_pages((unsigned long)io_tlb_orig_addr, + get_order(io_tlb_nslabs * sizeof(phys_addr_t))); + free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * + sizeof(int))); + free_pages((unsigned long)phys_to_virt(io_tlb_start), + get_order(io_tlb_nslabs << IO_TLB_SHIFT)); + } else { + memblock_free_late(io_tlb_overflow_buffer, + PAGE_ALIGN(io_tlb_overflow)); + memblock_free_late(__pa(io_tlb_orig_addr), + PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); + memblock_free_late(__pa(io_tlb_list), + PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); + memblock_free_late(io_tlb_start, + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + } + io_tlb_nslabs = 0; + max_segment = 0; +} + +int is_swiotlb_buffer(phys_addr_t paddr) +{ + return paddr >= io_tlb_start && paddr < io_tlb_end; +} + +/* + * Bounce: copy the swiotlb buffer back to the original dma location + */ +static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir) +{ + unsigned long pfn = PFN_DOWN(orig_addr); + unsigned char *vaddr = phys_to_virt(tlb_addr); + + if (PageHighMem(pfn_to_page(pfn))) { + /* The buffer does not have a mapping. Map it in and copy */ + unsigned int offset = orig_addr & ~PAGE_MASK; + char *buffer; + unsigned int sz = 0; + unsigned long flags; + + while (size) { + sz = min_t(size_t, PAGE_SIZE - offset, size); + + local_irq_save(flags); + buffer = kmap_atomic(pfn_to_page(pfn)); + if (dir == DMA_TO_DEVICE) + memcpy(vaddr, buffer + offset, sz); + else + memcpy(buffer + offset, vaddr, sz); + kunmap_atomic(buffer); + local_irq_restore(flags); + + size -= sz; + pfn++; + vaddr += sz; + offset = 0; + } + } else if (dir == DMA_TO_DEVICE) { + memcpy(vaddr, phys_to_virt(orig_addr), size); + } else { + memcpy(phys_to_virt(orig_addr), vaddr, size); + } +} + +phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, + dma_addr_t tbl_dma_addr, + phys_addr_t orig_addr, size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + unsigned long flags; + phys_addr_t tlb_addr; + unsigned int nslots, stride, index, wrap; + int i; + unsigned long mask; + unsigned long offset_slots; + unsigned long max_slots; + + if (no_iotlb_memory) + panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); + + if (mem_encrypt_active()) + pr_warn_once("%s is active and system is using DMA bounce buffers\n", + sme_active() ? "SME" : "SEV"); + + mask = dma_get_seg_boundary(hwdev); + + tbl_dma_addr &= mask; + + offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + + /* + * Carefully handle integer overflow which can occur when mask == ~0UL. + */ + max_slots = mask + 1 + ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT + : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); + + /* + * For mappings greater than or equal to a page, we limit the stride + * (and hence alignment) to a page size. + */ + nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + if (size >= PAGE_SIZE) + stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); + else + stride = 1; + + BUG_ON(!nslots); + + /* + * Find suitable number of IO TLB entries size that will fit this + * request and allocate a buffer from that IO TLB pool. + */ + spin_lock_irqsave(&io_tlb_lock, flags); + index = ALIGN(io_tlb_index, stride); + if (index >= io_tlb_nslabs) + index = 0; + wrap = index; + + do { + while (iommu_is_span_boundary(index, nslots, offset_slots, + max_slots)) { + index += stride; + if (index >= io_tlb_nslabs) + index = 0; + if (index == wrap) + goto not_found; + } + + /* + * If we find a slot that indicates we have 'nslots' number of + * contiguous buffers, we allocate the buffers from that slot + * and mark the entries as '0' indicating unavailable. + */ + if (io_tlb_list[index] >= nslots) { + int count = 0; + + for (i = index; i < (int) (index + nslots); i++) + io_tlb_list[i] = 0; + for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) + io_tlb_list[i] = ++count; + tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT); + + /* + * Update the indices to avoid searching in the next + * round. + */ + io_tlb_index = ((index + nslots) < io_tlb_nslabs + ? (index + nslots) : 0); + + goto found; + } + index += stride; + if (index >= io_tlb_nslabs) + index = 0; + } while (index != wrap); + +not_found: + spin_unlock_irqrestore(&io_tlb_lock, flags); + if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) + dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size); + return SWIOTLB_MAP_ERROR; +found: + spin_unlock_irqrestore(&io_tlb_lock, flags); + + /* + * Save away the mapping from the original address to the DMA address. + * This is needed when we sync the memory. Then we sync the buffer if + * needed. + */ + for (i = 0; i < nslots; i++) + io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) + swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE); + + return tlb_addr; +} + +/* + * Allocates bounce buffer and returns its physical address. + */ +static phys_addr_t +map_single(struct device *hwdev, phys_addr_t phys, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + dma_addr_t start_dma_addr; + + if (swiotlb_force == SWIOTLB_NO_FORCE) { + dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n", + &phys); + return SWIOTLB_MAP_ERROR; + } + + start_dma_addr = __phys_to_dma(hwdev, io_tlb_start); + return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, + dir, attrs); +} + +/* + * tlb_addr is the physical address of the bounce buffer to unmap. + */ +void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + unsigned long flags; + int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; + phys_addr_t orig_addr = io_tlb_orig_addr[index]; + + /* + * First, sync the memory before unmapping the entry + */ + if (orig_addr != INVALID_PHYS_ADDR && + !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) + swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE); + + /* + * Return the buffer to the free list by setting the corresponding + * entries to indicate the number of contiguous entries available. + * While returning the entries to the free list, we merge the entries + * with slots below and above the pool being returned. + */ + spin_lock_irqsave(&io_tlb_lock, flags); + { + count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? + io_tlb_list[index + nslots] : 0); + /* + * Step 1: return the slots to the free list, merging the + * slots with superceeding slots + */ + for (i = index + nslots - 1; i >= index; i--) { + io_tlb_list[i] = ++count; + io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; + } + /* + * Step 2: merge the returned slots with the preceding slots, + * if available (non zero) + */ + for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) + io_tlb_list[i] = ++count; + } + spin_unlock_irqrestore(&io_tlb_lock, flags); +} + +void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, + enum dma_sync_target target) +{ + int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; + phys_addr_t orig_addr = io_tlb_orig_addr[index]; + + if (orig_addr == INVALID_PHYS_ADDR) + return; + orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1); + + switch (target) { + case SYNC_FOR_CPU: + if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) + swiotlb_bounce(orig_addr, tlb_addr, + size, DMA_FROM_DEVICE); + else + BUG_ON(dir != DMA_TO_DEVICE); + break; + case SYNC_FOR_DEVICE: + if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) + swiotlb_bounce(orig_addr, tlb_addr, + size, DMA_TO_DEVICE); + else + BUG_ON(dir != DMA_FROM_DEVICE); + break; + default: + BUG(); + } +} + +static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr, + size_t size) +{ + u64 mask = DMA_BIT_MASK(32); + + if (dev && dev->coherent_dma_mask) + mask = dev->coherent_dma_mask; + return addr + size - 1 <= mask; +} + +static void * +swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle, + unsigned long attrs) +{ + phys_addr_t phys_addr; + + if (swiotlb_force == SWIOTLB_NO_FORCE) + goto out_warn; + + phys_addr = swiotlb_tbl_map_single(dev, + __phys_to_dma(dev, io_tlb_start), + 0, size, DMA_FROM_DEVICE, attrs); + if (phys_addr == SWIOTLB_MAP_ERROR) + goto out_warn; + + *dma_handle = __phys_to_dma(dev, phys_addr); + if (!dma_coherent_ok(dev, *dma_handle, size)) + goto out_unmap; + + memset(phys_to_virt(phys_addr), 0, size); + return phys_to_virt(phys_addr); + +out_unmap: + dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", + (unsigned long long)dev->coherent_dma_mask, + (unsigned long long)*dma_handle); + + /* + * DMA_TO_DEVICE to avoid memcpy in unmap_single. + * DMA_ATTR_SKIP_CPU_SYNC is optional. + */ + swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); +out_warn: + if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) { + dev_warn(dev, + "swiotlb: coherent allocation failed, size=%zu\n", + size); + dump_stack(); + } + return NULL; +} + +static bool swiotlb_free_buffer(struct device *dev, size_t size, + dma_addr_t dma_addr) +{ + phys_addr_t phys_addr = dma_to_phys(dev, dma_addr); + + WARN_ON_ONCE(irqs_disabled()); + + if (!is_swiotlb_buffer(phys_addr)) + return false; + + /* + * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single. + * DMA_ATTR_SKIP_CPU_SYNC is optional. + */ + swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + return true; +} + +static void +swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, + int do_panic) +{ + if (swiotlb_force == SWIOTLB_NO_FORCE) + return; + + /* + * Ran out of IOMMU space for this operation. This is very bad. + * Unfortunately the drivers cannot handle this operation properly. + * unless they check for dma_mapping_error (most don't) + * When the mapping is small enough return a static buffer to limit + * the damage, or panic when the transfer is too big. + */ + dev_err_ratelimited(dev, "DMA: Out of SW-IOMMU space for %zu bytes\n", + size); + + if (size <= io_tlb_overflow || !do_panic) + return; + + if (dir == DMA_BIDIRECTIONAL) + panic("DMA: Random memory could be DMA accessed\n"); + if (dir == DMA_FROM_DEVICE) + panic("DMA: Random memory could be DMA written\n"); + if (dir == DMA_TO_DEVICE) + panic("DMA: Random memory could be DMA read\n"); +} + +/* + * Map a single buffer of the indicated size for DMA in streaming mode. The + * physical address to use is returned. + * + * Once the device is given the dma address, the device owns this memory until + * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed. + */ +dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + phys_addr_t map, phys = page_to_phys(page) + offset; + dma_addr_t dev_addr = phys_to_dma(dev, phys); + + BUG_ON(dir == DMA_NONE); + /* + * If the address happens to be in the device's DMA window, + * we can safely return the device addr and not worry about bounce + * buffering it. + */ + if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE) + return dev_addr; + + trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); + + /* Oh well, have to allocate and map a bounce buffer. */ + map = map_single(dev, phys, size, dir, attrs); + if (map == SWIOTLB_MAP_ERROR) { + swiotlb_full(dev, size, dir, 1); + return __phys_to_dma(dev, io_tlb_overflow_buffer); + } + + dev_addr = __phys_to_dma(dev, map); + + /* Ensure that the address returned is DMA'ble */ + if (dma_capable(dev, dev_addr, size)) + return dev_addr; + + attrs |= DMA_ATTR_SKIP_CPU_SYNC; + swiotlb_tbl_unmap_single(dev, map, size, dir, attrs); + + return __phys_to_dma(dev, io_tlb_overflow_buffer); +} + +/* + * Unmap a single streaming mode DMA translation. The dma_addr and size must + * match what was provided for in a previous swiotlb_map_page call. All + * other usages are undefined. + * + * After this call, reads by the cpu to the buffer are guaranteed to see + * whatever the device wrote there. + */ +static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); + + BUG_ON(dir == DMA_NONE); + + if (is_swiotlb_buffer(paddr)) { + swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); + return; + } + + if (dir != DMA_FROM_DEVICE) + return; + + /* + * phys_to_virt doesn't work with hihgmem page but we could + * call dma_mark_clean() with hihgmem page here. However, we + * are fine since dma_mark_clean() is null on POWERPC. We can + * make dma_mark_clean() take a physical address if necessary. + */ + dma_mark_clean(phys_to_virt(paddr), size); +} + +void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + unmap_single(hwdev, dev_addr, size, dir, attrs); +} + +/* + * Make physical memory consistent for a single streaming mode DMA translation + * after a transfer. + * + * If you perform a swiotlb_map_page() but wish to interrogate the buffer + * using the cpu, yet do not wish to teardown the dma mapping, you must + * call this function before doing so. At the next point you give the dma + * address back to the card, you must first perform a + * swiotlb_dma_sync_for_device, and then the device again owns the buffer + */ +static void +swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + enum dma_sync_target target) +{ + phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); + + BUG_ON(dir == DMA_NONE); + + if (is_swiotlb_buffer(paddr)) { + swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target); + return; + } + + if (dir != DMA_FROM_DEVICE) + return; + + dma_mark_clean(phys_to_virt(paddr), size); +} + +void +swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir) +{ + swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU); +} + +void +swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir) +{ + swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE); +} + +/* + * Map a set of buffers described by scatterlist in streaming mode for DMA. + * This is the scatter-gather version of the above swiotlb_map_page + * interface. Here the scatter gather list elements are each tagged with the + * appropriate dma address and length. They are obtained via + * sg_dma_{address,length}(SG). + * + * NOTE: An implementation may be able to use a smaller number of + * DMA address/length pairs than there are SG table elements. + * (for example via virtual mapping capabilities) + * The routine returns the number of addr/length pairs actually + * used, at most nents. + * + * Device ownership issues as mentioned above for swiotlb_map_page are the + * same here. + */ +int +swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, + enum dma_data_direction dir, unsigned long attrs) +{ + struct scatterlist *sg; + int i; + + BUG_ON(dir == DMA_NONE); + + for_each_sg(sgl, sg, nelems, i) { + phys_addr_t paddr = sg_phys(sg); + dma_addr_t dev_addr = phys_to_dma(hwdev, paddr); + + if (swiotlb_force == SWIOTLB_FORCE || + !dma_capable(hwdev, dev_addr, sg->length)) { + phys_addr_t map = map_single(hwdev, sg_phys(sg), + sg->length, dir, attrs); + if (map == SWIOTLB_MAP_ERROR) { + /* Don't panic here, we expect map_sg users + to do proper error handling. */ + swiotlb_full(hwdev, sg->length, dir, 0); + attrs |= DMA_ATTR_SKIP_CPU_SYNC; + swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, + attrs); + sg_dma_len(sgl) = 0; + return 0; + } + sg->dma_address = __phys_to_dma(hwdev, map); + } else + sg->dma_address = dev_addr; + sg_dma_len(sg) = sg->length; + } + return nelems; +} + +/* + * Unmap a set of streaming mode DMA translations. Again, cpu read rules + * concerning calls here are the same as for swiotlb_unmap_page() above. + */ +void +swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, + unsigned long attrs) +{ + struct scatterlist *sg; + int i; + + BUG_ON(dir == DMA_NONE); + + for_each_sg(sgl, sg, nelems, i) + unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, + attrs); +} + +/* + * Make physical memory consistent for a set of streaming mode DMA translations + * after a transfer. + * + * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules + * and usage. + */ +static void +swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, + enum dma_sync_target target) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nelems, i) + swiotlb_sync_single(hwdev, sg->dma_address, + sg_dma_len(sg), dir, target); +} + +void +swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir) +{ + swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU); +} + +void +swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir) +{ + swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); +} + +int +swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) +{ + return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer)); +} + +/* + * Return whether the given device DMA address mask can be supported + * properly. For example, if your device can only drive the low 24-bits + * during bus mastering, then you would pass 0x00ffffff as the mask to + * this function. + */ +int +swiotlb_dma_supported(struct device *hwdev, u64 mask) +{ + return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask; +} + +void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, unsigned long attrs) +{ + void *vaddr; + + /* temporary workaround: */ + if (gfp & __GFP_NOWARN) + attrs |= DMA_ATTR_NO_WARN; + + /* + * Don't print a warning when the first allocation attempt fails. + * swiotlb_alloc_coherent() will print a warning when the DMA memory + * allocation ultimately failed. + */ + gfp |= __GFP_NOWARN; + + vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs); + if (!vaddr) + vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs); + return vaddr; +} + +void swiotlb_free(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_addr, unsigned long attrs) +{ + if (!swiotlb_free_buffer(dev, size, dma_addr)) + dma_direct_free(dev, size, vaddr, dma_addr, attrs); +} + +const struct dma_map_ops swiotlb_dma_ops = { + .mapping_error = swiotlb_dma_mapping_error, + .alloc = swiotlb_alloc, + .free = swiotlb_free, + .sync_single_for_cpu = swiotlb_sync_single_for_cpu, + .sync_single_for_device = swiotlb_sync_single_for_device, + .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, + .sync_sg_for_device = swiotlb_sync_sg_for_device, + .map_sg = swiotlb_map_sg_attrs, + .unmap_sg = swiotlb_unmap_sg_attrs, + .map_page = swiotlb_map_page, + .unmap_page = swiotlb_unmap_page, + .dma_supported = dma_direct_supported, +}; diff --git a/kernel/dma/virt.c b/kernel/dma/virt.c new file mode 100644 index 000000000000..631ddec4b60a --- /dev/null +++ b/kernel/dma/virt.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DMA operations that map to virtual addresses without flushing memory. + */ +#include +#include +#include +#include + +static void *dma_virt_alloc(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, + unsigned long attrs) +{ + void *ret; + + ret = (void *)__get_free_pages(gfp, get_order(size)); + if (ret) + *dma_handle = (uintptr_t)ret; + return ret; +} + +static void dma_virt_free(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_addr, + unsigned long attrs) +{ + free_pages((unsigned long)cpu_addr, get_order(size)); +} + +static dma_addr_t dma_virt_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + return (uintptr_t)(page_address(page) + offset); +} + +static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, + unsigned long attrs) +{ + int i; + struct scatterlist *sg; + + for_each_sg(sgl, sg, nents, i) { + BUG_ON(!sg_page(sg)); + sg_dma_address(sg) = (uintptr_t)sg_virt(sg); + sg_dma_len(sg) = sg->length; + } + + return nents; +} + +const struct dma_map_ops dma_virt_ops = { + .alloc = dma_virt_alloc, + .free = dma_virt_free, + .map_page = dma_virt_map_page, + .map_sg = dma_virt_map_sg, +}; +EXPORT_SYMBOL(dma_virt_ops); diff --git a/lib/Kconfig b/lib/Kconfig index 809fdd155739..803fcbced729 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -420,60 +420,15 @@ config HAS_IOPORT_MAP depends on HAS_IOMEM && !NO_IOPORT_MAP default y -config HAS_DMA - bool - depends on !NO_DMA - default y +source "kernel/dma/Kconfig" config SGL_ALLOC bool default n -config NEED_SG_DMA_LENGTH - bool - -config NEED_DMA_MAP_STATE - bool - -config ARCH_DMA_ADDR_T_64BIT - def_bool 64BIT || PHYS_ADDR_T_64BIT - config IOMMU_HELPER bool -config ARCH_HAS_SYNC_DMA_FOR_DEVICE - bool - -config ARCH_HAS_SYNC_DMA_FOR_CPU - bool - select NEED_DMA_MAP_STATE - -config DMA_DIRECT_OPS - bool - depends on HAS_DMA - -config DMA_NONCOHERENT_OPS - bool - depends on HAS_DMA - select DMA_DIRECT_OPS - -config DMA_NONCOHERENT_MMAP - bool - depends on DMA_NONCOHERENT_OPS - -config DMA_NONCOHERENT_CACHE_SYNC - bool - depends on DMA_NONCOHERENT_OPS - -config DMA_VIRT_OPS - bool - depends on HAS_DMA - -config SWIOTLB - bool - select DMA_DIRECT_OPS - select NEED_DMA_MAP_STATE - config CHECK_SIGNATURE bool diff --git a/lib/Makefile b/lib/Makefile index 5e0e160c9242..8153fdab287f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -29,9 +29,6 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ lib-$(CONFIG_PRINTK) += dump_stack.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o -obj-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o -obj-$(CONFIG_DMA_NONCOHERENT_OPS) += dma-noncoherent.o -obj-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o lib-y += kobject.o klist.o obj-y += lockref.o @@ -148,7 +145,6 @@ obj-$(CONFIG_SMP) += percpu_counter.o obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o -obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o @@ -169,8 +165,6 @@ obj-$(CONFIG_NLATTR) += nlattr.o obj-$(CONFIG_LRU_CACHE) += lru_cache.o -obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o - obj-$(CONFIG_GENERIC_CSUM) += checksum.o obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o diff --git a/lib/dma-debug.c b/lib/dma-debug.c deleted file mode 100644 index c007d25bee09..000000000000 --- a/lib/dma-debug.c +++ /dev/null @@ -1,1773 +0,0 @@ -/* - * Copyright (C) 2008 Advanced Micro Devices, Inc. - * - * Author: Joerg Roedel - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#define HASH_SIZE 1024ULL -#define HASH_FN_SHIFT 13 -#define HASH_FN_MASK (HASH_SIZE - 1) - -/* allow architectures to override this if absolutely required */ -#ifndef PREALLOC_DMA_DEBUG_ENTRIES -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) -#endif - -enum { - dma_debug_single, - dma_debug_page, - dma_debug_sg, - dma_debug_coherent, - dma_debug_resource, -}; - -enum map_err_types { - MAP_ERR_CHECK_NOT_APPLICABLE, - MAP_ERR_NOT_CHECKED, - MAP_ERR_CHECKED, -}; - -#define DMA_DEBUG_STACKTRACE_ENTRIES 5 - -/** - * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping - * @list: node on pre-allocated free_entries list - * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent - * @type: single, page, sg, coherent - * @pfn: page frame of the start address - * @offset: offset of mapping relative to pfn - * @size: length of the mapping - * @direction: enum dma_data_direction - * @sg_call_ents: 'nents' from dma_map_sg - * @sg_mapped_ents: 'mapped_ents' from dma_map_sg - * @map_err_type: track whether dma_mapping_error() was checked - * @stacktrace: support backtraces when a violation is detected - */ -struct dma_debug_entry { - struct list_head list; - struct device *dev; - int type; - unsigned long pfn; - size_t offset; - u64 dev_addr; - u64 size; - int direction; - int sg_call_ents; - int sg_mapped_ents; - enum map_err_types map_err_type; -#ifdef CONFIG_STACKTRACE - struct stack_trace stacktrace; - unsigned long st_entries[DMA_DEBUG_STACKTRACE_ENTRIES]; -#endif -}; - -typedef bool (*match_fn)(struct dma_debug_entry *, struct dma_debug_entry *); - -struct hash_bucket { - struct list_head list; - spinlock_t lock; -} ____cacheline_aligned_in_smp; - -/* Hash list to save the allocated dma addresses */ -static struct hash_bucket dma_entry_hash[HASH_SIZE]; -/* List of pre-allocated dma_debug_entry's */ -static LIST_HEAD(free_entries); -/* Lock for the list above */ -static DEFINE_SPINLOCK(free_entries_lock); - -/* Global disable flag - will be set in case of an error */ -static bool global_disable __read_mostly; - -/* Early initialization disable flag, set at the end of dma_debug_init */ -static bool dma_debug_initialized __read_mostly; - -static inline bool dma_debug_disabled(void) -{ - return global_disable || !dma_debug_initialized; -} - -/* Global error count */ -static u32 error_count; - -/* Global error show enable*/ -static u32 show_all_errors __read_mostly; -/* Number of errors to show */ -static u32 show_num_errors = 1; - -static u32 num_free_entries; -static u32 min_free_entries; -static u32 nr_total_entries; - -/* number of preallocated entries requested by kernel cmdline */ -static u32 nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; - -/* debugfs dentry's for the stuff above */ -static struct dentry *dma_debug_dent __read_mostly; -static struct dentry *global_disable_dent __read_mostly; -static struct dentry *error_count_dent __read_mostly; -static struct dentry *show_all_errors_dent __read_mostly; -static struct dentry *show_num_errors_dent __read_mostly; -static struct dentry *num_free_entries_dent __read_mostly; -static struct dentry *min_free_entries_dent __read_mostly; -static struct dentry *filter_dent __read_mostly; - -/* per-driver filter related state */ - -#define NAME_MAX_LEN 64 - -static char current_driver_name[NAME_MAX_LEN] __read_mostly; -static struct device_driver *current_driver __read_mostly; - -static DEFINE_RWLOCK(driver_name_lock); - -static const char *const maperr2str[] = { - [MAP_ERR_CHECK_NOT_APPLICABLE] = "dma map error check not applicable", - [MAP_ERR_NOT_CHECKED] = "dma map error not checked", - [MAP_ERR_CHECKED] = "dma map error checked", -}; - -static const char *type2name[5] = { "single", "page", - "scather-gather", "coherent", - "resource" }; - -static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE", - "DMA_FROM_DEVICE", "DMA_NONE" }; - -/* - * The access to some variables in this macro is racy. We can't use atomic_t - * here because all these variables are exported to debugfs. Some of them even - * writeable. This is also the reason why a lock won't help much. But anyway, - * the races are no big deal. Here is why: - * - * error_count: the addition is racy, but the worst thing that can happen is - * that we don't count some errors - * show_num_errors: the subtraction is racy. Also no big deal because in - * worst case this will result in one warning more in the - * system log than the user configured. This variable is - * writeable via debugfs. - */ -static inline void dump_entry_trace(struct dma_debug_entry *entry) -{ -#ifdef CONFIG_STACKTRACE - if (entry) { - pr_warning("Mapped at:\n"); - print_stack_trace(&entry->stacktrace, 0); - } -#endif -} - -static bool driver_filter(struct device *dev) -{ - struct device_driver *drv; - unsigned long flags; - bool ret; - - /* driver filter off */ - if (likely(!current_driver_name[0])) - return true; - - /* driver filter on and initialized */ - if (current_driver && dev && dev->driver == current_driver) - return true; - - /* driver filter on, but we can't filter on a NULL device... */ - if (!dev) - return false; - - if (current_driver || !current_driver_name[0]) - return false; - - /* driver filter on but not yet initialized */ - drv = dev->driver; - if (!drv) - return false; - - /* lock to protect against change of current_driver_name */ - read_lock_irqsave(&driver_name_lock, flags); - - ret = false; - if (drv->name && - strncmp(current_driver_name, drv->name, NAME_MAX_LEN - 1) == 0) { - current_driver = drv; - ret = true; - } - - read_unlock_irqrestore(&driver_name_lock, flags); - - return ret; -} - -#define err_printk(dev, entry, format, arg...) do { \ - error_count += 1; \ - if (driver_filter(dev) && \ - (show_all_errors || show_num_errors > 0)) { \ - WARN(1, "%s %s: " format, \ - dev ? dev_driver_string(dev) : "NULL", \ - dev ? dev_name(dev) : "NULL", ## arg); \ - dump_entry_trace(entry); \ - } \ - if (!show_all_errors && show_num_errors > 0) \ - show_num_errors -= 1; \ - } while (0); - -/* - * Hash related functions - * - * Every DMA-API request is saved into a struct dma_debug_entry. To - * have quick access to these structs they are stored into a hash. - */ -static int hash_fn(struct dma_debug_entry *entry) -{ - /* - * Hash function is based on the dma address. - * We use bits 20-27 here as the index into the hash - */ - return (entry->dev_addr >> HASH_FN_SHIFT) & HASH_FN_MASK; -} - -/* - * Request exclusive access to a hash bucket for a given dma_debug_entry. - */ -static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry, - unsigned long *flags) - __acquires(&dma_entry_hash[idx].lock) -{ - int idx = hash_fn(entry); - unsigned long __flags; - - spin_lock_irqsave(&dma_entry_hash[idx].lock, __flags); - *flags = __flags; - return &dma_entry_hash[idx]; -} - -/* - * Give up exclusive access to the hash bucket - */ -static void put_hash_bucket(struct hash_bucket *bucket, - unsigned long *flags) - __releases(&bucket->lock) -{ - unsigned long __flags = *flags; - - spin_unlock_irqrestore(&bucket->lock, __flags); -} - -static bool exact_match(struct dma_debug_entry *a, struct dma_debug_entry *b) -{ - return ((a->dev_addr == b->dev_addr) && - (a->dev == b->dev)) ? true : false; -} - -static bool containing_match(struct dma_debug_entry *a, - struct dma_debug_entry *b) -{ - if (a->dev != b->dev) - return false; - - if ((b->dev_addr <= a->dev_addr) && - ((b->dev_addr + b->size) >= (a->dev_addr + a->size))) - return true; - - return false; -} - -/* - * Search a given entry in the hash bucket list - */ -static struct dma_debug_entry *__hash_bucket_find(struct hash_bucket *bucket, - struct dma_debug_entry *ref, - match_fn match) -{ - struct dma_debug_entry *entry, *ret = NULL; - int matches = 0, match_lvl, last_lvl = -1; - - list_for_each_entry(entry, &bucket->list, list) { - if (!match(ref, entry)) - continue; - - /* - * Some drivers map the same physical address multiple - * times. Without a hardware IOMMU this results in the - * same device addresses being put into the dma-debug - * hash multiple times too. This can result in false - * positives being reported. Therefore we implement a - * best-fit algorithm here which returns the entry from - * the hash which fits best to the reference value - * instead of the first-fit. - */ - matches += 1; - match_lvl = 0; - entry->size == ref->size ? ++match_lvl : 0; - entry->type == ref->type ? ++match_lvl : 0; - entry->direction == ref->direction ? ++match_lvl : 0; - entry->sg_call_ents == ref->sg_call_ents ? ++match_lvl : 0; - - if (match_lvl == 4) { - /* perfect-fit - return the result */ - return entry; - } else if (match_lvl > last_lvl) { - /* - * We found an entry that fits better then the - * previous one or it is the 1st match. - */ - last_lvl = match_lvl; - ret = entry; - } - } - - /* - * If we have multiple matches but no perfect-fit, just return - * NULL. - */ - ret = (matches == 1) ? ret : NULL; - - return ret; -} - -static struct dma_debug_entry *bucket_find_exact(struct hash_bucket *bucket, - struct dma_debug_entry *ref) -{ - return __hash_bucket_find(bucket, ref, exact_match); -} - -static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket, - struct dma_debug_entry *ref, - unsigned long *flags) -{ - - unsigned int max_range = dma_get_max_seg_size(ref->dev); - struct dma_debug_entry *entry, index = *ref; - unsigned int range = 0; - - while (range <= max_range) { - entry = __hash_bucket_find(*bucket, ref, containing_match); - - if (entry) - return entry; - - /* - * Nothing found, go back a hash bucket - */ - put_hash_bucket(*bucket, flags); - range += (1 << HASH_FN_SHIFT); - index.dev_addr -= (1 << HASH_FN_SHIFT); - *bucket = get_hash_bucket(&index, flags); - } - - return NULL; -} - -/* - * Add an entry to a hash bucket - */ -static void hash_bucket_add(struct hash_bucket *bucket, - struct dma_debug_entry *entry) -{ - list_add_tail(&entry->list, &bucket->list); -} - -/* - * Remove entry from a hash bucket list - */ -static void hash_bucket_del(struct dma_debug_entry *entry) -{ - list_del(&entry->list); -} - -static unsigned long long phys_addr(struct dma_debug_entry *entry) -{ - if (entry->type == dma_debug_resource) - return __pfn_to_phys(entry->pfn) + entry->offset; - - return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset; -} - -/* - * Dump mapping entries for debugging purposes - */ -void debug_dma_dump_mappings(struct device *dev) -{ - int idx; - - for (idx = 0; idx < HASH_SIZE; idx++) { - struct hash_bucket *bucket = &dma_entry_hash[idx]; - struct dma_debug_entry *entry; - unsigned long flags; - - spin_lock_irqsave(&bucket->lock, flags); - - list_for_each_entry(entry, &bucket->list, list) { - if (!dev || dev == entry->dev) { - dev_info(entry->dev, - "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n", - type2name[entry->type], idx, - phys_addr(entry), entry->pfn, - entry->dev_addr, entry->size, - dir2name[entry->direction], - maperr2str[entry->map_err_type]); - } - } - - spin_unlock_irqrestore(&bucket->lock, flags); - } -} - -/* - * For each mapping (initial cacheline in the case of - * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a - * scatterlist, or the cacheline specified in dma_map_single) insert - * into this tree using the cacheline as the key. At - * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If - * the entry already exists at insertion time add a tag as a reference - * count for the overlapping mappings. For now, the overlap tracking - * just ensures that 'unmaps' balance 'maps' before marking the - * cacheline idle, but we should also be flagging overlaps as an API - * violation. - * - * Memory usage is mostly constrained by the maximum number of available - * dma-debug entries in that we need a free dma_debug_entry before - * inserting into the tree. In the case of dma_map_page and - * dma_alloc_coherent there is only one dma_debug_entry and one - * dma_active_cacheline entry to track per event. dma_map_sg(), on the - * other hand, consumes a single dma_debug_entry, but inserts 'nents' - * entries into the tree. - * - * At any time debug_dma_assert_idle() can be called to trigger a - * warning if any cachelines in the given page are in the active set. - */ -static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT); -static DEFINE_SPINLOCK(radix_lock); -#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) -#define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) -#define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT) - -static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry) -{ - return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) + - (entry->offset >> L1_CACHE_SHIFT); -} - -static int active_cacheline_read_overlap(phys_addr_t cln) -{ - int overlap = 0, i; - - for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) - if (radix_tree_tag_get(&dma_active_cacheline, cln, i)) - overlap |= 1 << i; - return overlap; -} - -static int active_cacheline_set_overlap(phys_addr_t cln, int overlap) -{ - int i; - - if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0) - return overlap; - - for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) - if (overlap & 1 << i) - radix_tree_tag_set(&dma_active_cacheline, cln, i); - else - radix_tree_tag_clear(&dma_active_cacheline, cln, i); - - return overlap; -} - -static void active_cacheline_inc_overlap(phys_addr_t cln) -{ - int overlap = active_cacheline_read_overlap(cln); - - overlap = active_cacheline_set_overlap(cln, ++overlap); - - /* If we overflowed the overlap counter then we're potentially - * leaking dma-mappings. Otherwise, if maps and unmaps are - * balanced then this overflow may cause false negatives in - * debug_dma_assert_idle() as the cacheline may be marked idle - * prematurely. - */ - WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP, - "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n", - ACTIVE_CACHELINE_MAX_OVERLAP, &cln); -} - -static int active_cacheline_dec_overlap(phys_addr_t cln) -{ - int overlap = active_cacheline_read_overlap(cln); - - return active_cacheline_set_overlap(cln, --overlap); -} - -static int active_cacheline_insert(struct dma_debug_entry *entry) -{ - phys_addr_t cln = to_cacheline_number(entry); - unsigned long flags; - int rc; - - /* If the device is not writing memory then we don't have any - * concerns about the cpu consuming stale data. This mitigates - * legitimate usages of overlapping mappings. - */ - if (entry->direction == DMA_TO_DEVICE) - return 0; - - spin_lock_irqsave(&radix_lock, flags); - rc = radix_tree_insert(&dma_active_cacheline, cln, entry); - if (rc == -EEXIST) - active_cacheline_inc_overlap(cln); - spin_unlock_irqrestore(&radix_lock, flags); - - return rc; -} - -static void active_cacheline_remove(struct dma_debug_entry *entry) -{ - phys_addr_t cln = to_cacheline_number(entry); - unsigned long flags; - - /* ...mirror the insert case */ - if (entry->direction == DMA_TO_DEVICE) - return; - - spin_lock_irqsave(&radix_lock, flags); - /* since we are counting overlaps the final put of the - * cacheline will occur when the overlap count is 0. - * active_cacheline_dec_overlap() returns -1 in that case - */ - if (active_cacheline_dec_overlap(cln) < 0) - radix_tree_delete(&dma_active_cacheline, cln); - spin_unlock_irqrestore(&radix_lock, flags); -} - -/** - * debug_dma_assert_idle() - assert that a page is not undergoing dma - * @page: page to lookup in the dma_active_cacheline tree - * - * Place a call to this routine in cases where the cpu touching the page - * before the dma completes (page is dma_unmapped) will lead to data - * corruption. - */ -void debug_dma_assert_idle(struct page *page) -{ - static struct dma_debug_entry *ents[CACHELINES_PER_PAGE]; - struct dma_debug_entry *entry = NULL; - void **results = (void **) &ents; - unsigned int nents, i; - unsigned long flags; - phys_addr_t cln; - - if (dma_debug_disabled()) - return; - - if (!page) - return; - - cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT; - spin_lock_irqsave(&radix_lock, flags); - nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln, - CACHELINES_PER_PAGE); - for (i = 0; i < nents; i++) { - phys_addr_t ent_cln = to_cacheline_number(ents[i]); - - if (ent_cln == cln) { - entry = ents[i]; - break; - } else if (ent_cln >= cln + CACHELINES_PER_PAGE) - break; - } - spin_unlock_irqrestore(&radix_lock, flags); - - if (!entry) - return; - - cln = to_cacheline_number(entry); - err_printk(entry->dev, entry, - "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n", - &cln); -} - -/* - * Wrapper function for adding an entry to the hash. - * This function takes care of locking itself. - */ -static void add_dma_entry(struct dma_debug_entry *entry) -{ - struct hash_bucket *bucket; - unsigned long flags; - int rc; - - bucket = get_hash_bucket(entry, &flags); - hash_bucket_add(bucket, entry); - put_hash_bucket(bucket, &flags); - - rc = active_cacheline_insert(entry); - if (rc == -ENOMEM) { - pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n"); - global_disable = true; - } - - /* TODO: report -EEXIST errors here as overlapping mappings are - * not supported by the DMA API - */ -} - -static struct dma_debug_entry *__dma_entry_alloc(void) -{ - struct dma_debug_entry *entry; - - entry = list_entry(free_entries.next, struct dma_debug_entry, list); - list_del(&entry->list); - memset(entry, 0, sizeof(*entry)); - - num_free_entries -= 1; - if (num_free_entries < min_free_entries) - min_free_entries = num_free_entries; - - return entry; -} - -/* struct dma_entry allocator - * - * The next two functions implement the allocator for - * struct dma_debug_entries. - */ -static struct dma_debug_entry *dma_entry_alloc(void) -{ - struct dma_debug_entry *entry; - unsigned long flags; - - spin_lock_irqsave(&free_entries_lock, flags); - - if (list_empty(&free_entries)) { - global_disable = true; - spin_unlock_irqrestore(&free_entries_lock, flags); - pr_err("DMA-API: debugging out of memory - disabling\n"); - return NULL; - } - - entry = __dma_entry_alloc(); - - spin_unlock_irqrestore(&free_entries_lock, flags); - -#ifdef CONFIG_STACKTRACE - entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES; - entry->stacktrace.entries = entry->st_entries; - entry->stacktrace.skip = 2; - save_stack_trace(&entry->stacktrace); -#endif - - return entry; -} - -static void dma_entry_free(struct dma_debug_entry *entry) -{ - unsigned long flags; - - active_cacheline_remove(entry); - - /* - * add to beginning of the list - this way the entries are - * more likely cache hot when they are reallocated. - */ - spin_lock_irqsave(&free_entries_lock, flags); - list_add(&entry->list, &free_entries); - num_free_entries += 1; - spin_unlock_irqrestore(&free_entries_lock, flags); -} - -int dma_debug_resize_entries(u32 num_entries) -{ - int i, delta, ret = 0; - unsigned long flags; - struct dma_debug_entry *entry; - LIST_HEAD(tmp); - - spin_lock_irqsave(&free_entries_lock, flags); - - if (nr_total_entries < num_entries) { - delta = num_entries - nr_total_entries; - - spin_unlock_irqrestore(&free_entries_lock, flags); - - for (i = 0; i < delta; i++) { - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - break; - - list_add_tail(&entry->list, &tmp); - } - - spin_lock_irqsave(&free_entries_lock, flags); - - list_splice(&tmp, &free_entries); - nr_total_entries += i; - num_free_entries += i; - } else { - delta = nr_total_entries - num_entries; - - for (i = 0; i < delta && !list_empty(&free_entries); i++) { - entry = __dma_entry_alloc(); - kfree(entry); - } - - nr_total_entries -= i; - } - - if (nr_total_entries != num_entries) - ret = 1; - - spin_unlock_irqrestore(&free_entries_lock, flags); - - return ret; -} - -/* - * DMA-API debugging init code - * - * The init code does two things: - * 1. Initialize core data structures - * 2. Preallocate a given number of dma_debug_entry structs - */ - -static int prealloc_memory(u32 num_entries) -{ - struct dma_debug_entry *entry, *next_entry; - int i; - - for (i = 0; i < num_entries; ++i) { - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - goto out_err; - - list_add_tail(&entry->list, &free_entries); - } - - num_free_entries = num_entries; - min_free_entries = num_entries; - - pr_info("DMA-API: preallocated %d debug entries\n", num_entries); - - return 0; - -out_err: - - list_for_each_entry_safe(entry, next_entry, &free_entries, list) { - list_del(&entry->list); - kfree(entry); - } - - return -ENOMEM; -} - -static ssize_t filter_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - char buf[NAME_MAX_LEN + 1]; - unsigned long flags; - int len; - - if (!current_driver_name[0]) - return 0; - - /* - * We can't copy to userspace directly because current_driver_name can - * only be read under the driver_name_lock with irqs disabled. So - * create a temporary copy first. - */ - read_lock_irqsave(&driver_name_lock, flags); - len = scnprintf(buf, NAME_MAX_LEN + 1, "%s\n", current_driver_name); - read_unlock_irqrestore(&driver_name_lock, flags); - - return simple_read_from_buffer(user_buf, count, ppos, buf, len); -} - -static ssize_t filter_write(struct file *file, const char __user *userbuf, - size_t count, loff_t *ppos) -{ - char buf[NAME_MAX_LEN]; - unsigned long flags; - size_t len; - int i; - - /* - * We can't copy from userspace directly. Access to - * current_driver_name is protected with a write_lock with irqs - * disabled. Since copy_from_user can fault and may sleep we - * need to copy to temporary buffer first - */ - len = min(count, (size_t)(NAME_MAX_LEN - 1)); - if (copy_from_user(buf, userbuf, len)) - return -EFAULT; - - buf[len] = 0; - - write_lock_irqsave(&driver_name_lock, flags); - - /* - * Now handle the string we got from userspace very carefully. - * The rules are: - * - only use the first token we got - * - token delimiter is everything looking like a space - * character (' ', '\n', '\t' ...) - * - */ - if (!isalnum(buf[0])) { - /* - * If the first character userspace gave us is not - * alphanumerical then assume the filter should be - * switched off. - */ - if (current_driver_name[0]) - pr_info("DMA-API: switching off dma-debug driver filter\n"); - current_driver_name[0] = 0; - current_driver = NULL; - goto out_unlock; - } - - /* - * Now parse out the first token and use it as the name for the - * driver to filter for. - */ - for (i = 0; i < NAME_MAX_LEN - 1; ++i) { - current_driver_name[i] = buf[i]; - if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0) - break; - } - current_driver_name[i] = 0; - current_driver = NULL; - - pr_info("DMA-API: enable driver filter for driver [%s]\n", - current_driver_name); - -out_unlock: - write_unlock_irqrestore(&driver_name_lock, flags); - - return count; -} - -static const struct file_operations filter_fops = { - .read = filter_read, - .write = filter_write, - .llseek = default_llseek, -}; - -static int dma_debug_fs_init(void) -{ - dma_debug_dent = debugfs_create_dir("dma-api", NULL); - if (!dma_debug_dent) { - pr_err("DMA-API: can not create debugfs directory\n"); - return -ENOMEM; - } - - global_disable_dent = debugfs_create_bool("disabled", 0444, - dma_debug_dent, - &global_disable); - if (!global_disable_dent) - goto out_err; - - error_count_dent = debugfs_create_u32("error_count", 0444, - dma_debug_dent, &error_count); - if (!error_count_dent) - goto out_err; - - show_all_errors_dent = debugfs_create_u32("all_errors", 0644, - dma_debug_dent, - &show_all_errors); - if (!show_all_errors_dent) - goto out_err; - - show_num_errors_dent = debugfs_create_u32("num_errors", 0644, - dma_debug_dent, - &show_num_errors); - if (!show_num_errors_dent) - goto out_err; - - num_free_entries_dent = debugfs_create_u32("num_free_entries", 0444, - dma_debug_dent, - &num_free_entries); - if (!num_free_entries_dent) - goto out_err; - - min_free_entries_dent = debugfs_create_u32("min_free_entries", 0444, - dma_debug_dent, - &min_free_entries); - if (!min_free_entries_dent) - goto out_err; - - filter_dent = debugfs_create_file("driver_filter", 0644, - dma_debug_dent, NULL, &filter_fops); - if (!filter_dent) - goto out_err; - - return 0; - -out_err: - debugfs_remove_recursive(dma_debug_dent); - - return -ENOMEM; -} - -static int device_dma_allocations(struct device *dev, struct dma_debug_entry **out_entry) -{ - struct dma_debug_entry *entry; - unsigned long flags; - int count = 0, i; - - for (i = 0; i < HASH_SIZE; ++i) { - spin_lock_irqsave(&dma_entry_hash[i].lock, flags); - list_for_each_entry(entry, &dma_entry_hash[i].list, list) { - if (entry->dev == dev) { - count += 1; - *out_entry = entry; - } - } - spin_unlock_irqrestore(&dma_entry_hash[i].lock, flags); - } - - return count; -} - -static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data) -{ - struct device *dev = data; - struct dma_debug_entry *uninitialized_var(entry); - int count; - - if (dma_debug_disabled()) - return 0; - - switch (action) { - case BUS_NOTIFY_UNBOUND_DRIVER: - count = device_dma_allocations(dev, &entry); - if (count == 0) - break; - err_printk(dev, entry, "DMA-API: device driver has pending " - "DMA allocations while released from device " - "[count=%d]\n" - "One of leaked entries details: " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped with %s] [mapped as %s]\n", - count, entry->dev_addr, entry->size, - dir2name[entry->direction], type2name[entry->type]); - break; - default: - break; - } - - return 0; -} - -void dma_debug_add_bus(struct bus_type *bus) -{ - struct notifier_block *nb; - - if (dma_debug_disabled()) - return; - - nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL); - if (nb == NULL) { - pr_err("dma_debug_add_bus: out of memory\n"); - return; - } - - nb->notifier_call = dma_debug_device_change; - - bus_register_notifier(bus, nb); -} - -static int dma_debug_init(void) -{ - int i; - - /* Do not use dma_debug_initialized here, since we really want to be - * called to set dma_debug_initialized - */ - if (global_disable) - return 0; - - for (i = 0; i < HASH_SIZE; ++i) { - INIT_LIST_HEAD(&dma_entry_hash[i].list); - spin_lock_init(&dma_entry_hash[i].lock); - } - - if (dma_debug_fs_init() != 0) { - pr_err("DMA-API: error creating debugfs entries - disabling\n"); - global_disable = true; - - return 0; - } - - if (prealloc_memory(nr_prealloc_entries) != 0) { - pr_err("DMA-API: debugging out of memory error - disabled\n"); - global_disable = true; - - return 0; - } - - nr_total_entries = num_free_entries; - - dma_debug_initialized = true; - - pr_info("DMA-API: debugging enabled by kernel config\n"); - return 0; -} -core_initcall(dma_debug_init); - -static __init int dma_debug_cmdline(char *str) -{ - if (!str) - return -EINVAL; - - if (strncmp(str, "off", 3) == 0) { - pr_info("DMA-API: debugging disabled on kernel command line\n"); - global_disable = true; - } - - return 0; -} - -static __init int dma_debug_entries_cmdline(char *str) -{ - if (!str) - return -EINVAL; - if (!get_option(&str, &nr_prealloc_entries)) - nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; - return 0; -} - -__setup("dma_debug=", dma_debug_cmdline); -__setup("dma_debug_entries=", dma_debug_entries_cmdline); - -static void check_unmap(struct dma_debug_entry *ref) -{ - struct dma_debug_entry *entry; - struct hash_bucket *bucket; - unsigned long flags; - - bucket = get_hash_bucket(ref, &flags); - entry = bucket_find_exact(bucket, ref); - - if (!entry) { - /* must drop lock before calling dma_mapping_error */ - put_hash_bucket(bucket, &flags); - - if (dma_mapping_error(ref->dev, ref->dev_addr)) { - err_printk(ref->dev, NULL, - "DMA-API: device driver tries to free an " - "invalid DMA memory address\n"); - } else { - err_printk(ref->dev, NULL, - "DMA-API: device driver tries to free DMA " - "memory it has not allocated [device " - "address=0x%016llx] [size=%llu bytes]\n", - ref->dev_addr, ref->size); - } - return; - } - - if (ref->size != entry->size) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " - "DMA memory with different size " - "[device address=0x%016llx] [map size=%llu bytes] " - "[unmap size=%llu bytes]\n", - ref->dev_addr, entry->size, ref->size); - } - - if (ref->type != entry->type) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " - "DMA memory with wrong function " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped as %s] [unmapped as %s]\n", - ref->dev_addr, ref->size, - type2name[entry->type], type2name[ref->type]); - } else if ((entry->type == dma_debug_coherent) && - (phys_addr(ref) != phys_addr(entry))) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " - "DMA memory with different CPU address " - "[device address=0x%016llx] [size=%llu bytes] " - "[cpu alloc address=0x%016llx] " - "[cpu free address=0x%016llx]", - ref->dev_addr, ref->size, - phys_addr(entry), - phys_addr(ref)); - } - - if (ref->sg_call_ents && ref->type == dma_debug_sg && - ref->sg_call_ents != entry->sg_call_ents) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " - "DMA sg list with different entry count " - "[map count=%d] [unmap count=%d]\n", - entry->sg_call_ents, ref->sg_call_ents); - } - - /* - * This may be no bug in reality - but most implementations of the - * DMA API don't handle this properly, so check for it here - */ - if (ref->direction != entry->direction) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " - "DMA memory with different direction " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped with %s] [unmapped with %s]\n", - ref->dev_addr, ref->size, - dir2name[entry->direction], - dir2name[ref->direction]); - } - - /* - * Drivers should use dma_mapping_error() to check the returned - * addresses of dma_map_single() and dma_map_page(). - * If not, print this warning message. See Documentation/DMA-API.txt. - */ - if (entry->map_err_type == MAP_ERR_NOT_CHECKED) { - err_printk(ref->dev, entry, - "DMA-API: device driver failed to check map error" - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped as %s]", - ref->dev_addr, ref->size, - type2name[entry->type]); - } - - hash_bucket_del(entry); - dma_entry_free(entry); - - put_hash_bucket(bucket, &flags); -} - -static void check_for_stack(struct device *dev, - struct page *page, size_t offset) -{ - void *addr; - struct vm_struct *stack_vm_area = task_stack_vm_area(current); - - if (!stack_vm_area) { - /* Stack is direct-mapped. */ - if (PageHighMem(page)) - return; - addr = page_address(page) + offset; - if (object_is_on_stack(addr)) - err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [addr=%p]\n", addr); - } else { - /* Stack is vmalloced. */ - int i; - - for (i = 0; i < stack_vm_area->nr_pages; i++) { - if (page != stack_vm_area->pages[i]) - continue; - - addr = (u8 *)current->stack + i * PAGE_SIZE + offset; - err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [probable addr=%p]\n", addr); - break; - } - } -} - -static inline bool overlap(void *addr, unsigned long len, void *start, void *end) -{ - unsigned long a1 = (unsigned long)addr; - unsigned long b1 = a1 + len; - unsigned long a2 = (unsigned long)start; - unsigned long b2 = (unsigned long)end; - - return !(b1 <= a2 || a1 >= b2); -} - -static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len) -{ - if (overlap(addr, len, _stext, _etext) || - overlap(addr, len, __start_rodata, __end_rodata)) - err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len); -} - -static void check_sync(struct device *dev, - struct dma_debug_entry *ref, - bool to_cpu) -{ - struct dma_debug_entry *entry; - struct hash_bucket *bucket; - unsigned long flags; - - bucket = get_hash_bucket(ref, &flags); - - entry = bucket_find_contain(&bucket, ref, &flags); - - if (!entry) { - err_printk(dev, NULL, "DMA-API: device driver tries " - "to sync DMA memory it has not allocated " - "[device address=0x%016llx] [size=%llu bytes]\n", - (unsigned long long)ref->dev_addr, ref->size); - goto out; - } - - if (ref->size > entry->size) { - err_printk(dev, entry, "DMA-API: device driver syncs" - " DMA memory outside allocated range " - "[device address=0x%016llx] " - "[allocation size=%llu bytes] " - "[sync offset+size=%llu]\n", - entry->dev_addr, entry->size, - ref->size); - } - - if (entry->direction == DMA_BIDIRECTIONAL) - goto out; - - if (ref->direction != entry->direction) { - err_printk(dev, entry, "DMA-API: device driver syncs " - "DMA memory with different direction " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped with %s] [synced with %s]\n", - (unsigned long long)ref->dev_addr, entry->size, - dir2name[entry->direction], - dir2name[ref->direction]); - } - - if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) && - !(ref->direction == DMA_TO_DEVICE)) - err_printk(dev, entry, "DMA-API: device driver syncs " - "device read-only DMA memory for cpu " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped with %s] [synced with %s]\n", - (unsigned long long)ref->dev_addr, entry->size, - dir2name[entry->direction], - dir2name[ref->direction]); - - if (!to_cpu && !(entry->direction == DMA_TO_DEVICE) && - !(ref->direction == DMA_FROM_DEVICE)) - err_printk(dev, entry, "DMA-API: device driver syncs " - "device write-only DMA memory to device " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped with %s] [synced with %s]\n", - (unsigned long long)ref->dev_addr, entry->size, - dir2name[entry->direction], - dir2name[ref->direction]); - - if (ref->sg_call_ents && ref->type == dma_debug_sg && - ref->sg_call_ents != entry->sg_call_ents) { - err_printk(ref->dev, entry, "DMA-API: device driver syncs " - "DMA sg list with different entry count " - "[map count=%d] [sync count=%d]\n", - entry->sg_call_ents, ref->sg_call_ents); - } - -out: - put_hash_bucket(bucket, &flags); -} - -static void check_sg_segment(struct device *dev, struct scatterlist *sg) -{ -#ifdef CONFIG_DMA_API_DEBUG_SG - unsigned int max_seg = dma_get_max_seg_size(dev); - u64 start, end, boundary = dma_get_seg_boundary(dev); - - /* - * Either the driver forgot to set dma_parms appropriately, or - * whoever generated the list forgot to check them. - */ - if (sg->length > max_seg) - err_printk(dev, NULL, "DMA-API: mapping sg segment longer than device claims to support [len=%u] [max=%u]\n", - sg->length, max_seg); - /* - * In some cases this could potentially be the DMA API - * implementation's fault, but it would usually imply that - * the scatterlist was built inappropriately to begin with. - */ - start = sg_dma_address(sg); - end = start + sg_dma_len(sg) - 1; - if ((start ^ end) & ~boundary) - err_printk(dev, NULL, "DMA-API: mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n", - start, end, boundary); -#endif -} - -void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, - size_t size, int direction, dma_addr_t dma_addr, - bool map_single) -{ - struct dma_debug_entry *entry; - - if (unlikely(dma_debug_disabled())) - return; - - if (dma_mapping_error(dev, dma_addr)) - return; - - entry = dma_entry_alloc(); - if (!entry) - return; - - entry->dev = dev; - entry->type = dma_debug_page; - entry->pfn = page_to_pfn(page); - entry->offset = offset, - entry->dev_addr = dma_addr; - entry->size = size; - entry->direction = direction; - entry->map_err_type = MAP_ERR_NOT_CHECKED; - - if (map_single) - entry->type = dma_debug_single; - - check_for_stack(dev, page, offset); - - if (!PageHighMem(page)) { - void *addr = page_address(page) + offset; - - check_for_illegal_area(dev, addr, size); - } - - add_dma_entry(entry); -} -EXPORT_SYMBOL(debug_dma_map_page); - -void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - struct dma_debug_entry ref; - struct dma_debug_entry *entry; - struct hash_bucket *bucket; - unsigned long flags; - - if (unlikely(dma_debug_disabled())) - return; - - ref.dev = dev; - ref.dev_addr = dma_addr; - bucket = get_hash_bucket(&ref, &flags); - - list_for_each_entry(entry, &bucket->list, list) { - if (!exact_match(&ref, entry)) - continue; - - /* - * The same physical address can be mapped multiple - * times. Without a hardware IOMMU this results in the - * same device addresses being put into the dma-debug - * hash multiple times too. This can result in false - * positives being reported. Therefore we implement a - * best-fit algorithm here which updates the first entry - * from the hash which fits the reference value and is - * not currently listed as being checked. - */ - if (entry->map_err_type == MAP_ERR_NOT_CHECKED) { - entry->map_err_type = MAP_ERR_CHECKED; - break; - } - } - - put_hash_bucket(bucket, &flags); -} -EXPORT_SYMBOL(debug_dma_mapping_error); - -void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, int direction, bool map_single) -{ - struct dma_debug_entry ref = { - .type = dma_debug_page, - .dev = dev, - .dev_addr = addr, - .size = size, - .direction = direction, - }; - - if (unlikely(dma_debug_disabled())) - return; - - if (map_single) - ref.type = dma_debug_single; - - check_unmap(&ref); -} -EXPORT_SYMBOL(debug_dma_unmap_page); - -void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, int mapped_ents, int direction) -{ - struct dma_debug_entry *entry; - struct scatterlist *s; - int i; - - if (unlikely(dma_debug_disabled())) - return; - - for_each_sg(sg, s, mapped_ents, i) { - entry = dma_entry_alloc(); - if (!entry) - return; - - entry->type = dma_debug_sg; - entry->dev = dev; - entry->pfn = page_to_pfn(sg_page(s)); - entry->offset = s->offset, - entry->size = sg_dma_len(s); - entry->dev_addr = sg_dma_address(s); - entry->direction = direction; - entry->sg_call_ents = nents; - entry->sg_mapped_ents = mapped_ents; - - check_for_stack(dev, sg_page(s), s->offset); - - if (!PageHighMem(sg_page(s))) { - check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s)); - } - - check_sg_segment(dev, s); - - add_dma_entry(entry); - } -} -EXPORT_SYMBOL(debug_dma_map_sg); - -static int get_nr_mapped_entries(struct device *dev, - struct dma_debug_entry *ref) -{ - struct dma_debug_entry *entry; - struct hash_bucket *bucket; - unsigned long flags; - int mapped_ents; - - bucket = get_hash_bucket(ref, &flags); - entry = bucket_find_exact(bucket, ref); - mapped_ents = 0; - - if (entry) - mapped_ents = entry->sg_mapped_ents; - put_hash_bucket(bucket, &flags); - - return mapped_ents; -} - -void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, - int nelems, int dir) -{ - struct scatterlist *s; - int mapped_ents = 0, i; - - if (unlikely(dma_debug_disabled())) - return; - - for_each_sg(sglist, s, nelems, i) { - - struct dma_debug_entry ref = { - .type = dma_debug_sg, - .dev = dev, - .pfn = page_to_pfn(sg_page(s)), - .offset = s->offset, - .dev_addr = sg_dma_address(s), - .size = sg_dma_len(s), - .direction = dir, - .sg_call_ents = nelems, - }; - - if (mapped_ents && i >= mapped_ents) - break; - - if (!i) - mapped_ents = get_nr_mapped_entries(dev, &ref); - - check_unmap(&ref); - } -} -EXPORT_SYMBOL(debug_dma_unmap_sg); - -void debug_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t dma_addr, void *virt) -{ - struct dma_debug_entry *entry; - - if (unlikely(dma_debug_disabled())) - return; - - if (unlikely(virt == NULL)) - return; - - /* handle vmalloc and linear addresses */ - if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt)) - return; - - entry = dma_entry_alloc(); - if (!entry) - return; - - entry->type = dma_debug_coherent; - entry->dev = dev; - entry->offset = offset_in_page(virt); - entry->size = size; - entry->dev_addr = dma_addr; - entry->direction = DMA_BIDIRECTIONAL; - - if (is_vmalloc_addr(virt)) - entry->pfn = vmalloc_to_pfn(virt); - else - entry->pfn = page_to_pfn(virt_to_page(virt)); - - add_dma_entry(entry); -} -EXPORT_SYMBOL(debug_dma_alloc_coherent); - -void debug_dma_free_coherent(struct device *dev, size_t size, - void *virt, dma_addr_t addr) -{ - struct dma_debug_entry ref = { - .type = dma_debug_coherent, - .dev = dev, - .offset = offset_in_page(virt), - .dev_addr = addr, - .size = size, - .direction = DMA_BIDIRECTIONAL, - }; - - /* handle vmalloc and linear addresses */ - if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt)) - return; - - if (is_vmalloc_addr(virt)) - ref.pfn = vmalloc_to_pfn(virt); - else - ref.pfn = page_to_pfn(virt_to_page(virt)); - - if (unlikely(dma_debug_disabled())) - return; - - check_unmap(&ref); -} -EXPORT_SYMBOL(debug_dma_free_coherent); - -void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size, - int direction, dma_addr_t dma_addr) -{ - struct dma_debug_entry *entry; - - if (unlikely(dma_debug_disabled())) - return; - - entry = dma_entry_alloc(); - if (!entry) - return; - - entry->type = dma_debug_resource; - entry->dev = dev; - entry->pfn = PHYS_PFN(addr); - entry->offset = offset_in_page(addr); - entry->size = size; - entry->dev_addr = dma_addr; - entry->direction = direction; - entry->map_err_type = MAP_ERR_NOT_CHECKED; - - add_dma_entry(entry); -} -EXPORT_SYMBOL(debug_dma_map_resource); - -void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, - size_t size, int direction) -{ - struct dma_debug_entry ref = { - .type = dma_debug_resource, - .dev = dev, - .dev_addr = dma_addr, - .size = size, - .direction = direction, - }; - - if (unlikely(dma_debug_disabled())) - return; - - check_unmap(&ref); -} -EXPORT_SYMBOL(debug_dma_unmap_resource); - -void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, - size_t size, int direction) -{ - struct dma_debug_entry ref; - - if (unlikely(dma_debug_disabled())) - return; - - ref.type = dma_debug_single; - ref.dev = dev; - ref.dev_addr = dma_handle; - ref.size = size; - ref.direction = direction; - ref.sg_call_ents = 0; - - check_sync(dev, &ref, true); -} -EXPORT_SYMBOL(debug_dma_sync_single_for_cpu); - -void debug_dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, - int direction) -{ - struct dma_debug_entry ref; - - if (unlikely(dma_debug_disabled())) - return; - - ref.type = dma_debug_single; - ref.dev = dev; - ref.dev_addr = dma_handle; - ref.size = size; - ref.direction = direction; - ref.sg_call_ents = 0; - - check_sync(dev, &ref, false); -} -EXPORT_SYMBOL(debug_dma_sync_single_for_device); - -void debug_dma_sync_single_range_for_cpu(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, size_t size, - int direction) -{ - struct dma_debug_entry ref; - - if (unlikely(dma_debug_disabled())) - return; - - ref.type = dma_debug_single; - ref.dev = dev; - ref.dev_addr = dma_handle; - ref.size = offset + size; - ref.direction = direction; - ref.sg_call_ents = 0; - - check_sync(dev, &ref, true); -} -EXPORT_SYMBOL(debug_dma_sync_single_range_for_cpu); - -void debug_dma_sync_single_range_for_device(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, - size_t size, int direction) -{ - struct dma_debug_entry ref; - - if (unlikely(dma_debug_disabled())) - return; - - ref.type = dma_debug_single; - ref.dev = dev; - ref.dev_addr = dma_handle; - ref.size = offset + size; - ref.direction = direction; - ref.sg_call_ents = 0; - - check_sync(dev, &ref, false); -} -EXPORT_SYMBOL(debug_dma_sync_single_range_for_device); - -void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int nelems, int direction) -{ - struct scatterlist *s; - int mapped_ents = 0, i; - - if (unlikely(dma_debug_disabled())) - return; - - for_each_sg(sg, s, nelems, i) { - - struct dma_debug_entry ref = { - .type = dma_debug_sg, - .dev = dev, - .pfn = page_to_pfn(sg_page(s)), - .offset = s->offset, - .dev_addr = sg_dma_address(s), - .size = sg_dma_len(s), - .direction = direction, - .sg_call_ents = nelems, - }; - - if (!i) - mapped_ents = get_nr_mapped_entries(dev, &ref); - - if (i >= mapped_ents) - break; - - check_sync(dev, &ref, true); - } -} -EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu); - -void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int nelems, int direction) -{ - struct scatterlist *s; - int mapped_ents = 0, i; - - if (unlikely(dma_debug_disabled())) - return; - - for_each_sg(sg, s, nelems, i) { - - struct dma_debug_entry ref = { - .type = dma_debug_sg, - .dev = dev, - .pfn = page_to_pfn(sg_page(s)), - .offset = s->offset, - .dev_addr = sg_dma_address(s), - .size = sg_dma_len(s), - .direction = direction, - .sg_call_ents = nelems, - }; - if (!i) - mapped_ents = get_nr_mapped_entries(dev, &ref); - - if (i >= mapped_ents) - break; - - check_sync(dev, &ref, false); - } -} -EXPORT_SYMBOL(debug_dma_sync_sg_for_device); - -static int __init dma_debug_driver_setup(char *str) -{ - int i; - - for (i = 0; i < NAME_MAX_LEN - 1; ++i, ++str) { - current_driver_name[i] = *str; - if (*str == 0) - break; - } - - if (current_driver_name[0]) - pr_info("DMA-API: enable driver filter for driver [%s]\n", - current_driver_name); - - - return 1; -} -__setup("dma_debug_driver=", dma_debug_driver_setup); diff --git a/lib/dma-direct.c b/lib/dma-direct.c deleted file mode 100644 index 8be8106270c2..000000000000 --- a/lib/dma-direct.c +++ /dev/null @@ -1,204 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DMA operations that map physical memory directly without using an IOMMU or - * flushing caches. - */ -#include -#include -#include -#include -#include -#include -#include - -#define DIRECT_MAPPING_ERROR 0 - -/* - * Most architectures use ZONE_DMA for the first 16 Megabytes, but - * some use it for entirely different regions: - */ -#ifndef ARCH_ZONE_DMA_BITS -#define ARCH_ZONE_DMA_BITS 24 -#endif - -/* - * For AMD SEV all DMA must be to unencrypted addresses. - */ -static inline bool force_dma_unencrypted(void) -{ - return sev_active(); -} - -static bool -check_addr(struct device *dev, dma_addr_t dma_addr, size_t size, - const char *caller) -{ - if (unlikely(dev && !dma_capable(dev, dma_addr, size))) { - if (!dev->dma_mask) { - dev_err(dev, - "%s: call on device without dma_mask\n", - caller); - return false; - } - - if (*dev->dma_mask >= DMA_BIT_MASK(32)) { - dev_err(dev, - "%s: overflow %pad+%zu of device mask %llx\n", - caller, &dma_addr, size, *dev->dma_mask); - } - return false; - } - return true; -} - -static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) -{ - dma_addr_t addr = force_dma_unencrypted() ? - __phys_to_dma(dev, phys) : phys_to_dma(dev, phys); - return addr + size - 1 <= dev->coherent_dma_mask; -} - -void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) -{ - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - int page_order = get_order(size); - struct page *page = NULL; - void *ret; - - /* we always manually zero the memory once we are done: */ - gfp &= ~__GFP_ZERO; - - /* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */ - if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) - gfp |= GFP_DMA; - if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) - gfp |= GFP_DMA32; - -again: - /* CMA can be used only in the context which permits sleeping */ - if (gfpflags_allow_blocking(gfp)) { - page = dma_alloc_from_contiguous(dev, count, page_order, gfp); - if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { - dma_release_from_contiguous(dev, page, count); - page = NULL; - } - } - if (!page) - page = alloc_pages_node(dev_to_node(dev), gfp, page_order); - - if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { - __free_pages(page, page_order); - page = NULL; - - if (IS_ENABLED(CONFIG_ZONE_DMA32) && - dev->coherent_dma_mask < DMA_BIT_MASK(64) && - !(gfp & (GFP_DMA32 | GFP_DMA))) { - gfp |= GFP_DMA32; - goto again; - } - - if (IS_ENABLED(CONFIG_ZONE_DMA) && - dev->coherent_dma_mask < DMA_BIT_MASK(32) && - !(gfp & GFP_DMA)) { - gfp = (gfp & ~GFP_DMA32) | GFP_DMA; - goto again; - } - } - - if (!page) - return NULL; - ret = page_address(page); - if (force_dma_unencrypted()) { - set_memory_decrypted((unsigned long)ret, 1 << page_order); - *dma_handle = __phys_to_dma(dev, page_to_phys(page)); - } else { - *dma_handle = phys_to_dma(dev, page_to_phys(page)); - } - memset(ret, 0, size); - return ret; -} - -/* - * NOTE: this function must never look at the dma_addr argument, because we want - * to be able to use it as a helper for iommu implementations as well. - */ -void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs) -{ - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned int page_order = get_order(size); - - if (force_dma_unencrypted()) - set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); - if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count)) - free_pages((unsigned long)cpu_addr, page_order); -} - -dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - dma_addr_t dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset; - - if (!check_addr(dev, dma_addr, size, __func__)) - return DIRECT_MAPPING_ERROR; - return dma_addr; -} - -int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, - enum dma_data_direction dir, unsigned long attrs) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sgl, sg, nents, i) { - BUG_ON(!sg_page(sg)); - - sg_dma_address(sg) = phys_to_dma(dev, sg_phys(sg)); - if (!check_addr(dev, sg_dma_address(sg), sg->length, __func__)) - return 0; - sg_dma_len(sg) = sg->length; - } - - return nents; -} - -int dma_direct_supported(struct device *dev, u64 mask) -{ -#ifdef CONFIG_ZONE_DMA - if (mask < DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) - return 0; -#else - /* - * Because 32-bit DMA masks are so common we expect every architecture - * to be able to satisfy them - either by not supporting more physical - * memory, or by providing a ZONE_DMA32. If neither is the case, the - * architecture needs to use an IOMMU instead of the direct mapping. - */ - if (mask < DMA_BIT_MASK(32)) - return 0; -#endif - /* - * Various PCI/PCIe bridges have broken support for > 32bit DMA even - * if the device itself might support it. - */ - if (dev->dma_32bit_limit && mask > DMA_BIT_MASK(32)) - return 0; - return 1; -} - -int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == DIRECT_MAPPING_ERROR; -} - -const struct dma_map_ops dma_direct_ops = { - .alloc = dma_direct_alloc, - .free = dma_direct_free, - .map_page = dma_direct_map_page, - .map_sg = dma_direct_map_sg, - .dma_supported = dma_direct_supported, - .mapping_error = dma_direct_mapping_error, -}; -EXPORT_SYMBOL(dma_direct_ops); diff --git a/lib/dma-noncoherent.c b/lib/dma-noncoherent.c deleted file mode 100644 index 79e9a757387f..000000000000 --- a/lib/dma-noncoherent.c +++ /dev/null @@ -1,102 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2018 Christoph Hellwig. - * - * DMA operations that map physical memory directly without providing cache - * coherence. - */ -#include -#include -#include -#include -#include - -static void dma_noncoherent_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir); -} - -static void dma_noncoherent_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); -} - -static dma_addr_t dma_noncoherent_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - dma_addr_t addr; - - addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); - if (!dma_mapping_error(dev, addr) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_device(dev, page_to_phys(page) + offset, - size, dir); - return addr; -} - -static int dma_noncoherent_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - nents = dma_direct_map_sg(dev, sgl, nents, dir, attrs); - if (nents > 0 && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_noncoherent_sync_sg_for_device(dev, sgl, nents, dir); - return nents; -} - -#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU -static void dma_noncoherent_sync_single_for_cpu(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir); -} - -static void dma_noncoherent_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); -} - -static void dma_noncoherent_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_noncoherent_sync_single_for_cpu(dev, addr, size, dir); -} - -static void dma_noncoherent_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_noncoherent_sync_sg_for_cpu(dev, sgl, nents, dir); -} -#endif - -const struct dma_map_ops dma_noncoherent_ops = { - .alloc = arch_dma_alloc, - .free = arch_dma_free, - .mmap = arch_dma_mmap, - .sync_single_for_device = dma_noncoherent_sync_single_for_device, - .sync_sg_for_device = dma_noncoherent_sync_sg_for_device, - .map_page = dma_noncoherent_map_page, - .map_sg = dma_noncoherent_map_sg, -#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU - .sync_single_for_cpu = dma_noncoherent_sync_single_for_cpu, - .sync_sg_for_cpu = dma_noncoherent_sync_sg_for_cpu, - .unmap_page = dma_noncoherent_unmap_page, - .unmap_sg = dma_noncoherent_unmap_sg, -#endif - .dma_supported = dma_direct_supported, - .mapping_error = dma_direct_mapping_error, - .cache_sync = arch_dma_cache_sync, -}; -EXPORT_SYMBOL(dma_noncoherent_ops); diff --git a/lib/dma-virt.c b/lib/dma-virt.c deleted file mode 100644 index 8e61a02ef9ca..000000000000 --- a/lib/dma-virt.c +++ /dev/null @@ -1,61 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * lib/dma-virt.c - * - * DMA operations that map to virtual addresses without flushing memory. - */ -#include -#include -#include -#include - -static void *dma_virt_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, - unsigned long attrs) -{ - void *ret; - - ret = (void *)__get_free_pages(gfp, get_order(size)); - if (ret) - *dma_handle = (uintptr_t)ret; - return ret; -} - -static void dma_virt_free(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_addr, - unsigned long attrs) -{ - free_pages((unsigned long)cpu_addr, get_order(size)); -} - -static dma_addr_t dma_virt_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - return (uintptr_t)(page_address(page) + offset); -} - -static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sgl, sg, nents, i) { - BUG_ON(!sg_page(sg)); - sg_dma_address(sg) = (uintptr_t)sg_virt(sg); - sg_dma_len(sg) = sg->length; - } - - return nents; -} - -const struct dma_map_ops dma_virt_ops = { - .alloc = dma_virt_alloc, - .free = dma_virt_free, - .map_page = dma_virt_map_page, - .map_sg = dma_virt_map_sg, -}; -EXPORT_SYMBOL(dma_virt_ops); diff --git a/lib/swiotlb.c b/lib/swiotlb.c deleted file mode 100644 index 04b68d9dffac..000000000000 --- a/lib/swiotlb.c +++ /dev/null @@ -1,1087 +0,0 @@ -/* - * Dynamic DMA mapping support. - * - * This implementation is a fallback for platforms that do not support - * I/O TLBs (aka DMA address translation hardware). - * Copyright (C) 2000 Asit Mallick - * Copyright (C) 2000 Goutham Rao - * Copyright (C) 2000, 2003 Hewlett-Packard Co - * David Mosberger-Tang - * - * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API. - * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid - * unnecessary i-cache flushing. - * 04/07/.. ak Better overflow handling. Assorted fixes. - * 05/09/10 linville Add support for syncing ranges, support syncing for - * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup. - * 08/12/11 beckyb Add highmem support - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#define CREATE_TRACE_POINTS -#include - -#define OFFSET(val,align) ((unsigned long) \ - ( (val) & ( (align) - 1))) - -#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) - -/* - * Minimum IO TLB size to bother booting with. Systems with mainly - * 64bit capable cards will only lightly use the swiotlb. If we can't - * allocate a contiguous 1MB, we're probably in trouble anyway. - */ -#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) - -enum swiotlb_force swiotlb_force; - -/* - * Used to do a quick range check in swiotlb_tbl_unmap_single and - * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this - * API. - */ -static phys_addr_t io_tlb_start, io_tlb_end; - -/* - * The number of IO TLB blocks (in groups of 64) between io_tlb_start and - * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. - */ -static unsigned long io_tlb_nslabs; - -/* - * When the IOMMU overflows we return a fallback buffer. This sets the size. - */ -static unsigned long io_tlb_overflow = 32*1024; - -static phys_addr_t io_tlb_overflow_buffer; - -/* - * This is a free list describing the number of free entries available from - * each index - */ -static unsigned int *io_tlb_list; -static unsigned int io_tlb_index; - -/* - * Max segment that we can provide which (if pages are contingous) will - * not be bounced (unless SWIOTLB_FORCE is set). - */ -unsigned int max_segment; - -/* - * We need to save away the original address corresponding to a mapped entry - * for the sync operations. - */ -#define INVALID_PHYS_ADDR (~(phys_addr_t)0) -static phys_addr_t *io_tlb_orig_addr; - -/* - * Protect the above data structures in the map and unmap calls - */ -static DEFINE_SPINLOCK(io_tlb_lock); - -static int late_alloc; - -static int __init -setup_io_tlb_npages(char *str) -{ - if (isdigit(*str)) { - io_tlb_nslabs = simple_strtoul(str, &str, 0); - /* avoid tail segment of size < IO_TLB_SEGSIZE */ - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - if (*str == ',') - ++str; - if (!strcmp(str, "force")) { - swiotlb_force = SWIOTLB_FORCE; - } else if (!strcmp(str, "noforce")) { - swiotlb_force = SWIOTLB_NO_FORCE; - io_tlb_nslabs = 1; - } - - return 0; -} -early_param("swiotlb", setup_io_tlb_npages); -/* make io_tlb_overflow tunable too? */ - -unsigned long swiotlb_nr_tbl(void) -{ - return io_tlb_nslabs; -} -EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); - -unsigned int swiotlb_max_segment(void) -{ - return max_segment; -} -EXPORT_SYMBOL_GPL(swiotlb_max_segment); - -void swiotlb_set_max_segment(unsigned int val) -{ - if (swiotlb_force == SWIOTLB_FORCE) - max_segment = 1; - else - max_segment = rounddown(val, PAGE_SIZE); -} - -/* default to 64MB */ -#define IO_TLB_DEFAULT_SIZE (64UL<<20) -unsigned long swiotlb_size_or_default(void) -{ - unsigned long size; - - size = io_tlb_nslabs << IO_TLB_SHIFT; - - return size ? size : (IO_TLB_DEFAULT_SIZE); -} - -static bool no_iotlb_memory; - -void swiotlb_print_info(void) -{ - unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; - unsigned char *vstart, *vend; - - if (no_iotlb_memory) { - pr_warn("software IO TLB: No low mem\n"); - return; - } - - vstart = phys_to_virt(io_tlb_start); - vend = phys_to_virt(io_tlb_end); - - printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n", - (unsigned long long)io_tlb_start, - (unsigned long long)io_tlb_end, - bytes >> 20, vstart, vend - 1); -} - -/* - * Early SWIOTLB allocation may be too early to allow an architecture to - * perform the desired operations. This function allows the architecture to - * call SWIOTLB when the operations are possible. It needs to be called - * before the SWIOTLB memory is used. - */ -void __init swiotlb_update_mem_attributes(void) -{ - void *vaddr; - unsigned long bytes; - - if (no_iotlb_memory || late_alloc) - return; - - vaddr = phys_to_virt(io_tlb_start); - bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT); - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); - memset(vaddr, 0, bytes); - - vaddr = phys_to_virt(io_tlb_overflow_buffer); - bytes = PAGE_ALIGN(io_tlb_overflow); - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); - memset(vaddr, 0, bytes); -} - -int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) -{ - void *v_overflow_buffer; - unsigned long i, bytes; - - bytes = nslabs << IO_TLB_SHIFT; - - io_tlb_nslabs = nslabs; - io_tlb_start = __pa(tlb); - io_tlb_end = io_tlb_start + bytes; - - /* - * Get the overflow emergency buffer - */ - v_overflow_buffer = memblock_virt_alloc_low_nopanic( - PAGE_ALIGN(io_tlb_overflow), - PAGE_SIZE); - if (!v_overflow_buffer) - return -ENOMEM; - - io_tlb_overflow_buffer = __pa(v_overflow_buffer); - - /* - * Allocate and initialize the free list array. This array is used - * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE - * between io_tlb_start and io_tlb_end. - */ - io_tlb_list = memblock_virt_alloc( - PAGE_ALIGN(io_tlb_nslabs * sizeof(int)), - PAGE_SIZE); - io_tlb_orig_addr = memblock_virt_alloc( - PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)), - PAGE_SIZE); - for (i = 0; i < io_tlb_nslabs; i++) { - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; - } - io_tlb_index = 0; - - if (verbose) - swiotlb_print_info(); - - swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); - return 0; -} - -/* - * Statically reserve bounce buffer space and initialize bounce buffer data - * structures for the software IO TLB used to implement the DMA API. - */ -void __init -swiotlb_init(int verbose) -{ - size_t default_size = IO_TLB_DEFAULT_SIZE; - unsigned char *vstart; - unsigned long bytes; - - if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - - bytes = io_tlb_nslabs << IO_TLB_SHIFT; - - /* Get IO TLB memory from the low pages */ - vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE); - if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) - return; - - if (io_tlb_start) - memblock_free_early(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); - pr_warn("Cannot allocate SWIOTLB buffer"); - no_iotlb_memory = true; -} - -/* - * Systems with larger DMA zones (those that don't support ISA) can - * initialize the swiotlb later using the slab allocator if needed. - * This should be just like above, but with some error catching. - */ -int -swiotlb_late_init_with_default_size(size_t default_size) -{ - unsigned long bytes, req_nslabs = io_tlb_nslabs; - unsigned char *vstart = NULL; - unsigned int order; - int rc = 0; - - if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - - /* - * Get IO TLB memory from the low pages - */ - order = get_order(io_tlb_nslabs << IO_TLB_SHIFT); - io_tlb_nslabs = SLABS_PER_PAGE << order; - bytes = io_tlb_nslabs << IO_TLB_SHIFT; - - while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { - vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, - order); - if (vstart) - break; - order--; - } - - if (!vstart) { - io_tlb_nslabs = req_nslabs; - return -ENOMEM; - } - if (order != get_order(bytes)) { - printk(KERN_WARNING "Warning: only able to allocate %ld MB " - "for software IO TLB\n", (PAGE_SIZE << order) >> 20); - io_tlb_nslabs = SLABS_PER_PAGE << order; - } - rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs); - if (rc) - free_pages((unsigned long)vstart, order); - - return rc; -} - -int -swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) -{ - unsigned long i, bytes; - unsigned char *v_overflow_buffer; - - bytes = nslabs << IO_TLB_SHIFT; - - io_tlb_nslabs = nslabs; - io_tlb_start = virt_to_phys(tlb); - io_tlb_end = io_tlb_start + bytes; - - set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT); - memset(tlb, 0, bytes); - - /* - * Get the overflow emergency buffer - */ - v_overflow_buffer = (void *)__get_free_pages(GFP_DMA, - get_order(io_tlb_overflow)); - if (!v_overflow_buffer) - goto cleanup2; - - set_memory_decrypted((unsigned long)v_overflow_buffer, - io_tlb_overflow >> PAGE_SHIFT); - memset(v_overflow_buffer, 0, io_tlb_overflow); - io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer); - - /* - * Allocate and initialize the free list array. This array is used - * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE - * between io_tlb_start and io_tlb_end. - */ - io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL, - get_order(io_tlb_nslabs * sizeof(int))); - if (!io_tlb_list) - goto cleanup3; - - io_tlb_orig_addr = (phys_addr_t *) - __get_free_pages(GFP_KERNEL, - get_order(io_tlb_nslabs * - sizeof(phys_addr_t))); - if (!io_tlb_orig_addr) - goto cleanup4; - - for (i = 0; i < io_tlb_nslabs; i++) { - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; - } - io_tlb_index = 0; - - swiotlb_print_info(); - - late_alloc = 1; - - swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); - - return 0; - -cleanup4: - free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * - sizeof(int))); - io_tlb_list = NULL; -cleanup3: - free_pages((unsigned long)v_overflow_buffer, - get_order(io_tlb_overflow)); - io_tlb_overflow_buffer = 0; -cleanup2: - io_tlb_end = 0; - io_tlb_start = 0; - io_tlb_nslabs = 0; - max_segment = 0; - return -ENOMEM; -} - -void __init swiotlb_exit(void) -{ - if (!io_tlb_orig_addr) - return; - - if (late_alloc) { - free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer), - get_order(io_tlb_overflow)); - free_pages((unsigned long)io_tlb_orig_addr, - get_order(io_tlb_nslabs * sizeof(phys_addr_t))); - free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * - sizeof(int))); - free_pages((unsigned long)phys_to_virt(io_tlb_start), - get_order(io_tlb_nslabs << IO_TLB_SHIFT)); - } else { - memblock_free_late(io_tlb_overflow_buffer, - PAGE_ALIGN(io_tlb_overflow)); - memblock_free_late(__pa(io_tlb_orig_addr), - PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); - memblock_free_late(__pa(io_tlb_list), - PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); - memblock_free_late(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); - } - io_tlb_nslabs = 0; - max_segment = 0; -} - -int is_swiotlb_buffer(phys_addr_t paddr) -{ - return paddr >= io_tlb_start && paddr < io_tlb_end; -} - -/* - * Bounce: copy the swiotlb buffer back to the original dma location - */ -static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir) -{ - unsigned long pfn = PFN_DOWN(orig_addr); - unsigned char *vaddr = phys_to_virt(tlb_addr); - - if (PageHighMem(pfn_to_page(pfn))) { - /* The buffer does not have a mapping. Map it in and copy */ - unsigned int offset = orig_addr & ~PAGE_MASK; - char *buffer; - unsigned int sz = 0; - unsigned long flags; - - while (size) { - sz = min_t(size_t, PAGE_SIZE - offset, size); - - local_irq_save(flags); - buffer = kmap_atomic(pfn_to_page(pfn)); - if (dir == DMA_TO_DEVICE) - memcpy(vaddr, buffer + offset, sz); - else - memcpy(buffer + offset, vaddr, sz); - kunmap_atomic(buffer); - local_irq_restore(flags); - - size -= sz; - pfn++; - vaddr += sz; - offset = 0; - } - } else if (dir == DMA_TO_DEVICE) { - memcpy(vaddr, phys_to_virt(orig_addr), size); - } else { - memcpy(phys_to_virt(orig_addr), vaddr, size); - } -} - -phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, - dma_addr_t tbl_dma_addr, - phys_addr_t orig_addr, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - unsigned long flags; - phys_addr_t tlb_addr; - unsigned int nslots, stride, index, wrap; - int i; - unsigned long mask; - unsigned long offset_slots; - unsigned long max_slots; - - if (no_iotlb_memory) - panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); - - if (mem_encrypt_active()) - pr_warn_once("%s is active and system is using DMA bounce buffers\n", - sme_active() ? "SME" : "SEV"); - - mask = dma_get_seg_boundary(hwdev); - - tbl_dma_addr &= mask; - - offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - - /* - * Carefully handle integer overflow which can occur when mask == ~0UL. - */ - max_slots = mask + 1 - ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT - : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); - - /* - * For mappings greater than or equal to a page, we limit the stride - * (and hence alignment) to a page size. - */ - nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - if (size >= PAGE_SIZE) - stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); - else - stride = 1; - - BUG_ON(!nslots); - - /* - * Find suitable number of IO TLB entries size that will fit this - * request and allocate a buffer from that IO TLB pool. - */ - spin_lock_irqsave(&io_tlb_lock, flags); - index = ALIGN(io_tlb_index, stride); - if (index >= io_tlb_nslabs) - index = 0; - wrap = index; - - do { - while (iommu_is_span_boundary(index, nslots, offset_slots, - max_slots)) { - index += stride; - if (index >= io_tlb_nslabs) - index = 0; - if (index == wrap) - goto not_found; - } - - /* - * If we find a slot that indicates we have 'nslots' number of - * contiguous buffers, we allocate the buffers from that slot - * and mark the entries as '0' indicating unavailable. - */ - if (io_tlb_list[index] >= nslots) { - int count = 0; - - for (i = index; i < (int) (index + nslots); i++) - io_tlb_list[i] = 0; - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) - io_tlb_list[i] = ++count; - tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT); - - /* - * Update the indices to avoid searching in the next - * round. - */ - io_tlb_index = ((index + nslots) < io_tlb_nslabs - ? (index + nslots) : 0); - - goto found; - } - index += stride; - if (index >= io_tlb_nslabs) - index = 0; - } while (index != wrap); - -not_found: - spin_unlock_irqrestore(&io_tlb_lock, flags); - if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) - dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size); - return SWIOTLB_MAP_ERROR; -found: - spin_unlock_irqrestore(&io_tlb_lock, flags); - - /* - * Save away the mapping from the original address to the DMA address. - * This is needed when we sync the memory. Then we sync the buffer if - * needed. - */ - for (i = 0; i < nslots; i++) - io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE); - - return tlb_addr; -} - -/* - * Allocates bounce buffer and returns its physical address. - */ -static phys_addr_t -map_single(struct device *hwdev, phys_addr_t phys, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - dma_addr_t start_dma_addr; - - if (swiotlb_force == SWIOTLB_NO_FORCE) { - dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n", - &phys); - return SWIOTLB_MAP_ERROR; - } - - start_dma_addr = __phys_to_dma(hwdev, io_tlb_start); - return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, - dir, attrs); -} - -/* - * tlb_addr is the physical address of the bounce buffer to unmap. - */ -void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - unsigned long flags; - int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; - phys_addr_t orig_addr = io_tlb_orig_addr[index]; - - /* - * First, sync the memory before unmapping the entry - */ - if (orig_addr != INVALID_PHYS_ADDR && - !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) - swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE); - - /* - * Return the buffer to the free list by setting the corresponding - * entries to indicate the number of contiguous entries available. - * While returning the entries to the free list, we merge the entries - * with slots below and above the pool being returned. - */ - spin_lock_irqsave(&io_tlb_lock, flags); - { - count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? - io_tlb_list[index + nslots] : 0); - /* - * Step 1: return the slots to the free list, merging the - * slots with superceeding slots - */ - for (i = index + nslots - 1; i >= index; i--) { - io_tlb_list[i] = ++count; - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; - } - /* - * Step 2: merge the returned slots with the preceding slots, - * if available (non zero) - */ - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) - io_tlb_list[i] = ++count; - } - spin_unlock_irqrestore(&io_tlb_lock, flags); -} - -void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir, - enum dma_sync_target target) -{ - int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; - phys_addr_t orig_addr = io_tlb_orig_addr[index]; - - if (orig_addr == INVALID_PHYS_ADDR) - return; - orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1); - - switch (target) { - case SYNC_FOR_CPU: - if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, tlb_addr, - size, DMA_FROM_DEVICE); - else - BUG_ON(dir != DMA_TO_DEVICE); - break; - case SYNC_FOR_DEVICE: - if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, tlb_addr, - size, DMA_TO_DEVICE); - else - BUG_ON(dir != DMA_FROM_DEVICE); - break; - default: - BUG(); - } -} - -static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr, - size_t size) -{ - u64 mask = DMA_BIT_MASK(32); - - if (dev && dev->coherent_dma_mask) - mask = dev->coherent_dma_mask; - return addr + size - 1 <= mask; -} - -static void * -swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle, - unsigned long attrs) -{ - phys_addr_t phys_addr; - - if (swiotlb_force == SWIOTLB_NO_FORCE) - goto out_warn; - - phys_addr = swiotlb_tbl_map_single(dev, - __phys_to_dma(dev, io_tlb_start), - 0, size, DMA_FROM_DEVICE, attrs); - if (phys_addr == SWIOTLB_MAP_ERROR) - goto out_warn; - - *dma_handle = __phys_to_dma(dev, phys_addr); - if (!dma_coherent_ok(dev, *dma_handle, size)) - goto out_unmap; - - memset(phys_to_virt(phys_addr), 0, size); - return phys_to_virt(phys_addr); - -out_unmap: - dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", - (unsigned long long)dev->coherent_dma_mask, - (unsigned long long)*dma_handle); - - /* - * DMA_TO_DEVICE to avoid memcpy in unmap_single. - * DMA_ATTR_SKIP_CPU_SYNC is optional. - */ - swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); -out_warn: - if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) { - dev_warn(dev, - "swiotlb: coherent allocation failed, size=%zu\n", - size); - dump_stack(); - } - return NULL; -} - -static bool swiotlb_free_buffer(struct device *dev, size_t size, - dma_addr_t dma_addr) -{ - phys_addr_t phys_addr = dma_to_phys(dev, dma_addr); - - WARN_ON_ONCE(irqs_disabled()); - - if (!is_swiotlb_buffer(phys_addr)) - return false; - - /* - * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single. - * DMA_ATTR_SKIP_CPU_SYNC is optional. - */ - swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); - return true; -} - -static void -swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, - int do_panic) -{ - if (swiotlb_force == SWIOTLB_NO_FORCE) - return; - - /* - * Ran out of IOMMU space for this operation. This is very bad. - * Unfortunately the drivers cannot handle this operation properly. - * unless they check for dma_mapping_error (most don't) - * When the mapping is small enough return a static buffer to limit - * the damage, or panic when the transfer is too big. - */ - dev_err_ratelimited(dev, "DMA: Out of SW-IOMMU space for %zu bytes\n", - size); - - if (size <= io_tlb_overflow || !do_panic) - return; - - if (dir == DMA_BIDIRECTIONAL) - panic("DMA: Random memory could be DMA accessed\n"); - if (dir == DMA_FROM_DEVICE) - panic("DMA: Random memory could be DMA written\n"); - if (dir == DMA_TO_DEVICE) - panic("DMA: Random memory could be DMA read\n"); -} - -/* - * Map a single buffer of the indicated size for DMA in streaming mode. The - * physical address to use is returned. - * - * Once the device is given the dma address, the device owns this memory until - * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed. - */ -dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - phys_addr_t map, phys = page_to_phys(page) + offset; - dma_addr_t dev_addr = phys_to_dma(dev, phys); - - BUG_ON(dir == DMA_NONE); - /* - * If the address happens to be in the device's DMA window, - * we can safely return the device addr and not worry about bounce - * buffering it. - */ - if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE) - return dev_addr; - - trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); - - /* Oh well, have to allocate and map a bounce buffer. */ - map = map_single(dev, phys, size, dir, attrs); - if (map == SWIOTLB_MAP_ERROR) { - swiotlb_full(dev, size, dir, 1); - return __phys_to_dma(dev, io_tlb_overflow_buffer); - } - - dev_addr = __phys_to_dma(dev, map); - - /* Ensure that the address returned is DMA'ble */ - if (dma_capable(dev, dev_addr, size)) - return dev_addr; - - attrs |= DMA_ATTR_SKIP_CPU_SYNC; - swiotlb_tbl_unmap_single(dev, map, size, dir, attrs); - - return __phys_to_dma(dev, io_tlb_overflow_buffer); -} - -/* - * Unmap a single streaming mode DMA translation. The dma_addr and size must - * match what was provided for in a previous swiotlb_map_page call. All - * other usages are undefined. - * - * After this call, reads by the cpu to the buffer are guaranteed to see - * whatever the device wrote there. - */ -static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); - - BUG_ON(dir == DMA_NONE); - - if (is_swiotlb_buffer(paddr)) { - swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); - return; - } - - if (dir != DMA_FROM_DEVICE) - return; - - /* - * phys_to_virt doesn't work with hihgmem page but we could - * call dma_mark_clean() with hihgmem page here. However, we - * are fine since dma_mark_clean() is null on POWERPC. We can - * make dma_mark_clean() take a physical address if necessary. - */ - dma_mark_clean(phys_to_virt(paddr), size); -} - -void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - unmap_single(hwdev, dev_addr, size, dir, attrs); -} - -/* - * Make physical memory consistent for a single streaming mode DMA translation - * after a transfer. - * - * If you perform a swiotlb_map_page() but wish to interrogate the buffer - * using the cpu, yet do not wish to teardown the dma mapping, you must - * call this function before doing so. At the next point you give the dma - * address back to the card, you must first perform a - * swiotlb_dma_sync_for_device, and then the device again owns the buffer - */ -static void -swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - enum dma_sync_target target) -{ - phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); - - BUG_ON(dir == DMA_NONE); - - if (is_swiotlb_buffer(paddr)) { - swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target); - return; - } - - if (dir != DMA_FROM_DEVICE) - return; - - dma_mark_clean(phys_to_virt(paddr), size); -} - -void -swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir) -{ - swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU); -} - -void -swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir) -{ - swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE); -} - -/* - * Map a set of buffers described by scatterlist in streaming mode for DMA. - * This is the scatter-gather version of the above swiotlb_map_page - * interface. Here the scatter gather list elements are each tagged with the - * appropriate dma address and length. They are obtained via - * sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for swiotlb_map_page are the - * same here. - */ -int -swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, - enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - BUG_ON(dir == DMA_NONE); - - for_each_sg(sgl, sg, nelems, i) { - phys_addr_t paddr = sg_phys(sg); - dma_addr_t dev_addr = phys_to_dma(hwdev, paddr); - - if (swiotlb_force == SWIOTLB_FORCE || - !dma_capable(hwdev, dev_addr, sg->length)) { - phys_addr_t map = map_single(hwdev, sg_phys(sg), - sg->length, dir, attrs); - if (map == SWIOTLB_MAP_ERROR) { - /* Don't panic here, we expect map_sg users - to do proper error handling. */ - swiotlb_full(hwdev, sg->length, dir, 0); - attrs |= DMA_ATTR_SKIP_CPU_SYNC; - swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, - attrs); - sg_dma_len(sgl) = 0; - return 0; - } - sg->dma_address = __phys_to_dma(hwdev, map); - } else - sg->dma_address = dev_addr; - sg_dma_len(sg) = sg->length; - } - return nelems; -} - -/* - * Unmap a set of streaming mode DMA translations. Again, cpu read rules - * concerning calls here are the same as for swiotlb_unmap_page() above. - */ -void -swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, - int nelems, enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - BUG_ON(dir == DMA_NONE); - - for_each_sg(sgl, sg, nelems, i) - unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, - attrs); -} - -/* - * Make physical memory consistent for a set of streaming mode DMA translations - * after a transfer. - * - * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules - * and usage. - */ -static void -swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, - int nelems, enum dma_data_direction dir, - enum dma_sync_target target) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nelems, i) - swiotlb_sync_single(hwdev, sg->dma_address, - sg_dma_len(sg), dir, target); -} - -void -swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) -{ - swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU); -} - -void -swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) -{ - swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); -} - -int -swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) -{ - return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer)); -} - -/* - * Return whether the given device DMA address mask can be supported - * properly. For example, if your device can only drive the low 24-bits - * during bus mastering, then you would pass 0x00ffffff as the mask to - * this function. - */ -int -swiotlb_dma_supported(struct device *hwdev, u64 mask) -{ - return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask; -} - -void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) -{ - void *vaddr; - - /* temporary workaround: */ - if (gfp & __GFP_NOWARN) - attrs |= DMA_ATTR_NO_WARN; - - /* - * Don't print a warning when the first allocation attempt fails. - * swiotlb_alloc_coherent() will print a warning when the DMA memory - * allocation ultimately failed. - */ - gfp |= __GFP_NOWARN; - - vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs); - if (!vaddr) - vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs); - return vaddr; -} - -void swiotlb_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, unsigned long attrs) -{ - if (!swiotlb_free_buffer(dev, size, dma_addr)) - dma_direct_free(dev, size, vaddr, dma_addr, attrs); -} - -const struct dma_map_ops swiotlb_dma_ops = { - .mapping_error = swiotlb_dma_mapping_error, - .alloc = swiotlb_alloc, - .free = swiotlb_free, - .sync_single_for_cpu = swiotlb_sync_single_for_cpu, - .sync_single_for_device = swiotlb_sync_single_for_device, - .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = swiotlb_sync_sg_for_device, - .map_sg = swiotlb_map_sg_attrs, - .unmap_sg = swiotlb_unmap_sg_attrs, - .map_page = swiotlb_map_page, - .unmap_page = swiotlb_unmap_page, - .dma_supported = dma_direct_supported, -}; -- cgit v1.2.3 From 7d1982b4e335c1b184406b7566f6041bfe313c35 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 15 Jun 2018 02:30:47 +0200 Subject: bpf: fix panic in prog load calls cleanup While testing I found that when hitting error path in bpf_prog_load() where we jump to free_used_maps and prog contained BPF to BPF calls that were JITed earlier, then we never clean up the bpf_prog_kallsyms_add() done under jit_subprogs(). Add proper API to make BPF kallsyms deletion more clear and fix that. Fixes: 1c2a088a6626 ("bpf: x64: add JIT support for multi-function programs") Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 3 +++ kernel/bpf/core.c | 14 ++++++++++++++ kernel/bpf/syscall.c | 8 ++------ 3 files changed, 19 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 45fc0f5000d8..297c56fa9cee 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -961,6 +961,9 @@ static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp) } #endif /* CONFIG_BPF_JIT */ +void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp); +void bpf_prog_kallsyms_del_all(struct bpf_prog *fp); + #define BPF_ANC BIT(15) static inline bool bpf_needs_clear_a(const struct sock_filter *first) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 9f1493705f40..1061968adcc1 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -350,6 +350,20 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, return prog_adj; } +void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp) +{ + int i; + + for (i = 0; i < fp->aux->func_cnt; i++) + bpf_prog_kallsyms_del(fp->aux->func[i]); +} + +void bpf_prog_kallsyms_del_all(struct bpf_prog *fp) +{ + bpf_prog_kallsyms_del_subprogs(fp); + bpf_prog_kallsyms_del(fp); +} + #ifdef CONFIG_BPF_JIT /* All BPF JIT sysctl knobs here. */ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0fa20624707f..0f62692fe635 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1034,14 +1034,9 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu) static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) { if (atomic_dec_and_test(&prog->aux->refcnt)) { - int i; - /* bpf_prog_free_id() must be called first */ bpf_prog_free_id(prog, do_idr_lock); - - for (i = 0; i < prog->aux->func_cnt; i++) - bpf_prog_kallsyms_del(prog->aux->func[i]); - bpf_prog_kallsyms_del(prog); + bpf_prog_kallsyms_del_all(prog); call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); } @@ -1384,6 +1379,7 @@ static int bpf_prog_load(union bpf_attr *attr) return err; free_used_maps: + bpf_prog_kallsyms_del_subprogs(prog); free_used_maps(prog->aux); free_prog: bpf_prog_uncharge_memlock(prog); -- cgit v1.2.3 From 9facc336876f7ecf9edba4c67b90426fde4ec898 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 15 Jun 2018 02:30:48 +0200 Subject: bpf: reject any prog that failed read-only lock We currently lock any JITed image as read-only via bpf_jit_binary_lock_ro() as well as the BPF image as read-only through bpf_prog_lock_ro(). In the case any of these would fail we throw a WARN_ON_ONCE() in order to yell loudly to the log. Perhaps, to some extend, this may be comparable to an allocation where __GFP_NOWARN is explicitly not set. Added via 65869a47f348 ("bpf: improve read-only handling"), this behavior is slightly different compared to any of the other in-kernel set_memory_ro() users who do not check the return code of set_memory_ro() and friends /at all/ (e.g. in the case of module_enable_ro() / module_disable_ro()). Given in BPF this is mandatory hardening step, we want to know whether there are any issues that would leave both BPF data writable. So it happens that syzkaller enabled fault injection and it triggered memory allocation failure deep inside x86's change_page_attr_set_clr() which was triggered from set_memory_ro(). Now, there are two options: i) leaving everything as is, and ii) reworking the image locking code in order to have a final checkpoint out of the central bpf_prog_select_runtime() which probes whether any of the calls during prog setup weren't successful, and then bailing out with an error. Option ii) is a better approach since this additional paranoia avoids altogether leaving any potential W+X pages from BPF side in the system. Therefore, lets be strict about it, and reject programs in such unlikely occasion. While testing I noticed also that one bpf_prog_lock_ro() call was missing on the outer dummy prog in case of calls, e.g. in the destructor we call bpf_prog_free_deferred() on the main prog where we try to bpf_prog_unlock_free() the program, and since we go via bpf_prog_select_runtime() do that as well. Reported-by: syzbot+3b889862e65a98317058@syzkaller.appspotmail.com Reported-by: syzbot+9e762b52dd17e616a7a5@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 60 ++++++++++++++++++++++++++++++++------------------ kernel/bpf/core.c | 55 +++++++++++++++++++++++++++++++++++++++------ kernel/bpf/syscall.c | 4 +--- 3 files changed, 87 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 297c56fa9cee..108f9812e196 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -469,7 +469,8 @@ struct sock_fprog_kern { }; struct bpf_binary_header { - unsigned int pages; + u16 pages; + u16 locked:1; u8 image[]; }; @@ -671,15 +672,18 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default) #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) -#ifdef CONFIG_ARCH_HAS_SET_MEMORY static inline void bpf_prog_lock_ro(struct bpf_prog *fp) { +#ifdef CONFIG_ARCH_HAS_SET_MEMORY fp->locked = 1; - WARN_ON_ONCE(set_memory_ro((unsigned long)fp, fp->pages)); + if (set_memory_ro((unsigned long)fp, fp->pages)) + fp->locked = 0; +#endif } static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) { +#ifdef CONFIG_ARCH_HAS_SET_MEMORY if (fp->locked) { WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages)); /* In case set_memory_rw() fails, we want to be the first @@ -687,34 +691,30 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) */ fp->locked = 0; } +#endif } static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) { - WARN_ON_ONCE(set_memory_ro((unsigned long)hdr, hdr->pages)); -} - -static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) -{ - WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages)); -} -#else -static inline void bpf_prog_lock_ro(struct bpf_prog *fp) -{ -} - -static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) -{ -} - -static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) -{ +#ifdef CONFIG_ARCH_HAS_SET_MEMORY + hdr->locked = 1; + if (set_memory_ro((unsigned long)hdr, hdr->pages)) + hdr->locked = 0; +#endif } static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) { +#ifdef CONFIG_ARCH_HAS_SET_MEMORY + if (hdr->locked) { + WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages)); + /* In case set_memory_rw() fails, we want to be the first + * to crash here instead of some random place later on. + */ + hdr->locked = 0; + } +#endif } -#endif /* CONFIG_ARCH_HAS_SET_MEMORY */ static inline struct bpf_binary_header * bpf_jit_binary_hdr(const struct bpf_prog *fp) @@ -725,6 +725,22 @@ bpf_jit_binary_hdr(const struct bpf_prog *fp) return (void *)addr; } +#ifdef CONFIG_ARCH_HAS_SET_MEMORY +static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp) +{ + if (!fp->locked) + return -ENOLCK; + if (fp->jited) { + const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp); + + if (!hdr->locked) + return -ENOLCK; + } + + return 0; +} +#endif + int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); static inline int sk_filter(struct sock *sk, struct sk_buff *skb) { diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 1061968adcc1..a9e6c04d0f4a 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -598,6 +598,8 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, bpf_fill_ill_insns(hdr, size); hdr->pages = size / PAGE_SIZE; + hdr->locked = 0; + hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), PAGE_SIZE - sizeof(*hdr)); start = (get_random_int() % hole) & ~(alignment - 1); @@ -1448,6 +1450,33 @@ static int bpf_check_tail_call(const struct bpf_prog *fp) return 0; } +static int bpf_prog_check_pages_ro_locked(const struct bpf_prog *fp) +{ +#ifdef CONFIG_ARCH_HAS_SET_MEMORY + int i, err; + + for (i = 0; i < fp->aux->func_cnt; i++) { + err = bpf_prog_check_pages_ro_single(fp->aux->func[i]); + if (err) + return err; + } + + return bpf_prog_check_pages_ro_single(fp); +#endif + return 0; +} + +static void bpf_prog_select_func(struct bpf_prog *fp) +{ +#ifndef CONFIG_BPF_JIT_ALWAYS_ON + u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1); + + fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1]; +#else + fp->bpf_func = __bpf_prog_ret0_warn; +#endif +} + /** * bpf_prog_select_runtime - select exec runtime for BPF program * @fp: bpf_prog populated with internal BPF program @@ -1458,13 +1487,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp) */ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) { -#ifndef CONFIG_BPF_JIT_ALWAYS_ON - u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1); + /* In case of BPF to BPF calls, verifier did all the prep + * work with regards to JITing, etc. + */ + if (fp->bpf_func) + goto finalize; - fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1]; -#else - fp->bpf_func = __bpf_prog_ret0_warn; -#endif + bpf_prog_select_func(fp); /* eBPF JITs can rewrite the program in case constant * blinding is active. However, in case of error during @@ -1485,6 +1514,8 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) if (*err) return fp; } + +finalize: bpf_prog_lock_ro(fp); /* The tail call compatibility check can only be done at @@ -1493,7 +1524,17 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) * all eBPF JITs might immediately support all features. */ *err = bpf_check_tail_call(fp); - + if (*err) + return fp; + + /* Checkpoint: at this point onwards any cBPF -> eBPF or + * native eBPF program is read-only. If we failed to change + * the page attributes (e.g. allocation failure from + * splitting large pages), then reject the whole program + * in order to guarantee not ending up with any W+X pages + * from BPF side in kernel. + */ + *err = bpf_prog_check_pages_ro_locked(fp); return fp; } EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0f62692fe635..35dc466641f2 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1353,9 +1353,7 @@ static int bpf_prog_load(union bpf_attr *attr) if (err < 0) goto free_used_maps; - /* eBPF program is ready to be JITed */ - if (!prog->bpf_func) - prog = bpf_prog_select_runtime(prog, &err); + prog = bpf_prog_select_runtime(prog, &err); if (err < 0) goto free_used_maps; -- cgit v1.2.3 From 6d5fc1957989266006db6ef3dfb9159b42cf0189 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Thu, 14 Jun 2018 11:07:42 +0900 Subject: xdp: Fix handling of devmap in generic XDP Commit 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") changed the return value type of __devmap_lookup_elem() from struct net_device * to struct bpf_dtab_netdev * but forgot to modify generic XDP code accordingly. Thus generic XDP incorrectly used struct bpf_dtab_netdev where struct net_device is expected, then skb->dev was set to invalid value. v2: - Fix compiler warning without CONFIG_BPF_SYSCALL. Fixes: 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") Signed-off-by: Toshiaki Makita Acked-by: Yonghong Song Acked-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 12 ++++++++++++ include/linux/filter.h | 16 ++++++++++++++++ kernel/bpf/devmap.c | 14 ++++++++++++++ net/core/filter.c | 21 ++++----------------- 4 files changed, 46 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 995c3b1e59bf..7df32a3200f7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -488,12 +488,15 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); /* Map specifics */ struct xdp_buff; +struct sk_buff; struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); void __dev_map_insert_ctx(struct bpf_map *map, u32 index); void __dev_map_flush(struct bpf_map *map); int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, struct net_device *dev_rx); +int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, + struct bpf_prog *xdp_prog); struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); void __cpu_map_insert_ctx(struct bpf_map *map, u32 index); @@ -586,6 +589,15 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, return 0; } +struct sk_buff; + +static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, + struct sk_buff *skb, + struct bpf_prog *xdp_prog) +{ + return 0; +} + static inline struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) { diff --git a/include/linux/filter.h b/include/linux/filter.h index 108f9812e196..b615df57b7d5 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -802,6 +803,21 @@ static inline bool bpf_dump_raw_ok(void) struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); +static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, + struct net_device *fwd) +{ + unsigned int len; + + if (unlikely(!(fwd->flags & IFF_UP))) + return -ENETDOWN; + + len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; + if (skb->len > len) + return -EMSGSIZE; + + return 0; +} + /* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the * same cpu context. Further for best results no more than a single map * for the do_redirect/do_flush pair should be used. This limitation is diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index a7cc7b3494a9..642c97f6d1b8 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -345,6 +345,20 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, return bq_enqueue(dst, xdpf, dev_rx); } +int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, + struct bpf_prog *xdp_prog) +{ + int err; + + err = __xdp_generic_ok_fwd_dev(skb, dst->dev); + if (unlikely(err)) + return err; + skb->dev = dst->dev; + generic_xdp_tx(skb, xdp_prog); + + return 0; +} + static void *dev_map_lookup_elem(struct bpf_map *map, void *key) { struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key); diff --git a/net/core/filter.c b/net/core/filter.c index 3d9ba7e5965a..e7f12e9f598c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3214,20 +3214,6 @@ err: } EXPORT_SYMBOL_GPL(xdp_do_redirect); -static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd) -{ - unsigned int len; - - if (unlikely(!(fwd->flags & IFF_UP))) - return -ENETDOWN; - - len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; - if (skb->len > len) - return -EMSGSIZE; - - return 0; -} - static int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, @@ -3256,10 +3242,11 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, } if (map->map_type == BPF_MAP_TYPE_DEVMAP) { - if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) + struct bpf_dtab_netdev *dst = fwd; + + err = dev_map_generic_redirect(dst, skb, xdp_prog); + if (unlikely(err)) goto err; - skb->dev = fwd; - generic_xdp_tx(skb, xdp_prog); } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { struct xdp_sock *xs = fwd; -- cgit v1.2.3 From 9bbe60a67be5a1c6f79b3c9be5003481a50529ff Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 16 Jun 2018 11:55:44 +0100 Subject: atm: Preserve value of skb->truesize when accounting to vcc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ATM accounts for in-flight TX packets in sk_wmem_alloc of the VCC on which they are to be sent. But it doesn't take ownership of those packets from the sock (if any) which originally owned them. They should remain owned by their actual sender until they've left the box. There's a hack in pskb_expand_head() to avoid adjusting skb->truesize for certain skbs, precisely to avoid messing up sk_wmem_alloc accounting. Ideally that hack would cover the ATM use case too, but it doesn't — skbs which aren't owned by any sock, for example PPP control frames, still get their truesize adjusted when the low-level ATM driver adds headroom. This has always been an issue, it seems. The truesize of a packet increases, and sk_wmem_alloc on the VCC goes negative. But this wasn't for normal traffic, only for control frames. So I think we just got away with it, and we probably needed to send 2GiB of LCP echo frames before the misaccounting would ever have caused a problem and caused atm_may_send() to start refusing packets. Commit 14afee4b609 ("net: convert sock.sk_wmem_alloc from atomic_t to refcount_t") did exactly what it was intended to do, and turned this mostly-theoretical problem into a real one, causing PPPoATM to fail immediately as sk_wmem_alloc underflows and atm_may_send() *immediately* starts refusing to allow new packets. The least intrusive solution to this problem is to stash the value of skb->truesize that was accounted to the VCC, in a new member of the ATM_SKB(skb) structure. Then in atm_pop_raw() subtract precisely that value instead of the then-current value of skb->truesize. Fixes: 158f323b9868 ("net: adjust skb->truesize in pskb_expand_head()") Signed-off-by: David Woodhouse Tested-by: Kevin Darbyshire-Bryant Signed-off-by: David S. Miller --- include/linux/atmdev.h | 15 +++++++++++++++ net/atm/br2684.c | 3 +-- net/atm/clip.c | 3 +-- net/atm/common.c | 3 +-- net/atm/lec.c | 3 +-- net/atm/mpc.c | 3 +-- net/atm/pppoatm.c | 3 +-- net/atm/raw.c | 4 ++-- 8 files changed, 23 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 0c27515d2cf6..8124815eb121 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -214,6 +214,7 @@ struct atmphy_ops { struct atm_skb_data { struct atm_vcc *vcc; /* ATM VCC */ unsigned long atm_options; /* ATM layer options */ + unsigned int acct_truesize; /* truesize accounted to vcc */ }; #define VCC_HTABLE_SIZE 32 @@ -241,6 +242,20 @@ void vcc_insert_socket(struct sock *sk); void atm_dev_release_vccs(struct atm_dev *dev); +static inline void atm_account_tx(struct atm_vcc *vcc, struct sk_buff *skb) +{ + /* + * Because ATM skbs may not belong to a sock (and we don't + * necessarily want to), skb->truesize may be adjusted, + * escaping the hack in pskb_expand_head() which avoids + * doing so for some cases. So stash the value of truesize + * at the time we accounted it, and atm_pop_raw() can use + * that value later, in case it changes. + */ + refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); + ATM_SKB(skb)->acct_truesize = skb->truesize; + ATM_SKB(skb)->atm_options = vcc->atm_options; +} static inline void atm_force_charge(struct atm_vcc *vcc,int truesize) { diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 36b3adacc0dd..10462de734ea 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -252,8 +252,7 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev, ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc; pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev); - refcount_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc); - ATM_SKB(skb)->atm_options = atmvcc->atm_options; + atm_account_tx(atmvcc, skb); dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; diff --git a/net/atm/clip.c b/net/atm/clip.c index 66caa48a27c2..d795b9c5aea4 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -381,8 +381,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb, memcpy(here, llc_oui, sizeof(llc_oui)); ((__be16 *) here)[3] = skb->protocol; } - refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); - ATM_SKB(skb)->atm_options = vcc->atm_options; + atm_account_tx(vcc, skb); entry->vccs->last_use = jiffies; pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev); old = xchg(&entry->vccs->xoff, 1); /* assume XOFF ... */ diff --git a/net/atm/common.c b/net/atm/common.c index 1f2af59935db..ff5748b2190f 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -630,10 +630,9 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) goto out; } pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); - refcount_add(skb->truesize, &sk->sk_wmem_alloc); + atm_account_tx(vcc, skb); skb->dev = NULL; /* for paths shared with net_device interfaces */ - ATM_SKB(skb)->atm_options = vcc->atm_options; if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) { kfree_skb(skb); error = -EFAULT; diff --git a/net/atm/lec.c b/net/atm/lec.c index 5a95fcf6f9b6..d7f5cf5b7594 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -182,9 +182,8 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb) struct net_device *dev = skb->dev; ATM_SKB(skb)->vcc = vcc; - ATM_SKB(skb)->atm_options = vcc->atm_options; + atm_account_tx(vcc, skb); - refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); if (vcc->send(vcc, skb) < 0) { dev->stats.tx_dropped++; return; diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 75620c2f2617..24b53c4c39c6 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -555,8 +555,7 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc) sizeof(struct llc_snap_hdr)); } - refcount_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc); - ATM_SKB(skb)->atm_options = entry->shortcut->atm_options; + atm_account_tx(entry->shortcut, skb); entry->shortcut->send(entry->shortcut, skb); entry->packets_fwded++; mpc->in_ops->put(entry); diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index 21d9d341a619..af8c4b38b746 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -350,8 +350,7 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb) return 1; } - refcount_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc); - ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options; + atm_account_tx(vcc, skb); pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev); ret = ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb) diff --git a/net/atm/raw.c b/net/atm/raw.c index ee10e8d46185..b3ba44aab0ee 100644 --- a/net/atm/raw.c +++ b/net/atm/raw.c @@ -35,8 +35,8 @@ static void atm_pop_raw(struct atm_vcc *vcc, struct sk_buff *skb) struct sock *sk = sk_atm(vcc); pr_debug("(%d) %d -= %d\n", - vcc->vci, sk_wmem_alloc_get(sk), skb->truesize); - WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)); + vcc->vci, sk_wmem_alloc_get(sk), ATM_SKB(skb)->acct_truesize); + WARN_ON(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, &sk->sk_wmem_alloc)); dev_kfree_skb_any(skb); sk->sk_write_space(sk); } -- cgit v1.2.3 From b23908d3c48a37c46c6a26df2cdeab1610b360ba Mon Sep 17 00:00:00 2001 From: Simon Glass Date: Sun, 17 Jun 2018 14:09:42 +0200 Subject: firmware: dmi: Add access to the SKU ID string This is used in some systems from user space for determining the identity of the device. Expose this as a file so that that user-space tools don't need to read from /sys/firmware/dmi/tables/DMI Signed-off-by: Simon Glass Signed-off-by: Jean Delvare --- drivers/firmware/dmi-id.c | 2 ++ drivers/firmware/dmi_scan.c | 1 + include/linux/mod_devicetable.h | 1 + 3 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/dmi-id.c b/drivers/firmware/dmi-id.c index 951b6c79f166..624a11cb07e2 100644 --- a/drivers/firmware/dmi-id.c +++ b/drivers/firmware/dmi-id.c @@ -47,6 +47,7 @@ DEFINE_DMI_ATTR_WITH_SHOW(product_name, 0444, DMI_PRODUCT_NAME); DEFINE_DMI_ATTR_WITH_SHOW(product_version, 0444, DMI_PRODUCT_VERSION); DEFINE_DMI_ATTR_WITH_SHOW(product_serial, 0400, DMI_PRODUCT_SERIAL); DEFINE_DMI_ATTR_WITH_SHOW(product_uuid, 0400, DMI_PRODUCT_UUID); +DEFINE_DMI_ATTR_WITH_SHOW(product_sku, 0444, DMI_PRODUCT_SKU); DEFINE_DMI_ATTR_WITH_SHOW(product_family, 0444, DMI_PRODUCT_FAMILY); DEFINE_DMI_ATTR_WITH_SHOW(board_vendor, 0444, DMI_BOARD_VENDOR); DEFINE_DMI_ATTR_WITH_SHOW(board_name, 0444, DMI_BOARD_NAME); @@ -193,6 +194,7 @@ static void __init dmi_id_init_attr_table(void) ADD_DMI_ATTR(product_serial, DMI_PRODUCT_SERIAL); ADD_DMI_ATTR(product_uuid, DMI_PRODUCT_UUID); ADD_DMI_ATTR(product_family, DMI_PRODUCT_FAMILY); + ADD_DMI_ATTR(product_sku, DMI_PRODUCT_SKU); ADD_DMI_ATTR(board_vendor, DMI_BOARD_VENDOR); ADD_DMI_ATTR(board_name, DMI_BOARD_NAME); ADD_DMI_ATTR(board_version, DMI_BOARD_VERSION); diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 54e66adef252..f2483548cde9 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -447,6 +447,7 @@ static void __init dmi_decode(const struct dmi_header *dm, void *dummy) dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6); dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7); dmi_save_uuid(dm, DMI_PRODUCT_UUID, 8); + dmi_save_ident(dm, DMI_PRODUCT_SKU, 25); dmi_save_ident(dm, DMI_PRODUCT_FAMILY, 26); break; case 2: /* Base Board Information */ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 7d361be2e24f..61fbfbc03e5b 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -490,6 +490,7 @@ enum dmi_field { DMI_PRODUCT_VERSION, DMI_PRODUCT_SERIAL, DMI_PRODUCT_UUID, + DMI_PRODUCT_SKU, DMI_PRODUCT_FAMILY, DMI_BOARD_VENDOR, DMI_BOARD_NAME, -- cgit v1.2.3 From 42f86b44a4d356edba626171dfe0be061fc695af Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 Jun 2018 19:07:24 -0400 Subject: pNFS/flexfiles: Don't tie up all the rpciod threads in resends We do not want to have rpciod threads perform recursive calls into the RPC layer since that can deadlock. In particular, having to wait for a layoutget can be nasty... We want rather to defer scheduling those retries until we're in the rpc_release() callback, since that is called from the nfsiod workqueue. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 11 ++++++++--- include/linux/nfs_xdr.h | 2 ++ 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 3ae038d9c292..336b4d560e2c 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1243,17 +1243,18 @@ static int ff_layout_read_done_cb(struct rpc_task *task, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); + clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); + clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); switch (err) { case -NFS4ERR_RESET_TO_PNFS: if (ff_layout_choose_best_ds_for_read(hdr->lseg, hdr->pgio_mirror_idx + 1, &hdr->pgio_mirror_idx)) goto out_eagain; - ff_layout_read_record_layoutstats_done(task, hdr); - pnfs_read_resend_pnfs(hdr); + set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); return task->tk_status; case -NFS4ERR_RESET_TO_MDS: - ff_layout_reset_read(hdr); + set_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); return task->tk_status; case -EAGAIN: goto out_eagain; @@ -1403,6 +1404,10 @@ static void ff_layout_read_release(void *data) struct nfs_pgio_header *hdr = data; ff_layout_read_record_layoutstats_done(&hdr->task, hdr); + if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) + pnfs_read_resend_pnfs(hdr); + else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags)) + ff_layout_reset_read(hdr); pnfs_generic_rw_release(data); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9dee3c23895d..712eed156d09 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1438,6 +1438,8 @@ enum { NFS_IOHDR_EOF, NFS_IOHDR_REDO, NFS_IOHDR_STAT, + NFS_IOHDR_RESEND_PNFS, + NFS_IOHDR_RESEND_MDS, }; struct nfs_io_completion; -- cgit v1.2.3 From 9262478220eac908ae6e168c3df2c453c87e2da3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 Jun 2018 17:24:09 -0700 Subject: bpf: enforce correct alignment for instructions After commit 9facc336876f ("bpf: reject any prog that failed read-only lock") offsetof(struct bpf_binary_header, image) became 3 instead of 4, breaking powerpc BPF badly, since instructions need to be word aligned. Fixes: 9facc336876f ("bpf: reject any prog that failed read-only lock") Signed-off-by: Eric Dumazet Cc: Daniel Borkmann Cc: Martin KaFai Lau Cc: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index b615df57b7d5..20f2659dd829 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -472,7 +472,9 @@ struct sock_fprog_kern { struct bpf_binary_header { u16 pages; u16 locked:1; - u8 image[]; + + /* Some arches need word alignment for their instructions */ + u8 image[] __aligned(4); }; struct bpf_prog { -- cgit v1.2.3 From 9a789fcfe8605417f7a1a970355f5efa4fe88c64 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 19 Jun 2018 09:32:30 -0400 Subject: rseq/cleanup: Do not abort rseq c.s. in child on fork() Considering that we explicitly forbid system calls in rseq critical sections, it is not valid to issue a fork or clone system call within a rseq critical section, so rseq_fork() is not required to restart an active rseq c.s. in the child process. Signed-off-by: Mathieu Desnoyers Cc: Andrew Morton Cc: Andy Lutomirski Cc: Ben Maurer Cc: Boqun Feng Cc: Catalin Marinas Cc: Chris Lameter Cc: Dave Watson Cc: Joel Fernandes Cc: Josh Triplett Cc: Linus Torvalds Cc: Michael Kerrisk Cc: Paul E . McKenney Cc: Paul Turner Cc: Peter Zijlstra Cc: Russell King Cc: Shuah Khan Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-api@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Link: https://lore.kernel.org/lkml/20180619133230.4087-4-mathieu.desnoyers@efficios.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 87bf02d93a27..c1882643d455 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1831,9 +1831,7 @@ static inline void rseq_migrate(struct task_struct *t) /* * If parent process has a registered restartable sequences area, the - * child inherits. Only applies when forking a process, not a thread. In - * case a parent fork() in the middle of a restartable sequence, set the - * resume notifier to force the child to retry. + * child inherits. Only applies when forking a process, not a thread. */ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) { @@ -1847,7 +1845,6 @@ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) t->rseq_len = current->rseq_len; t->rseq_sig = current->rseq_sig; t->rseq_event_mask = current->rseq_event_mask; - rseq_preempt(t); } } -- cgit v1.2.3 From f642fb5864a6e3645edce6f85ffe7b44d5e9b990 Mon Sep 17 00:00:00 2001 From: "mike.travis@hpe.com" Date: Thu, 24 May 2018 15:17:12 -0500 Subject: x86/platform/UV: Add adjustable set memory block size function Add a new function to "adjust" the current fixed UV memory block size of 2GB so it can be changed to a different physical boundary. This is out of necessity so arch dependent code can accommodate specific BIOS requirements which can align these new PMEM modules at less than the default boundaries. A "set order" type of function was used to insure that the memory block size will be a power of two value without requiring a validity check. 64GB was chosen as the upper limit for memory block size values to accommodate upcoming 4PB systems which have 6 more bits of physical address space (46 becoming 52). Signed-off-by: Mike Travis Reviewed-by: Andrew Banman Cc: Andrew Morton Cc: Dimitri Sivanich Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Russ Anderson Cc: Thomas Gleixner Cc: dan.j.williams@intel.com Cc: jgross@suse.com Cc: kirill.shutemov@linux.intel.com Cc: mhocko@suse.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/lkml/20180524201711.609546602@stormcage.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 20 ++++++++++++++++---- include/linux/memory.h | 1 + 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0a400606dea0..20d8bf5fbceb 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1350,16 +1350,28 @@ int kern_addr_valid(unsigned long addr) /* Amount of ram needed to start using large blocks */ #define MEM_SIZE_FOR_LARGE_BLOCK (64UL << 30) +/* Adjustable memory block size */ +static unsigned long set_memory_block_size; +int __init set_memory_block_size_order(unsigned int order) +{ + unsigned long size = 1UL << order; + + if (size > MEM_SIZE_FOR_LARGE_BLOCK || size < MIN_MEMORY_BLOCK_SIZE) + return -EINVAL; + + set_memory_block_size = size; + return 0; +} + static unsigned long probe_memory_block_size(void) { unsigned long boot_mem_end = max_pfn << PAGE_SHIFT; unsigned long bz; - /* If this is UV system, always set 2G block size */ - if (is_uv_system()) { - bz = MAX_BLOCK_SIZE; + /* If memory block size has been set, then use it */ + bz = set_memory_block_size; + if (bz) goto done; - } /* Use regular block if RAM is smaller than MEM_SIZE_FOR_LARGE_BLOCK */ if (boot_mem_end < MEM_SIZE_FOR_LARGE_BLOCK) { diff --git a/include/linux/memory.h b/include/linux/memory.h index 31ca3e28b0eb..a6ddefc60517 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -38,6 +38,7 @@ struct memory_block { int arch_get_memory_phys_device(unsigned long start_pfn); unsigned long memory_block_size_bytes(void); +int set_memory_block_size_order(unsigned int order); /* These states are exposed to userspace as text strings in sysfs */ #define MEM_ONLINE (1<<0) /* exposed to userspace */ -- cgit v1.2.3 From 8730662d7b2582f65dd6c59ab1e0b7fa461c79b0 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 24 Apr 2018 14:22:38 -0700 Subject: kernel.h: Fix a typo in comment Signed-off-by: Wei Wang Cc: Andrew Morton Cc: Borislav Petkov Cc: Crt Mori Cc: Josh Poimboeuf Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Randy Dunlap Cc: Steven Rostedt Cc: Thomas Gleixner Cc: gregkh@linuxfoundation.org Cc: wei.vince.wang@gmail.com Link: https://lkml.kernel.org/lkml/20180424212241.16013-1-wvw@google.com Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d23123238534..941dc0a5a877 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -666,7 +666,7 @@ do { \ * your code. (Extra memory is used for special buffers that are * allocated when trace_printk() is used.) * - * A little optization trick is done here. If there's only one + * A little optimization trick is done here. If there's only one * argument, there's no need to scan the string for printf formats. * The trace_puts() will suffice. But how can we take advantage of * using trace_puts() when trace_printk() has only one argument? -- cgit v1.2.3 From 72a8edc2d9134c2895eac2fec5eecf8230a05c96 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 22 Jun 2018 10:52:48 +0100 Subject: genirq/debugfs: Add missing IRQCHIP_SUPPORTS_LEVEL_MSI debug Debug is missing the IRQCHIP_SUPPORTS_LEVEL_MSI debug entry, making debugfs slightly less useful. Take this opportunity to also add a missing comment in the definition of IRQCHIP_SUPPORTS_LEVEL_MSI. Fixes: 6988e0e0d283 ("genirq/msi: Limit level-triggered MSI to platform devices") Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: Jason Cooper Cc: Alexandre Belloni Cc: Yang Yingliang Cc: Sumit Garg Link: https://lkml.kernel.org/r/20180622095254.5906-2-marc.zyngier@arm.com --- include/linux/irq.h | 1 + kernel/irq/debugfs.c | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 4bd2f34947f4..201de12a9957 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -503,6 +503,7 @@ struct irq_chip { * IRQCHIP_SKIP_SET_WAKE: Skip chip.irq_set_wake(), for this irq chip * IRQCHIP_ONESHOT_SAFE: One shot does not require mask/unmask * IRQCHIP_EOI_THREADED: Chip requires eoi() on unmask in threaded mode + * IRQCHIP_SUPPORTS_LEVEL_MSI Chip can provide two doorbells for Level MSIs */ enum { IRQCHIP_SET_TYPE_MASKED = (1 << 0), diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index 4dadeb3d6666..6f636136cccc 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -55,6 +55,7 @@ static const struct irq_bit_descr irqchip_flags[] = { BIT_MASK_DESCR(IRQCHIP_SKIP_SET_WAKE), BIT_MASK_DESCR(IRQCHIP_ONESHOT_SAFE), BIT_MASK_DESCR(IRQCHIP_EOI_THREADED), + BIT_MASK_DESCR(IRQCHIP_SUPPORTS_LEVEL_MSI), }; static void -- cgit v1.2.3 From bed9df97b39e73a4607189f2c4b9fb89cc3f7f59 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 22 Jun 2018 19:35:33 +0800 Subject: irqdesc: Delete irq_desc_get_msi_desc() Function irq_desc_get_msi_desc() is not referenced in the kernel (and does not seem to have been referenced since e39758e0ea76, 3 years ago), so delete it. Signed-off-by: John Garry Signed-off-by: Thomas Gleixner Cc: Cc: Cc: Cc: Cc: Cc: Link: https://lkml.kernel.org/r/1529667333-92959-1-git-send-email-john.garry@huawei.com --- include/linux/irqdesc.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 25b33b664537..dd1e40ddac7d 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -145,11 +145,6 @@ static inline void *irq_desc_get_handler_data(struct irq_desc *desc) return desc->irq_common_data.handler_data; } -static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc) -{ - return desc->irq_common_data.msi_desc; -} - /* * Architectures call this to let the generic IRQ layer * handle an interrupt. -- cgit v1.2.3 From 784e0300fe9fe4aa81bd7df9d59e138f56bb605b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 22 Jun 2018 11:45:07 +0100 Subject: rseq: Avoid infinite recursion when delivering SIGSEGV When delivering a signal to a task that is using rseq, we call into __rseq_handle_notify_resume() so that the registers pushed in the sigframe are updated to reflect the state of the restartable sequence (for example, ensuring that the signal returns to the abort handler if necessary). However, if the rseq management fails due to an unrecoverable fault when accessing userspace or certain combinations of RSEQ_CS_* flags, then we will attempt to deliver a SIGSEGV. This has the potential for infinite recursion if the rseq code continuously fails on signal delivery. Avoid this problem by using force_sigsegv() instead of force_sig(), which is explicitly designed to reset the SEGV handler to SIG_DFL in the case of a recursive fault. In doing so, remove rseq_signal_deliver() from the internal rseq API and have an optional struct ksignal * parameter to rseq_handle_notify_resume() instead. Signed-off-by: Will Deacon Signed-off-by: Thomas Gleixner Acked-by: Mathieu Desnoyers Cc: peterz@infradead.org Cc: paulmck@linux.vnet.ibm.com Cc: boqun.feng@gmail.com Link: https://lkml.kernel.org/r/1529664307-983-1-git-send-email-will.deacon@arm.com --- arch/arm/kernel/signal.c | 4 ++-- arch/powerpc/kernel/signal.c | 4 ++-- arch/x86/entry/common.c | 2 +- arch/x86/kernel/signal.c | 2 +- include/linux/sched.h | 18 +++++++++++------- kernel/rseq.c | 7 ++++--- 6 files changed, 21 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index f09e9d66d605..dec130e7078c 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -544,7 +544,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) * Increment event counter and perform fixup for the pre-signal * frame. */ - rseq_signal_deliver(regs); + rseq_signal_deliver(ksig, regs); /* * Set up the stack frame @@ -666,7 +666,7 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) } else { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); - rseq_handle_notify_resume(regs); + rseq_handle_notify_resume(NULL, regs); } } local_irq_disable(); diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 17fe4339ba59..b3e8db376ecd 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -134,7 +134,7 @@ static void do_signal(struct task_struct *tsk) /* Re-enable the breakpoints for the signal stack */ thread_change_pc(tsk, tsk->thread.regs); - rseq_signal_deliver(tsk->thread.regs); + rseq_signal_deliver(&ksig, tsk->thread.regs); if (is32) { if (ksig.ka.sa.sa_flags & SA_SIGINFO) @@ -170,7 +170,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); - rseq_handle_notify_resume(regs); + rseq_handle_notify_resume(NULL, regs); } user_enter(); diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 92190879b228..3b2490b81918 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -164,7 +164,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) if (cached_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); - rseq_handle_notify_resume(regs); + rseq_handle_notify_resume(NULL, regs); } if (cached_flags & _TIF_USER_RETURN_NOTIFY) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 445ca11ff863..92a3b312a53c 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -692,7 +692,7 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) * Increment event counter and perform fixup for the pre-signal * frame. */ - rseq_signal_deliver(regs); + rseq_signal_deliver(ksig, regs); /* Set up the stack frame */ if (is_ia32_frame(ksig)) { diff --git a/include/linux/sched.h b/include/linux/sched.h index c1882643d455..9256118bd40c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1799,20 +1799,22 @@ static inline void rseq_set_notify_resume(struct task_struct *t) set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); } -void __rseq_handle_notify_resume(struct pt_regs *regs); +void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs); -static inline void rseq_handle_notify_resume(struct pt_regs *regs) +static inline void rseq_handle_notify_resume(struct ksignal *ksig, + struct pt_regs *regs) { if (current->rseq) - __rseq_handle_notify_resume(regs); + __rseq_handle_notify_resume(ksig, regs); } -static inline void rseq_signal_deliver(struct pt_regs *regs) +static inline void rseq_signal_deliver(struct ksignal *ksig, + struct pt_regs *regs) { preempt_disable(); __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); preempt_enable(); - rseq_handle_notify_resume(regs); + rseq_handle_notify_resume(ksig, regs); } /* rseq_preempt() requires preemption to be disabled. */ @@ -1861,10 +1863,12 @@ static inline void rseq_execve(struct task_struct *t) static inline void rseq_set_notify_resume(struct task_struct *t) { } -static inline void rseq_handle_notify_resume(struct pt_regs *regs) +static inline void rseq_handle_notify_resume(struct ksignal *ksig, + struct pt_regs *regs) { } -static inline void rseq_signal_deliver(struct pt_regs *regs) +static inline void rseq_signal_deliver(struct ksignal *ksig, + struct pt_regs *regs) { } static inline void rseq_preempt(struct task_struct *t) diff --git a/kernel/rseq.c b/kernel/rseq.c index ae306f90c514..22b6acf1ad63 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -251,10 +251,10 @@ static int rseq_ip_fixup(struct pt_regs *regs) * respect to other threads scheduled on the same CPU, and with respect * to signal handlers. */ -void __rseq_handle_notify_resume(struct pt_regs *regs) +void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) { struct task_struct *t = current; - int ret; + int ret, sig; if (unlikely(t->flags & PF_EXITING)) return; @@ -268,7 +268,8 @@ void __rseq_handle_notify_resume(struct pt_regs *regs) return; error: - force_sig(SIGSEGV, t); + sig = ksig ? ksig->sig : 0; + force_sigsegv(sig, t); } #ifdef CONFIG_DEBUG_RSEQ -- cgit v1.2.3 From 3ee7e8697d5860b173132606d80a9cd35e7113ee Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 18 Jun 2018 15:46:58 +0200 Subject: bdi: Fix another oops in wb_workfn() syzbot is reporting NULL pointer dereference at wb_workfn() [1] due to wb->bdi->dev being NULL. And Dmitry confirmed that wb->state was WB_shutting_down after wb->bdi->dev became NULL. This indicates that unregister_bdi() failed to call wb_shutdown() on one of wb objects. The problem is in cgwb_bdi_unregister() which does cgwb_kill() and thus drops bdi's reference to wb structures before going through the list of wbs again and calling wb_shutdown() on each of them. This way the loop iterating through all wbs can easily miss a wb if that wb has already passed through cgwb_remove_from_bdi_list() called from wb_shutdown() from cgwb_release_workfn() and as a result fully shutdown bdi although wb_workfn() for this wb structure is still running. In fact there are also other ways cgwb_bdi_unregister() can race with cgwb_release_workfn() leading e.g. to use-after-free issues: CPU1 CPU2 cgwb_bdi_unregister() cgwb_kill(*slot); cgwb_release() queue_work(cgwb_release_wq, &wb->release_work); cgwb_release_workfn() wb = list_first_entry(&bdi->wb_list, ...) spin_unlock_irq(&cgwb_lock); wb_shutdown(wb); ... kfree_rcu(wb, rcu); wb_shutdown(wb); -> oops use-after-free We solve these issues by synchronizing writeback structure shutdown from cgwb_bdi_unregister() with cgwb_release_workfn() using a new mutex. That way we also no longer need synchronization using WB_shutting_down as the mutex provides it for CONFIG_CGROUP_WRITEBACK case and without CONFIG_CGROUP_WRITEBACK wb_shutdown() can be called only once from bdi_unregister(). Reported-by: syzbot Acked-by: Tejun Heo Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 2 +- mm/backing-dev.c | 20 +++++++------------- 2 files changed, 8 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 0bd432a4d7bd..24251762c20c 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -22,7 +22,6 @@ struct dentry; */ enum wb_state { WB_registered, /* bdi_register() was done */ - WB_shutting_down, /* wb_shutdown() in progress */ WB_writeback_running, /* Writeback is in progress */ WB_has_dirty_io, /* Dirty inodes on ->b_{dirty|io|more_io} */ WB_start_all, /* nr_pages == 0 (all) work pending */ @@ -189,6 +188,7 @@ struct backing_dev_info { #ifdef CONFIG_CGROUP_WRITEBACK struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */ struct rb_root cgwb_congested_tree; /* their congested states */ + struct mutex cgwb_release_mutex; /* protect shutdown of wb structs */ #else struct bdi_writeback_congested *wb_congested; #endif diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 347cc834c04a..2e5d3df0853d 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -359,15 +359,8 @@ static void wb_shutdown(struct bdi_writeback *wb) spin_lock_bh(&wb->work_lock); if (!test_and_clear_bit(WB_registered, &wb->state)) { spin_unlock_bh(&wb->work_lock); - /* - * Wait for wb shutdown to finish if someone else is just - * running wb_shutdown(). Otherwise we could proceed to wb / - * bdi destruction before wb_shutdown() is finished. - */ - wait_on_bit(&wb->state, WB_shutting_down, TASK_UNINTERRUPTIBLE); return; } - set_bit(WB_shutting_down, &wb->state); spin_unlock_bh(&wb->work_lock); cgwb_remove_from_bdi_list(wb); @@ -379,12 +372,6 @@ static void wb_shutdown(struct bdi_writeback *wb) mod_delayed_work(bdi_wq, &wb->dwork, 0); flush_delayed_work(&wb->dwork); WARN_ON(!list_empty(&wb->work_list)); - /* - * Make sure bit gets cleared after shutdown is finished. Matches with - * the barrier provided by test_and_clear_bit() above. - */ - smp_wmb(); - clear_and_wake_up_bit(WB_shutting_down, &wb->state); } static void wb_exit(struct bdi_writeback *wb) @@ -508,10 +495,12 @@ static void cgwb_release_workfn(struct work_struct *work) struct bdi_writeback *wb = container_of(work, struct bdi_writeback, release_work); + mutex_lock(&wb->bdi->cgwb_release_mutex); wb_shutdown(wb); css_put(wb->memcg_css); css_put(wb->blkcg_css); + mutex_unlock(&wb->bdi->cgwb_release_mutex); fprop_local_destroy_percpu(&wb->memcg_completions); percpu_ref_exit(&wb->refcnt); @@ -697,6 +686,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC); bdi->cgwb_congested_tree = RB_ROOT; + mutex_init(&bdi->cgwb_release_mutex); ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); if (!ret) { @@ -717,7 +707,10 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi) spin_lock_irq(&cgwb_lock); radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0) cgwb_kill(*slot); + spin_unlock_irq(&cgwb_lock); + mutex_lock(&bdi->cgwb_release_mutex); + spin_lock_irq(&cgwb_lock); while (!list_empty(&bdi->wb_list)) { wb = list_first_entry(&bdi->wb_list, struct bdi_writeback, bdi_node); @@ -726,6 +719,7 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi) spin_lock_irq(&cgwb_lock); } spin_unlock_irq(&cgwb_lock); + mutex_unlock(&bdi->cgwb_release_mutex); } /** -- cgit v1.2.3 From 92397a6c38d139d50fabbe9e2dc09b61d53b2377 Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Tue, 5 Jun 2018 14:15:10 +0800 Subject: iio: buffer: fix the function signature to match implementation linux/iio/buffer-dma.h was not updated to when length was changed to unsigned int. Fixes: c043ec1ca5ba ("iio:buffer: make length types match kfifo types") Signed-off-by: Phil Reid Signed-off-by: Jonathan Cameron --- include/linux/iio/buffer-dma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/buffer-dma.h b/include/linux/iio/buffer-dma.h index 767467d886de..67c75372b691 100644 --- a/include/linux/iio/buffer-dma.h +++ b/include/linux/iio/buffer-dma.h @@ -141,7 +141,7 @@ int iio_dma_buffer_read(struct iio_buffer *buffer, size_t n, char __user *user_buffer); size_t iio_dma_buffer_data_available(struct iio_buffer *buffer); int iio_dma_buffer_set_bytes_per_datum(struct iio_buffer *buffer, size_t bpd); -int iio_dma_buffer_set_length(struct iio_buffer *buffer, int length); +int iio_dma_buffer_set_length(struct iio_buffer *buffer, unsigned int length); int iio_dma_buffer_request_update(struct iio_buffer *buffer); int iio_dma_buffer_init(struct iio_dma_buffer_queue *queue, -- cgit v1.2.3 From 5e03aa61a7f3cb99852bc3ad6925f5fa3c0dcc93 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 14 Jun 2018 10:53:34 +0530 Subject: PM / Domains: Fix return value of of_genpd_opp_to_performance_state() of_genpd_opp_to_performance_state() should return 0 for errors, but the dummy routine isn't doing that. Fix it. Signed-off-by: Viresh Kumar Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 9206a4fef9ac..139f79c8477a 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -276,7 +276,7 @@ static inline unsigned int of_genpd_opp_to_performance_state(struct device *dev, struct device_node *opp_node) { - return -ENODEV; + return 0; } static inline int genpd_dev_pm_attach(struct device *dev) -- cgit v1.2.3 From ad6384ba3ac98ad524194e37de379c1fe503870b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 20 Jun 2018 11:45:37 +0530 Subject: PM / Domains: Rename opp_node to np The DT node passed here isn't necessarily an OPP node, as this routine can also be used for cases where the "required-opps" property is present directly in the device's node. Rename it. This also removes a stale comment. Acked-by: Ulf Hansson Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 7 +++---- include/linux/pm_domain.h | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 4925af5c4cf0..c298de8a8308 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2487,10 +2487,9 @@ EXPORT_SYMBOL_GPL(of_genpd_parse_idle_states); * power domain corresponding to a DT node's "required-opps" property. * * @dev: Device for which the performance-state needs to be found. - * @opp_node: DT node where the "required-opps" property is present. This can be + * @np: DT node where the "required-opps" property is present. This can be * the device node itself (if it doesn't have an OPP table) or a node * within the OPP table of a device (if device has an OPP table). - * @state: Pointer to return performance state. * * Returns performance state corresponding to the "required-opps" property of * a DT node. This calls platform specific genpd->opp_to_performance_state() @@ -2499,7 +2498,7 @@ EXPORT_SYMBOL_GPL(of_genpd_parse_idle_states); * Returns performance state on success and 0 on failure. */ unsigned int of_genpd_opp_to_performance_state(struct device *dev, - struct device_node *opp_node) + struct device_node *np) { struct generic_pm_domain *genpd; struct dev_pm_opp *opp; @@ -2514,7 +2513,7 @@ unsigned int of_genpd_opp_to_performance_state(struct device *dev, genpd_lock(genpd); - opp = of_dev_pm_opp_find_required_opp(&genpd->dev, opp_node); + opp = of_dev_pm_opp_find_required_opp(&genpd->dev, np); if (IS_ERR(opp)) { dev_err(dev, "Failed to find required OPP: %ld\n", PTR_ERR(opp)); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 139f79c8477a..cb8d84090cfb 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -234,7 +234,7 @@ struct generic_pm_domain *of_genpd_remove_last(struct device_node *np); int of_genpd_parse_idle_states(struct device_node *dn, struct genpd_power_state **states, int *n); unsigned int of_genpd_opp_to_performance_state(struct device *dev, - struct device_node *opp_node); + struct device_node *np); int genpd_dev_pm_attach(struct device *dev); struct device *genpd_dev_pm_attach_by_id(struct device *dev, @@ -274,7 +274,7 @@ static inline int of_genpd_parse_idle_states(struct device_node *dn, static inline unsigned int of_genpd_opp_to_performance_state(struct device *dev, - struct device_node *opp_node) + struct device_node *np) { return 0; } -- cgit v1.2.3 From d2d2e3c46be5d6dd8001d0eebdf7cafb9bc7006b Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 21 Jun 2018 16:43:17 +0300 Subject: acpi: Add helper for deactivating memory region Sometimes memory resource may be overlapping with SystemMemory Operation Region by design, for example if the memory region is used as a mailbox for communication with a firmware in the system. One occasion of such mailboxes is USB Type-C Connector System Software Interface (UCSI). With regions like that, it is important that the driver is able to map the memory with the requirements it has. For example, the driver should be allowed to map the memory as non-cached memory. However, if the operation region has been accessed before the driver has mapped the memory, the memory has been marked as write-back by the time the driver is loaded. That means the driver will fail to map the memory if it expects non-cached memory. To work around the problem, introducing helper that the drivers can use to temporarily deactivate (unmap) SystemMemory Operation Regions that overlap with their IO memory. Fixes: 8243edf44152 ("usb: typec: ucsi: Add ACPI driver") Cc: stable@vger.kernel.org Reviewed-by: Rafael J. Wysocki Signed-off-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/osl.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/acpi.h | 3 +++ 2 files changed, 75 insertions(+) (limited to 'include/linux') diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 7ca41bf023c9..8df9abfa947b 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -45,6 +45,8 @@ #include #include +#include "acpica/accommon.h" +#include "acpica/acnamesp.h" #include "internal.h" #define _COMPONENT ACPI_OS_SERVICES @@ -1490,6 +1492,76 @@ int acpi_check_region(resource_size_t start, resource_size_t n, } EXPORT_SYMBOL(acpi_check_region); +static acpi_status acpi_deactivate_mem_region(acpi_handle handle, u32 level, + void *_res, void **return_value) +{ + struct acpi_mem_space_context **mem_ctx; + union acpi_operand_object *handler_obj; + union acpi_operand_object *region_obj2; + union acpi_operand_object *region_obj; + struct resource *res = _res; + acpi_status status; + + region_obj = acpi_ns_get_attached_object(handle); + if (!region_obj) + return AE_OK; + + handler_obj = region_obj->region.handler; + if (!handler_obj) + return AE_OK; + + if (region_obj->region.space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) + return AE_OK; + + if (!(region_obj->region.flags & AOPOBJ_SETUP_COMPLETE)) + return AE_OK; + + region_obj2 = acpi_ns_get_secondary_object(region_obj); + if (!region_obj2) + return AE_OK; + + mem_ctx = (void *)®ion_obj2->extra.region_context; + + if (!(mem_ctx[0]->address >= res->start && + mem_ctx[0]->address < res->end)) + return AE_OK; + + status = handler_obj->address_space.setup(region_obj, + ACPI_REGION_DEACTIVATE, + NULL, (void **)mem_ctx); + if (ACPI_SUCCESS(status)) + region_obj->region.flags &= ~(AOPOBJ_SETUP_COMPLETE); + + return status; +} + +/** + * acpi_release_memory - Release any mappings done to a memory region + * @handle: Handle to namespace node + * @res: Memory resource + * @level: A level that terminates the search + * + * Walks through @handle and unmaps all SystemMemory Operation Regions that + * overlap with @res and that have already been activated (mapped). + * + * This is a helper that allows drivers to place special requirements on memory + * region that may overlap with operation regions, primarily allowing them to + * safely map the region as non-cached memory. + * + * The unmapped Operation Regions will be automatically remapped next time they + * are called, so the drivers do not need to do anything else. + */ +acpi_status acpi_release_memory(acpi_handle handle, struct resource *res, + u32 level) +{ + if (!(res->flags & IORESOURCE_MEM)) + return AE_TYPE; + + return acpi_walk_namespace(ACPI_TYPE_REGION, handle, level, + acpi_deactivate_mem_region, NULL, res, NULL); +} +EXPORT_SYMBOL_GPL(acpi_release_memory); + /* * Let drivers know whether the resource checks are effective */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4b35a66383f9..e54f40974eb0 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -443,6 +443,9 @@ int acpi_check_resource_conflict(const struct resource *res); int acpi_check_region(resource_size_t start, resource_size_t n, const char *name); +acpi_status acpi_release_memory(acpi_handle handle, struct resource *res, + u32 level); + int acpi_resources_are_enforced(void); #ifdef CONFIG_HIBERNATION -- cgit v1.2.3 From 8793bb7f4a9dd1396575d2e9337d331662cb7555 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 19 Jun 2018 13:14:56 -0700 Subject: kbuild: add macro for controlling warnings to linux/compiler.h I have occasionally run into a situation where it would make sense to control a compiler warning from a source file rather than doing so from a Makefile using the $(cc-disable-warning, ...) or $(cc-option, ...) helpers. The approach here is similar to what glibc uses, using __diag() and related macros to encapsulate a _Pragma("GCC diagnostic ...") statement that gets turned into the respective "#pragma GCC diagnostic ..." by the preprocessor when the macro gets expanded. Like glibc, I also have an argument to pass the affected compiler version, but decided to actually evaluate that one. For now, this supports GCC_4_6, GCC_4_7, GCC_4_8, GCC_4_9, GCC_5, GCC_6, GCC_7, GCC_8 and GCC_9. Adding support for CLANG_5 and other interesting versions is straightforward here. GNU compilers starting with gcc-4.2 could support it in principle, but "#pragma GCC diagnostic push" was only added in gcc-4.6, so it seems simpler to not deal with those at all. The same versions show a large number of warnings already, so it seems easier to just leave it at that and not do a more fine-grained control for them. The use cases I found so far include: - turning off the gcc-8 -Wattribute-alias warning inside of the SYSCALL_DEFINEx() macro without having to do it globally. - Reducing the build time for a simple re-make after a change, once we move the warnings from ./Makefile and ./scripts/Makefile.extrawarn into linux/compiler.h - More control over the warnings based on other configurations, using preprocessor syntax instead of Makefile syntax. This should make it easier for the average developer to understand and change things. - Adding an easy way to turn the W=1 option on unconditionally for a subdirectory or a specific file. This has been requested by several developers in the past that want to have their subsystems W=1 clean. - Integrating clang better into the build systems. Clang supports more warnings than GCC, and we probably want to classify them as default, W=1, W=2 etc, but there are cases in which the warnings should be classified differently due to excessive false positives from one or the other compiler. - Adding a way to turn the default warnings into errors (e.g. using a new "make E=0" tag) while not also turning the W=1 warnings into errors. This patch for now just adds the minimal infrastructure in order to do the first of the list above. As the #pragma GCC diagnostic takes precedence over command line options, the next step would be to convert a lot of the individual Makefiles that set nonstandard options to use __diag() instead. [paul.burton@mips.com: - Rebase atop current master. - Add __diag_GCC, or more generally __diag_, abstraction to avoid code outside of linux/compiler-gcc.h needing to duplicate knowledge about different GCC versions. - Add a comment argument to __diag_{ignore,warn,error} which isn't used in the expansion of the macros but serves to push people to document the reason for using them - per feedback from Kees Cook. - Translate severity to GCC-specific pragmas in linux/compiler-gcc.h rather than using GCC-specific in linux/compiler_types.h. - Drop all but GCC 8 macros, since we only need to define macros for versions that we need to introduce pragmas for, and as of this series that's just GCC 8. - Capitalize comments in linux/compiler-gcc.h to match the style of the rest of the file. - Line up macro definitions with tabs in linux/compiler-gcc.h.] Signed-off-by: Arnd Bergmann Signed-off-by: Paul Burton Tested-by: Christophe Leroy Tested-by: Stafford Horne Signed-off-by: Masahiro Yamada --- include/linux/compiler-gcc.h | 25 +++++++++++++++++++++++++ include/linux/compiler_types.h | 18 ++++++++++++++++++ 2 files changed, 43 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index f1a7492a5cc8..fd282c7d3e5e 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -347,3 +347,28 @@ #if GCC_VERSION >= 50100 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif + +/* + * Turn individual warnings and errors on and off locally, depending + * on version. + */ +#define __diag_GCC(version, severity, s) \ + __diag_GCC_ ## version(__diag_GCC_ ## severity s) + +/* Severity used in pragma directives */ +#define __diag_GCC_ignore ignored +#define __diag_GCC_warn warning +#define __diag_GCC_error error + +/* Compilers before gcc-4.6 do not understand "#pragma GCC diagnostic push" */ +#if GCC_VERSION >= 40600 +#define __diag_str1(s) #s +#define __diag_str(s) __diag_str1(s) +#define __diag(s) _Pragma(__diag_str(GCC diagnostic s)) +#endif + +#if GCC_VERSION >= 80000 +#define __diag_GCC_8(s) __diag(s) +#else +#define __diag_GCC_8(s) +#endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 6b79a9bba9a7..a8ba6b04152c 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -271,4 +271,22 @@ struct ftrace_likely_data { # define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) #endif +#ifndef __diag +#define __diag(string) +#endif + +#ifndef __diag_GCC +#define __diag_GCC(version, severity, string) +#endif + +#define __diag_push() __diag(push) +#define __diag_pop() __diag(pop) + +#define __diag_ignore(compiler, version, option, comment) \ + __diag_ ## compiler(version, ignore, option) +#define __diag_warn(compiler, version, option, comment) \ + __diag_ ## compiler(version, warn, option) +#define __diag_error(compiler, version, option, comment) \ + __diag_ ## compiler(version, error, option) + #endif /* __LINUX_COMPILER_TYPES_H */ -- cgit v1.2.3 From bee20031772af3debe8cbaa234528f24c7892e8f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 19 Jun 2018 13:14:57 -0700 Subject: disable -Wattribute-alias warning for SYSCALL_DEFINEx() gcc-8 warns for every single definition of a system call entry point, e.g.: include/linux/compat.h:56:18: error: 'compat_sys_rt_sigprocmask' alias between functions of incompatible types 'long int(int, compat_sigset_t *, compat_sigset_t *, compat_size_t)' {aka 'long int(int, struct *, struct *, unsigned int)'} and 'long int(long int, long int, long int, long int)' [-Werror=attribute-alias] asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))\ ^~~~~~~~~~ include/linux/compat.h:45:2: note: in expansion of macro 'COMPAT_SYSCALL_DEFINEx' COMPAT_SYSCALL_DEFINEx(4, _##name, __VA_ARGS__) ^~~~~~~~~~~~~~~~~~~~~~ kernel/signal.c:2601:1: note: in expansion of macro 'COMPAT_SYSCALL_DEFINE4' COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, int, how, compat_sigset_t __user *, nset, ^~~~~~~~~~~~~~~~~~~~~~ include/linux/compat.h:60:18: note: aliased declaration here asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))\ ^~~~~~~~~~ The new warning seems reasonable in principle, but it doesn't help us here, since we rely on the type mismatch to sanitize the system call arguments. After I reported this as GCC PR82435, a new -Wno-attribute-alias option was added that could be used to turn the warning off globally on the command line, but I'd prefer to do it a little more fine-grained. Interestingly, turning a warning off and on again inside of a single macro doesn't always work, in this case I had to add an extra statement inbetween and decided to copy the __SC_TEST one from the native syscall to the compat syscall macro. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83256 for more details about this. [paul.burton@mips.com: - Rebase atop current master. - Split GCC & version arguments to __diag_ignore() in order to match changes to the preceding patch. - Add the comment argument to match the preceding patch.] Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82435 Signed-off-by: Arnd Bergmann Signed-off-by: Paul Burton Tested-by: Christophe Leroy Tested-by: Stafford Horne Signed-off-by: Masahiro Yamada --- include/linux/compat.h | 8 +++++++- include/linux/syscalls.h | 4 ++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index b1a5562b3215..c68acc47da57 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -72,6 +72,9 @@ */ #ifndef COMPAT_SYSCALL_DEFINEx #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ + __diag_push(); \ + __diag_ignore(GCC, 8, "-Wattribute-alias", \ + "Type aliasing is used to sanitize syscall arguments");\ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(__se_compat_sys##name)))); \ @@ -80,8 +83,11 @@ asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ - return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\ + long ret = __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + return ret; \ } \ + __diag_pop(); \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #endif /* COMPAT_SYSCALL_DEFINEx */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 73810808cdf2..a368a68cb667 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -231,6 +231,9 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) */ #ifndef __SYSCALL_DEFINEx #define __SYSCALL_DEFINEx(x, name, ...) \ + __diag_push(); \ + __diag_ignore(GCC, 8, "-Wattribute-alias", \ + "Type aliasing is used to sanitize syscall arguments");\ asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(__se_sys##name)))); \ ALLOW_ERROR_INJECTION(sys##name, ERRNO); \ @@ -243,6 +246,7 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \ return ret; \ } \ + __diag_pop(); \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #endif /* __SYSCALL_DEFINEx */ -- cgit v1.2.3 From 15bfd21fbc5d35834b9ea383dc458a1f0c9e3434 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 26 Jun 2018 09:14:58 -0600 Subject: block: Fix transfer when chunk sectors exceeds max A device may have boundary restrictions where the number of sectors between boundaries exceeds its max transfer size. In this case, we need to cap the max size to the smaller of the two limits. Reported-by: Jitendra Bhivare Tested-by: Jitendra Bhivare Cc: Reviewed-by: Martin K. Petersen Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9154570edf29..79226ca8f80f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1119,8 +1119,8 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q, if (!q->limits.chunk_sectors) return q->limits.max_sectors; - return q->limits.chunk_sectors - - (offset & (q->limits.chunk_sectors - 1)); + return min(q->limits.max_sectors, (unsigned int)(q->limits.chunk_sectors - + (offset & (q->limits.chunk_sectors - 1)))); } static inline unsigned int blk_rq_get_max_sectors(struct request *rq, -- cgit v1.2.3 From a11e1d432b51f63ba698d044441284a661f01144 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 28 Jun 2018 09:43:44 -0700 Subject: Revert changes to convert to ->poll_mask() and aio IOCB_CMD_POLL The poll() changes were not well thought out, and completely unexplained. They also caused a huge performance regression, because "->poll()" was no longer a trivial file operation that just called down to the underlying file operations, but instead did at least two indirect calls. Indirect calls are sadly slow now with the Spectre mitigation, but the performance problem could at least be largely mitigated by changing the "->get_poll_head()" operation to just have a per-file-descriptor pointer to the poll head instead. That gets rid of one of the new indirections. But that doesn't fix the new complexity that is completely unwarranted for the regular case. The (undocumented) reason for the poll() changes was some alleged AIO poll race fixing, but we don't make the common case slower and more complex for some uncommon special case, so this all really needs way more explanations and most likely a fundamental redesign. [ This revert is a revert of about 30 different commits, not reverted individually because that would just be unnecessarily messy - Linus ] Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Linus Torvalds --- Documentation/filesystems/Locking | 7 +- Documentation/filesystems/vfs.txt | 13 ---- crypto/af_alg.c | 13 +++- crypto/algif_aead.c | 4 +- crypto/algif_skcipher.c | 4 +- drivers/char/random.c | 29 ++++---- drivers/isdn/mISDN/socket.c | 2 +- drivers/net/ppp/pppoe.c | 2 +- fs/aio.c | 148 +------------------------------------- fs/eventfd.c | 19 ++--- fs/eventpoll.c | 15 ++-- fs/pipe.c | 22 +++--- fs/select.c | 23 ------ fs/timerfd.c | 22 +++--- include/crypto/if_alg.h | 3 +- include/linux/fs.h | 2 - include/linux/net.h | 1 - include/linux/poll.h | 12 ++-- include/linux/skbuff.h | 3 +- include/net/bluetooth/bluetooth.h | 2 +- include/net/iucv/af_iucv.h | 2 + include/net/sctp/sctp.h | 3 +- include/net/tcp.h | 3 +- include/net/tls.h | 6 +- include/net/udp.h | 2 +- include/uapi/linux/aio_abi.h | 8 ++- net/appletalk/ddp.c | 2 +- net/atm/common.c | 11 ++- net/atm/common.h | 2 +- net/atm/pvc.c | 2 +- net/atm/svc.c | 2 +- net/ax25/af_ax25.c | 2 +- net/bluetooth/af_bluetooth.c | 7 +- net/bluetooth/hci_sock.c | 2 +- net/bluetooth/l2cap_sock.c | 2 +- net/bluetooth/rfcomm/sock.c | 2 +- net/bluetooth/sco.c | 2 +- net/caif/caif_socket.c | 12 ++-- net/can/bcm.c | 2 +- net/can/raw.c | 2 +- net/core/datagram.c | 13 ++-- net/dccp/dccp.h | 3 +- net/dccp/ipv4.c | 2 +- net/dccp/ipv6.c | 2 +- net/dccp/proto.c | 13 +++- net/decnet/af_decnet.c | 6 +- net/ieee802154/socket.c | 4 +- net/ipv4/af_inet.c | 8 +-- net/ipv4/tcp.c | 23 ++++-- net/ipv4/udp.c | 10 +-- net/ipv6/af_inet6.c | 4 +- net/ipv6/raw.c | 4 +- net/iucv/af_iucv.c | 7 +- net/kcm/kcmsock.c | 10 +-- net/key/af_key.c | 2 +- net/l2tp/l2tp_ip.c | 2 +- net/l2tp/l2tp_ip6.c | 2 +- net/l2tp/l2tp_ppp.c | 2 +- net/llc/af_llc.c | 2 +- net/netlink/af_netlink.c | 2 +- net/netrom/af_netrom.c | 2 +- net/nfc/llcp_sock.c | 9 ++- net/nfc/rawsock.c | 4 +- net/packet/af_packet.c | 9 +-- net/phonet/socket.c | 9 ++- net/qrtr/qrtr.c | 2 +- net/rose/af_rose.c | 2 +- net/rxrpc/af_rxrpc.c | 10 ++- net/sctp/ipv6.c | 2 +- net/sctp/protocol.c | 2 +- net/sctp/socket.c | 4 +- net/smc/af_smc.c | 12 +++- net/socket.c | 48 ++----------- net/tipc/socket.c | 14 ++-- net/tls/tls_main.c | 2 +- net/tls/tls_sw.c | 19 ++--- net/unix/af_unix.c | 30 +++++--- net/vmw_vsock/af_vsock.c | 19 +++-- net/x25/af_x25.c | 2 +- net/xdp/xsk.c | 7 +- 80 files changed, 301 insertions(+), 450 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 2c391338c675..37bf0a9de75c 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -441,8 +441,6 @@ prototypes: int (*iterate) (struct file *, struct dir_context *); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); - struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t); - __poll_t (*poll_mask) (struct file *, __poll_t); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); @@ -473,7 +471,7 @@ prototypes: }; locking rules: - All except for ->poll_mask may block. + All may block. ->llseek() locking has moved from llseek to the individual llseek implementations. If your fs is not using generic_file_llseek, you @@ -505,9 +503,6 @@ in sys_read() and friends. the lease within the individual filesystem to record the result of the operation -->poll_mask can be called with or without the waitqueue lock for the waitqueue -returned from ->get_poll_head. - --------------------------- dquot_operations ------------------------------- prototypes: int (*write_dquot) (struct dquot *); diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 829a7b7857a4..f608180ad59d 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -857,8 +857,6 @@ struct file_operations { ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iterate) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); - struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t); - __poll_t (*poll_mask) (struct file *, __poll_t); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); @@ -903,17 +901,6 @@ otherwise noted. activity on this file and (optionally) go to sleep until there is activity. Called by the select(2) and poll(2) system calls - get_poll_head: Returns the struct wait_queue_head that callers can - wait on. Callers need to check the returned events using ->poll_mask - once woken. Can return NULL to indicate polling is not supported, - or any error code using the ERR_PTR convention to indicate that a - grave error occured and ->poll_mask shall not be called. - - poll_mask: return the mask of EPOLL* values describing the file descriptor - state. Called either before going to sleep on the waitqueue returned by - get_poll_head, or after it has been woken. If ->get_poll_head and - ->poll_mask are implemented ->poll does not need to be implement. - unlocked_ioctl: called by the ioctl(2) system call. compat_ioctl: called by the ioctl(2) system call when 32 bit system calls diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 49fa8582138b..314c52c967e5 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -1060,12 +1060,19 @@ void af_alg_async_cb(struct crypto_async_request *_req, int err) } EXPORT_SYMBOL_GPL(af_alg_async_cb); -__poll_t af_alg_poll_mask(struct socket *sock, __poll_t events) +/** + * af_alg_poll - poll system call handler + */ +__poll_t af_alg_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); struct af_alg_ctx *ctx = ask->private; - __poll_t mask = 0; + __poll_t mask; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; if (!ctx->more || ctx->used) mask |= EPOLLIN | EPOLLRDNORM; @@ -1075,7 +1082,7 @@ __poll_t af_alg_poll_mask(struct socket *sock, __poll_t events) return mask; } -EXPORT_SYMBOL_GPL(af_alg_poll_mask); +EXPORT_SYMBOL_GPL(af_alg_poll); /** * af_alg_alloc_areq - allocate struct af_alg_async_req diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 825524f27438..c40a8c7ee8ae 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -375,7 +375,7 @@ static struct proto_ops algif_aead_ops = { .sendmsg = aead_sendmsg, .sendpage = af_alg_sendpage, .recvmsg = aead_recvmsg, - .poll_mask = af_alg_poll_mask, + .poll = af_alg_poll, }; static int aead_check_key(struct socket *sock) @@ -471,7 +471,7 @@ static struct proto_ops algif_aead_ops_nokey = { .sendmsg = aead_sendmsg_nokey, .sendpage = aead_sendpage_nokey, .recvmsg = aead_recvmsg_nokey, - .poll_mask = af_alg_poll_mask, + .poll = af_alg_poll, }; static void *aead_bind(const char *name, u32 type, u32 mask) diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 4c04eb9888ad..cfdaab2b7d76 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -206,7 +206,7 @@ static struct proto_ops algif_skcipher_ops = { .sendmsg = skcipher_sendmsg, .sendpage = af_alg_sendpage, .recvmsg = skcipher_recvmsg, - .poll_mask = af_alg_poll_mask, + .poll = af_alg_poll, }; static int skcipher_check_key(struct socket *sock) @@ -302,7 +302,7 @@ static struct proto_ops algif_skcipher_ops_nokey = { .sendmsg = skcipher_sendmsg_nokey, .sendpage = skcipher_sendpage_nokey, .recvmsg = skcipher_recvmsg_nokey, - .poll_mask = af_alg_poll_mask, + .poll = af_alg_poll, }; static void *skcipher_bind(const char *name, u32 type, u32 mask) diff --git a/drivers/char/random.c b/drivers/char/random.c index a8fb0020ba5c..cd888d4ee605 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -402,7 +402,8 @@ static struct poolinfo { /* * Static global variables */ -static DECLARE_WAIT_QUEUE_HEAD(random_wait); +static DECLARE_WAIT_QUEUE_HEAD(random_read_wait); +static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); static struct fasync_struct *fasync; static DEFINE_SPINLOCK(random_ready_list_lock); @@ -721,8 +722,8 @@ retry: /* should we wake readers? */ if (entropy_bits >= random_read_wakeup_bits && - wq_has_sleeper(&random_wait)) { - wake_up_interruptible_poll(&random_wait, POLLIN); + wq_has_sleeper(&random_read_wait)) { + wake_up_interruptible(&random_read_wait); kill_fasync(&fasync, SIGIO, POLL_IN); } /* If the input pool is getting full, send some @@ -1396,7 +1397,7 @@ retry: trace_debit_entropy(r->name, 8 * ibytes); if (ibytes && (r->entropy_count >> ENTROPY_SHIFT) < random_write_wakeup_bits) { - wake_up_interruptible_poll(&random_wait, POLLOUT); + wake_up_interruptible(&random_write_wait); kill_fasync(&fasync, SIGIO, POLL_OUT); } @@ -1838,7 +1839,7 @@ _random_read(int nonblock, char __user *buf, size_t nbytes) if (nonblock) return -EAGAIN; - wait_event_interruptible(random_wait, + wait_event_interruptible(random_read_wait, ENTROPY_BITS(&input_pool) >= random_read_wakeup_bits); if (signal_pending(current)) @@ -1875,17 +1876,14 @@ urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) return ret; } -static struct wait_queue_head * -random_get_poll_head(struct file *file, __poll_t events) -{ - return &random_wait; -} - static __poll_t -random_poll_mask(struct file *file, __poll_t events) +random_poll(struct file *file, poll_table * wait) { - __poll_t mask = 0; + __poll_t mask; + poll_wait(file, &random_read_wait, wait); + poll_wait(file, &random_write_wait, wait); + mask = 0; if (ENTROPY_BITS(&input_pool) >= random_read_wakeup_bits) mask |= EPOLLIN | EPOLLRDNORM; if (ENTROPY_BITS(&input_pool) < random_write_wakeup_bits) @@ -1992,8 +1990,7 @@ static int random_fasync(int fd, struct file *filp, int on) const struct file_operations random_fops = { .read = random_read, .write = random_write, - .get_poll_head = random_get_poll_head, - .poll_mask = random_poll_mask, + .poll = random_poll, .unlocked_ioctl = random_ioctl, .fasync = random_fasync, .llseek = noop_llseek, @@ -2326,7 +2323,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, * We'll be woken up again once below random_write_wakeup_thresh, * or when the calling thread is about to terminate. */ - wait_event_interruptible(random_wait, kthread_should_stop() || + wait_event_interruptible(random_write_wait, kthread_should_stop() || ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits); mix_pool_bytes(poolp, buffer, count); credit_entropy_bits(poolp, entropy); diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index 98f90aadd141..18c0a1281914 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -588,7 +588,7 @@ static const struct proto_ops data_sock_ops = { .getname = data_sock_getname, .sendmsg = mISDN_sock_sendmsg, .recvmsg = mISDN_sock_recvmsg, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = data_sock_setsockopt, diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index de51e8f70f44..ce61231e96ea 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -1107,7 +1107,7 @@ static const struct proto_ops pppoe_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = pppoe_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = sock_no_setsockopt, diff --git a/fs/aio.c b/fs/aio.c index e1d20124ec0e..210df9da1283 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -5,7 +5,6 @@ * Implements an efficient asynchronous io interface. * * Copyright 2000, 2001, 2002 Red Hat, Inc. All Rights Reserved. - * Copyright 2018 Christoph Hellwig. * * See ../COPYING for licensing terms. */ @@ -165,22 +164,10 @@ struct fsync_iocb { bool datasync; }; -struct poll_iocb { - struct file *file; - __poll_t events; - struct wait_queue_head *head; - - union { - struct wait_queue_entry wait; - struct work_struct work; - }; -}; - struct aio_kiocb { union { struct kiocb rw; struct fsync_iocb fsync; - struct poll_iocb poll; }; struct kioctx *ki_ctx; @@ -1590,6 +1577,7 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync) if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)) return -EINVAL; + req->file = fget(iocb->aio_fildes); if (unlikely(!req->file)) return -EBADF; @@ -1604,137 +1592,6 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync) return 0; } -/* need to use list_del_init so we can check if item was present */ -static inline bool __aio_poll_remove(struct poll_iocb *req) -{ - if (list_empty(&req->wait.entry)) - return false; - list_del_init(&req->wait.entry); - return true; -} - -static inline void __aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask) -{ - fput(iocb->poll.file); - aio_complete(iocb, mangle_poll(mask), 0); -} - -static void aio_poll_work(struct work_struct *work) -{ - struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, poll.work); - - if (!list_empty_careful(&iocb->ki_list)) - aio_remove_iocb(iocb); - __aio_poll_complete(iocb, iocb->poll.events); -} - -static int aio_poll_cancel(struct kiocb *iocb) -{ - struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw); - struct poll_iocb *req = &aiocb->poll; - struct wait_queue_head *head = req->head; - bool found = false; - - spin_lock(&head->lock); - found = __aio_poll_remove(req); - spin_unlock(&head->lock); - - if (found) { - req->events = 0; - INIT_WORK(&req->work, aio_poll_work); - schedule_work(&req->work); - } - return 0; -} - -static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, - void *key) -{ - struct poll_iocb *req = container_of(wait, struct poll_iocb, wait); - struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll); - struct file *file = req->file; - __poll_t mask = key_to_poll(key); - - assert_spin_locked(&req->head->lock); - - /* for instances that support it check for an event match first: */ - if (mask && !(mask & req->events)) - return 0; - - mask = file->f_op->poll_mask(file, req->events) & req->events; - if (!mask) - return 0; - - __aio_poll_remove(req); - - /* - * Try completing without a context switch if we can acquire ctx_lock - * without spinning. Otherwise we need to defer to a workqueue to - * avoid a deadlock due to the lock order. - */ - if (spin_trylock(&iocb->ki_ctx->ctx_lock)) { - list_del_init(&iocb->ki_list); - spin_unlock(&iocb->ki_ctx->ctx_lock); - - __aio_poll_complete(iocb, mask); - } else { - req->events = mask; - INIT_WORK(&req->work, aio_poll_work); - schedule_work(&req->work); - } - - return 1; -} - -static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb) -{ - struct kioctx *ctx = aiocb->ki_ctx; - struct poll_iocb *req = &aiocb->poll; - __poll_t mask; - - /* reject any unknown events outside the normal event mask. */ - if ((u16)iocb->aio_buf != iocb->aio_buf) - return -EINVAL; - /* reject fields that are not defined for poll */ - if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags) - return -EINVAL; - - req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP; - req->file = fget(iocb->aio_fildes); - if (unlikely(!req->file)) - return -EBADF; - if (!file_has_poll_mask(req->file)) - goto out_fail; - - req->head = req->file->f_op->get_poll_head(req->file, req->events); - if (!req->head) - goto out_fail; - if (IS_ERR(req->head)) { - mask = EPOLLERR; - goto done; - } - - init_waitqueue_func_entry(&req->wait, aio_poll_wake); - aiocb->ki_cancel = aio_poll_cancel; - - spin_lock_irq(&ctx->ctx_lock); - spin_lock(&req->head->lock); - mask = req->file->f_op->poll_mask(req->file, req->events) & req->events; - if (!mask) { - __add_wait_queue(req->head, &req->wait); - list_add_tail(&aiocb->ki_list, &ctx->active_reqs); - } - spin_unlock(&req->head->lock); - spin_unlock_irq(&ctx->ctx_lock); -done: - if (mask) - __aio_poll_complete(aiocb, mask); - return 0; -out_fail: - fput(req->file); - return -EINVAL; /* same as no support for IOCB_CMD_POLL */ -} - static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, bool compat) { @@ -1808,9 +1665,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, case IOCB_CMD_FDSYNC: ret = aio_fsync(&req->fsync, &iocb, true); break; - case IOCB_CMD_POLL: - ret = aio_poll(req, &iocb); - break; default: pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode); ret = -EINVAL; diff --git a/fs/eventfd.c b/fs/eventfd.c index ceb1031f1cac..08d3bd602f73 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -101,20 +101,14 @@ static int eventfd_release(struct inode *inode, struct file *file) return 0; } -static struct wait_queue_head * -eventfd_get_poll_head(struct file *file, __poll_t events) -{ - struct eventfd_ctx *ctx = file->private_data; - - return &ctx->wqh; -} - -static __poll_t eventfd_poll_mask(struct file *file, __poll_t eventmask) +static __poll_t eventfd_poll(struct file *file, poll_table *wait) { struct eventfd_ctx *ctx = file->private_data; __poll_t events = 0; u64 count; + poll_wait(file, &ctx->wqh, wait); + /* * All writes to ctx->count occur within ctx->wqh.lock. This read * can be done outside ctx->wqh.lock because we know that poll_wait @@ -156,11 +150,11 @@ static __poll_t eventfd_poll_mask(struct file *file, __poll_t eventmask) count = READ_ONCE(ctx->count); if (count > 0) - events |= (EPOLLIN & eventmask); + events |= EPOLLIN; if (count == ULLONG_MAX) events |= EPOLLERR; if (ULLONG_MAX - 1 > count) - events |= (EPOLLOUT & eventmask); + events |= EPOLLOUT; return events; } @@ -311,8 +305,7 @@ static const struct file_operations eventfd_fops = { .show_fdinfo = eventfd_show_fdinfo, #endif .release = eventfd_release, - .get_poll_head = eventfd_get_poll_head, - .poll_mask = eventfd_poll_mask, + .poll = eventfd_poll, .read = eventfd_read, .write = eventfd_write, .llseek = noop_llseek, diff --git a/fs/eventpoll.c b/fs/eventpoll.c index ea4436f409fb..67db22fe99c5 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -922,18 +922,14 @@ static __poll_t ep_read_events_proc(struct eventpoll *ep, struct list_head *head return 0; } -static struct wait_queue_head *ep_eventpoll_get_poll_head(struct file *file, - __poll_t eventmask) -{ - struct eventpoll *ep = file->private_data; - return &ep->poll_wait; -} - -static __poll_t ep_eventpoll_poll_mask(struct file *file, __poll_t eventmask) +static __poll_t ep_eventpoll_poll(struct file *file, poll_table *wait) { struct eventpoll *ep = file->private_data; int depth = 0; + /* Insert inside our poll wait queue */ + poll_wait(file, &ep->poll_wait, wait); + /* * Proceed to find out if wanted events are really available inside * the ready list. @@ -972,8 +968,7 @@ static const struct file_operations eventpoll_fops = { .show_fdinfo = ep_show_fdinfo, #endif .release = ep_eventpoll_release, - .get_poll_head = ep_eventpoll_get_poll_head, - .poll_mask = ep_eventpoll_poll_mask, + .poll = ep_eventpoll_poll, .llseek = noop_llseek, }; diff --git a/fs/pipe.c b/fs/pipe.c index bb0840e234f3..39d6f431da83 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -509,22 +509,19 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } } -static struct wait_queue_head * -pipe_get_poll_head(struct file *filp, __poll_t events) -{ - struct pipe_inode_info *pipe = filp->private_data; - - return &pipe->wait; -} - /* No kernel lock held - fine */ -static __poll_t pipe_poll_mask(struct file *filp, __poll_t events) +static __poll_t +pipe_poll(struct file *filp, poll_table *wait) { + __poll_t mask; struct pipe_inode_info *pipe = filp->private_data; - int nrbufs = pipe->nrbufs; - __poll_t mask = 0; + int nrbufs; + + poll_wait(filp, &pipe->wait, wait); /* Reading only -- no need for acquiring the semaphore. */ + nrbufs = pipe->nrbufs; + mask = 0; if (filp->f_mode & FMODE_READ) { mask = (nrbufs > 0) ? EPOLLIN | EPOLLRDNORM : 0; if (!pipe->writers && filp->f_version != pipe->w_counter) @@ -1023,8 +1020,7 @@ const struct file_operations pipefifo_fops = { .llseek = no_llseek, .read_iter = pipe_read, .write_iter = pipe_write, - .get_poll_head = pipe_get_poll_head, - .poll_mask = pipe_poll_mask, + .poll = pipe_poll, .unlocked_ioctl = pipe_ioctl, .release = pipe_release, .fasync = pipe_fasync, diff --git a/fs/select.c b/fs/select.c index 317891ff8165..4a6b6e4b21cb 100644 --- a/fs/select.c +++ b/fs/select.c @@ -34,29 +34,6 @@ #include -__poll_t vfs_poll(struct file *file, struct poll_table_struct *pt) -{ - if (file->f_op->poll) { - return file->f_op->poll(file, pt); - } else if (file_has_poll_mask(file)) { - unsigned int events = poll_requested_events(pt); - struct wait_queue_head *head; - - if (pt && pt->_qproc) { - head = file->f_op->get_poll_head(file, events); - if (!head) - return DEFAULT_POLLMASK; - if (IS_ERR(head)) - return EPOLLERR; - pt->_qproc(file, head, pt); - } - - return file->f_op->poll_mask(file, events); - } else { - return DEFAULT_POLLMASK; - } -} -EXPORT_SYMBOL_GPL(vfs_poll); /* * Estimate expected accuracy in ns from a timeval. diff --git a/fs/timerfd.c b/fs/timerfd.c index d84a2bee4f82..cdad49da3ff7 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -226,20 +226,21 @@ static int timerfd_release(struct inode *inode, struct file *file) kfree_rcu(ctx, rcu); return 0; } - -static struct wait_queue_head *timerfd_get_poll_head(struct file *file, - __poll_t eventmask) + +static __poll_t timerfd_poll(struct file *file, poll_table *wait) { struct timerfd_ctx *ctx = file->private_data; + __poll_t events = 0; + unsigned long flags; - return &ctx->wqh; -} + poll_wait(file, &ctx->wqh, wait); -static __poll_t timerfd_poll_mask(struct file *file, __poll_t eventmask) -{ - struct timerfd_ctx *ctx = file->private_data; + spin_lock_irqsave(&ctx->wqh.lock, flags); + if (ctx->ticks) + events |= EPOLLIN; + spin_unlock_irqrestore(&ctx->wqh.lock, flags); - return ctx->ticks ? EPOLLIN : 0; + return events; } static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, @@ -363,8 +364,7 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg static const struct file_operations timerfd_fops = { .release = timerfd_release, - .get_poll_head = timerfd_get_poll_head, - .poll_mask = timerfd_poll_mask, + .poll = timerfd_poll, .read = timerfd_read, .llseek = noop_llseek, .show_fdinfo = timerfd_show, diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index cc414db9da0a..482461d8931d 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -245,7 +245,8 @@ ssize_t af_alg_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); void af_alg_free_resources(struct af_alg_async_req *areq); void af_alg_async_cb(struct crypto_async_request *_req, int err); -__poll_t af_alg_poll_mask(struct socket *sock, __poll_t events); +__poll_t af_alg_poll(struct file *file, struct socket *sock, + poll_table *wait); struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk, unsigned int areqlen); int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags, diff --git a/include/linux/fs.h b/include/linux/fs.h index 5c91108846db..d78d146a98da 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1720,8 +1720,6 @@ struct file_operations { int (*iterate) (struct file *, struct dir_context *); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); - struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t); - __poll_t (*poll_mask) (struct file *, __poll_t); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); diff --git a/include/linux/net.h b/include/linux/net.h index 08b6eb964dd6..6554d3ba4396 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -147,7 +147,6 @@ struct proto_ops { int (*getname) (struct socket *sock, struct sockaddr *addr, int peer); - __poll_t (*poll_mask) (struct socket *sock, __poll_t events); __poll_t (*poll) (struct file *file, struct socket *sock, struct poll_table_struct *wait); int (*ioctl) (struct socket *sock, unsigned int cmd, diff --git a/include/linux/poll.h b/include/linux/poll.h index fdf86b4cbc71..7e0fdcf905d2 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -74,18 +74,18 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc) pt->_key = ~(__poll_t)0; /* all events enabled */ } -static inline bool file_has_poll_mask(struct file *file) +static inline bool file_can_poll(struct file *file) { - return file->f_op->get_poll_head && file->f_op->poll_mask; + return file->f_op->poll; } -static inline bool file_can_poll(struct file *file) +static inline __poll_t vfs_poll(struct file *file, struct poll_table_struct *pt) { - return file->f_op->poll || file_has_poll_mask(file); + if (unlikely(!file->f_op->poll)) + return DEFAULT_POLLMASK; + return file->f_op->poll(file, pt); } -__poll_t vfs_poll(struct file *file, struct poll_table_struct *pt); - struct poll_table_entry { struct file *filp; __poll_t key; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c86885954994..164cdedf6012 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3252,7 +3252,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, int *peeked, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); -__poll_t datagram_poll_mask(struct socket *sock, __poll_t events); +__poll_t datagram_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait); int skb_copy_datagram_iter(const struct sk_buff *from, int offset, struct iov_iter *to, int size); static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset, diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 53ce8176c313..ec9d6bc65855 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -271,7 +271,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); -__poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events); +__poll_t bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait); int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo); int bt_sock_wait_ready(struct sock *sk, unsigned long flags); diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index b0eaeb02d46d..f4c21b5a1242 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -153,6 +153,8 @@ struct iucv_sock_list { atomic_t autobind_name; }; +__poll_t iucv_sock_poll(struct file *file, struct socket *sock, + poll_table *wait); void iucv_sock_link(struct iucv_sock_list *l, struct sock *s); void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *s); void iucv_accept_enqueue(struct sock *parent, struct sock *sk); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 30b3e2fe240a..8c2caa370e0f 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -109,7 +109,8 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb); int sctp_inet_listen(struct socket *sock, int backlog); void sctp_write_space(struct sock *sk); void sctp_data_ready(struct sock *sk); -__poll_t sctp_poll_mask(struct socket *sock, __poll_t events); +__poll_t sctp_poll(struct file *file, struct socket *sock, + poll_table *wait); void sctp_sock_rfree(struct sk_buff *skb); void sctp_copy_sock(struct sock *newsk, struct sock *sk, struct sctp_association *asoc); diff --git a/include/net/tcp.h b/include/net/tcp.h index 0448e7c5d2b4..800582b5dd54 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -388,7 +388,8 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); void tcp_close(struct sock *sk, long timeout); void tcp_init_sock(struct sock *sk); void tcp_init_transfer(struct sock *sk, int bpf_op); -__poll_t tcp_poll_mask(struct socket *sock, __poll_t events); +__poll_t tcp_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); int tcp_setsockopt(struct sock *sk, int level, int optname, diff --git a/include/net/tls.h b/include/net/tls.h index 7f84ea3e217c..70c273777fe9 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -109,7 +109,8 @@ struct tls_sw_context_rx { struct strparser strp; void (*saved_data_ready)(struct sock *sk); - __poll_t (*sk_poll_mask)(struct socket *sock, __poll_t events); + unsigned int (*sk_poll)(struct file *file, struct socket *sock, + struct poll_table_struct *wait); struct sk_buff *recv_pkt; u8 control; bool decrypted; @@ -224,7 +225,8 @@ void tls_sw_free_resources_tx(struct sock *sk); void tls_sw_free_resources_rx(struct sock *sk); int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); -__poll_t tls_sw_poll_mask(struct socket *sock, __poll_t events); +unsigned int tls_sw_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait); ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); diff --git a/include/net/udp.h b/include/net/udp.h index b1ea8b0f5e6a..81afdacd4fff 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -285,7 +285,7 @@ int udp_init_sock(struct sock *sk); int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int __udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags); -__poll_t udp_poll_mask(struct socket *sock, __poll_t events); +__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait); struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features, bool is_ipv6); diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h index d00221345c19..d4e768d55d14 100644 --- a/include/uapi/linux/aio_abi.h +++ b/include/uapi/linux/aio_abi.h @@ -39,8 +39,10 @@ enum { IOCB_CMD_PWRITE = 1, IOCB_CMD_FSYNC = 2, IOCB_CMD_FDSYNC = 3, - /* 4 was the experimental IOCB_CMD_PREADX */ - IOCB_CMD_POLL = 5, + /* These two are experimental. + * IOCB_CMD_PREADX = 4, + * IOCB_CMD_POLL = 5, + */ IOCB_CMD_NOOP = 6, IOCB_CMD_PREADV = 7, IOCB_CMD_PWRITEV = 8, @@ -109,7 +111,7 @@ struct iocb { #undef IFLITTLE struct __aio_sigset { - const sigset_t __user *sigmask; + sigset_t __user *sigmask; size_t sigsetsize; }; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 55fdba05d7d9..9b6bc5abe946 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1869,7 +1869,7 @@ static const struct proto_ops atalk_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = atalk_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = atalk_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = atalk_compat_ioctl, diff --git a/net/atm/common.c b/net/atm/common.c index ff5748b2190f..a7a68e509628 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -647,11 +647,16 @@ out: return error; } -__poll_t vcc_poll_mask(struct socket *sock, __poll_t events) +__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; - struct atm_vcc *vcc = ATM_SD(sock); - __poll_t mask = 0; + struct atm_vcc *vcc; + __poll_t mask; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; + + vcc = ATM_SD(sock); /* exceptional events */ if (sk->sk_err) diff --git a/net/atm/common.h b/net/atm/common.h index 526796ad230f..5850649068bb 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -17,7 +17,7 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci); int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags); int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len); -__poll_t vcc_poll_mask(struct socket *sock, __poll_t events); +__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait); int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int vcc_setsockopt(struct socket *sock, int level, int optname, diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 9f75092fe778..2cb10af16afc 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -113,7 +113,7 @@ static const struct proto_ops pvc_proto_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = pvc_getname, - .poll_mask = vcc_poll_mask, + .poll = vcc_poll, .ioctl = vcc_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = vcc_compat_ioctl, diff --git a/net/atm/svc.c b/net/atm/svc.c index 53f4ad7087b1..2f91b766ac42 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -636,7 +636,7 @@ static const struct proto_ops svc_proto_ops = { .socketpair = sock_no_socketpair, .accept = svc_accept, .getname = svc_getname, - .poll_mask = vcc_poll_mask, + .poll = vcc_poll, .ioctl = svc_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = svc_compat_ioctl, diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index d1d2442ce573..c603d33d5410 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1941,7 +1941,7 @@ static const struct proto_ops ax25_proto_ops = { .socketpair = sock_no_socketpair, .accept = ax25_accept, .getname = ax25_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = ax25_ioctl, .listen = ax25_listen, .shutdown = ax25_shutdown, diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 510ab4f55df5..3264e1873219 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -437,13 +437,16 @@ static inline __poll_t bt_accept_poll(struct sock *parent) return 0; } -__poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events) +__poll_t bt_sock_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask = 0; BT_DBG("sock %p, sk %p", sock, sk); + poll_wait(file, sk_sleep(sk), wait); + if (sk->sk_state == BT_LISTEN) return bt_accept_poll(sk); @@ -475,7 +478,7 @@ __poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events) return mask; } -EXPORT_SYMBOL(bt_sock_poll_mask); +EXPORT_SYMBOL(bt_sock_poll); int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index d6c099861538..1506e1632394 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1975,7 +1975,7 @@ static const struct proto_ops hci_sock_ops = { .sendmsg = hci_sock_sendmsg, .recvmsg = hci_sock_recvmsg, .ioctl = hci_sock_ioctl, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = hci_sock_setsockopt, diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 742a190034e6..686bdc6b35b0 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1653,7 +1653,7 @@ static const struct proto_ops l2cap_sock_ops = { .getname = l2cap_sock_getname, .sendmsg = l2cap_sock_sendmsg, .recvmsg = l2cap_sock_recvmsg, - .poll_mask = bt_sock_poll_mask, + .poll = bt_sock_poll, .ioctl = bt_sock_ioctl, .mmap = sock_no_mmap, .socketpair = sock_no_socketpair, diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 1cf57622473a..d606e9212291 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -1049,7 +1049,7 @@ static const struct proto_ops rfcomm_sock_ops = { .setsockopt = rfcomm_sock_setsockopt, .getsockopt = rfcomm_sock_getsockopt, .ioctl = rfcomm_sock_ioctl, - .poll_mask = bt_sock_poll_mask, + .poll = bt_sock_poll, .socketpair = sock_no_socketpair, .mmap = sock_no_mmap }; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index d60dbc61d170..413b8ee49fec 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -1197,7 +1197,7 @@ static const struct proto_ops sco_sock_ops = { .getname = sco_sock_getname, .sendmsg = sco_sock_sendmsg, .recvmsg = sco_sock_recvmsg, - .poll_mask = bt_sock_poll_mask, + .poll = bt_sock_poll, .ioctl = bt_sock_ioctl, .mmap = sock_no_mmap, .socketpair = sock_no_socketpair, diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index c7991867d622..a6fb1b3bcad9 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -934,11 +934,15 @@ static int caif_release(struct socket *sock) } /* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */ -static __poll_t caif_poll_mask(struct socket *sock, __poll_t events) +static __poll_t caif_poll(struct file *file, + struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; + __poll_t mask; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - __poll_t mask = 0; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; /* exceptional events? */ if (sk->sk_err) @@ -972,7 +976,7 @@ static const struct proto_ops caif_seqpacket_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = caif_poll_mask, + .poll = caif_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -993,7 +997,7 @@ static const struct proto_ops caif_stream_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = caif_poll_mask, + .poll = caif_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/can/bcm.c b/net/can/bcm.c index 9393f25df08d..0af8f0db892a 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1660,7 +1660,7 @@ static const struct proto_ops bcm_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */ .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/can/raw.c b/net/can/raw.c index fd7e2f49ea6a..1051eee82581 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -843,7 +843,7 @@ static const struct proto_ops raw_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = raw_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */ .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/core/datagram.c b/net/core/datagram.c index f19bf3dc2bd6..9938952c5c78 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -819,8 +819,9 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg); /** * datagram_poll - generic datagram poll + * @file: file struct * @sock: socket - * @events to wait for + * @wait: poll table * * Datagram poll: Again totally generic. This also handles * sequenced packet sockets providing the socket receive queue @@ -830,10 +831,14 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg); * and you use a different write policy from sock_writeable() * then please supply your own write_space callback. */ -__poll_t datagram_poll_mask(struct socket *sock, __poll_t events) +__poll_t datagram_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; - __poll_t mask = 0; + __poll_t mask; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) @@ -866,4 +871,4 @@ __poll_t datagram_poll_mask(struct socket *sock, __poll_t events) return mask; } -EXPORT_SYMBOL(datagram_poll_mask); +EXPORT_SYMBOL(datagram_poll); diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 0ea2ee56ac1b..f91e3816806b 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -316,7 +316,8 @@ int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); void dccp_shutdown(struct sock *sk, int how); int inet_dccp_listen(struct socket *sock, int backlog); -__poll_t dccp_poll_mask(struct socket *sock, __poll_t events); +__poll_t dccp_poll(struct file *file, struct socket *sock, + poll_table *wait); int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); void dccp_req_err(struct sock *sk, u64 seq); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index a9e478cd3787..b08feb219b44 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -984,7 +984,7 @@ static const struct proto_ops inet_dccp_ops = { .accept = inet_accept, .getname = inet_getname, /* FIXME: work on tcp_poll to rename it to inet_csk_poll */ - .poll_mask = dccp_poll_mask, + .poll = dccp_poll, .ioctl = inet_ioctl, /* FIXME: work on inet_listen to rename it to sock_common_listen */ .listen = inet_dccp_listen, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 17fc4e0166ba..6344f1b18a6a 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1070,7 +1070,7 @@ static const struct proto_ops inet6_dccp_ops = { .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet6_getname, - .poll_mask = dccp_poll_mask, + .poll = dccp_poll, .ioctl = inet6_ioctl, .listen = inet_dccp_listen, .shutdown = inet_shutdown, diff --git a/net/dccp/proto.c b/net/dccp/proto.c index ca21c1c76da0..0d56e36a6db7 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -312,11 +312,20 @@ int dccp_disconnect(struct sock *sk, int flags) EXPORT_SYMBOL_GPL(dccp_disconnect); -__poll_t dccp_poll_mask(struct socket *sock, __poll_t events) +/* + * Wait for a DCCP event. + * + * Note that we don't need to lock the socket, as the upper poll layers + * take care of normal races (between the test and the event) and we don't + * go look at any of the socket buffers directly. + */ +__poll_t dccp_poll(struct file *file, struct socket *sock, + poll_table *wait) { __poll_t mask; struct sock *sk = sock->sk; + sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == DCCP_LISTEN) return inet_csk_listen_poll(sk); @@ -358,7 +367,7 @@ __poll_t dccp_poll_mask(struct socket *sock, __poll_t events) return mask; } -EXPORT_SYMBOL_GPL(dccp_poll_mask); +EXPORT_SYMBOL_GPL(dccp_poll); int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) { diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 9a686d890bfa..7d6ff983ba2c 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1207,11 +1207,11 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer) } -static __poll_t dn_poll_mask(struct socket *sock, __poll_t events) +static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct dn_scp *scp = DN_SK(sk); - __poll_t mask = datagram_poll_mask(sock, events); + __poll_t mask = datagram_poll(file, sock, wait); if (!skb_queue_empty(&scp->other_receive_queue)) mask |= EPOLLRDBAND; @@ -2331,7 +2331,7 @@ static const struct proto_ops dn_proto_ops = { .socketpair = sock_no_socketpair, .accept = dn_accept, .getname = dn_getname, - .poll_mask = dn_poll_mask, + .poll = dn_poll, .ioctl = dn_ioctl, .listen = dn_listen, .shutdown = dn_shutdown, diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index a0768d2759b8..a60658c85a9a 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -423,7 +423,7 @@ static const struct proto_ops ieee802154_raw_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = ieee802154_sock_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -969,7 +969,7 @@ static const struct proto_ops ieee802154_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = ieee802154_sock_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 15e125558c76..b403499fdabe 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -986,7 +986,7 @@ const struct proto_ops inet_stream_ops = { .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet_getname, - .poll_mask = tcp_poll_mask, + .poll = tcp_poll, .ioctl = inet_ioctl, .listen = inet_listen, .shutdown = inet_shutdown, @@ -1021,7 +1021,7 @@ const struct proto_ops inet_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = inet_getname, - .poll_mask = udp_poll_mask, + .poll = udp_poll, .ioctl = inet_ioctl, .listen = sock_no_listen, .shutdown = inet_shutdown, @@ -1042,7 +1042,7 @@ EXPORT_SYMBOL(inet_dgram_ops); /* * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without - * udp_poll_mask + * udp_poll */ static const struct proto_ops inet_sockraw_ops = { .family = PF_INET, @@ -1053,7 +1053,7 @@ static const struct proto_ops inet_sockraw_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = inet_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = inet_ioctl, .listen = sock_no_listen, .shutdown = inet_shutdown, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 141acd92e58a..e7b53d2a971f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -494,21 +494,32 @@ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp, } /* - * Socket is not locked. We are protected from async events by poll logic and - * correct handling of state changes made by other threads is impossible in - * any case. + * Wait for a TCP event. + * + * Note that we don't need to lock the socket, as the upper poll layers + * take care of normal races (between the test and the event) and we don't + * go look at any of the socket buffers directly. */ -__poll_t tcp_poll_mask(struct socket *sock, __poll_t events) +__poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) { + __poll_t mask; struct sock *sk = sock->sk; const struct tcp_sock *tp = tcp_sk(sk); - __poll_t mask = 0; int state; + sock_poll_wait(file, sk_sleep(sk), wait); + state = inet_sk_state_load(sk); if (state == TCP_LISTEN) return inet_csk_listen_poll(sk); + /* Socket is not locked. We are protected from async events + * by poll logic and correct handling of state changes + * made by other threads is impossible in any case. + */ + + mask = 0; + /* * EPOLLHUP is certainly not done right. But poll() doesn't * have a notion of HUP in just one direction, and for a @@ -589,7 +600,7 @@ __poll_t tcp_poll_mask(struct socket *sock, __poll_t events) return mask; } -EXPORT_SYMBOL(tcp_poll_mask); +EXPORT_SYMBOL(tcp_poll); int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9bb27df4dac5..24e116ddae79 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2591,7 +2591,7 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname, * udp_poll - wait for a UDP event. * @file - file struct * @sock - socket - * @events - events to wait for + * @wait - poll table * * This is same as datagram poll, except for the special case of * blocking sockets. If application is using a blocking fd @@ -2600,23 +2600,23 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname, * but then block when reading it. Add special case code * to work around these arguably broken applications. */ -__poll_t udp_poll_mask(struct socket *sock, __poll_t events) +__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait) { - __poll_t mask = datagram_poll_mask(sock, events); + __poll_t mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; if (!skb_queue_empty(&udp_sk(sk)->reader_queue)) mask |= EPOLLIN | EPOLLRDNORM; /* Check for false positives due to checksum errors */ - if ((mask & EPOLLRDNORM) && !(sock->file->f_flags & O_NONBLOCK) && + if ((mask & EPOLLRDNORM) && !(file->f_flags & O_NONBLOCK) && !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1) mask &= ~(EPOLLIN | EPOLLRDNORM); return mask; } -EXPORT_SYMBOL(udp_poll_mask); +EXPORT_SYMBOL(udp_poll); int udp_abort(struct sock *sk, int err) { diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 74f2a261e8df..9ed0eae91758 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -570,7 +570,7 @@ const struct proto_ops inet6_stream_ops = { .socketpair = sock_no_socketpair, /* a do nothing */ .accept = inet_accept, /* ok */ .getname = inet6_getname, - .poll_mask = tcp_poll_mask, /* ok */ + .poll = tcp_poll, /* ok */ .ioctl = inet6_ioctl, /* must change */ .listen = inet_listen, /* ok */ .shutdown = inet_shutdown, /* ok */ @@ -603,7 +603,7 @@ const struct proto_ops inet6_dgram_ops = { .socketpair = sock_no_socketpair, /* a do nothing */ .accept = sock_no_accept, /* a do nothing */ .getname = inet6_getname, - .poll_mask = udp_poll_mask, /* ok */ + .poll = udp_poll, /* ok */ .ioctl = inet6_ioctl, /* must change */ .listen = sock_no_listen, /* ok */ .shutdown = inet_shutdown, /* ok */ diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ce6f0d15b5dd..afc307c89d1a 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1334,7 +1334,7 @@ void raw6_proc_exit(void) } #endif /* CONFIG_PROC_FS */ -/* Same as inet6_dgram_ops, sans udp_poll_mask. */ +/* Same as inet6_dgram_ops, sans udp_poll. */ const struct proto_ops inet6_sockraw_ops = { .family = PF_INET6, .owner = THIS_MODULE, @@ -1344,7 +1344,7 @@ const struct proto_ops inet6_sockraw_ops = { .socketpair = sock_no_socketpair, /* a do nothing */ .accept = sock_no_accept, /* a do nothing */ .getname = inet6_getname, - .poll_mask = datagram_poll_mask, /* ok */ + .poll = datagram_poll, /* ok */ .ioctl = inet6_ioctl, /* must change */ .listen = sock_no_listen, /* ok */ .shutdown = inet_shutdown, /* ok */ diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 68e86257a549..893a022f9620 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1488,11 +1488,14 @@ static inline __poll_t iucv_accept_poll(struct sock *parent) return 0; } -static __poll_t iucv_sock_poll_mask(struct socket *sock, __poll_t events) +__poll_t iucv_sock_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask = 0; + sock_poll_wait(file, sk_sleep(sk), wait); + if (sk->sk_state == IUCV_LISTEN) return iucv_accept_poll(sk); @@ -2385,7 +2388,7 @@ static const struct proto_ops iucv_sock_ops = { .getname = iucv_sock_getname, .sendmsg = iucv_sock_sendmsg, .recvmsg = iucv_sock_recvmsg, - .poll_mask = iucv_sock_poll_mask, + .poll = iucv_sock_poll, .ioctl = sock_no_ioctl, .mmap = sock_no_mmap, .socketpair = sock_no_socketpair, diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 84b7d5c6fec8..d3601d421571 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1336,9 +1336,9 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux) struct list_head *head; int index = 0; - /* For SOCK_SEQPACKET sock type, datagram_poll_mask checks the sk_state, - * so we set sk_state, otherwise epoll_wait always returns right away - * with EPOLLHUP + /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so + * we set sk_state, otherwise epoll_wait always returns right away with + * EPOLLHUP */ kcm->sk.sk_state = TCP_ESTABLISHED; @@ -1903,7 +1903,7 @@ static const struct proto_ops kcm_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = kcm_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -1924,7 +1924,7 @@ static const struct proto_ops kcm_seqpacket_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = kcm_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/key/af_key.c b/net/key/af_key.c index 8bdc1cbe490a..5e1d2946ffbf 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3751,7 +3751,7 @@ static const struct proto_ops pfkey_ops = { /* Now the operations that really occur. */ .release = pfkey_release, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .sendmsg = pfkey_sendmsg, .recvmsg = pfkey_recvmsg, }; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 181073bf6925..a9c05b2bc1b0 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -613,7 +613,7 @@ static const struct proto_ops l2tp_ip_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = l2tp_ip_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = inet_ioctl, .listen = sock_no_listen, .shutdown = inet_shutdown, diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 336e4c00abbc..957369192ca1 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -754,7 +754,7 @@ static const struct proto_ops l2tp_ip6_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = l2tp_ip6_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = inet6_ioctl, .listen = sock_no_listen, .shutdown = inet_shutdown, diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 55188382845c..e398797878a9 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1818,7 +1818,7 @@ static const struct proto_ops pppol2tp_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = pppol2tp_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = pppol2tp_setsockopt, diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 804de8490186..1beeea9549fa 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -1192,7 +1192,7 @@ static const struct proto_ops llc_ui_ops = { .socketpair = sock_no_socketpair, .accept = llc_ui_accept, .getname = llc_ui_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = llc_ui_ioctl, .listen = llc_ui_listen, .shutdown = llc_ui_shutdown, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 1189b84413d5..393573a99a5a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2658,7 +2658,7 @@ static const struct proto_ops netlink_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = netlink_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = netlink_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 93fbcafbf388..03f37c4e64fe 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1355,7 +1355,7 @@ static const struct proto_ops nr_proto_ops = { .socketpair = sock_no_socketpair, .accept = nr_accept, .getname = nr_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = nr_ioctl, .listen = nr_listen, .shutdown = sock_no_shutdown, diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index ab5bb14b49af..ea0c0c6f1874 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -548,13 +548,16 @@ static inline __poll_t llcp_accept_poll(struct sock *parent) return 0; } -static __poll_t llcp_sock_poll_mask(struct socket *sock, __poll_t events) +static __poll_t llcp_sock_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask = 0; pr_debug("%p\n", sk); + sock_poll_wait(file, sk_sleep(sk), wait); + if (sk->sk_state == LLCP_LISTEN) return llcp_accept_poll(sk); @@ -896,7 +899,7 @@ static const struct proto_ops llcp_sock_ops = { .socketpair = sock_no_socketpair, .accept = llcp_sock_accept, .getname = llcp_sock_getname, - .poll_mask = llcp_sock_poll_mask, + .poll = llcp_sock_poll, .ioctl = sock_no_ioctl, .listen = llcp_sock_listen, .shutdown = sock_no_shutdown, @@ -916,7 +919,7 @@ static const struct proto_ops llcp_rawsock_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = llcp_sock_getname, - .poll_mask = llcp_sock_poll_mask, + .poll = llcp_sock_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 60c322531c49..e2188deb08dc 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -284,7 +284,7 @@ static const struct proto_ops rawsock_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -304,7 +304,7 @@ static const struct proto_ops rawsock_raw_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ff8e7e245c37..57634bc3da74 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4076,11 +4076,12 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, return 0; } -static __poll_t packet_poll_mask(struct socket *sock, __poll_t events) +static __poll_t packet_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); - __poll_t mask = datagram_poll_mask(sock, events); + __poll_t mask = datagram_poll(file, sock, wait); spin_lock_bh(&sk->sk_receive_queue.lock); if (po->rx_ring.pg_vec) { @@ -4422,7 +4423,7 @@ static const struct proto_ops packet_ops_spkt = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = packet_getname_spkt, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = packet_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -4443,7 +4444,7 @@ static const struct proto_ops packet_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = packet_getname, - .poll_mask = packet_poll_mask, + .poll = packet_poll, .ioctl = packet_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/phonet/socket.c b/net/phonet/socket.c index c295c4e20f01..30187990257f 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -340,12 +340,15 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr, return sizeof(struct sockaddr_pn); } -static __poll_t pn_socket_poll_mask(struct socket *sock, __poll_t events) +static __poll_t pn_socket_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; struct pep_sock *pn = pep_sk(sk); __poll_t mask = 0; + poll_wait(file, sk_sleep(sk), wait); + if (sk->sk_state == TCP_CLOSE) return EPOLLERR; if (!skb_queue_empty(&sk->sk_receive_queue)) @@ -445,7 +448,7 @@ const struct proto_ops phonet_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = pn_socket_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = pn_socket_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -470,7 +473,7 @@ const struct proto_ops phonet_stream_ops = { .socketpair = sock_no_socketpair, .accept = pn_socket_accept, .getname = pn_socket_getname, - .poll_mask = pn_socket_poll_mask, + .poll = pn_socket_poll, .ioctl = pn_socket_ioctl, .listen = pn_socket_listen, .shutdown = sock_no_shutdown, diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 1b5025ea5b04..2aa07b547b16 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -1023,7 +1023,7 @@ static const struct proto_ops qrtr_proto_ops = { .recvmsg = qrtr_recvmsg, .getname = qrtr_getname, .ioctl = qrtr_ioctl, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .shutdown = sock_no_shutdown, .setsockopt = sock_no_setsockopt, .getsockopt = sock_no_getsockopt, diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index ebe42e7eb456..d00a0ef39a56 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1470,7 +1470,7 @@ static const struct proto_ops rose_proto_ops = { .socketpair = sock_no_socketpair, .accept = rose_accept, .getname = rose_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = rose_ioctl, .listen = rose_listen, .shutdown = sock_no_shutdown, diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 3b1ac93efee2..2b463047dd7b 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -734,11 +734,15 @@ static int rxrpc_getsockopt(struct socket *sock, int level, int optname, /* * permit an RxRPC socket to be polled */ -static __poll_t rxrpc_poll_mask(struct socket *sock, __poll_t events) +static __poll_t rxrpc_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; struct rxrpc_sock *rx = rxrpc_sk(sk); - __poll_t mask = 0; + __poll_t mask; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; /* the socket is readable if there are any messages waiting on the Rx * queue */ @@ -945,7 +949,7 @@ static const struct proto_ops rxrpc_rpc_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = rxrpc_poll_mask, + .poll = rxrpc_poll, .ioctl = sock_no_ioctl, .listen = rxrpc_listen, .shutdown = rxrpc_shutdown, diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 7339918a805d..0cd2e764f47f 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -1010,7 +1010,7 @@ static const struct proto_ops inet6_seqpacket_ops = { .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = sctp_getname, - .poll_mask = sctp_poll_mask, + .poll = sctp_poll, .ioctl = inet6_ioctl, .listen = sctp_inet_listen, .shutdown = inet_shutdown, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 5dffbc493008..67f73d3a1356 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1016,7 +1016,7 @@ static const struct proto_ops inet_seqpacket_ops = { .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet_getname, /* Semantics are different. */ - .poll_mask = sctp_poll_mask, + .poll = sctp_poll, .ioctl = inet_ioctl, .listen = sctp_inet_listen, .shutdown = inet_shutdown, /* Looks harmless. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d20f7addee19..ce620e878538 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7717,12 +7717,14 @@ out: * here, again, by modeling the current TCP/UDP code. We don't have * a good way to test with it yet. */ -__poll_t sctp_poll_mask(struct socket *sock, __poll_t events) +__poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct sctp_sock *sp = sctp_sk(sk); __poll_t mask; + poll_wait(file, sk_sleep(sk), wait); + sock_rps_record_flow(sk); /* A TCP-style listening socket becomes readable when the accept queue diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index da7f02edcd37..973b4471b532 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1273,7 +1273,8 @@ static __poll_t smc_accept_poll(struct sock *parent) return mask; } -static __poll_t smc_poll_mask(struct socket *sock, __poll_t events) +static __poll_t smc_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask = 0; @@ -1289,7 +1290,7 @@ static __poll_t smc_poll_mask(struct socket *sock, __poll_t events) if ((sk->sk_state == SMC_INIT) || smc->use_fallback) { /* delegate to CLC child sock */ release_sock(sk); - mask = smc->clcsock->ops->poll_mask(smc->clcsock, events); + mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); lock_sock(sk); sk->sk_err = smc->clcsock->sk->sk_err; if (sk->sk_err) { @@ -1307,6 +1308,11 @@ static __poll_t smc_poll_mask(struct socket *sock, __poll_t events) } } } else { + if (sk->sk_state != SMC_CLOSED) { + release_sock(sk); + sock_poll_wait(file, sk_sleep(sk), wait); + lock_sock(sk); + } if (sk->sk_err) mask |= EPOLLERR; if ((sk->sk_shutdown == SHUTDOWN_MASK) || @@ -1619,7 +1625,7 @@ static const struct proto_ops smc_sock_ops = { .socketpair = sock_no_socketpair, .accept = smc_accept, .getname = smc_getname, - .poll_mask = smc_poll_mask, + .poll = smc_poll, .ioctl = smc_ioctl, .listen = smc_listen, .shutdown = smc_shutdown, diff --git a/net/socket.c b/net/socket.c index 8a109012608a..a564c6ed19d5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -117,10 +117,8 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from); static int sock_mmap(struct file *file, struct vm_area_struct *vma); static int sock_close(struct inode *inode, struct file *file); -static struct wait_queue_head *sock_get_poll_head(struct file *file, - __poll_t events); -static __poll_t sock_poll_mask(struct file *file, __poll_t); -static __poll_t sock_poll(struct file *file, struct poll_table_struct *wait); +static __poll_t sock_poll(struct file *file, + struct poll_table_struct *wait); static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT static long compat_sock_ioctl(struct file *file, @@ -143,8 +141,6 @@ static const struct file_operations socket_file_ops = { .llseek = no_llseek, .read_iter = sock_read_iter, .write_iter = sock_write_iter, - .get_poll_head = sock_get_poll_head, - .poll_mask = sock_poll_mask, .poll = sock_poll, .unlocked_ioctl = sock_ioctl, #ifdef CONFIG_COMPAT @@ -1130,48 +1126,14 @@ out_release: } EXPORT_SYMBOL(sock_create_lite); -static struct wait_queue_head *sock_get_poll_head(struct file *file, - __poll_t events) -{ - struct socket *sock = file->private_data; - - if (!sock->ops->poll_mask) - return NULL; - sock_poll_busy_loop(sock, events); - return sk_sleep(sock->sk); -} - -static __poll_t sock_poll_mask(struct file *file, __poll_t events) -{ - struct socket *sock = file->private_data; - - /* - * We need to be sure we are in sync with the socket flags modification. - * - * This memory barrier is paired in the wq_has_sleeper. - */ - smp_mb(); - - /* this socket can poll_ll so tell the system call */ - return sock->ops->poll_mask(sock, events) | - (sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0); -} - /* No kernel lock held - perfect */ static __poll_t sock_poll(struct file *file, poll_table *wait) { struct socket *sock = file->private_data; - __poll_t events = poll_requested_events(wait), mask = 0; - - if (sock->ops->poll) { - sock_poll_busy_loop(sock, events); - mask = sock->ops->poll(file, sock, wait); - } else if (sock->ops->poll_mask) { - sock_poll_wait(file, sock_get_poll_head(file, events), wait); - mask = sock->ops->poll_mask(sock, events); - } + __poll_t events = poll_requested_events(wait); - return mask | sock_poll_busy_flag(sock); + sock_poll_busy_loop(sock, events); + return sock->ops->poll(file, sock, wait) | sock_poll_busy_flag(sock); } static int sock_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 14a5d055717d..930852c54d7a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -692,9 +692,10 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, } /** - * tipc_poll - read pollmask + * tipc_poll - read and possibly block on pollmask * @file: file structure associated with the socket * @sock: socket for which to calculate the poll bits + * @wait: ??? * * Returns pollmask value * @@ -708,12 +709,15 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, * imply that the operation will succeed, merely that it should be performed * and will not block. */ -static __poll_t tipc_poll_mask(struct socket *sock, __poll_t events) +static __poll_t tipc_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); __poll_t revents = 0; + sock_poll_wait(file, sk_sleep(sk), wait); + if (sk->sk_shutdown & RCV_SHUTDOWN) revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) @@ -3033,7 +3037,7 @@ static const struct proto_ops msg_ops = { .socketpair = tipc_socketpair, .accept = sock_no_accept, .getname = tipc_getname, - .poll_mask = tipc_poll_mask, + .poll = tipc_poll, .ioctl = tipc_ioctl, .listen = sock_no_listen, .shutdown = tipc_shutdown, @@ -3054,7 +3058,7 @@ static const struct proto_ops packet_ops = { .socketpair = tipc_socketpair, .accept = tipc_accept, .getname = tipc_getname, - .poll_mask = tipc_poll_mask, + .poll = tipc_poll, .ioctl = tipc_ioctl, .listen = tipc_listen, .shutdown = tipc_shutdown, @@ -3075,7 +3079,7 @@ static const struct proto_ops stream_ops = { .socketpair = tipc_socketpair, .accept = tipc_accept, .getname = tipc_getname, - .poll_mask = tipc_poll_mask, + .poll = tipc_poll, .ioctl = tipc_ioctl, .listen = tipc_listen, .shutdown = tipc_shutdown, diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index a127d61e8af9..301f22430469 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -712,7 +712,7 @@ static int __init tls_register(void) build_protos(tls_prots[TLSV4], &tcp_prot); tls_sw_proto_ops = inet_stream_ops; - tls_sw_proto_ops.poll_mask = tls_sw_poll_mask; + tls_sw_proto_ops.poll = tls_sw_poll; tls_sw_proto_ops.splice_read = tls_sw_splice_read; #ifdef CONFIG_TLS_DEVICE diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index f127fac88acf..d2380548f8f6 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -919,22 +919,23 @@ splice_read_end: return copied ? : err; } -__poll_t tls_sw_poll_mask(struct socket *sock, __poll_t events) +unsigned int tls_sw_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait) { + unsigned int ret; struct sock *sk = sock->sk; struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - __poll_t mask; - /* Grab EPOLLOUT and EPOLLHUP from the underlying socket */ - mask = ctx->sk_poll_mask(sock, events); + /* Grab POLLOUT and POLLHUP from the underlying socket */ + ret = ctx->sk_poll(file, sock, wait); - /* Clear EPOLLIN bits, and set based on recv_pkt */ - mask &= ~(EPOLLIN | EPOLLRDNORM); + /* Clear POLLIN bits, and set based on recv_pkt */ + ret &= ~(POLLIN | POLLRDNORM); if (ctx->recv_pkt) - mask |= EPOLLIN | EPOLLRDNORM; + ret |= POLLIN | POLLRDNORM; - return mask; + return ret; } static int tls_read_size(struct strparser *strp, struct sk_buff *skb) @@ -1191,7 +1192,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) sk->sk_data_ready = tls_data_ready; write_unlock_bh(&sk->sk_callback_lock); - sw_ctx_rx->sk_poll_mask = sk->sk_socket->ops->poll_mask; + sw_ctx_rx->sk_poll = sk->sk_socket->ops->poll; strp_check_rcv(&sw_ctx_rx->strp); } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 95b02a71fd47..e5473c03d667 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -638,8 +638,9 @@ static int unix_stream_connect(struct socket *, struct sockaddr *, static int unix_socketpair(struct socket *, struct socket *); static int unix_accept(struct socket *, struct socket *, int, bool); static int unix_getname(struct socket *, struct sockaddr *, int); -static __poll_t unix_poll_mask(struct socket *, __poll_t); -static __poll_t unix_dgram_poll_mask(struct socket *, __poll_t); +static __poll_t unix_poll(struct file *, struct socket *, poll_table *); +static __poll_t unix_dgram_poll(struct file *, struct socket *, + poll_table *); static int unix_ioctl(struct socket *, unsigned int, unsigned long); static int unix_shutdown(struct socket *, int); static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); @@ -680,7 +681,7 @@ static const struct proto_ops unix_stream_ops = { .socketpair = unix_socketpair, .accept = unix_accept, .getname = unix_getname, - .poll_mask = unix_poll_mask, + .poll = unix_poll, .ioctl = unix_ioctl, .listen = unix_listen, .shutdown = unix_shutdown, @@ -703,7 +704,7 @@ static const struct proto_ops unix_dgram_ops = { .socketpair = unix_socketpair, .accept = sock_no_accept, .getname = unix_getname, - .poll_mask = unix_dgram_poll_mask, + .poll = unix_dgram_poll, .ioctl = unix_ioctl, .listen = sock_no_listen, .shutdown = unix_shutdown, @@ -725,7 +726,7 @@ static const struct proto_ops unix_seqpacket_ops = { .socketpair = unix_socketpair, .accept = unix_accept, .getname = unix_getname, - .poll_mask = unix_dgram_poll_mask, + .poll = unix_dgram_poll, .ioctl = unix_ioctl, .listen = unix_listen, .shutdown = unix_shutdown, @@ -2629,10 +2630,13 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return err; } -static __poll_t unix_poll_mask(struct socket *sock, __poll_t events) +static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; - __poll_t mask = 0; + __poll_t mask; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; /* exceptional events? */ if (sk->sk_err) @@ -2661,11 +2665,15 @@ static __poll_t unix_poll_mask(struct socket *sock, __poll_t events) return mask; } -static __poll_t unix_dgram_poll_mask(struct socket *sock, __poll_t events) +static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk, *other; - int writable; - __poll_t mask = 0; + unsigned int writable; + __poll_t mask; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) @@ -2691,7 +2699,7 @@ static __poll_t unix_dgram_poll_mask(struct socket *sock, __poll_t events) } /* No write status requested, avoid expensive OUT tests. */ - if (!(events & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) + if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) return mask; writable = unix_writable(sk); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index bb5d5fa68c35..c1076c19b858 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -850,11 +850,18 @@ static int vsock_shutdown(struct socket *sock, int mode) return err; } -static __poll_t vsock_poll_mask(struct socket *sock, __poll_t events) +static __poll_t vsock_poll(struct file *file, struct socket *sock, + poll_table *wait) { - struct sock *sk = sock->sk; - struct vsock_sock *vsk = vsock_sk(sk); - __poll_t mask = 0; + struct sock *sk; + __poll_t mask; + struct vsock_sock *vsk; + + sk = sock->sk; + vsk = vsock_sk(sk); + + poll_wait(file, sk_sleep(sk), wait); + mask = 0; if (sk->sk_err) /* Signify that there has been an error on this socket. */ @@ -1084,7 +1091,7 @@ static const struct proto_ops vsock_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = vsock_getname, - .poll_mask = vsock_poll_mask, + .poll = vsock_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = vsock_shutdown, @@ -1842,7 +1849,7 @@ static const struct proto_ops vsock_stream_ops = { .socketpair = sock_no_socketpair, .accept = vsock_accept, .getname = vsock_getname, - .poll_mask = vsock_poll_mask, + .poll = vsock_poll, .ioctl = sock_no_ioctl, .listen = vsock_listen, .shutdown = vsock_shutdown, diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index f93365ae0fdd..d49aa79b7997 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1750,7 +1750,7 @@ static const struct proto_ops x25_proto_ops = { .socketpair = sock_no_socketpair, .accept = x25_accept, .getname = x25_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = x25_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = compat_x25_ioctl, diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 3b3410ada097..59fb7d3c36a3 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -303,9 +303,10 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len); } -static __poll_t xsk_poll_mask(struct socket *sock, __poll_t events) +static unsigned int xsk_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait) { - __poll_t mask = datagram_poll_mask(sock, events); + unsigned int mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); @@ -696,7 +697,7 @@ static const struct proto_ops xsk_proto_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = xsk_poll_mask, + .poll = xsk_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, -- cgit v1.2.3 From d50d82faa0c964e31f7a946ba8aba7c715ca7ab0 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 27 Jun 2018 23:26:09 -0700 Subject: slub: fix failure when we delete and create a slab cache In kernel 4.17 I removed some code from dm-bufio that did slab cache merging (commit 21bb13276768: "dm bufio: remove code that merges slab caches") - both slab and slub support merging caches with identical attributes, so dm-bufio now just calls kmem_cache_create and relies on implicit merging. This uncovered a bug in the slub subsystem - if we delete a cache and immediatelly create another cache with the same attributes, it fails because of duplicate filename in /sys/kernel/slab/. The slub subsystem offloads freeing the cache to a workqueue - and if we create the new cache before the workqueue runs, it complains because of duplicate filename in sysfs. This patch fixes the bug by moving the call of kobject_del from sysfs_slab_remove_workfn to shutdown_cache. kobject_del must be called while we hold slab_mutex - so that the sysfs entry is deleted before a cache with the same attributes could be created. Running device-mapper-test-suite with: dmtest run --suite thin-provisioning -n /commit_failure_causes_fallback/ triggered: Buffer I/O error on dev dm-0, logical block 1572848, async page read device-mapper: thin: 253:1: metadata operation 'dm_pool_alloc_data_block' failed: error = -5 device-mapper: thin: 253:1: aborting current metadata transaction sysfs: cannot create duplicate filename '/kernel/slab/:a-0000144' CPU: 2 PID: 1037 Comm: kworker/u48:1 Not tainted 4.17.0.snitm+ #25 Hardware name: Supermicro SYS-1029P-WTR/X11DDW-L, BIOS 2.0a 12/06/2017 Workqueue: dm-thin do_worker [dm_thin_pool] Call Trace: dump_stack+0x5a/0x73 sysfs_warn_dup+0x58/0x70 sysfs_create_dir_ns+0x77/0x80 kobject_add_internal+0xba/0x2e0 kobject_init_and_add+0x70/0xb0 sysfs_slab_add+0xb1/0x250 __kmem_cache_create+0x116/0x150 create_cache+0xd9/0x1f0 kmem_cache_create_usercopy+0x1c1/0x250 kmem_cache_create+0x18/0x20 dm_bufio_client_create+0x1ae/0x410 [dm_bufio] dm_block_manager_create+0x5e/0x90 [dm_persistent_data] __create_persistent_data_objects+0x38/0x940 [dm_thin_pool] dm_pool_abort_metadata+0x64/0x90 [dm_thin_pool] metadata_operation_failed+0x59/0x100 [dm_thin_pool] alloc_data_block.isra.53+0x86/0x180 [dm_thin_pool] process_cell+0x2a3/0x550 [dm_thin_pool] do_worker+0x28d/0x8f0 [dm_thin_pool] process_one_work+0x171/0x370 worker_thread+0x49/0x3f0 kthread+0xf8/0x130 ret_from_fork+0x35/0x40 kobject_add_internal failed for :a-0000144 with -EEXIST, don't try to register things with the same name in the same directory. kmem_cache_create(dm_bufio_buffer-16) failed with error -17 Link: http://lkml.kernel.org/r/alpine.LRH.2.02.1806151817130.6333@file01.intranet.prod.int.rdu2.redhat.com Signed-off-by: Mikulas Patocka Reported-by: Mike Snitzer Tested-by: Mike Snitzer Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 4 ++++ mm/slab_common.c | 4 ++++ mm/slub.c | 7 ++++++- 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 09fa2c6f0e68..3a1a1dbc6f49 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -155,8 +155,12 @@ struct kmem_cache { #ifdef CONFIG_SYSFS #define SLAB_SUPPORTS_SYSFS +void sysfs_slab_unlink(struct kmem_cache *); void sysfs_slab_release(struct kmem_cache *); #else +static inline void sysfs_slab_unlink(struct kmem_cache *s) +{ +} static inline void sysfs_slab_release(struct kmem_cache *s) { } diff --git a/mm/slab_common.c b/mm/slab_common.c index 890b1f04a03a..2296caf87bfb 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -567,10 +567,14 @@ static int shutdown_cache(struct kmem_cache *s) list_del(&s->list); if (s->flags & SLAB_TYPESAFE_BY_RCU) { +#ifdef SLAB_SUPPORTS_SYSFS + sysfs_slab_unlink(s); +#endif list_add_tail(&s->list, &slab_caches_to_rcu_destroy); schedule_work(&slab_caches_to_rcu_destroy_work); } else { #ifdef SLAB_SUPPORTS_SYSFS + sysfs_slab_unlink(s); sysfs_slab_release(s); #else slab_kmem_cache_release(s); diff --git a/mm/slub.c b/mm/slub.c index a3b8467c14af..51258eff4178 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5667,7 +5667,6 @@ static void sysfs_slab_remove_workfn(struct work_struct *work) kset_unregister(s->memcg_kset); #endif kobject_uevent(&s->kobj, KOBJ_REMOVE); - kobject_del(&s->kobj); out: kobject_put(&s->kobj); } @@ -5752,6 +5751,12 @@ static void sysfs_slab_remove(struct kmem_cache *s) schedule_work(&s->kobj_remove_work); } +void sysfs_slab_unlink(struct kmem_cache *s) +{ + if (slab_state >= FULL) + kobject_del(&s->kobj); +} + void sysfs_slab_release(struct kmem_cache *s) { if (slab_state >= FULL) -- cgit v1.2.3 From f77bc3a82ce58fe7977a11f28e0b6cac8a9e087c Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Wed, 27 Jun 2018 23:26:17 -0700 Subject: include/linux/dax.h: dax_iomap_fault() returns vm_fault_t Commit 1c8f422059ae ("mm: change return type to vm_fault_t") missed a conversion. It's not a big problem at present because mainline is still using typedef int vm_fault_t; Fixes: 1c8f422059ae ("mm: change return type to vm_fault_t") Link: http://lkml.kernel.org/r/20180620172046.GA27894@jordon-HP-15-Notebook-PC Signed-off-by: Souptick Joarder Reviewed-by: Andrew Morton Cc: Matthew Wilcox Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dax.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 3855e3800f48..deb0f663252f 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -135,7 +135,7 @@ void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops); -int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, +vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, pfn_t *pfnp, int *errp, const struct iomap_ops *ops); vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size, pfn_t pfn); -- cgit v1.2.3 From 9544bc5347207a68eb308cc8aaaed6c3a687cabd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 29 Jun 2018 08:48:06 -0600 Subject: sg: remove ->sg_magic member This was introduced more than a decade ago when sg chaining was added, but we never really caught anything with it. The scatterlist entry size can be critical, since drivers allocate it, so remove the magic member. Recently it's been triggering allocation stalls and failures in NVMe. Tested-by: Jordan Glover Acked-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/gpu/drm/i915/i915_drv.h | 3 --- include/linux/scatterlist.h | 18 ------------------ lib/scatterlist.c | 6 ------ tools/virtio/linux/scatterlist.h | 18 ------------------ 4 files changed, 45 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 34c125e2d90c..9180f67746b4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2238,9 +2238,6 @@ static inline struct scatterlist *____sg_next(struct scatterlist *sg) **/ static inline struct scatterlist *__sg_next(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif return sg_is_last(sg) ? NULL : ____sg_next(sg); } diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 51f52020ad5f..093aa57120b0 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -9,9 +9,6 @@ #include struct scatterlist { -#ifdef CONFIG_DEBUG_SG - unsigned long sg_magic; -#endif unsigned long page_link; unsigned int offset; unsigned int length; @@ -64,7 +61,6 @@ struct sg_table { * */ -#define SG_MAGIC 0x87654321 #define SG_CHAIN 0x01UL #define SG_END 0x02UL @@ -98,7 +94,6 @@ static inline void sg_assign_page(struct scatterlist *sg, struct page *page) */ BUG_ON((unsigned long) page & (SG_CHAIN | SG_END)); #ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); BUG_ON(sg_is_chain(sg)); #endif sg->page_link = page_link | (unsigned long) page; @@ -129,7 +124,6 @@ static inline void sg_set_page(struct scatterlist *sg, struct page *page, static inline struct page *sg_page(struct scatterlist *sg) { #ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); BUG_ON(sg_is_chain(sg)); #endif return (struct page *)((sg)->page_link & ~(SG_CHAIN | SG_END)); @@ -195,9 +189,6 @@ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, **/ static inline void sg_mark_end(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif /* * Set termination bit, clear potential chain bit */ @@ -215,9 +206,6 @@ static inline void sg_mark_end(struct scatterlist *sg) **/ static inline void sg_unmark_end(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif sg->page_link &= ~SG_END; } @@ -260,12 +248,6 @@ static inline void *sg_virt(struct scatterlist *sg) static inline void sg_init_marker(struct scatterlist *sgl, unsigned int nents) { -#ifdef CONFIG_DEBUG_SG - unsigned int i; - - for (i = 0; i < nents; i++) - sgl[i].sg_magic = SG_MAGIC; -#endif sg_mark_end(&sgl[nents - 1]); } diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 06dad7a072fd..d4ae67d6cd1e 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -24,9 +24,6 @@ **/ struct scatterlist *sg_next(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif if (sg_is_last(sg)) return NULL; @@ -111,10 +108,7 @@ struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents) for_each_sg(sgl, sg, nents, i) ret = sg; -#ifdef CONFIG_DEBUG_SG - BUG_ON(sgl[0].sg_magic != SG_MAGIC); BUG_ON(!sg_is_last(ret)); -#endif return ret; } EXPORT_SYMBOL(sg_last); diff --git a/tools/virtio/linux/scatterlist.h b/tools/virtio/linux/scatterlist.h index 9a45f90e2d08..369ee308b668 100644 --- a/tools/virtio/linux/scatterlist.h +++ b/tools/virtio/linux/scatterlist.h @@ -36,7 +36,6 @@ static inline void sg_assign_page(struct scatterlist *sg, struct page *page) */ BUG_ON((unsigned long) page & 0x03); #ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); BUG_ON(sg_is_chain(sg)); #endif sg->page_link = page_link | (unsigned long) page; @@ -67,7 +66,6 @@ static inline void sg_set_page(struct scatterlist *sg, struct page *page, static inline struct page *sg_page(struct scatterlist *sg) { #ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); BUG_ON(sg_is_chain(sg)); #endif return (struct page *)((sg)->page_link & ~0x3); @@ -116,9 +114,6 @@ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, **/ static inline void sg_mark_end(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif /* * Set termination bit, clear potential chain bit */ @@ -136,17 +131,11 @@ static inline void sg_mark_end(struct scatterlist *sg) **/ static inline void sg_unmark_end(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif sg->page_link &= ~0x02; } static inline struct scatterlist *sg_next(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif if (sg_is_last(sg)) return NULL; @@ -160,13 +149,6 @@ static inline struct scatterlist *sg_next(struct scatterlist *sg) static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents) { memset(sgl, 0, sizeof(*sgl) * nents); -#ifdef CONFIG_DEBUG_SG - { - unsigned int i; - for (i = 0; i < nents; i++) - sgl[i].sg_magic = SG_MAGIC; - } -#endif sg_mark_end(&sgl[nents - 1]); } -- cgit v1.2.3 From f308d7353d1f5c3ddedc784a367edc72fc51bbaa Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sun, 24 Jun 2018 23:27:22 +0200 Subject: mtd: rawnand: add Reed-Solomon error correction algorithm Add Reed-Solomon (RS) to the enumeration of ECC algorithms. Signed-off-by: Stefan Agner Reviewed-by: Boris Brezillon Acked-by: Rob Herring Signed-off-by: Miquel Raynal --- Documentation/devicetree/bindings/mtd/nand.txt | 2 +- drivers/mtd/nand/raw/nand_base.c | 1 + include/linux/mtd/rawnand.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/mtd/nand.txt b/Documentation/devicetree/bindings/mtd/nand.txt index 8bb11d809429..eaef8c657aa5 100644 --- a/Documentation/devicetree/bindings/mtd/nand.txt +++ b/Documentation/devicetree/bindings/mtd/nand.txt @@ -25,7 +25,7 @@ Optional NAND chip properties: Deprecated values: "soft_bch": use "soft" and nand-ecc-algo instead - nand-ecc-algo: string, algorithm of NAND ECC. - Supported values are: "hamming", "bch". + Valid values are: "hamming", "bch", "rs". - nand-bus-width : 8 or 16 bus width if not present 8 - nand-on-flash-bbt: boolean to enable on flash bbt option if not present false diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index b01d15ec4c56..d0af5347f89d 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -5777,6 +5777,7 @@ static int of_get_nand_ecc_mode(struct device_node *np) static const char * const nand_ecc_algos[] = { [NAND_ECC_HAMMING] = "hamming", [NAND_ECC_BCH] = "bch", + [NAND_ECC_RS] = "rs", }; static int of_get_nand_ecc_algo(struct device_node *np) diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 3e8ec3b8a39c..2d9cb7acbc3d 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -121,6 +121,7 @@ enum nand_ecc_algo { NAND_ECC_UNKNOWN, NAND_ECC_HAMMING, NAND_ECC_BCH, + NAND_ECC_RS, }; /* -- cgit v1.2.3 From f922bd798bb94e0adcce26ddd811245443173268 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sun, 24 Jun 2018 23:27:23 +0200 Subject: mtd: rawnand: add an option to specify NAND chip as a boot device Allow to define a NAND chip as a boot device. This can be helpful for the selection of the ECC algorithm and strength in case the boot ROM supports only a subset of controller provided options. Signed-off-by: Stefan Agner Reviewed-by: Boris Brezillon Acked-by: Rob Herring Signed-off-by: Miquel Raynal --- Documentation/devicetree/bindings/mtd/nand.txt | 4 ++++ drivers/mtd/nand/raw/nand_base.c | 3 +++ include/linux/mtd/rawnand.h | 6 ++++++ 3 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/mtd/nand.txt b/Documentation/devicetree/bindings/mtd/nand.txt index eaef8c657aa5..e949c778e983 100644 --- a/Documentation/devicetree/bindings/mtd/nand.txt +++ b/Documentation/devicetree/bindings/mtd/nand.txt @@ -43,6 +43,10 @@ Optional NAND chip properties: This is particularly useful when only the in-band area is used by the upper layers, and you want to make your NAND as reliable as possible. +- nand-is-boot-medium: Whether the NAND chip is a boot medium. Drivers might use + this information to select ECC algorithms supported by + the boot ROM or similar restrictions. + - nand-rb: shall contain the native Ready/Busy ids. The ECC strength and ECC step size properties define the correction capability diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index d0af5347f89d..1c633f80c3e1 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -5859,6 +5859,9 @@ static int nand_dt_init(struct nand_chip *chip) if (of_get_nand_bus_width(dn) == 16) chip->options |= NAND_BUSWIDTH_16; + if (of_property_read_bool(dn, "nand-is-boot-medium")) + chip->options |= NAND_IS_BOOT_MEDIUM; + if (of_get_nand_on_flash_bbt(dn)) chip->bbt_options |= NAND_BBT_USE_FLASH; diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 2d9cb7acbc3d..80aeeca03f36 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -219,6 +219,12 @@ enum nand_ecc_algo { */ #define NAND_WAIT_TCCS 0x00200000 +/* + * Whether the NAND chip is a boot medium. Drivers might use this information + * to select ECC algorithms supported by the boot ROM or similar restrictions. + */ +#define NAND_IS_BOOT_MEDIUM 0x00400000 + /* Options set by nand scan */ /* Nand scan has allocated controller struct */ #define NAND_CONTROLLER_ALLOC 0x80000000 -- cgit v1.2.3 From 00ce4e039ad5bded462931606c3063ff691964b7 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Mon, 25 Jun 2018 10:44:44 +1200 Subject: mtd: rawnand: add manufacturer fixup for ONFI parameter page This is called after the ONFI parameter page checksum is verified and allows us to override the contents of the parameter page. Suggested-by: Boris Brezillon Signed-off-by: Chris Packham Reviewed-by: Boris Brezillon Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/nand_base.c | 4 ++++ include/linux/mtd/rawnand.h | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 1c633f80c3e1..f362c09a71d7 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -5168,6 +5168,10 @@ static int nand_flash_detect_onfi(struct nand_chip *chip) } } + if (chip->manufacturer.desc && chip->manufacturer.desc->ops && + chip->manufacturer.desc->ops->fixup_onfi_param_page) + chip->manufacturer.desc->ops->fixup_onfi_param_page(chip, p); + /* Check version */ val = le16_to_cpu(p->revision); if (val & (1 << 5)) diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 80aeeca03f36..dc4538b58b84 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -785,11 +785,15 @@ nand_get_sdr_timings(const struct nand_data_interface *conf) * implementation) if any. * @cleanup: the ->init() function may have allocated resources, ->cleanup() * is here to let vendor specific code release those resources. + * @fixup_onfi_param_page: apply vendor specific fixups to the ONFI parameter + * page. This is called after the checksum is verified. */ struct nand_manufacturer_ops { void (*detect)(struct nand_chip *chip); int (*init)(struct nand_chip *chip); void (*cleanup)(struct nand_chip *chip); + void (*fixup_onfi_param_page)(struct nand_chip *chip, + struct nand_onfi_params *p); }; /** -- cgit v1.2.3 From 872b71ff084ab125c68073d9e69acfd7095f2015 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Mon, 25 Jun 2018 10:44:45 +1200 Subject: mtd: rawnand: add defines for ONFI version bits Add defines for the ONFI version bits and use them in nand_flash_detect_onfi(). Signed-off-by: Chris Packham Reviewed-by: Boris Brezillon Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/nand_base.c | 10 +++++----- include/linux/mtd/rawnand.h | 11 +++++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index f362c09a71d7..dbf6e80e9ab5 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -5174,15 +5174,15 @@ static int nand_flash_detect_onfi(struct nand_chip *chip) /* Check version */ val = le16_to_cpu(p->revision); - if (val & (1 << 5)) + if (val & ONFI_VERSION_2_3) chip->parameters.onfi.version = 23; - else if (val & (1 << 4)) + else if (val & ONFI_VERSION_2_2) chip->parameters.onfi.version = 22; - else if (val & (1 << 3)) + else if (val & ONFI_VERSION_2_1) chip->parameters.onfi.version = 21; - else if (val & (1 << 2)) + else if (val & ONFI_VERSION_2_0) chip->parameters.onfi.version = 20; - else if (val & (1 << 1)) + else if (val & ONFI_VERSION_1_0) chip->parameters.onfi.version = 10; if (!chip->parameters.onfi.version) { diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index dc4538b58b84..fbac4741da52 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -237,6 +237,17 @@ enum nand_ecc_algo { /* Keep gcc happy */ struct nand_chip; +/* ONFI version bits */ +#define ONFI_VERSION_1_0 BIT(1) +#define ONFI_VERSION_2_0 BIT(2) +#define ONFI_VERSION_2_1 BIT(3) +#define ONFI_VERSION_2_2 BIT(4) +#define ONFI_VERSION_2_3 BIT(5) +#define ONFI_VERSION_3_0 BIT(6) +#define ONFI_VERSION_3_1 BIT(7) +#define ONFI_VERSION_3_2 BIT(8) +#define ONFI_VERSION_4_0 BIT(9) + /* ONFI features */ #define ONFI_FEATURE_16_BIT_BUS (1 << 0) #define ONFI_FEATURE_EXT_PARAM_PAGE (1 << 7) -- cgit v1.2.3 From 181ace9e1b2e1c5c2be590bc9603baa65f3a16d8 Mon Sep 17 00:00:00 2001 From: Abhishek Sahu Date: Wed, 20 Jun 2018 12:57:28 +0530 Subject: mtd: rawnand: helper function for setting up ECC configuration commit 2c8f8afa7f92 ("mtd: nand: add generic helpers to check, match, maximize ECC settings") provides generic helpers which drivers can use for setting up ECC parameters. Since same board can have different ECC strength nand chips so following is the logic for setting up ECC strength and ECC step size, which can be used by most of the drivers. 1. If both ECC step size and ECC strength are already set (usually by DT) then just check whether this setting is supported by NAND controller. 2. If NAND_ECC_MAXIMIZE is set, then select maximum ECC strength supported by NAND controller. 3. Otherwise, try to match the ECC step size and ECC strength closest to the chip's requirement. If available OOB size can't fit the chip requirement then select maximum ECC strength which can be fit with available OOB size. This patch introduces nand_ecc_choose_conf function which calls the required helper functions for the above logic. The drivers can use this single function instead of calling the 3 helper functions individually. Signed-off-by: Abhishek Sahu Reviewed-by: Masahiro Yamada Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/nand_base.c | 33 +++++++++++++++++++++++++++++++++ include/linux/mtd/rawnand.h | 3 +++ 2 files changed, 36 insertions(+) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index dbf6e80e9ab5..2858d0b09c1d 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -6295,6 +6295,39 @@ int nand_maximize_ecc(struct nand_chip *chip, } EXPORT_SYMBOL_GPL(nand_maximize_ecc); +/** + * nand_ecc_choose_conf - Set the ECC strength and ECC step size + * @chip: nand chip info structure + * @caps: ECC engine caps info structure + * @oobavail: OOB size that the ECC engine can use + * + * Choose the ECC configuration according to following logic + * + * 1. If both ECC step size and ECC strength are already set (usually by DT) + * then check if it is supported by this controller. + * 2. If NAND_ECC_MAXIMIZE is set, then select maximum ECC strength. + * 3. Otherwise, try to match the ECC step size and ECC strength closest + * to the chip's requirement. If available OOB size can't fit the chip + * requirement then fallback to the maximum ECC step size and ECC strength. + * + * On success, the chosen ECC settings are set. + */ +int nand_ecc_choose_conf(struct nand_chip *chip, + const struct nand_ecc_caps *caps, int oobavail) +{ + if (chip->ecc.size && chip->ecc.strength) + return nand_check_ecc_caps(chip, caps, oobavail); + + if (chip->ecc.options & NAND_ECC_MAXIMIZE) + return nand_maximize_ecc(chip, caps, oobavail); + + if (!nand_match_ecc_req(chip, caps, oobavail)) + return 0; + + return nand_maximize_ecc(chip, caps, oobavail); +} +EXPORT_SYMBOL_GPL(nand_ecc_choose_conf); + /* * Check if the chip configuration meet the datasheet requirements. diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index fbac4741da52..2ce305388471 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1672,6 +1672,9 @@ int nand_match_ecc_req(struct nand_chip *chip, int nand_maximize_ecc(struct nand_chip *chip, const struct nand_ecc_caps *caps, int oobavail); +int nand_ecc_choose_conf(struct nand_chip *chip, + const struct nand_ecc_caps *caps, int oobavail); + /* Default write_oob implementation */ int nand_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page); -- cgit v1.2.3 From 0cf5c7dbaa392d11cfae532cedeaad0ac48b2165 Mon Sep 17 00:00:00 2001 From: Abhishek Sahu Date: Wed, 20 Jun 2018 12:57:42 +0530 Subject: mtd: rawnand: provide only single helper function for ECC conf Function nand_ecc_choose_conf() will be help for all the cases, so other helper functions can be made static. nand_check_ecc_caps(): Invoke nand_ecc_choose_conf() with both chip->ecc.size and chip->ecc.strength value set. nand_maximize_ecc(): Invoke nand_ecc_choose_conf() with NAND_ECC_MAXIMIZE flag. nand_match_ecc_req(): Invoke nand_ecc_choose_conf() with either chip->ecc.size or chip->ecc.strength value set and without NAND_ECC_MAXIMIZE flag. CC: Masahiro Yamada Signed-off-by: Abhishek Sahu Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/nand_base.c | 39 +++++++++++++++------------------------ include/linux/mtd/rawnand.h | 9 --------- 2 files changed, 15 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 2858d0b09c1d..faac82b1e058 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -6085,24 +6085,17 @@ static int nand_set_ecc_soft_ops(struct mtd_info *mtd) * by the controller and the calculated ECC bytes fit within the chip's OOB. * On success, the calculated ECC bytes is set. */ -int nand_check_ecc_caps(struct nand_chip *chip, - const struct nand_ecc_caps *caps, int oobavail) +static int +nand_check_ecc_caps(struct nand_chip *chip, + const struct nand_ecc_caps *caps, int oobavail) { struct mtd_info *mtd = nand_to_mtd(chip); const struct nand_ecc_step_info *stepinfo; int preset_step = chip->ecc.size; int preset_strength = chip->ecc.strength; - int nsteps, ecc_bytes; + int ecc_bytes, nsteps = mtd->writesize / preset_step; int i, j; - if (WARN_ON(oobavail < 0)) - return -EINVAL; - - if (!preset_step || !preset_strength) - return -ENODATA; - - nsteps = mtd->writesize / preset_step; - for (i = 0; i < caps->nstepinfos; i++) { stepinfo = &caps->stepinfos[i]; @@ -6135,7 +6128,6 @@ int nand_check_ecc_caps(struct nand_chip *chip, return -ENOTSUPP; } -EXPORT_SYMBOL_GPL(nand_check_ecc_caps); /** * nand_match_ecc_req - meet the chip's requirement with least ECC bytes @@ -6147,8 +6139,9 @@ EXPORT_SYMBOL_GPL(nand_check_ecc_caps); * number of ECC bytes (i.e. with the largest number of OOB-free bytes). * On success, the chosen ECC settings are set. */ -int nand_match_ecc_req(struct nand_chip *chip, - const struct nand_ecc_caps *caps, int oobavail) +static int +nand_match_ecc_req(struct nand_chip *chip, + const struct nand_ecc_caps *caps, int oobavail) { struct mtd_info *mtd = nand_to_mtd(chip); const struct nand_ecc_step_info *stepinfo; @@ -6159,9 +6152,6 @@ int nand_match_ecc_req(struct nand_chip *chip, int best_ecc_bytes_total = INT_MAX; int i, j; - if (WARN_ON(oobavail < 0)) - return -EINVAL; - /* No information provided by the NAND chip */ if (!req_step || !req_strength) return -ENOTSUPP; @@ -6220,7 +6210,6 @@ int nand_match_ecc_req(struct nand_chip *chip, return 0; } -EXPORT_SYMBOL_GPL(nand_match_ecc_req); /** * nand_maximize_ecc - choose the max ECC strength available @@ -6231,8 +6220,9 @@ EXPORT_SYMBOL_GPL(nand_match_ecc_req); * Choose the max ECC strength that is supported on the controller, and can fit * within the chip's OOB. On success, the chosen ECC settings are set. */ -int nand_maximize_ecc(struct nand_chip *chip, - const struct nand_ecc_caps *caps, int oobavail) +static int +nand_maximize_ecc(struct nand_chip *chip, + const struct nand_ecc_caps *caps, int oobavail) { struct mtd_info *mtd = nand_to_mtd(chip); const struct nand_ecc_step_info *stepinfo; @@ -6242,9 +6232,6 @@ int nand_maximize_ecc(struct nand_chip *chip, int best_strength, best_ecc_bytes; int i, j; - if (WARN_ON(oobavail < 0)) - return -EINVAL; - for (i = 0; i < caps->nstepinfos; i++) { stepinfo = &caps->stepinfos[i]; step_size = stepinfo->stepsize; @@ -6293,7 +6280,6 @@ int nand_maximize_ecc(struct nand_chip *chip, return 0; } -EXPORT_SYMBOL_GPL(nand_maximize_ecc); /** * nand_ecc_choose_conf - Set the ECC strength and ECC step size @@ -6315,6 +6301,11 @@ EXPORT_SYMBOL_GPL(nand_maximize_ecc); int nand_ecc_choose_conf(struct nand_chip *chip, const struct nand_ecc_caps *caps, int oobavail) { + struct mtd_info *mtd = nand_to_mtd(chip); + + if (WARN_ON(oobavail < 0 || oobavail > mtd->oobsize)) + return -EINVAL; + if (chip->ecc.size && chip->ecc.strength) return nand_check_ecc_caps(chip, caps, oobavail); diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 2ce305388471..0c6fb316b409 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1663,15 +1663,6 @@ int nand_check_erased_ecc_chunk(void *data, int datalen, void *extraoob, int extraooblen, int threshold); -int nand_check_ecc_caps(struct nand_chip *chip, - const struct nand_ecc_caps *caps, int oobavail); - -int nand_match_ecc_req(struct nand_chip *chip, - const struct nand_ecc_caps *caps, int oobavail); - -int nand_maximize_ecc(struct nand_chip *chip, - const struct nand_ecc_caps *caps, int oobavail); - int nand_ecc_choose_conf(struct nand_chip *chip, const struct nand_ecc_caps *caps, int oobavail); -- cgit v1.2.3 From 7529df4652482c33ae1a99ee8189401146f13cb7 Mon Sep 17 00:00:00 2001 From: Peter Pan Date: Fri, 22 Jun 2018 14:28:23 +0200 Subject: mtd: nand: Add core infrastructure to support SPI NANDs Add a SPI NAND framework based on the generic NAND framework and the spi-mem infrastructure. In its current state, this framework supports the following features: - single/dual/quad IO modes - on-die ECC Signed-off-by: Peter Pan Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- drivers/mtd/nand/Kconfig | 1 + drivers/mtd/nand/Makefile | 1 + drivers/mtd/nand/spi/Kconfig | 7 + drivers/mtd/nand/spi/Makefile | 3 + drivers/mtd/nand/spi/core.c | 1136 +++++++++++++++++++++++++++++++++++++++++ include/linux/mtd/spinand.h | 416 +++++++++++++++ include/linux/spi/spi-mem.h | 4 +- 7 files changed, 1567 insertions(+), 1 deletion(-) create mode 100644 drivers/mtd/nand/spi/Kconfig create mode 100644 drivers/mtd/nand/spi/Makefile create mode 100644 drivers/mtd/nand/spi/core.c create mode 100644 include/linux/mtd/spinand.h (limited to 'include/linux') diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index 88c7d3b4ff8b..9033215e62ea 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -4,3 +4,4 @@ config MTD_NAND_CORE source "drivers/mtd/nand/onenand/Kconfig" source "drivers/mtd/nand/raw/Kconfig" +source "drivers/mtd/nand/spi/Kconfig" diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile index 3f0cb87f1a57..7ecd80c0a66e 100644 --- a/drivers/mtd/nand/Makefile +++ b/drivers/mtd/nand/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_MTD_NAND_CORE) += nandcore.o obj-y += onenand/ obj-y += raw/ +obj-y += spi/ diff --git a/drivers/mtd/nand/spi/Kconfig b/drivers/mtd/nand/spi/Kconfig new file mode 100644 index 000000000000..7c37d2929b68 --- /dev/null +++ b/drivers/mtd/nand/spi/Kconfig @@ -0,0 +1,7 @@ +menuconfig MTD_SPI_NAND + tristate "SPI NAND device Support" + select MTD_NAND_CORE + depends on SPI_MASTER + select SPI_MEM + help + This is the framework for the SPI NAND device drivers. diff --git a/drivers/mtd/nand/spi/Makefile b/drivers/mtd/nand/spi/Makefile new file mode 100644 index 000000000000..2c473b765027 --- /dev/null +++ b/drivers/mtd/nand/spi/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 +spinand-objs := core.o +obj-$(CONFIG_MTD_SPI_NAND) += spinand.o diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c new file mode 100644 index 000000000000..4efbeb0d85b4 --- /dev/null +++ b/drivers/mtd/nand/spi/core.c @@ -0,0 +1,1136 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2016-2017 Micron Technology, Inc. + * + * Authors: + * Peter Pan + * Boris Brezillon + */ + +#define pr_fmt(fmt) "spi-nand: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void spinand_cache_op_adjust_colum(struct spinand_device *spinand, + const struct nand_page_io_req *req, + u16 *column) +{ + struct nand_device *nand = spinand_to_nand(spinand); + unsigned int shift; + + if (nand->memorg.planes_per_lun < 2) + return; + + /* The plane number is passed in MSB just above the column address */ + shift = fls(nand->memorg.pagesize); + *column |= req->pos.plane << shift; +} + +static int spinand_read_reg_op(struct spinand_device *spinand, u8 reg, u8 *val) +{ + struct spi_mem_op op = SPINAND_GET_FEATURE_OP(reg, + spinand->scratchbuf); + int ret; + + ret = spi_mem_exec_op(spinand->spimem, &op); + if (ret) + return ret; + + *val = *spinand->scratchbuf; + return 0; +} + +static int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 val) +{ + struct spi_mem_op op = SPINAND_SET_FEATURE_OP(reg, + spinand->scratchbuf); + + *spinand->scratchbuf = val; + return spi_mem_exec_op(spinand->spimem, &op); +} + +static int spinand_read_status(struct spinand_device *spinand, u8 *status) +{ + return spinand_read_reg_op(spinand, REG_STATUS, status); +} + +static int spinand_get_cfg(struct spinand_device *spinand, u8 *cfg) +{ + struct nand_device *nand = spinand_to_nand(spinand); + + if (WARN_ON(spinand->cur_target < 0 || + spinand->cur_target >= nand->memorg.ntargets)) + return -EINVAL; + + *cfg = spinand->cfg_cache[spinand->cur_target]; + return 0; +} + +static int spinand_set_cfg(struct spinand_device *spinand, u8 cfg) +{ + struct nand_device *nand = spinand_to_nand(spinand); + int ret; + + if (WARN_ON(spinand->cur_target < 0 || + spinand->cur_target >= nand->memorg.ntargets)) + return -EINVAL; + + if (spinand->cfg_cache[spinand->cur_target] == cfg) + return 0; + + ret = spinand_write_reg_op(spinand, REG_CFG, cfg); + if (ret) + return ret; + + spinand->cfg_cache[spinand->cur_target] = cfg; + return 0; +} + +/** + * spinand_upd_cfg() - Update the configuration register + * @spinand: the spinand device + * @mask: the mask encoding the bits to update in the config reg + * @val: the new value to apply + * + * Update the configuration register. + * + * Return: 0 on success, a negative error code otherwise. + */ +int spinand_upd_cfg(struct spinand_device *spinand, u8 mask, u8 val) +{ + int ret; + u8 cfg; + + ret = spinand_get_cfg(spinand, &cfg); + if (ret) + return ret; + + cfg &= ~mask; + cfg |= val; + + return spinand_set_cfg(spinand, cfg); +} + +/** + * spinand_select_target() - Select a specific NAND target/die + * @spinand: the spinand device + * @target: the target/die to select + * + * Select a new target/die. If chip only has one die, this function is a NOOP. + * + * Return: 0 on success, a negative error code otherwise. + */ +int spinand_select_target(struct spinand_device *spinand, unsigned int target) +{ + struct nand_device *nand = spinand_to_nand(spinand); + int ret; + + if (WARN_ON(target >= nand->memorg.ntargets)) + return -EINVAL; + + if (spinand->cur_target == target) + return 0; + + if (nand->memorg.ntargets == 1) { + spinand->cur_target = target; + return 0; + } + + ret = spinand->select_target(spinand, target); + if (ret) + return ret; + + spinand->cur_target = target; + return 0; +} + +static int spinand_init_cfg_cache(struct spinand_device *spinand) +{ + struct nand_device *nand = spinand_to_nand(spinand); + struct device *dev = &spinand->spimem->spi->dev; + unsigned int target; + int ret; + + spinand->cfg_cache = devm_kcalloc(dev, + nand->memorg.ntargets, + sizeof(*spinand->cfg_cache), + GFP_KERNEL); + if (!spinand->cfg_cache) + return -ENOMEM; + + for (target = 0; target < nand->memorg.ntargets; target++) { + ret = spinand_select_target(spinand, target); + if (ret) + return ret; + + /* + * We use spinand_read_reg_op() instead of spinand_get_cfg() + * here to bypass the config cache. + */ + ret = spinand_read_reg_op(spinand, REG_CFG, + &spinand->cfg_cache[target]); + if (ret) + return ret; + } + + return 0; +} + +static int spinand_init_quad_enable(struct spinand_device *spinand) +{ + bool enable = false; + + if (!(spinand->flags & SPINAND_HAS_QE_BIT)) + return 0; + + if (spinand->op_templates.read_cache->data.buswidth == 4 || + spinand->op_templates.write_cache->data.buswidth == 4 || + spinand->op_templates.update_cache->data.buswidth == 4) + enable = true; + + return spinand_upd_cfg(spinand, CFG_QUAD_ENABLE, + enable ? CFG_QUAD_ENABLE : 0); +} + +static int spinand_ecc_enable(struct spinand_device *spinand, + bool enable) +{ + return spinand_upd_cfg(spinand, CFG_ECC_ENABLE, + enable ? CFG_ECC_ENABLE : 0); +} + +static int spinand_write_enable_op(struct spinand_device *spinand) +{ + struct spi_mem_op op = SPINAND_WR_EN_DIS_OP(true); + + return spi_mem_exec_op(spinand->spimem, &op); +} + +static int spinand_load_page_op(struct spinand_device *spinand, + const struct nand_page_io_req *req) +{ + struct nand_device *nand = spinand_to_nand(spinand); + unsigned int row = nanddev_pos_to_row(nand, &req->pos); + struct spi_mem_op op = SPINAND_PAGE_READ_OP(row); + + return spi_mem_exec_op(spinand->spimem, &op); +} + +static int spinand_read_from_cache_op(struct spinand_device *spinand, + const struct nand_page_io_req *req) +{ + struct spi_mem_op op = *spinand->op_templates.read_cache; + struct nand_device *nand = spinand_to_nand(spinand); + struct mtd_info *mtd = nanddev_to_mtd(nand); + struct nand_page_io_req adjreq = *req; + unsigned int nbytes = 0; + void *buf = NULL; + u16 column = 0; + int ret; + + if (req->datalen) { + adjreq.datalen = nanddev_page_size(nand); + adjreq.dataoffs = 0; + adjreq.databuf.in = spinand->databuf; + buf = spinand->databuf; + nbytes = adjreq.datalen; + } + + if (req->ooblen) { + adjreq.ooblen = nanddev_per_page_oobsize(nand); + adjreq.ooboffs = 0; + adjreq.oobbuf.in = spinand->oobbuf; + nbytes += nanddev_per_page_oobsize(nand); + if (!buf) { + buf = spinand->oobbuf; + column = nanddev_page_size(nand); + } + } + + spinand_cache_op_adjust_colum(spinand, &adjreq, &column); + op.addr.val = column; + + /* + * Some controllers are limited in term of max RX data size. In this + * case, just repeat the READ_CACHE operation after updating the + * column. + */ + while (nbytes) { + op.data.buf.in = buf; + op.data.nbytes = nbytes; + ret = spi_mem_adjust_op_size(spinand->spimem, &op); + if (ret) + return ret; + + ret = spi_mem_exec_op(spinand->spimem, &op); + if (ret) + return ret; + + buf += op.data.nbytes; + nbytes -= op.data.nbytes; + op.addr.val += op.data.nbytes; + } + + if (req->datalen) + memcpy(req->databuf.in, spinand->databuf + req->dataoffs, + req->datalen); + + if (req->ooblen) { + if (req->mode == MTD_OPS_AUTO_OOB) + mtd_ooblayout_get_databytes(mtd, req->oobbuf.in, + spinand->oobbuf, + req->ooboffs, + req->ooblen); + else + memcpy(req->oobbuf.in, spinand->oobbuf + req->ooboffs, + req->ooblen); + } + + return 0; +} + +static int spinand_write_to_cache_op(struct spinand_device *spinand, + const struct nand_page_io_req *req) +{ + struct spi_mem_op op = *spinand->op_templates.write_cache; + struct nand_device *nand = spinand_to_nand(spinand); + struct mtd_info *mtd = nanddev_to_mtd(nand); + struct nand_page_io_req adjreq = *req; + unsigned int nbytes = 0; + void *buf = NULL; + u16 column = 0; + int ret; + + memset(spinand->databuf, 0xff, + nanddev_page_size(nand) + + nanddev_per_page_oobsize(nand)); + + if (req->datalen) { + memcpy(spinand->databuf + req->dataoffs, req->databuf.out, + req->datalen); + adjreq.dataoffs = 0; + adjreq.datalen = nanddev_page_size(nand); + adjreq.databuf.out = spinand->databuf; + nbytes = adjreq.datalen; + buf = spinand->databuf; + } + + if (req->ooblen) { + if (req->mode == MTD_OPS_AUTO_OOB) + mtd_ooblayout_set_databytes(mtd, req->oobbuf.out, + spinand->oobbuf, + req->ooboffs, + req->ooblen); + else + memcpy(spinand->oobbuf + req->ooboffs, req->oobbuf.out, + req->ooblen); + + adjreq.ooblen = nanddev_per_page_oobsize(nand); + adjreq.ooboffs = 0; + nbytes += nanddev_per_page_oobsize(nand); + if (!buf) { + buf = spinand->oobbuf; + column = nanddev_page_size(nand); + } + } + + spinand_cache_op_adjust_colum(spinand, &adjreq, &column); + + op = *spinand->op_templates.write_cache; + op.addr.val = column; + + /* + * Some controllers are limited in term of max TX data size. In this + * case, split the operation into one LOAD CACHE and one or more + * LOAD RANDOM CACHE. + */ + while (nbytes) { + op.data.buf.out = buf; + op.data.nbytes = nbytes; + + ret = spi_mem_adjust_op_size(spinand->spimem, &op); + if (ret) + return ret; + + ret = spi_mem_exec_op(spinand->spimem, &op); + if (ret) + return ret; + + buf += op.data.nbytes; + nbytes -= op.data.nbytes; + op.addr.val += op.data.nbytes; + + /* + * We need to use the RANDOM LOAD CACHE operation if there's + * more than one iteration, because the LOAD operation resets + * the cache to 0xff. + */ + if (nbytes) { + column = op.addr.val; + op = *spinand->op_templates.update_cache; + op.addr.val = column; + } + } + + return 0; +} + +static int spinand_program_op(struct spinand_device *spinand, + const struct nand_page_io_req *req) +{ + struct nand_device *nand = spinand_to_nand(spinand); + unsigned int row = nanddev_pos_to_row(nand, &req->pos); + struct spi_mem_op op = SPINAND_PROG_EXEC_OP(row); + + return spi_mem_exec_op(spinand->spimem, &op); +} + +static int spinand_erase_op(struct spinand_device *spinand, + const struct nand_pos *pos) +{ + struct nand_device *nand = spinand_to_nand(spinand); + unsigned int row = nanddev_pos_to_row(nand, pos); + struct spi_mem_op op = SPINAND_BLK_ERASE_OP(row); + + return spi_mem_exec_op(spinand->spimem, &op); +} + +static int spinand_wait(struct spinand_device *spinand, u8 *s) +{ + unsigned long timeo = jiffies + msecs_to_jiffies(400); + u8 status; + int ret; + + do { + ret = spinand_read_status(spinand, &status); + if (ret) + return ret; + + if (!(status & STATUS_BUSY)) + goto out; + } while (time_before(jiffies, timeo)); + + /* + * Extra read, just in case the STATUS_READY bit has changed + * since our last check + */ + ret = spinand_read_status(spinand, &status); + if (ret) + return ret; + +out: + if (s) + *s = status; + + return status & STATUS_BUSY ? -ETIMEDOUT : 0; +} + +static int spinand_read_id_op(struct spinand_device *spinand, u8 *buf) +{ + struct spi_mem_op op = SPINAND_READID_OP(0, spinand->scratchbuf, + SPINAND_MAX_ID_LEN); + int ret; + + ret = spi_mem_exec_op(spinand->spimem, &op); + if (!ret) + memcpy(buf, spinand->scratchbuf, SPINAND_MAX_ID_LEN); + + return ret; +} + +static int spinand_reset_op(struct spinand_device *spinand) +{ + struct spi_mem_op op = SPINAND_RESET_OP; + int ret; + + ret = spi_mem_exec_op(spinand->spimem, &op); + if (ret) + return ret; + + return spinand_wait(spinand, NULL); +} + +static int spinand_lock_block(struct spinand_device *spinand, u8 lock) +{ + return spinand_write_reg_op(spinand, REG_BLOCK_LOCK, lock); +} + +static int spinand_check_ecc_status(struct spinand_device *spinand, u8 status) +{ + struct nand_device *nand = spinand_to_nand(spinand); + + if (spinand->eccinfo.get_status) + return spinand->eccinfo.get_status(spinand, status); + + switch (status & STATUS_ECC_MASK) { + case STATUS_ECC_NO_BITFLIPS: + return 0; + + case STATUS_ECC_HAS_BITFLIPS: + /* + * We have no way to know exactly how many bitflips have been + * fixed, so let's return the maximum possible value so that + * wear-leveling layers move the data immediately. + */ + return nand->eccreq.strength; + + case STATUS_ECC_UNCOR_ERROR: + return -EBADMSG; + + default: + break; + } + + return -EINVAL; +} + +static int spinand_read_page(struct spinand_device *spinand, + const struct nand_page_io_req *req, + bool ecc_enabled) +{ + u8 status; + int ret; + + ret = spinand_load_page_op(spinand, req); + if (ret) + return ret; + + ret = spinand_wait(spinand, &status); + if (ret < 0) + return ret; + + ret = spinand_read_from_cache_op(spinand, req); + if (ret) + return ret; + + if (!ecc_enabled) + return 0; + + return spinand_check_ecc_status(spinand, status); +} + +static int spinand_write_page(struct spinand_device *spinand, + const struct nand_page_io_req *req) +{ + u8 status; + int ret; + + ret = spinand_write_enable_op(spinand); + if (ret) + return ret; + + ret = spinand_write_to_cache_op(spinand, req); + if (ret) + return ret; + + ret = spinand_program_op(spinand, req); + if (ret) + return ret; + + ret = spinand_wait(spinand, &status); + if (!ret && (status & STATUS_PROG_FAILED)) + ret = -EIO; + + return ret; +} + +static int spinand_mtd_read(struct mtd_info *mtd, loff_t from, + struct mtd_oob_ops *ops) +{ + struct spinand_device *spinand = mtd_to_spinand(mtd); + struct nand_device *nand = mtd_to_nanddev(mtd); + unsigned int max_bitflips = 0; + struct nand_io_iter iter; + bool enable_ecc = false; + bool ecc_failed = false; + int ret = 0; + + if (ops->mode != MTD_OPS_RAW && spinand->eccinfo.ooblayout) + enable_ecc = true; + + mutex_lock(&spinand->lock); + + nanddev_io_for_each_page(nand, from, ops, &iter) { + ret = spinand_select_target(spinand, iter.req.pos.target); + if (ret) + break; + + ret = spinand_ecc_enable(spinand, enable_ecc); + if (ret) + break; + + ret = spinand_read_page(spinand, &iter.req, enable_ecc); + if (ret < 0 && ret != -EBADMSG) + break; + + if (ret == -EBADMSG) { + ecc_failed = true; + mtd->ecc_stats.failed++; + ret = 0; + } else { + mtd->ecc_stats.corrected += ret; + max_bitflips = max_t(unsigned int, max_bitflips, ret); + } + + ops->retlen += iter.req.datalen; + ops->oobretlen += iter.req.ooblen; + } + + mutex_unlock(&spinand->lock); + + if (ecc_failed && !ret) + ret = -EBADMSG; + + return ret ? ret : max_bitflips; +} + +static int spinand_mtd_write(struct mtd_info *mtd, loff_t to, + struct mtd_oob_ops *ops) +{ + struct spinand_device *spinand = mtd_to_spinand(mtd); + struct nand_device *nand = mtd_to_nanddev(mtd); + struct nand_io_iter iter; + bool enable_ecc = false; + int ret = 0; + + if (ops->mode != MTD_OPS_RAW && mtd->ooblayout) + enable_ecc = true; + + mutex_lock(&spinand->lock); + + nanddev_io_for_each_page(nand, to, ops, &iter) { + ret = spinand_select_target(spinand, iter.req.pos.target); + if (ret) + break; + + ret = spinand_ecc_enable(spinand, enable_ecc); + if (ret) + break; + + ret = spinand_write_page(spinand, &iter.req); + if (ret) + break; + + ops->retlen += iter.req.datalen; + ops->oobretlen += iter.req.ooblen; + } + + mutex_unlock(&spinand->lock); + + return ret; +} + +static bool spinand_isbad(struct nand_device *nand, const struct nand_pos *pos) +{ + struct spinand_device *spinand = nand_to_spinand(nand); + struct nand_page_io_req req = { + .pos = *pos, + .ooblen = 2, + .ooboffs = 0, + .oobbuf.in = spinand->oobbuf, + .mode = MTD_OPS_RAW, + }; + + memset(spinand->oobbuf, 0, 2); + spinand_select_target(spinand, pos->target); + spinand_read_page(spinand, &req, false); + if (spinand->oobbuf[0] != 0xff || spinand->oobbuf[1] != 0xff) + return true; + + return false; +} + +static int spinand_mtd_block_isbad(struct mtd_info *mtd, loff_t offs) +{ + struct nand_device *nand = mtd_to_nanddev(mtd); + struct spinand_device *spinand = nand_to_spinand(nand); + struct nand_pos pos; + int ret; + + nanddev_offs_to_pos(nand, offs, &pos); + mutex_lock(&spinand->lock); + ret = nanddev_isbad(nand, &pos); + mutex_unlock(&spinand->lock); + + return ret; +} + +static int spinand_markbad(struct nand_device *nand, const struct nand_pos *pos) +{ + struct spinand_device *spinand = nand_to_spinand(nand); + struct nand_page_io_req req = { + .pos = *pos, + .ooboffs = 0, + .ooblen = 2, + .oobbuf.out = spinand->oobbuf, + }; + int ret; + + /* Erase block before marking it bad. */ + ret = spinand_select_target(spinand, pos->target); + if (ret) + return ret; + + ret = spinand_write_enable_op(spinand); + if (ret) + return ret; + + spinand_erase_op(spinand, pos); + + memset(spinand->oobbuf, 0, 2); + return spinand_write_page(spinand, &req); +} + +static int spinand_mtd_block_markbad(struct mtd_info *mtd, loff_t offs) +{ + struct nand_device *nand = mtd_to_nanddev(mtd); + struct spinand_device *spinand = nand_to_spinand(nand); + struct nand_pos pos; + int ret; + + nanddev_offs_to_pos(nand, offs, &pos); + mutex_lock(&spinand->lock); + ret = nanddev_markbad(nand, &pos); + mutex_unlock(&spinand->lock); + + return ret; +} + +static int spinand_erase(struct nand_device *nand, const struct nand_pos *pos) +{ + struct spinand_device *spinand = nand_to_spinand(nand); + u8 status; + int ret; + + ret = spinand_select_target(spinand, pos->target); + if (ret) + return ret; + + ret = spinand_write_enable_op(spinand); + if (ret) + return ret; + + ret = spinand_erase_op(spinand, pos); + if (ret) + return ret; + + ret = spinand_wait(spinand, &status); + if (!ret && (status & STATUS_ERASE_FAILED)) + ret = -EIO; + + return ret; +} + +static int spinand_mtd_erase(struct mtd_info *mtd, + struct erase_info *einfo) +{ + struct spinand_device *spinand = mtd_to_spinand(mtd); + int ret; + + mutex_lock(&spinand->lock); + ret = nanddev_mtd_erase(mtd, einfo); + mutex_unlock(&spinand->lock); + + return ret; +} + +static int spinand_mtd_block_isreserved(struct mtd_info *mtd, loff_t offs) +{ + struct spinand_device *spinand = mtd_to_spinand(mtd); + struct nand_device *nand = mtd_to_nanddev(mtd); + struct nand_pos pos; + int ret; + + nanddev_offs_to_pos(nand, offs, &pos); + mutex_lock(&spinand->lock); + ret = nanddev_isreserved(nand, &pos); + mutex_unlock(&spinand->lock); + + return ret; +} + +static const struct nand_ops spinand_ops = { + .erase = spinand_erase, + .markbad = spinand_markbad, + .isbad = spinand_isbad, +}; + +static int spinand_manufacturer_detect(struct spinand_device *spinand) +{ + return -ENOTSUPP; +} + +static int spinand_manufacturer_init(struct spinand_device *spinand) +{ + if (spinand->manufacturer->ops->init) + return spinand->manufacturer->ops->init(spinand); + + return 0; +} + +static void spinand_manufacturer_cleanup(struct spinand_device *spinand) +{ + /* Release manufacturer private data */ + if (spinand->manufacturer->ops->cleanup) + return spinand->manufacturer->ops->cleanup(spinand); +} + +static const struct spi_mem_op * +spinand_select_op_variant(struct spinand_device *spinand, + const struct spinand_op_variants *variants) +{ + struct nand_device *nand = spinand_to_nand(spinand); + unsigned int i; + + for (i = 0; i < variants->nops; i++) { + struct spi_mem_op op = variants->ops[i]; + unsigned int nbytes; + int ret; + + nbytes = nanddev_per_page_oobsize(nand) + + nanddev_page_size(nand); + + while (nbytes) { + op.data.nbytes = nbytes; + ret = spi_mem_adjust_op_size(spinand->spimem, &op); + if (ret) + break; + + if (!spi_mem_supports_op(spinand->spimem, &op)) + break; + + nbytes -= op.data.nbytes; + } + + if (!nbytes) + return &variants->ops[i]; + } + + return NULL; +} + +/** + * spinand_match_and_init() - Try to find a match between a device ID and an + * entry in a spinand_info table + * @spinand: SPI NAND object + * @table: SPI NAND device description table + * @table_size: size of the device description table + * + * Should be used by SPI NAND manufacturer drivers when they want to find a + * match between a device ID retrieved through the READ_ID command and an + * entry in the SPI NAND description table. If a match is found, the spinand + * object will be initialized with information provided by the matching + * spinand_info entry. + * + * Return: 0 on success, a negative error code otherwise. + */ +int spinand_match_and_init(struct spinand_device *spinand, + const struct spinand_info *table, + unsigned int table_size, u8 devid) +{ + struct nand_device *nand = spinand_to_nand(spinand); + unsigned int i; + + for (i = 0; i < table_size; i++) { + const struct spinand_info *info = &table[i]; + const struct spi_mem_op *op; + + if (devid != info->devid) + continue; + + nand->memorg = table[i].memorg; + nand->eccreq = table[i].eccreq; + spinand->eccinfo = table[i].eccinfo; + spinand->flags = table[i].flags; + spinand->select_target = table[i].select_target; + + op = spinand_select_op_variant(spinand, + info->op_variants.read_cache); + if (!op) + return -ENOTSUPP; + + spinand->op_templates.read_cache = op; + + op = spinand_select_op_variant(spinand, + info->op_variants.write_cache); + if (!op) + return -ENOTSUPP; + + spinand->op_templates.write_cache = op; + + op = spinand_select_op_variant(spinand, + info->op_variants.update_cache); + spinand->op_templates.update_cache = op; + + return 0; + } + + return -ENOTSUPP; +} + +static int spinand_detect(struct spinand_device *spinand) +{ + struct device *dev = &spinand->spimem->spi->dev; + struct nand_device *nand = spinand_to_nand(spinand); + int ret; + + ret = spinand_reset_op(spinand); + if (ret) + return ret; + + ret = spinand_read_id_op(spinand, spinand->id.data); + if (ret) + return ret; + + spinand->id.len = SPINAND_MAX_ID_LEN; + + ret = spinand_manufacturer_detect(spinand); + if (ret) { + dev_err(dev, "unknown raw ID %*phN\n", SPINAND_MAX_ID_LEN, + spinand->id.data); + return ret; + } + + if (nand->memorg.ntargets > 1 && !spinand->select_target) { + dev_err(dev, + "SPI NANDs with more than one die must implement ->select_target()\n"); + return -EINVAL; + } + + dev_info(&spinand->spimem->spi->dev, + "%s SPI NAND was found.\n", spinand->manufacturer->name); + dev_info(&spinand->spimem->spi->dev, + "%llu MiB, block size: %zu KiB, page size: %zu, OOB size: %u\n", + nanddev_size(nand) >> 20, nanddev_eraseblock_size(nand) >> 10, + nanddev_page_size(nand), nanddev_per_page_oobsize(nand)); + + return 0; +} + +static int spinand_noecc_ooblayout_ecc(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + return -ERANGE; +} + +static int spinand_noecc_ooblayout_free(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + if (section) + return -ERANGE; + + /* Reserve 2 bytes for the BBM. */ + region->offset = 2; + region->length = 62; + + return 0; +} + +static const struct mtd_ooblayout_ops spinand_noecc_ooblayout = { + .ecc = spinand_noecc_ooblayout_ecc, + .free = spinand_noecc_ooblayout_free, +}; + +static int spinand_init(struct spinand_device *spinand) +{ + struct device *dev = &spinand->spimem->spi->dev; + struct mtd_info *mtd = spinand_to_mtd(spinand); + struct nand_device *nand = mtd_to_nanddev(mtd); + int ret, i; + + /* + * We need a scratch buffer because the spi_mem interface requires that + * buf passed in spi_mem_op->data.buf be DMA-able. + */ + spinand->scratchbuf = kzalloc(SPINAND_MAX_ID_LEN, GFP_KERNEL); + if (!spinand->scratchbuf) + return -ENOMEM; + + ret = spinand_detect(spinand); + if (ret) + goto err_free_bufs; + + /* + * Use kzalloc() instead of devm_kzalloc() here, because some drivers + * may use this buffer for DMA access. + * Memory allocated by devm_ does not guarantee DMA-safe alignment. + */ + spinand->databuf = kzalloc(nanddev_page_size(nand) + + nanddev_per_page_oobsize(nand), + GFP_KERNEL); + if (!spinand->databuf) { + ret = -ENOMEM; + goto err_free_bufs; + } + + spinand->oobbuf = spinand->databuf + nanddev_page_size(nand); + + ret = spinand_init_cfg_cache(spinand); + if (ret) + goto err_free_bufs; + + ret = spinand_init_quad_enable(spinand); + if (ret) + goto err_free_bufs; + + ret = spinand_upd_cfg(spinand, CFG_OTP_ENABLE, 0); + if (ret) + goto err_free_bufs; + + ret = spinand_manufacturer_init(spinand); + if (ret) { + dev_err(dev, + "Failed to initialize the SPI NAND chip (err = %d)\n", + ret); + goto err_free_bufs; + } + + /* After power up, all blocks are locked, so unlock them here. */ + for (i = 0; i < nand->memorg.ntargets; i++) { + ret = spinand_select_target(spinand, i); + if (ret) + goto err_free_bufs; + + ret = spinand_lock_block(spinand, BL_ALL_UNLOCKED); + if (ret) + goto err_free_bufs; + } + + ret = nanddev_init(nand, &spinand_ops, THIS_MODULE); + if (ret) + goto err_manuf_cleanup; + + /* + * Right now, we don't support ECC, so let the whole oob + * area is available for user. + */ + mtd->_read_oob = spinand_mtd_read; + mtd->_write_oob = spinand_mtd_write; + mtd->_block_isbad = spinand_mtd_block_isbad; + mtd->_block_markbad = spinand_mtd_block_markbad; + mtd->_block_isreserved = spinand_mtd_block_isreserved; + mtd->_erase = spinand_mtd_erase; + + if (spinand->eccinfo.ooblayout) + mtd_set_ooblayout(mtd, spinand->eccinfo.ooblayout); + else + mtd_set_ooblayout(mtd, &spinand_noecc_ooblayout); + + ret = mtd_ooblayout_count_freebytes(mtd); + if (ret < 0) + goto err_cleanup_nanddev; + + mtd->oobavail = ret; + + return 0; + +err_cleanup_nanddev: + nanddev_cleanup(nand); + +err_manuf_cleanup: + spinand_manufacturer_cleanup(spinand); + +err_free_bufs: + kfree(spinand->databuf); + kfree(spinand->scratchbuf); + return ret; +} + +static void spinand_cleanup(struct spinand_device *spinand) +{ + struct nand_device *nand = spinand_to_nand(spinand); + + nanddev_cleanup(nand); + spinand_manufacturer_cleanup(spinand); + kfree(spinand->databuf); + kfree(spinand->scratchbuf); +} + +static int spinand_probe(struct spi_mem *mem) +{ + struct spinand_device *spinand; + struct mtd_info *mtd; + int ret; + + spinand = devm_kzalloc(&mem->spi->dev, sizeof(*spinand), + GFP_KERNEL); + if (!spinand) + return -ENOMEM; + + spinand->spimem = mem; + spi_mem_set_drvdata(mem, spinand); + spinand_set_of_node(spinand, mem->spi->dev.of_node); + mutex_init(&spinand->lock); + mtd = spinand_to_mtd(spinand); + mtd->dev.parent = &mem->spi->dev; + + ret = spinand_init(spinand); + if (ret) + return ret; + + ret = mtd_device_register(mtd, NULL, 0); + if (ret) + goto err_spinand_cleanup; + + return 0; + +err_spinand_cleanup: + spinand_cleanup(spinand); + + return ret; +} + +static int spinand_remove(struct spi_mem *mem) +{ + struct spinand_device *spinand; + struct mtd_info *mtd; + int ret; + + spinand = spi_mem_get_drvdata(mem); + mtd = spinand_to_mtd(spinand); + + ret = mtd_device_unregister(mtd); + if (ret) + return ret; + + spinand_cleanup(spinand); + + return 0; +} + +static const struct spi_device_id spinand_ids[] = { + { .name = "spi-nand" }, + { /* sentinel */ }, +}; + +#ifdef CONFIG_OF +static const struct of_device_id spinand_of_ids[] = { + { .compatible = "spi-nand" }, + { /* sentinel */ }, +}; +#endif + +static struct spi_mem_driver spinand_drv = { + .spidrv = { + .id_table = spinand_ids, + .driver = { + .name = "spi-nand", + .of_match_table = of_match_ptr(spinand_of_ids), + }, + }, + .probe = spinand_probe, + .remove = spinand_remove, +}; +module_spi_mem_driver(spinand_drv); + +MODULE_DESCRIPTION("SPI NAND framework"); +MODULE_AUTHOR("Peter Pan"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h new file mode 100644 index 000000000000..d3efe62dc9de --- /dev/null +++ b/include/linux/mtd/spinand.h @@ -0,0 +1,416 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2016-2017 Micron Technology, Inc. + * + * Authors: + * Peter Pan + */ +#ifndef __LINUX_MTD_SPINAND_H +#define __LINUX_MTD_SPINAND_H + +#include +#include +#include +#include +#include +#include +#include + +/** + * Standard SPI NAND flash operations + */ + +#define SPINAND_RESET_OP \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0xff, 1), \ + SPI_MEM_OP_NO_ADDR, \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_NO_DATA) + +#define SPINAND_WR_EN_DIS_OP(enable) \ + SPI_MEM_OP(SPI_MEM_OP_CMD((enable) ? 0x06 : 0x04, 1), \ + SPI_MEM_OP_NO_ADDR, \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_NO_DATA) + +#define SPINAND_READID_OP(ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x9f, 1), \ + SPI_MEM_OP_NO_ADDR, \ + SPI_MEM_OP_DUMMY(ndummy, 1), \ + SPI_MEM_OP_DATA_IN(len, buf, 1)) + +#define SPINAND_SET_FEATURE_OP(reg, valptr) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x1f, 1), \ + SPI_MEM_OP_ADDR(1, reg, 1), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_DATA_OUT(1, valptr, 1)) + +#define SPINAND_GET_FEATURE_OP(reg, valptr) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x0f, 1), \ + SPI_MEM_OP_ADDR(1, reg, 1), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_DATA_IN(1, valptr, 1)) + +#define SPINAND_BLK_ERASE_OP(addr) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0xd8, 1), \ + SPI_MEM_OP_ADDR(3, addr, 1), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_NO_DATA) + +#define SPINAND_PAGE_READ_OP(addr) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x13, 1), \ + SPI_MEM_OP_ADDR(3, addr, 1), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_NO_DATA) + +#define SPINAND_PAGE_READ_FROM_CACHE_OP(fast, addr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(fast ? 0x0b : 0x03, 1), \ + SPI_MEM_OP_ADDR(2, addr, 1), \ + SPI_MEM_OP_DUMMY(ndummy, 1), \ + SPI_MEM_OP_DATA_IN(len, buf, 1)) + +#define SPINAND_PAGE_READ_FROM_CACHE_X2_OP(addr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x3b, 1), \ + SPI_MEM_OP_ADDR(2, addr, 1), \ + SPI_MEM_OP_DUMMY(ndummy, 1), \ + SPI_MEM_OP_DATA_IN(len, buf, 2)) + +#define SPINAND_PAGE_READ_FROM_CACHE_X4_OP(addr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x6b, 1), \ + SPI_MEM_OP_ADDR(2, addr, 1), \ + SPI_MEM_OP_DUMMY(ndummy, 1), \ + SPI_MEM_OP_DATA_IN(len, buf, 4)) + +#define SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(addr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ + SPI_MEM_OP_ADDR(2, addr, 2), \ + SPI_MEM_OP_DUMMY(ndummy, 2), \ + SPI_MEM_OP_DATA_IN(len, buf, 2)) + +#define SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(addr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ + SPI_MEM_OP_ADDR(2, addr, 4), \ + SPI_MEM_OP_DUMMY(ndummy, 4), \ + SPI_MEM_OP_DATA_IN(len, buf, 4)) + +#define SPINAND_PROG_EXEC_OP(addr) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x10, 1), \ + SPI_MEM_OP_ADDR(3, addr, 1), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_NO_DATA) + +#define SPINAND_PROG_LOAD(reset, addr, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(reset ? 0x02 : 0x84, 1), \ + SPI_MEM_OP_ADDR(2, addr, 1), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_DATA_OUT(len, buf, 1)) + +#define SPINAND_PROG_LOAD_X4(reset, addr, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(reset ? 0x32 : 0x34, 1), \ + SPI_MEM_OP_ADDR(2, addr, 1), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_DATA_OUT(len, buf, 4)) + +/** + * Standard SPI NAND flash commands + */ +#define SPINAND_CMD_PROG_LOAD_X4 0x32 +#define SPINAND_CMD_PROG_LOAD_RDM_DATA_X4 0x34 + +/* feature register */ +#define REG_BLOCK_LOCK 0xa0 +#define BL_ALL_UNLOCKED 0x00 + +/* configuration register */ +#define REG_CFG 0xb0 +#define CFG_OTP_ENABLE BIT(6) +#define CFG_ECC_ENABLE BIT(4) +#define CFG_QUAD_ENABLE BIT(0) + +/* status register */ +#define REG_STATUS 0xc0 +#define STATUS_BUSY BIT(0) +#define STATUS_ERASE_FAILED BIT(2) +#define STATUS_PROG_FAILED BIT(3) +#define STATUS_ECC_MASK GENMASK(5, 4) +#define STATUS_ECC_NO_BITFLIPS (0 << 4) +#define STATUS_ECC_HAS_BITFLIPS (1 << 4) +#define STATUS_ECC_UNCOR_ERROR (2 << 4) + +struct spinand_op; +struct spinand_device; + +#define SPINAND_MAX_ID_LEN 4 + +/** + * struct spinand_id - SPI NAND id structure + * @data: buffer containing the id bytes. Currently 4 bytes large, but can + * be extended if required + * @len: ID length + * + * struct_spinand_id->data contains all bytes returned after a READ_ID command, + * including dummy bytes if the chip does not emit ID bytes right after the + * READ_ID command. The responsibility to extract real ID bytes is left to + * struct_manufacurer_ops->detect(). + */ +struct spinand_id { + u8 data[SPINAND_MAX_ID_LEN]; + int len; +}; + +/** + * struct manufacurer_ops - SPI NAND manufacturer specific operations + * @detect: detect a SPI NAND device. Every time a SPI NAND device is probed + * the core calls the struct_manufacurer_ops->detect() hook of each + * registered manufacturer until one of them return 1. Note that + * the first thing to check in this hook is that the manufacturer ID + * in struct_spinand_device->id matches the manufacturer whose + * ->detect() hook has been called. Should return 1 if there's a + * match, 0 if the manufacturer ID does not match and a negative + * error code otherwise. When true is returned, the core assumes + * that properties of the NAND chip (spinand->base.memorg and + * spinand->base.eccreq) have been filled + * @init: initialize a SPI NAND device + * @cleanup: cleanup a SPI NAND device + * + * Each SPI NAND manufacturer driver should implement this interface so that + * NAND chips coming from this vendor can be detected and initialized properly. + */ +struct spinand_manufacturer_ops { + int (*detect)(struct spinand_device *spinand); + int (*init)(struct spinand_device *spinand); + void (*cleanup)(struct spinand_device *spinand); +}; + +/** + * struct spinand_manufacturer - SPI NAND manufacturer instance + * @id: manufacturer ID + * @name: manufacturer name + * @ops: manufacturer operations + */ +struct spinand_manufacturer { + u8 id; + char *name; + const struct spinand_manufacturer_ops *ops; +}; + +/** + * struct spinand_op_variants - SPI NAND operation variants + * @ops: the list of variants for a given operation + * @nops: the number of variants + * + * Some operations like read-from-cache/write-to-cache have several variants + * depending on the number of IO lines you use to transfer data or address + * cycles. This structure is a way to describe the different variants supported + * by a chip and let the core pick the best one based on the SPI mem controller + * capabilities. + */ +struct spinand_op_variants { + const struct spi_mem_op *ops; + unsigned int nops; +}; + +#define SPINAND_OP_VARIANTS(name, ...) \ + const struct spinand_op_variants name = { \ + .ops = (struct spi_mem_op[]) { __VA_ARGS__ }, \ + .nops = sizeof((struct spi_mem_op[]){ __VA_ARGS__ }) / \ + sizeof(struct spi_mem_op), \ + } + +/** + * spinand_ecc_info - description of the on-die ECC implemented by a SPI NAND + * chip + * @get_status: get the ECC status. Should return a positive number encoding + * the number of corrected bitflips if correction was possible or + * -EBADMSG if there are uncorrectable errors. I can also return + * other negative error codes if the error is not caused by + * uncorrectable bitflips + * @ooblayout: the OOB layout used by the on-die ECC implementation + */ +struct spinand_ecc_info { + int (*get_status)(struct spinand_device *spinand, u8 status); + const struct mtd_ooblayout_ops *ooblayout; +}; + +#define SPINAND_HAS_QE_BIT BIT(0) + +/** + * struct spinand_info - Structure used to describe SPI NAND chips + * @model: model name + * @devid: device ID + * @flags: OR-ing of the SPINAND_XXX flags + * @memorg: memory organization + * @eccreq: ECC requirements + * @eccinfo: on-die ECC info + * @op_variants: operations variants + * @op_variants.read_cache: variants of the read-cache operation + * @op_variants.write_cache: variants of the write-cache operation + * @op_variants.update_cache: variants of the update-cache operation + * @select_target: function used to select a target/die. Required only for + * multi-die chips + * + * Each SPI NAND manufacturer driver should have a spinand_info table + * describing all the chips supported by the driver. + */ +struct spinand_info { + const char *model; + u8 devid; + u32 flags; + struct nand_memory_organization memorg; + struct nand_ecc_req eccreq; + struct spinand_ecc_info eccinfo; + struct { + const struct spinand_op_variants *read_cache; + const struct spinand_op_variants *write_cache; + const struct spinand_op_variants *update_cache; + } op_variants; + int (*select_target)(struct spinand_device *spinand, + unsigned int target); +}; + +#define SPINAND_INFO_OP_VARIANTS(__read, __write, __update) \ + { \ + .read_cache = __read, \ + .write_cache = __write, \ + .update_cache = __update, \ + } + +#define SPINAND_ECCINFO(__ooblayout, __get_status) \ + .eccinfo = { \ + .ooblayout = __ooblayout, \ + .get_status = __get_status, \ + } + +#define SPINAND_SELECT_TARGET(__func) \ + .select_target = __func, + +#define SPINAND_INFO(__model, __id, __memorg, __eccreq, __op_variants, \ + __flags, ...) \ + { \ + .model = __model, \ + .devid = __id, \ + .memorg = __memorg, \ + .eccreq = __eccreq, \ + .op_variants = __op_variants, \ + .flags = __flags, \ + __VA_ARGS__ \ + } + +/** + * struct spinand_device - SPI NAND device instance + * @base: NAND device instance + * @spimem: pointer to the SPI mem object + * @lock: lock used to serialize accesses to the NAND + * @id: NAND ID as returned by READ_ID + * @flags: NAND flags + * @op_templates: various SPI mem op templates + * @op_templates.read_cache: read cache op template + * @op_templates.write_cache: write cache op template + * @op_templates.update_cache: update cache op template + * @select_target: select a specific target/die. Usually called before sending + * a command addressing a page or an eraseblock embedded in + * this die. Only required if your chip exposes several dies + * @cur_target: currently selected target/die + * @eccinfo: on-die ECC information + * @cfg_cache: config register cache. One entry per die + * @databuf: bounce buffer for data + * @oobbuf: bounce buffer for OOB data + * @scratchbuf: buffer used for everything but page accesses. This is needed + * because the spi-mem interface explicitly requests that buffers + * passed in spi_mem_op be DMA-able, so we can't based the bufs on + * the stack + * @manufacturer: SPI NAND manufacturer information + * @priv: manufacturer private data + */ +struct spinand_device { + struct nand_device base; + struct spi_mem *spimem; + struct mutex lock; + struct spinand_id id; + u32 flags; + + struct { + const struct spi_mem_op *read_cache; + const struct spi_mem_op *write_cache; + const struct spi_mem_op *update_cache; + } op_templates; + + int (*select_target)(struct spinand_device *spinand, + unsigned int target); + unsigned int cur_target; + + struct spinand_ecc_info eccinfo; + + u8 *cfg_cache; + u8 *databuf; + u8 *oobbuf; + u8 *scratchbuf; + const struct spinand_manufacturer *manufacturer; + void *priv; +}; + +/** + * mtd_to_spinand() - Get the SPI NAND device attached to an MTD instance + * @mtd: MTD instance + * + * Return: the SPI NAND device attached to @mtd. + */ +static inline struct spinand_device *mtd_to_spinand(struct mtd_info *mtd) +{ + return container_of(mtd_to_nanddev(mtd), struct spinand_device, base); +} + +/** + * spinand_to_mtd() - Get the MTD device embedded in a SPI NAND device + * @spinand: SPI NAND device + * + * Return: the MTD device embedded in @spinand. + */ +static inline struct mtd_info *spinand_to_mtd(struct spinand_device *spinand) +{ + return nanddev_to_mtd(&spinand->base); +} + +/** + * nand_to_spinand() - Get the SPI NAND device embedding an NAND object + * @nand: NAND object + * + * Return: the SPI NAND device embedding @nand. + */ +static inline struct spinand_device *nand_to_spinand(struct nand_device *nand) +{ + return container_of(nand, struct spinand_device, base); +} + +/** + * spinand_to_nand() - Get the NAND device embedded in a SPI NAND object + * @spinand: SPI NAND device + * + * Return: the NAND device embedded in @spinand. + */ +static inline struct nand_device * +spinand_to_nand(struct spinand_device *spinand) +{ + return &spinand->base; +} + +/** + * spinand_set_of_node - Attach a DT node to a SPI NAND device + * @spinand: SPI NAND device + * @np: DT node + * + * Attach a DT node to a SPI NAND device. + */ +static inline void spinand_set_of_node(struct spinand_device *spinand, + struct device_node *np) +{ + nanddev_set_of_node(&spinand->base, np); +} + +int spinand_match_and_init(struct spinand_device *dev, + const struct spinand_info *table, + unsigned int table_size, u8 devid); + +int spinand_upd_cfg(struct spinand_device *spinand, u8 mask, u8 val); +int spinand_select_target(struct spinand_device *spinand, unsigned int target); + +#endif /* __LINUX_MTD_SPINAND_H */ diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index bb4bd15ae1f6..4fa34a227a0f 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -3,7 +3,9 @@ * Copyright (C) 2018 Exceet Electronics GmbH * Copyright (C) 2018 Bootlin * - * Author: Boris Brezillon + * Author: + * Peter Pan + * Boris Brezillon */ #ifndef __LINUX_SPI_MEM_H -- cgit v1.2.3 From a508e8875e135d7a1df26d8131b5443cb07005ff Mon Sep 17 00:00:00 2001 From: Peter Pan Date: Fri, 22 Jun 2018 14:28:25 +0200 Subject: mtd: spinand: Add initial support for Micron MT29F2G01ABAGD Add a basic driver for Micron SPI NANDs. Only one device is supported right now, but the driver will be extended to support more devices afterwards. Signed-off-by: Peter Pan Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/Makefile | 2 +- drivers/mtd/nand/spi/core.c | 17 ++++++ drivers/mtd/nand/spi/micron.c | 133 ++++++++++++++++++++++++++++++++++++++++++ include/linux/mtd/spinand.h | 3 + 4 files changed, 154 insertions(+), 1 deletion(-) create mode 100644 drivers/mtd/nand/spi/micron.c (limited to 'include/linux') diff --git a/drivers/mtd/nand/spi/Makefile b/drivers/mtd/nand/spi/Makefile index 2c473b765027..a1df25398f20 100644 --- a/drivers/mtd/nand/spi/Makefile +++ b/drivers/mtd/nand/spi/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 -spinand-objs := core.o +spinand-objs := core.o micron.o obj-$(CONFIG_MTD_SPI_NAND) += spinand.o diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 4efbeb0d85b4..9cc502d7e745 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -763,8 +763,25 @@ static const struct nand_ops spinand_ops = { .isbad = spinand_isbad, }; +static const struct spinand_manufacturer *spinand_manufacturers[] = { + µn_spinand_manufacturer, +}; + static int spinand_manufacturer_detect(struct spinand_device *spinand) { + unsigned int i; + int ret; + + for (i = 0; i < ARRAY_SIZE(spinand_manufacturers); i++) { + ret = spinand_manufacturers[i]->ops->detect(spinand); + if (ret > 0) { + spinand->manufacturer = spinand_manufacturers[i]; + return 0; + } else if (ret < 0) { + return ret; + } + } + return -ENOTSUPP; } diff --git a/drivers/mtd/nand/spi/micron.c b/drivers/mtd/nand/spi/micron.c new file mode 100644 index 000000000000..9c4381d6847b --- /dev/null +++ b/drivers/mtd/nand/spi/micron.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2016-2017 Micron Technology, Inc. + * + * Authors: + * Peter Pan + */ + +#include +#include +#include + +#define SPINAND_MFR_MICRON 0x2c + +#define MICRON_STATUS_ECC_MASK GENMASK(7, 4) +#define MICRON_STATUS_ECC_NO_BITFLIPS (0 << 4) +#define MICRON_STATUS_ECC_1TO3_BITFLIPS (1 << 4) +#define MICRON_STATUS_ECC_4TO6_BITFLIPS (3 << 4) +#define MICRON_STATUS_ECC_7TO8_BITFLIPS (5 << 4) + +static SPINAND_OP_VARIANTS(read_cache_variants, + SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 2, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0)); + +static SPINAND_OP_VARIANTS(write_cache_variants, + SPINAND_PROG_LOAD_X4(true, 0, NULL, 0), + SPINAND_PROG_LOAD(true, 0, NULL, 0)); + +static SPINAND_OP_VARIANTS(update_cache_variants, + SPINAND_PROG_LOAD_X4(false, 0, NULL, 0), + SPINAND_PROG_LOAD(false, 0, NULL, 0)); + +static int mt29f2g01abagd_ooblayout_ecc(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + if (section) + return -ERANGE; + + region->offset = 64; + region->length = 64; + + return 0; +} + +static int mt29f2g01abagd_ooblayout_free(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + if (section) + return -ERANGE; + + /* Reserve 2 bytes for the BBM. */ + region->offset = 2; + region->length = 62; + + return 0; +} + +static const struct mtd_ooblayout_ops mt29f2g01abagd_ooblayout = { + .ecc = mt29f2g01abagd_ooblayout_ecc, + .free = mt29f2g01abagd_ooblayout_free, +}; + +static int mt29f2g01abagd_ecc_get_status(struct spinand_device *spinand, + u8 status) +{ + switch (status & MICRON_STATUS_ECC_MASK) { + case STATUS_ECC_NO_BITFLIPS: + return 0; + + case STATUS_ECC_UNCOR_ERROR: + return -EBADMSG; + + case MICRON_STATUS_ECC_1TO3_BITFLIPS: + return 3; + + case MICRON_STATUS_ECC_4TO6_BITFLIPS: + return 6; + + case MICRON_STATUS_ECC_7TO8_BITFLIPS: + return 8; + + default: + break; + } + + return -EINVAL; +} + +static const struct spinand_info micron_spinand_table[] = { + SPINAND_INFO("MT29F2G01ABAGD", 0x24, + NAND_MEMORG(1, 2048, 128, 64, 2048, 2, 1, 1), + NAND_ECCREQ(8, 512), + SPINAND_INFO_OP_VARIANTS(&read_cache_variants, + &write_cache_variants, + &update_cache_variants), + 0, + SPINAND_ECCINFO(&mt29f2g01abagd_ooblayout, + mt29f2g01abagd_ecc_get_status)), +}; + +static int micron_spinand_detect(struct spinand_device *spinand) +{ + u8 *id = spinand->id.data; + int ret; + + /* + * Micron SPI NAND read ID need a dummy byte, + * so the first byte in raw_id is dummy. + */ + if (id[1] != SPINAND_MFR_MICRON) + return 0; + + ret = spinand_match_and_init(spinand, micron_spinand_table, + ARRAY_SIZE(micron_spinand_table), id[2]); + if (ret) + return ret; + + return 1; +} + +static const struct spinand_manufacturer_ops micron_spinand_manuf_ops = { + .detect = micron_spinand_detect, +}; + +const struct spinand_manufacturer micron_spinand_manufacturer = { + .id = SPINAND_MFR_MICRON, + .name = "Micron", + .ops = µn_spinand_manuf_ops, +}; diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index d3efe62dc9de..717b272940f1 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -193,6 +193,9 @@ struct spinand_manufacturer { const struct spinand_manufacturer_ops *ops; }; +/* SPI NAND manufacturers */ +extern const struct spinand_manufacturer micron_spinand_manufacturer; + /** * struct spinand_op_variants - SPI NAND operation variants * @ops: the list of variants for a given operation -- cgit v1.2.3 From 1075492bb9e26312bc8ddeec1a93e2de5f9c76b4 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Fri, 22 Jun 2018 14:28:26 +0200 Subject: mtd: spinand: Add initial support for Winbond W25M02GV Add support for the W25M02GV chip. Signed-off-by: Frieder Schrempf Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/Makefile | 2 +- drivers/mtd/nand/spi/core.c | 1 + drivers/mtd/nand/spi/winbond.c | 141 +++++++++++++++++++++++++++++++++++++++++ include/linux/mtd/spinand.h | 1 + 4 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 drivers/mtd/nand/spi/winbond.c (limited to 'include/linux') diff --git a/drivers/mtd/nand/spi/Makefile b/drivers/mtd/nand/spi/Makefile index a1df25398f20..100008d202ed 100644 --- a/drivers/mtd/nand/spi/Makefile +++ b/drivers/mtd/nand/spi/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 -spinand-objs := core.o micron.o +spinand-objs := core.o micron.o winbond.o obj-$(CONFIG_MTD_SPI_NAND) += spinand.o diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 9cc502d7e745..4803a6bfe8ec 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -765,6 +765,7 @@ static const struct nand_ops spinand_ops = { static const struct spinand_manufacturer *spinand_manufacturers[] = { µn_spinand_manufacturer, + &winbond_spinand_manufacturer, }; static int spinand_manufacturer_detect(struct spinand_device *spinand) diff --git a/drivers/mtd/nand/spi/winbond.c b/drivers/mtd/nand/spi/winbond.c new file mode 100644 index 000000000000..67baa1b32c00 --- /dev/null +++ b/drivers/mtd/nand/spi/winbond.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2017 exceet electronics GmbH + * + * Authors: + * Frieder Schrempf + * Boris Brezillon + */ + +#include +#include +#include + +#define SPINAND_MFR_WINBOND 0xEF + +#define WINBOND_CFG_BUF_READ BIT(3) + +static SPINAND_OP_VARIANTS(read_cache_variants, + SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 2, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0)); + +static SPINAND_OP_VARIANTS(write_cache_variants, + SPINAND_PROG_LOAD_X4(true, 0, NULL, 0), + SPINAND_PROG_LOAD(true, 0, NULL, 0)); + +static SPINAND_OP_VARIANTS(update_cache_variants, + SPINAND_PROG_LOAD_X4(false, 0, NULL, 0), + SPINAND_PROG_LOAD(false, 0, NULL, 0)); + +static int w25m02gv_ooblayout_ecc(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + if (section > 3) + return -ERANGE; + + region->offset = (16 * section) + 8; + region->length = 8; + + return 0; +} + +static int w25m02gv_ooblayout_free(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + if (section > 3) + return -ERANGE; + + region->offset = (16 * section) + 2; + region->length = 6; + + return 0; +} + +static const struct mtd_ooblayout_ops w25m02gv_ooblayout = { + .ecc = w25m02gv_ooblayout_ecc, + .free = w25m02gv_ooblayout_free, +}; + +static int w25m02gv_select_target(struct spinand_device *spinand, + unsigned int target) +{ + struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(0xc2, 1), + SPI_MEM_OP_NO_ADDR, + SPI_MEM_OP_NO_DUMMY, + SPI_MEM_OP_DATA_OUT(1, + spinand->scratchbuf, + 1)); + + *spinand->scratchbuf = target; + return spi_mem_exec_op(spinand->spimem, &op); +} + +static const struct spinand_info winbond_spinand_table[] = { + SPINAND_INFO("W25M02GV", 0xAB, + NAND_MEMORG(1, 2048, 64, 64, 1024, 1, 1, 2), + NAND_ECCREQ(1, 512), + SPINAND_INFO_OP_VARIANTS(&read_cache_variants, + &write_cache_variants, + &update_cache_variants), + 0, + SPINAND_ECCINFO(&w25m02gv_ooblayout, NULL), + SPINAND_SELECT_TARGET(w25m02gv_select_target)), +}; + +/** + * winbond_spinand_detect - initialize device related part in spinand_device + * struct if it is a Winbond device. + * @spinand: SPI NAND device structure + */ +static int winbond_spinand_detect(struct spinand_device *spinand) +{ + u8 *id = spinand->id.data; + int ret; + + /* + * Winbond SPI NAND read ID need a dummy byte, + * so the first byte in raw_id is dummy. + */ + if (id[1] != SPINAND_MFR_WINBOND) + return 0; + + ret = spinand_match_and_init(spinand, winbond_spinand_table, + ARRAY_SIZE(winbond_spinand_table), id[2]); + if (ret) + return ret; + + return 1; +} + +static int winbond_spinand_init(struct spinand_device *spinand) +{ + struct nand_device *nand = spinand_to_nand(spinand); + unsigned int i; + + /* + * Make sure all dies are in buffer read mode and not continuous read + * mode. + */ + for (i = 0; i < nand->memorg.ntargets; i++) { + spinand_select_target(spinand, i); + spinand_upd_cfg(spinand, WINBOND_CFG_BUF_READ, + WINBOND_CFG_BUF_READ); + } + + return 0; +} + +static const struct spinand_manufacturer_ops winbond_spinand_manuf_ops = { + .detect = winbond_spinand_detect, + .init = winbond_spinand_init, +}; + +const struct spinand_manufacturer winbond_spinand_manufacturer = { + .id = SPINAND_MFR_WINBOND, + .name = "Winbond", + .ops = &winbond_spinand_manuf_ops, +}; diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 717b272940f1..f0f16b9029e7 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -195,6 +195,7 @@ struct spinand_manufacturer { /* SPI NAND manufacturers */ extern const struct spinand_manufacturer micron_spinand_manufacturer; +extern const struct spinand_manufacturer winbond_spinand_manufacturer; /** * struct spinand_op_variants - SPI NAND operation variants -- cgit v1.2.3 From b02308af05e62c7d995f4fc75b0bc2ae3c3026f7 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 22 Jun 2018 14:28:27 +0200 Subject: mtd: spinand: Add initial support for the MX35LF1GE4AB chip Add minimal support for the MX35LF1GE4AB SPI NAND chip. Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/Makefile | 2 +- drivers/mtd/nand/spi/core.c | 1 + drivers/mtd/nand/spi/macronix.c | 136 ++++++++++++++++++++++++++++++++++++++++ include/linux/mtd/spinand.h | 1 + 4 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 drivers/mtd/nand/spi/macronix.c (limited to 'include/linux') diff --git a/drivers/mtd/nand/spi/Makefile b/drivers/mtd/nand/spi/Makefile index 100008d202ed..b74e074b363a 100644 --- a/drivers/mtd/nand/spi/Makefile +++ b/drivers/mtd/nand/spi/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 -spinand-objs := core.o micron.o winbond.o +spinand-objs := core.o macronix.o micron.o winbond.o obj-$(CONFIG_MTD_SPI_NAND) += spinand.o diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 4803a6bfe8ec..30f83649c481 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -764,6 +764,7 @@ static const struct nand_ops spinand_ops = { }; static const struct spinand_manufacturer *spinand_manufacturers[] = { + ¯onix_spinand_manufacturer, µn_spinand_manufacturer, &winbond_spinand_manufacturer, }; diff --git a/drivers/mtd/nand/spi/macronix.c b/drivers/mtd/nand/spi/macronix.c new file mode 100644 index 000000000000..8ba3489d332a --- /dev/null +++ b/drivers/mtd/nand/spi/macronix.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018 Macronix + * + * Author: Boris Brezillon + */ + +#include +#include +#include + +#define SPINAND_MFR_MACRONIX 0xC2 + +static SPINAND_OP_VARIANTS(read_cache_variants, + SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0)); + +static SPINAND_OP_VARIANTS(write_cache_variants, + SPINAND_PROG_LOAD_X4(true, 0, NULL, 0), + SPINAND_PROG_LOAD(true, 0, NULL, 0)); + +static SPINAND_OP_VARIANTS(update_cache_variants, + SPINAND_PROG_LOAD_X4(false, 0, NULL, 0), + SPINAND_PROG_LOAD(false, 0, NULL, 0)); + +static int mx35lfxge4ab_ooblayout_ecc(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + return -ERANGE; +} + +static int mx35lfxge4ab_ooblayout_free(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + if (section) + return -ERANGE; + + region->offset = 2; + region->length = mtd->oobsize - 2; + + return 0; +} + +static const struct mtd_ooblayout_ops mx35lfxge4ab_ooblayout = { + .ecc = mx35lfxge4ab_ooblayout_ecc, + .free = mx35lfxge4ab_ooblayout_free, +}; + +static int mx35lf1ge4ab_get_eccsr(struct spinand_device *spinand, u8 *eccsr) +{ + struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(0x7c, 1), + SPI_MEM_OP_NO_ADDR, + SPI_MEM_OP_DUMMY(1, 1), + SPI_MEM_OP_DATA_IN(1, eccsr, 1)); + + return spi_mem_exec_op(spinand->spimem, &op); +} + +static int mx35lf1ge4ab_ecc_get_status(struct spinand_device *spinand, + u8 status) +{ + struct nand_device *nand = spinand_to_nand(spinand); + u8 eccsr; + + switch (status & STATUS_ECC_MASK) { + case STATUS_ECC_NO_BITFLIPS: + return 0; + + case STATUS_ECC_UNCOR_ERROR: + return -EBADMSG; + + case STATUS_ECC_HAS_BITFLIPS: + /* + * Let's try to retrieve the real maximum number of bitflips + * in order to avoid forcing the wear-leveling layer to move + * data around if it's not necessary. + */ + if (mx35lf1ge4ab_get_eccsr(spinand, &eccsr)) + return nand->eccreq.strength; + + if (WARN_ON(eccsr > nand->eccreq.strength || !eccsr)) + return nand->eccreq.strength; + + return eccsr; + + default: + break; + } + + return -EINVAL; +} + +static const struct spinand_info macronix_spinand_table[] = { + SPINAND_INFO("MX35LF1GE4AB", 0x12, + NAND_MEMORG(1, 2048, 64, 64, 1024, 1, 1, 1), + NAND_ECCREQ(4, 512), + SPINAND_INFO_OP_VARIANTS(&read_cache_variants, + &write_cache_variants, + &update_cache_variants), + SPINAND_HAS_QE_BIT, + SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout, + mx35lf1ge4ab_ecc_get_status)), +}; + +static int macronix_spinand_detect(struct spinand_device *spinand) +{ + u8 *id = spinand->id.data; + int ret; + + /* + * Macronix SPI NAND read ID needs a dummy byte, so the first byte in + * raw_id is garbage. + */ + if (id[1] != SPINAND_MFR_MACRONIX) + return 0; + + ret = spinand_match_and_init(spinand, macronix_spinand_table, + ARRAY_SIZE(macronix_spinand_table), + id[2]); + if (ret) + return ret; + + return 1; +} + +static const struct spinand_manufacturer_ops macronix_spinand_manuf_ops = { + .detect = macronix_spinand_detect, +}; + +const struct spinand_manufacturer macronix_spinand_manufacturer = { + .id = SPINAND_MFR_MACRONIX, + .name = "Macronix", + .ops = ¯onix_spinand_manuf_ops, +}; diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index f0f16b9029e7..088ff96c3eb6 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -194,6 +194,7 @@ struct spinand_manufacturer { }; /* SPI NAND manufacturers */ +extern const struct spinand_manufacturer macronix_spinand_manufacturer; extern const struct spinand_manufacturer micron_spinand_manufacturer; extern const struct spinand_manufacturer winbond_spinand_manufacturer; -- cgit v1.2.3 From a4c025372d9d4756e7be98a98f5dde302c6df562 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 5 Jul 2018 12:27:27 +0200 Subject: mtd: rawnand: Remove nand_do_read() prototype from rawnand.h nand_do_read() is a static function implemented in nand_base.c. There's no good reason to expose its prototype in rawnand.h. Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- include/linux/mtd/rawnand.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 0c6fb316b409..4d7a46c42900 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1545,8 +1545,6 @@ int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs); int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt); int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr, int allowbbt); -int nand_do_read(struct mtd_info *mtd, loff_t from, size_t len, - size_t *retlen, uint8_t *buf); /** * struct platform_nand_chip - chip level device structure -- cgit v1.2.3 From 707329aca6e0cf5781ca7f62c39bdcb5f87e9c23 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 5 Jul 2018 12:27:28 +0200 Subject: mtd: rawnand: Remove forward declaration of mtd_info struct mtd_info is defined in linux/mtd/mtd.h which is included at the beginning of nand_base.c, there's thus no need for the forward declaration of mtd_info. Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- include/linux/mtd/rawnand.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 4d7a46c42900..32145b302585 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -23,7 +23,6 @@ #include #include -struct mtd_info; struct nand_flash_dev; struct device_node; -- cgit v1.2.3 From 1c3ab61ebcf8cfb5308d2e68e03fd4b4df484e62 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 5 Jul 2018 12:27:29 +0200 Subject: mtd: rawnand: Remove forward declaration of device_node struct device_node is defined in linux/of.h. Let's include this file instead of having a forward declaration of this struct. Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- include/linux/mtd/rawnand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 32145b302585..83ab6779144e 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -21,10 +21,10 @@ #include #include #include +#include #include struct nand_flash_dev; -struct device_node; /* Scan and identify a NAND device */ int nand_scan_with_ids(struct mtd_info *mtd, int max_chips, -- cgit v1.2.3 From 44b07b921dea5ec997fb929ceaa82582a7415816 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 5 Jul 2018 12:27:30 +0200 Subject: mtd: rawnand: Rename nand_default_bbt() into nand_create_bbt() Rename nand_default_bbt() into nand_create_bbt() and pass it a nand_chip object to prepare removal of the chip->scan_bbt() hook. We add a temporary nand_default_bbt() wrapper which will be dropped after the removal of ->scan_bbt(). Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/nand_base.c | 5 +++++ drivers/mtd/nand/raw/nand_bbt.c | 10 +++++----- include/linux/mtd/rawnand.h | 2 +- 3 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index faac82b1e058..0476e13d47b1 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -4924,6 +4924,11 @@ static void nand_shutdown(struct mtd_info *mtd) nand_get_device(mtd, FL_PM_SUSPENDED); } +static int nand_default_bbt(struct mtd_info *mtd) +{ + return nand_create_bbt(mtd_to_nand(mtd)); +} + /* Set default functions */ static void nand_set_defaults(struct nand_chip *chip) { diff --git a/drivers/mtd/nand/raw/nand_bbt.c b/drivers/mtd/nand/raw/nand_bbt.c index d9f4ceff2568..39db352f8757 100644 --- a/drivers/mtd/nand/raw/nand_bbt.c +++ b/drivers/mtd/nand/raw/nand_bbt.c @@ -1349,15 +1349,14 @@ static int nand_create_badblock_pattern(struct nand_chip *this) } /** - * nand_default_bbt - [NAND Interface] Select a default bad block table for the device - * @mtd: MTD device structure + * nand_create_bbt - [NAND Interface] Select a default bad block table for the device + * @this: NAND chip object * * This function selects the default bad block table support for the device and * calls the nand_scan_bbt function. */ -int nand_default_bbt(struct mtd_info *mtd) +int nand_create_bbt(struct nand_chip *this) { - struct nand_chip *this = mtd_to_nand(mtd); int ret; /* Is a flash based bad block table requested? */ @@ -1383,8 +1382,9 @@ int nand_default_bbt(struct mtd_info *mtd) return ret; } - return nand_scan_bbt(mtd, this->badblock_pattern); + return nand_scan_bbt(nand_to_mtd(this), this->badblock_pattern); } +EXPORT_SYMBOL(nand_create_bbt); /** * nand_isreserved_bbt - [NAND Interface] Check if a block is reserved diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 83ab6779144e..186f9fb1e7eb 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1538,7 +1538,7 @@ extern const struct nand_manufacturer_ops micron_nand_manuf_ops; extern const struct nand_manufacturer_ops amd_nand_manuf_ops; extern const struct nand_manufacturer_ops macronix_nand_manuf_ops; -int nand_default_bbt(struct mtd_info *mtd); +int nand_create_bbt(struct nand_chip *chip); int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs); int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs); int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt); -- cgit v1.2.3 From e80eba7581512625083ba540f120479b935425de Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 5 Jul 2018 12:27:31 +0200 Subject: mtd: rawnand: Kill the chip->scan_bbt() hook None of the existing drivers are overloading the ->scan_bbt() method, let's get rid of it and replace calls to ->scan_bbt() by nand_create_bbt() ones. Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/diskonchip.c | 4 ++-- drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c | 2 +- drivers/mtd/nand/raw/nand_base.c | 9 +-------- drivers/mtd/nand/raw/nandsim.c | 2 +- include/linux/mtd/rawnand.h | 2 -- 5 files changed, 5 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c index 8d10061abb4b..3c46188dd6d2 100644 --- a/drivers/mtd/nand/raw/diskonchip.c +++ b/drivers/mtd/nand/raw/diskonchip.c @@ -1291,7 +1291,7 @@ static int __init nftl_scan_bbt(struct mtd_info *mtd) this->bbt_md = NULL; } - ret = this->scan_bbt(mtd); + ret = nand_create_bbt(this); if (ret) return ret; @@ -1338,7 +1338,7 @@ static int __init inftl_scan_bbt(struct mtd_info *mtd) this->bbt_md->pattern = "TBB_SYSM"; } - ret = this->scan_bbt(mtd); + ret = nand_create_bbt(this); if (ret) return ret; diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index 58d4e4a93a93..599513321e71 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -1944,7 +1944,7 @@ static int gpmi_nand_init(struct gpmi_nand_data *this) ret = nand_boot_init(this); if (ret) goto err_nand_cleanup; - ret = chip->scan_bbt(mtd); + ret = nand_create_bbt(chip); if (ret) goto err_nand_cleanup; diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 0476e13d47b1..4fa5e20d9690 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -4924,11 +4924,6 @@ static void nand_shutdown(struct mtd_info *mtd) nand_get_device(mtd, FL_PM_SUSPENDED); } -static int nand_default_bbt(struct mtd_info *mtd) -{ - return nand_create_bbt(mtd_to_nand(mtd)); -} - /* Set default functions */ static void nand_set_defaults(struct nand_chip *chip) { @@ -4970,8 +4965,6 @@ static void nand_set_defaults(struct nand_chip *chip) chip->write_byte = busw ? nand_write_byte16 : nand_write_byte; if (!chip->read_buf || chip->read_buf == nand_read_buf) chip->read_buf = busw ? nand_read_buf16 : nand_read_buf; - if (!chip->scan_bbt) - chip->scan_bbt = nand_default_bbt; if (!chip->controller) { chip->controller = &chip->hwcontrol; @@ -6673,7 +6666,7 @@ int nand_scan_tail(struct mtd_info *mtd) return 0; /* Build bad block table */ - ret = chip->scan_bbt(mtd); + ret = nand_create_bbt(chip); if (ret) goto err_nand_manuf_cleanup; diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c index f8edacde49ab..8a3b36cfe5ea 100644 --- a/drivers/mtd/nand/raw/nandsim.c +++ b/drivers/mtd/nand/raw/nandsim.c @@ -2337,7 +2337,7 @@ static int __init ns_init_module(void) if ((retval = init_nandsim(nsmtd)) != 0) goto err_exit; - if ((retval = chip->scan_bbt(nsmtd)) != 0) + if ((retval = nand_create_bbt(chip)) != 0) goto err_exit; if ((retval = parse_badblocks(nand, nsmtd)) != 0) diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 186f9fb1e7eb..ac0007ddadf2 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1199,7 +1199,6 @@ int nand_op_parser_exec_op(struct nand_chip *chip, * @buf_align: minimum buffer alignment required by a platform * @hwcontrol: platform-specific hardware control structure * @erase: [REPLACEABLE] erase function - * @scan_bbt: [REPLACEABLE] function to scan bad block table * @chip_delay: [BOARDSPECIFIC] chip dependent delay for transferring * data from array to read regs (tR). * @state: [INTERN] the current state of the NAND device @@ -1292,7 +1291,6 @@ struct nand_chip { const struct nand_operation *op, bool check_only); int (*erase)(struct mtd_info *mtd, int page); - int (*scan_bbt)(struct mtd_info *mtd); int (*set_features)(struct mtd_info *mtd, struct nand_chip *chip, int feature_addr, uint8_t *subfeature_para); int (*get_features)(struct mtd_info *mtd, struct nand_chip *chip, -- cgit v1.2.3 From f0f01838f76420988b50b23f315704c9a5cd2fa9 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 5 Jul 2018 12:27:32 +0200 Subject: mtd: rawnand: orion_nand: Kill orion_nand_data.dev_ready() None of the boards seem to overload the ->dev_ready() hook, just drop this field from orion_nand_data. Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/orion_nand.c | 3 --- include/linux/platform_data/mtd-orion_nand.h | 1 - 2 files changed, 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/orion_nand.c b/drivers/mtd/nand/raw/orion_nand.c index 7825fd3ce66b..1a4828442675 100644 --- a/drivers/mtd/nand/raw/orion_nand.c +++ b/drivers/mtd/nand/raw/orion_nand.c @@ -153,9 +153,6 @@ static int __init orion_nand_probe(struct platform_device *pdev) if (board->width == 16) nc->options |= NAND_BUSWIDTH_16; - if (board->dev_ready) - nc->dev_ready = board->dev_ready; - platform_set_drvdata(pdev, info); /* Not all platforms can gate the clock, so it is not diff --git a/include/linux/platform_data/mtd-orion_nand.h b/include/linux/platform_data/mtd-orion_nand.h index a7ce77c7c1a8..34828eb85982 100644 --- a/include/linux/platform_data/mtd-orion_nand.h +++ b/include/linux/platform_data/mtd-orion_nand.h @@ -12,7 +12,6 @@ */ struct orion_nand_data { struct mtd_partition *parts; - int (*dev_ready)(struct mtd_info *mtd); u32 nr_parts; u8 ale; /* address line number connected to ALE */ u8 cle; /* address line number connected to CLE */ -- cgit v1.2.3 From dc2d8856a758627d4f126d17bc113eedd72b2d60 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 5 Jul 2018 12:27:33 +0200 Subject: mtd: rawnand: plat_nand: Kill pdata->ctrl.{hwcontrol, read_byte}() None of the board files are overloading those hooks, so let's drop them from struct platform_nand_ctrl. Signed-off-by: Boris Brezillon Acked-by: Robert Jarzmik Signed-off-by: Miquel Raynal --- arch/arm/mach-pxa/balloon3.c | 1 - arch/arm/mach-pxa/em-x270.c | 1 - drivers/mtd/nand/raw/plat_nand.c | 2 -- include/linux/mtd/rawnand.h | 4 ---- 4 files changed, 8 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c index f4f8f23bda8c..af46d2182533 100644 --- a/arch/arm/mach-pxa/balloon3.c +++ b/arch/arm/mach-pxa/balloon3.c @@ -688,7 +688,6 @@ struct platform_nand_data balloon3_nand_pdata = { .chip_delay = 50, }, .ctrl = { - .hwcontrol = 0, .dev_ready = balloon3_nand_dev_ready, .select_chip = balloon3_nand_select_chip, .cmd_ctrl = balloon3_nand_cmd_ctl, diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c index 49022ad338e9..29be04c6cc48 100644 --- a/arch/arm/mach-pxa/em-x270.c +++ b/arch/arm/mach-pxa/em-x270.c @@ -346,7 +346,6 @@ struct platform_nand_data em_x270_nand_platdata = { .chip_delay = 20, }, .ctrl = { - .hwcontrol = 0, .dev_ready = em_x270_nand_device_ready, .select_chip = 0, .cmd_ctrl = em_x270_nand_cmd_ctl, diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c index 925a1323604d..222626df4b96 100644 --- a/drivers/mtd/nand/raw/plat_nand.c +++ b/drivers/mtd/nand/raw/plat_nand.c @@ -67,12 +67,10 @@ static int plat_nand_probe(struct platform_device *pdev) data->chip.select_chip = pdata->ctrl.select_chip; data->chip.write_buf = pdata->ctrl.write_buf; data->chip.read_buf = pdata->ctrl.read_buf; - data->chip.read_byte = pdata->ctrl.read_byte; data->chip.chip_delay = pdata->chip.chip_delay; data->chip.options |= pdata->chip.options; data->chip.bbt_options |= pdata->chip.bbt_options; - data->chip.ecc.hwctl = pdata->ctrl.hwcontrol; data->chip.ecc.mode = NAND_ECC_SOFT; data->chip.ecc.algo = NAND_ECC_HAMMING; diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index ac0007ddadf2..11c2426fc363 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1572,14 +1572,12 @@ struct platform_device; * struct platform_nand_ctrl - controller level device structure * @probe: platform specific function to probe/setup hardware * @remove: platform specific function to remove/teardown hardware - * @hwcontrol: platform specific hardware control structure * @dev_ready: platform specific function to read ready/busy pin * @select_chip: platform specific chip select function * @cmd_ctrl: platform specific function for controlling * ALE/CLE/nCE. Also used to write command and address * @write_buf: platform specific function for write buffer * @read_buf: platform specific function for read buffer - * @read_byte: platform specific function to read one byte from chip * @priv: private data to transport driver specific settings * * All fields are optional and depend on the hardware driver requirements @@ -1587,13 +1585,11 @@ struct platform_device; struct platform_nand_ctrl { int (*probe)(struct platform_device *pdev); void (*remove)(struct platform_device *pdev); - void (*hwcontrol)(struct mtd_info *mtd, int cmd); int (*dev_ready)(struct mtd_info *mtd); void (*select_chip)(struct mtd_info *mtd, int chip); void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl); void (*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len); void (*read_buf)(struct mtd_info *mtd, uint8_t *buf, int len); - unsigned char (*read_byte)(struct mtd_info *mtd); void *priv; }; -- cgit v1.2.3 From dc2865ac3527d76fe04ee1f1a3ffc101a60faba0 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 9 Jul 2018 22:09:40 +0200 Subject: MIPS: txx9: Move the ndfc.h header to include/linux/platform_data/txx9 This way we will be able to compile the ndfmc driver when COMPILE_TEST=y. Signed-off-by: Boris Brezillon Acked-by: Paul Burton Signed-off-by: Miquel Raynal --- arch/mips/include/asm/txx9/ndfmc.h | 30 ------------------------------ arch/mips/txx9/generic/setup.c | 2 +- arch/mips/txx9/generic/setup_tx4938.c | 2 +- arch/mips/txx9/generic/setup_tx4939.c | 2 +- drivers/mtd/nand/raw/txx9ndfmc.c | 2 +- include/linux/platform_data/txx9/ndfmc.h | 30 ++++++++++++++++++++++++++++++ 6 files changed, 34 insertions(+), 34 deletions(-) delete mode 100644 arch/mips/include/asm/txx9/ndfmc.h create mode 100644 include/linux/platform_data/txx9/ndfmc.h (limited to 'include/linux') diff --git a/arch/mips/include/asm/txx9/ndfmc.h b/arch/mips/include/asm/txx9/ndfmc.h deleted file mode 100644 index fa67f3df78fc..000000000000 --- a/arch/mips/include/asm/txx9/ndfmc.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * (C) Copyright TOSHIBA CORPORATION 2007 - */ -#ifndef __ASM_TXX9_NDFMC_H -#define __ASM_TXX9_NDFMC_H - -#define NDFMC_PLAT_FLAG_USE_BSPRT 0x01 -#define NDFMC_PLAT_FLAG_NO_RSTR 0x02 -#define NDFMC_PLAT_FLAG_HOLDADD 0x04 -#define NDFMC_PLAT_FLAG_DUMMYWRITE 0x08 - -struct txx9ndfmc_platform_data { - unsigned int shift; - unsigned int gbus_clock; - unsigned int hold; /* hold time in nanosecond */ - unsigned int spw; /* strobe pulse width in nanosecond */ - unsigned int flags; - unsigned char ch_mask; /* available channel bitmask */ - unsigned char wp_mask; /* write-protect bitmask */ - unsigned char wide_mask; /* 16bit-nand bitmask */ -}; - -void txx9_ndfmc_init(unsigned long baseaddr, - const struct txx9ndfmc_platform_data *plat_data); - -#endif /* __ASM_TXX9_NDFMC_H */ diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c index 1791a44ee570..aa47932abd28 100644 --- a/arch/mips/txx9/generic/setup.c +++ b/arch/mips/txx9/generic/setup.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -35,7 +36,6 @@ #include #include #include -#include #include #ifdef CONFIG_CPU_TX49XX #include diff --git a/arch/mips/txx9/generic/setup_tx4938.c b/arch/mips/txx9/generic/setup_tx4938.c index 85d1795652da..17395d5d15ca 100644 --- a/arch/mips/txx9/generic/setup_tx4938.c +++ b/arch/mips/txx9/generic/setup_tx4938.c @@ -17,13 +17,13 @@ #include #include #include +#include #include #include #include #include #include #include -#include #include #include diff --git a/arch/mips/txx9/generic/setup_tx4939.c b/arch/mips/txx9/generic/setup_tx4939.c index 274928987a21..360c388f4c82 100644 --- a/arch/mips/txx9/generic/setup_tx4939.c +++ b/arch/mips/txx9/generic/setup_tx4939.c @@ -21,13 +21,13 @@ #include #include #include +#include #include #include #include #include #include #include -#include #include #include diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c index b567d212fe7d..04d57474ef97 100644 --- a/drivers/mtd/nand/raw/txx9ndfmc.c +++ b/drivers/mtd/nand/raw/txx9ndfmc.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include /* TXX9 NDFMC Registers */ #define TXX9_NDFDTR 0x00 diff --git a/include/linux/platform_data/txx9/ndfmc.h b/include/linux/platform_data/txx9/ndfmc.h new file mode 100644 index 000000000000..fc172627d54e --- /dev/null +++ b/include/linux/platform_data/txx9/ndfmc.h @@ -0,0 +1,30 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * (C) Copyright TOSHIBA CORPORATION 2007 + */ +#ifndef __TXX9_NDFMC_H +#define __TXX9_NDFMC_H + +#define NDFMC_PLAT_FLAG_USE_BSPRT 0x01 +#define NDFMC_PLAT_FLAG_NO_RSTR 0x02 +#define NDFMC_PLAT_FLAG_HOLDADD 0x04 +#define NDFMC_PLAT_FLAG_DUMMYWRITE 0x08 + +struct txx9ndfmc_platform_data { + unsigned int shift; + unsigned int gbus_clock; + unsigned int hold; /* hold time in nanosecond */ + unsigned int spw; /* strobe pulse width in nanosecond */ + unsigned int flags; + unsigned char ch_mask; /* available channel bitmask */ + unsigned char wp_mask; /* write-protect bitmask */ + unsigned char wide_mask; /* 16bit-nand bitmask */ +}; + +void txx9_ndfmc_init(unsigned long baseaddr, + const struct txx9ndfmc_platform_data *plat_data); + +#endif /* __TXX9_NDFMC_H */ -- cgit v1.2.3 From e65e3a50702f4e7a01c0b36ff08d6ed8dde7ee7b Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 9 Jul 2018 22:09:42 +0200 Subject: MIPS: jz4740: Move jz4740_nand.h header to include/linux/platform_data/jz4740 This way we will be able to compile the jz4740_nand driver when COMPILE_TEST=y. Signed-off-by: Boris Brezillon Acked-by: Paul Burton Signed-off-by: Miquel Raynal --- arch/mips/include/asm/mach-jz4740/jz4740_nand.h | 34 ------------------------ arch/mips/jz4740/board-qi_lb60.c | 3 ++- drivers/mtd/nand/raw/jz4740_nand.c | 2 +- include/linux/platform_data/jz4740/jz4740_nand.h | 34 ++++++++++++++++++++++++ 4 files changed, 37 insertions(+), 36 deletions(-) delete mode 100644 arch/mips/include/asm/mach-jz4740/jz4740_nand.h create mode 100644 include/linux/platform_data/jz4740/jz4740_nand.h (limited to 'include/linux') diff --git a/arch/mips/include/asm/mach-jz4740/jz4740_nand.h b/arch/mips/include/asm/mach-jz4740/jz4740_nand.h deleted file mode 100644 index f381d465e768..000000000000 --- a/arch/mips/include/asm/mach-jz4740/jz4740_nand.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (C) 2009-2010, Lars-Peter Clausen - * JZ4740 SoC NAND controller driver - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef __ASM_MACH_JZ4740_JZ4740_NAND_H__ -#define __ASM_MACH_JZ4740_JZ4740_NAND_H__ - -#include -#include - -#define JZ_NAND_NUM_BANKS 4 - -struct jz_nand_platform_data { - int num_partitions; - struct mtd_partition *partitions; - - unsigned char banks[JZ_NAND_NUM_BANKS]; - - void (*ident_callback)(struct platform_device *, struct mtd_info *, - struct mtd_partition **, int *num_partitions); -}; - -#endif diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c index 60f0767507c6..af0c8ace0141 100644 --- a/arch/mips/jz4740/board-qi_lb60.c +++ b/arch/mips/jz4740/board-qi_lb60.c @@ -29,10 +29,11 @@ #include #include +#include + #include #include #include -#include #include #include diff --git a/drivers/mtd/nand/raw/jz4740_nand.c b/drivers/mtd/nand/raw/jz4740_nand.c index 613b00a9604b..a0254461812d 100644 --- a/drivers/mtd/nand/raw/jz4740_nand.c +++ b/drivers/mtd/nand/raw/jz4740_nand.c @@ -25,7 +25,7 @@ #include -#include +#include #define JZ_REG_NAND_CTRL 0x50 #define JZ_REG_NAND_ECC_CTRL 0x100 diff --git a/include/linux/platform_data/jz4740/jz4740_nand.h b/include/linux/platform_data/jz4740/jz4740_nand.h new file mode 100644 index 000000000000..bc571f6d5ced --- /dev/null +++ b/include/linux/platform_data/jz4740/jz4740_nand.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2009-2010, Lars-Peter Clausen + * JZ4740 SoC NAND controller driver + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __JZ4740_NAND_H__ +#define __JZ4740_NAND_H__ + +#include +#include + +#define JZ_NAND_NUM_BANKS 4 + +struct jz_nand_platform_data { + int num_partitions; + struct mtd_partition *partitions; + + unsigned char banks[JZ_NAND_NUM_BANKS]; + + void (*ident_callback)(struct platform_device *, struct mtd_info *, + struct mtd_partition **, int *num_partitions); +}; + +#endif -- cgit v1.2.3 From 0d6030ac041f6835974deb88a1a9c299b4adc3ad Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 18 Jul 2018 10:42:17 +0200 Subject: mtd: rawnand: Expose _notsupp() helpers for raw page accessors Some implementations simply can't disable their ECC engine. Expose helpers returning -ENOTSUPP so that the caller knows that raw accesses are not supported instead of silently falling back to non-raw accessors. Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/nand_base.c | 33 +++++++++++++++++++++++++++++++++ include/linux/mtd/rawnand.h | 4 ++++ 2 files changed, 37 insertions(+) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 4fa5e20d9690..c4b74630f4c5 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -2966,6 +2966,23 @@ int nand_check_erased_ecc_chunk(void *data, int datalen, } EXPORT_SYMBOL(nand_check_erased_ecc_chunk); +/** + * nand_read_page_raw_notsupp - dummy read raw page function + * @mtd: mtd info structure + * @chip: nand chip info structure + * @buf: buffer to store read data + * @oob_required: caller requires OOB data read to chip->oob_poi + * @page: page number to read + * + * Returns -ENOTSUPP unconditionally. + */ +int nand_read_page_raw_notsupp(struct mtd_info *mtd, struct nand_chip *chip, + u8 *buf, int oob_required, int page) +{ + return -ENOTSUPP; +} +EXPORT_SYMBOL(nand_read_page_raw_notsupp); + /** * nand_read_page_raw - [INTERN] read raw page data without ecc * @mtd: mtd info structure @@ -3960,6 +3977,22 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from, return ret; } +/** + * nand_write_page_raw_notsupp - dummy raw page write function + * @mtd: mtd info structure + * @chip: nand chip info structure + * @buf: data buffer + * @oob_required: must write chip->oob_poi to OOB + * @page: page number to write + * + * Returns -ENOTSUPP unconditionally. + */ +int nand_write_page_raw_notsupp(struct mtd_info *mtd, struct nand_chip *chip, + const u8 *buf, int oob_required, int page) +{ + return -ENOTSUPP; +} +EXPORT_SYMBOL(nand_write_page_raw_notsupp); /** * nand_write_page_raw - [INTERN] raw page write function diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 11c2426fc363..f60fad29eae6 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1681,10 +1681,14 @@ int nand_get_set_features_notsupp(struct mtd_info *mtd, struct nand_chip *chip, /* Default read_page_raw implementation */ int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *buf, int oob_required, int page); +int nand_read_page_raw_notsupp(struct mtd_info *mtd, struct nand_chip *chip, + u8 *buf, int oob_required, int page); /* Default write_page_raw implementation */ int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip, const uint8_t *buf, int oob_required, int page); +int nand_write_page_raw_notsupp(struct mtd_info *mtd, struct nand_chip *chip, + const u8 *buf, int oob_required, int page); /* Reset and initialize a NAND device */ int nand_reset(struct nand_chip *chip, int chipnr); -- cgit v1.2.3 From 760c435e0f85ed19e48a90d746ce1de2cd02def7 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 19 Jul 2018 00:09:12 +0200 Subject: mtd: rawnand: make subop helpers return unsigned values A report from Colin Ian King pointed a CoverityScan issue where error values on these helpers where not checked in the drivers. These helpers can error out only in case of a software bug in driver code, not because of a runtime/hardware error. Hence, let's WARN_ON() in this case and return 0 which is harmless anyway. Fixes: 8878b126df76 ("mtd: nand: add ->exec_op() implementation") Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/nand_base.c | 44 ++++++++++++++++++++-------------------- include/linux/mtd/rawnand.h | 16 +++++++-------- 2 files changed, 30 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index c4b74630f4c5..ef10beab99f5 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -2668,8 +2668,8 @@ static bool nand_subop_instr_is_valid(const struct nand_subop *subop, return subop && instr_idx < subop->ninstrs; } -static int nand_subop_get_start_off(const struct nand_subop *subop, - unsigned int instr_idx) +static unsigned int nand_subop_get_start_off(const struct nand_subop *subop, + unsigned int instr_idx) { if (instr_idx) return 0; @@ -2688,12 +2688,12 @@ static int nand_subop_get_start_off(const struct nand_subop *subop, * * Given an address instruction, returns the offset of the first cycle to issue. */ -int nand_subop_get_addr_start_off(const struct nand_subop *subop, - unsigned int instr_idx) +unsigned int nand_subop_get_addr_start_off(const struct nand_subop *subop, + unsigned int instr_idx) { - if (!nand_subop_instr_is_valid(subop, instr_idx) || - subop->instrs[instr_idx].type != NAND_OP_ADDR_INSTR) - return -EINVAL; + if (WARN_ON(!nand_subop_instr_is_valid(subop, instr_idx) || + subop->instrs[instr_idx].type != NAND_OP_ADDR_INSTR)) + return 0; return nand_subop_get_start_off(subop, instr_idx); } @@ -2710,14 +2710,14 @@ EXPORT_SYMBOL_GPL(nand_subop_get_addr_start_off); * * Given an address instruction, returns the number of address cycle to issue. */ -int nand_subop_get_num_addr_cyc(const struct nand_subop *subop, - unsigned int instr_idx) +unsigned int nand_subop_get_num_addr_cyc(const struct nand_subop *subop, + unsigned int instr_idx) { int start_off, end_off; - if (!nand_subop_instr_is_valid(subop, instr_idx) || - subop->instrs[instr_idx].type != NAND_OP_ADDR_INSTR) - return -EINVAL; + if (WARN_ON(!nand_subop_instr_is_valid(subop, instr_idx) || + subop->instrs[instr_idx].type != NAND_OP_ADDR_INSTR)) + return 0; start_off = nand_subop_get_addr_start_off(subop, instr_idx); @@ -2742,12 +2742,12 @@ EXPORT_SYMBOL_GPL(nand_subop_get_num_addr_cyc); * * Given a data instruction, returns the offset to start from. */ -int nand_subop_get_data_start_off(const struct nand_subop *subop, - unsigned int instr_idx) +unsigned int nand_subop_get_data_start_off(const struct nand_subop *subop, + unsigned int instr_idx) { - if (!nand_subop_instr_is_valid(subop, instr_idx) || - !nand_instr_is_data(&subop->instrs[instr_idx])) - return -EINVAL; + if (WARN_ON(!nand_subop_instr_is_valid(subop, instr_idx) || + !nand_instr_is_data(&subop->instrs[instr_idx]))) + return 0; return nand_subop_get_start_off(subop, instr_idx); } @@ -2764,14 +2764,14 @@ EXPORT_SYMBOL_GPL(nand_subop_get_data_start_off); * * Returns the length of the chunk of data to send/receive. */ -int nand_subop_get_data_len(const struct nand_subop *subop, - unsigned int instr_idx) +unsigned int nand_subop_get_data_len(const struct nand_subop *subop, + unsigned int instr_idx) { int start_off = 0, end_off; - if (!nand_subop_instr_is_valid(subop, instr_idx) || - !nand_instr_is_data(&subop->instrs[instr_idx])) - return -EINVAL; + if (WARN_ON(!nand_subop_instr_is_valid(subop, instr_idx) || + !nand_instr_is_data(&subop->instrs[instr_idx]))) + return 0; start_off = nand_subop_get_data_start_off(subop, instr_idx); diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index f60fad29eae6..598d356de83f 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1007,14 +1007,14 @@ struct nand_subop { unsigned int last_instr_end_off; }; -int nand_subop_get_addr_start_off(const struct nand_subop *subop, - unsigned int op_id); -int nand_subop_get_num_addr_cyc(const struct nand_subop *subop, - unsigned int op_id); -int nand_subop_get_data_start_off(const struct nand_subop *subop, - unsigned int op_id); -int nand_subop_get_data_len(const struct nand_subop *subop, - unsigned int op_id); +unsigned int nand_subop_get_addr_start_off(const struct nand_subop *subop, + unsigned int op_id); +unsigned int nand_subop_get_num_addr_cyc(const struct nand_subop *subop, + unsigned int op_id); +unsigned int nand_subop_get_data_start_off(const struct nand_subop *subop, + unsigned int op_id); +unsigned int nand_subop_get_data_len(const struct nand_subop *subop, + unsigned int op_id); /** * struct nand_op_parser_addr_constraints - Constraints for address instructions -- cgit v1.2.3 From 7da45139d264f3b7ead04e00ebb29b189cf9826e Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 17 Jul 2018 09:08:02 +0200 Subject: mtd: rawnand: better name for the controller structure In the raw NAND core, a NAND chip is described by a nand_chip structure, while a NAND controller is described with a nand_hw_control structure which is not very meaningful. Rename this structure nand_controller. As the structure gets renamed, it is logical to also rename the core function initializing it from nand_hw_control_init() to nand_controller_init(). Lastly, the 'hwcontrol' entry of the nand_chip structure is not meaningful neither while it has the role of fallback when no controller structure is provided by the driver (the controller driver is dumb and can only control a single chip). Thus, it is renamed dummy_controller. Signed-off-by: Miquel Raynal Acked-by: Boris Brezillon --- drivers/mtd/nand/raw/atmel/nand-controller.c | 10 +++++----- drivers/mtd/nand/raw/brcmnand/brcmnand.c | 4 ++-- drivers/mtd/nand/raw/docg4.c | 4 ++-- drivers/mtd/nand/raw/fsl_elbc_nand.c | 4 ++-- drivers/mtd/nand/raw/fsl_ifc_nand.c | 4 ++-- drivers/mtd/nand/raw/jz4780_nand.c | 7 ++++--- drivers/mtd/nand/raw/marvell_nand.c | 6 +++--- drivers/mtd/nand/raw/mtk_nand.c | 2 +- drivers/mtd/nand/raw/nand_base.c | 4 ++-- drivers/mtd/nand/raw/ndfc.c | 4 ++-- drivers/mtd/nand/raw/omap2.c | 2 +- drivers/mtd/nand/raw/oxnas_nand.c | 4 ++-- drivers/mtd/nand/raw/qcom_nandc.c | 4 ++-- drivers/mtd/nand/raw/s3c2410.c | 4 ++-- drivers/mtd/nand/raw/sunxi_nand.c | 6 +++--- drivers/mtd/nand/raw/tango_nand.c | 4 ++-- drivers/mtd/nand/raw/tegra_nand.c | 6 +++--- drivers/mtd/nand/raw/txx9ndfmc.c | 4 ++-- include/linux/mtd/rawnand.h | 14 ++++++++------ 19 files changed, 50 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c index 30dae4c9d439..855cc7729c43 100644 --- a/drivers/mtd/nand/raw/atmel/nand-controller.c +++ b/drivers/mtd/nand/raw/atmel/nand-controller.c @@ -216,7 +216,7 @@ struct atmel_nand_controller_caps { }; struct atmel_nand_controller { - struct nand_hw_control base; + struct nand_controller base; const struct atmel_nand_controller_caps *caps; struct device *dev; struct regmap *smc; @@ -227,7 +227,7 @@ struct atmel_nand_controller { }; static inline struct atmel_nand_controller * -to_nand_controller(struct nand_hw_control *ctl) +to_nand_controller(struct nand_controller *ctl) { return container_of(ctl, struct atmel_nand_controller, base); } @@ -239,7 +239,7 @@ struct atmel_smc_nand_controller { }; static inline struct atmel_smc_nand_controller * -to_smc_nand_controller(struct nand_hw_control *ctl) +to_smc_nand_controller(struct nand_controller *ctl) { return container_of(to_nand_controller(ctl), struct atmel_smc_nand_controller, base); @@ -263,7 +263,7 @@ struct atmel_hsmc_nand_controller { }; static inline struct atmel_hsmc_nand_controller * -to_hsmc_nand_controller(struct nand_hw_control *ctl) +to_hsmc_nand_controller(struct nand_controller *ctl) { return container_of(to_nand_controller(ctl), struct atmel_hsmc_nand_controller, base); @@ -1966,7 +1966,7 @@ static int atmel_nand_controller_init(struct atmel_nand_controller *nc, struct device_node *np = dev->of_node; int ret; - nand_hw_control_init(&nc->base); + nand_controller_init(&nc->base); INIT_LIST_HEAD(&nc->chips); nc->dev = dev; nc->caps = caps; diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c index 1306aaa7a8bf..2e5efa0f9ea2 100644 --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c @@ -114,7 +114,7 @@ enum { struct brcmnand_controller { struct device *dev; - struct nand_hw_control controller; + struct nand_controller controller; void __iomem *nand_base; void __iomem *nand_fc; /* flash cache */ void __iomem *flash_dma_base; @@ -2433,7 +2433,7 @@ int brcmnand_probe(struct platform_device *pdev, struct brcmnand_soc *soc) init_completion(&ctrl->done); init_completion(&ctrl->dma_done); - nand_hw_control_init(&ctrl->controller); + nand_controller_init(&ctrl->controller); INIT_LIST_HEAD(&ctrl->host_list); /* NAND register range */ diff --git a/drivers/mtd/nand/raw/docg4.c b/drivers/mtd/nand/raw/docg4.c index bb96cb33cd6b..4dccdfba6140 100644 --- a/drivers/mtd/nand/raw/docg4.c +++ b/drivers/mtd/nand/raw/docg4.c @@ -1257,8 +1257,8 @@ static void __init init_mtd_structs(struct mtd_info *mtd) nand->ecc.strength = DOCG4_T; nand->options = NAND_BUSWIDTH_16 | NAND_NO_SUBPAGE_WRITE; nand->IO_ADDR_R = nand->IO_ADDR_W = doc->virtadr + DOC_IOSPACE_DATA; - nand->controller = &nand->hwcontrol; - nand_hw_control_init(nand->controller); + nand->controller = &nand->dummy_controller; + nand_controller_init(nand->controller); /* methods */ nand->cmdfunc = docg4_command; diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c index 51f0b340bc0d..0aa54a949653 100644 --- a/drivers/mtd/nand/raw/fsl_elbc_nand.c +++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c @@ -61,7 +61,7 @@ struct fsl_elbc_mtd { /* Freescale eLBC FCM controller information */ struct fsl_elbc_fcm_ctrl { - struct nand_hw_control controller; + struct nand_controller controller; struct fsl_elbc_mtd *chips[MAX_BANKS]; u8 __iomem *addr; /* Address of assigned FCM buffer */ @@ -879,7 +879,7 @@ static int fsl_elbc_nand_probe(struct platform_device *pdev) } elbc_fcm_ctrl->counter++; - nand_hw_control_init(&elbc_fcm_ctrl->controller); + nand_controller_init(&elbc_fcm_ctrl->controller); fsl_lbc_ctrl_dev->nand = elbc_fcm_ctrl; } else { elbc_fcm_ctrl = fsl_lbc_ctrl_dev->nand; diff --git a/drivers/mtd/nand/raw/fsl_ifc_nand.c b/drivers/mtd/nand/raw/fsl_ifc_nand.c index 75d3c951f61a..96130d91e32c 100644 --- a/drivers/mtd/nand/raw/fsl_ifc_nand.c +++ b/drivers/mtd/nand/raw/fsl_ifc_nand.c @@ -51,7 +51,7 @@ struct fsl_ifc_mtd { /* overview of the fsl ifc controller */ struct fsl_ifc_nand_ctrl { - struct nand_hw_control controller; + struct nand_controller controller; struct fsl_ifc_mtd *chips[FSL_IFC_BANK_COUNT]; void __iomem *addr; /* Address of assigned IFC buffer */ @@ -1004,7 +1004,7 @@ static int fsl_ifc_nand_probe(struct platform_device *dev) ifc_nand_ctrl->addr = NULL; fsl_ifc_ctrl_dev->nand = ifc_nand_ctrl; - nand_hw_control_init(&ifc_nand_ctrl->controller); + nand_controller_init(&ifc_nand_ctrl->controller); } else { ifc_nand_ctrl = fsl_ifc_ctrl_dev->nand; } diff --git a/drivers/mtd/nand/raw/jz4780_nand.c b/drivers/mtd/nand/raw/jz4780_nand.c index e69f6ae4c539..49841dad347c 100644 --- a/drivers/mtd/nand/raw/jz4780_nand.c +++ b/drivers/mtd/nand/raw/jz4780_nand.c @@ -44,7 +44,7 @@ struct jz4780_nand_cs { struct jz4780_nand_controller { struct device *dev; struct jz4780_bch *bch; - struct nand_hw_control controller; + struct nand_controller controller; unsigned int num_banks; struct list_head chips; int selected; @@ -65,7 +65,8 @@ static inline struct jz4780_nand_chip *to_jz4780_nand_chip(struct mtd_info *mtd) return container_of(mtd_to_nand(mtd), struct jz4780_nand_chip, chip); } -static inline struct jz4780_nand_controller *to_jz4780_nand_controller(struct nand_hw_control *ctrl) +static inline struct jz4780_nand_controller +*to_jz4780_nand_controller(struct nand_controller *ctrl) { return container_of(ctrl, struct jz4780_nand_controller, controller); } @@ -368,7 +369,7 @@ static int jz4780_nand_probe(struct platform_device *pdev) nfc->dev = dev; nfc->num_banks = num_banks; - nand_hw_control_init(&nfc->controller); + nand_controller_init(&nfc->controller); INIT_LIST_HEAD(&nfc->chips); ret = jz4780_nand_init_chips(nfc, pdev); diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index 80a074cccb82..bd5f9a4b7b16 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -318,7 +318,7 @@ struct marvell_nfc_caps { * @dma_buf: 32-bit aligned buffer for DMA transfers (NFCv1 only) */ struct marvell_nfc { - struct nand_hw_control controller; + struct nand_controller controller; struct device *dev; void __iomem *regs; struct clk *core_clk; @@ -335,7 +335,7 @@ struct marvell_nfc { u8 *dma_buf; }; -static inline struct marvell_nfc *to_marvell_nfc(struct nand_hw_control *ctrl) +static inline struct marvell_nfc *to_marvell_nfc(struct nand_controller *ctrl) { return container_of(ctrl, struct marvell_nfc, controller); } @@ -2745,7 +2745,7 @@ static int marvell_nfc_probe(struct platform_device *pdev) return -ENOMEM; nfc->dev = dev; - nand_hw_control_init(&nfc->controller); + nand_controller_init(&nfc->controller); INIT_LIST_HEAD(&nfc->chips); r = platform_get_resource(pdev, IORESOURCE_MEM, 0); diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c index e6b14b79c8a8..7bc6be3f6ec0 100644 --- a/drivers/mtd/nand/raw/mtk_nand.c +++ b/drivers/mtd/nand/raw/mtk_nand.c @@ -145,7 +145,7 @@ struct mtk_nfc_clk { }; struct mtk_nfc { - struct nand_hw_control controller; + struct nand_controller controller; struct mtk_ecc_config ecc_cfg; struct mtk_nfc_clk clk; struct mtk_ecc *ecc; diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index e545e03a214e..dcdf0f373100 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -5000,8 +5000,8 @@ static void nand_set_defaults(struct nand_chip *chip) chip->read_buf = busw ? nand_read_buf16 : nand_read_buf; if (!chip->controller) { - chip->controller = &chip->hwcontrol; - nand_hw_control_init(chip->controller); + chip->controller = &chip->dummy_controller; + nand_controller_init(chip->controller); } if (!chip->buf_align) diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c index d8a806894937..540fa1a0ea24 100644 --- a/drivers/mtd/nand/raw/ndfc.c +++ b/drivers/mtd/nand/raw/ndfc.c @@ -39,7 +39,7 @@ struct ndfc_controller { void __iomem *ndfcbase; struct nand_chip chip; int chip_select; - struct nand_hw_control ndfc_control; + struct nand_controller ndfc_control; }; static struct ndfc_controller ndfc_ctrl[NDFC_MAX_CS]; @@ -218,7 +218,7 @@ static int ndfc_probe(struct platform_device *ofdev) ndfc = &ndfc_ctrl[cs]; ndfc->chip_select = cs; - nand_hw_control_init(&ndfc->ndfc_control); + nand_controller_init(&ndfc->ndfc_control); ndfc->ofdev = ofdev; dev_set_drvdata(&ofdev->dev, ndfc); diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c index e50c64adc3c8..e943b2e5a5e2 100644 --- a/drivers/mtd/nand/raw/omap2.c +++ b/drivers/mtd/nand/raw/omap2.c @@ -145,7 +145,7 @@ static u_char bch8_vector[] = {0xf3, 0xdb, 0x14, 0x16, 0x8b, 0xd2, 0xbe, 0xcc, static u_char bch4_vector[] = {0x00, 0x6b, 0x31, 0xdd, 0x41, 0xbc, 0x10}; /* Shared among all NAND instances to synchronize access to the ECC Engine */ -static struct nand_hw_control omap_gpmc_controller = { +static struct nand_controller omap_gpmc_controller = { .lock = __SPIN_LOCK_UNLOCKED(omap_gpmc_controller.lock), .wq = __WAIT_QUEUE_HEAD_INITIALIZER(omap_gpmc_controller.wq), }; diff --git a/drivers/mtd/nand/raw/oxnas_nand.c b/drivers/mtd/nand/raw/oxnas_nand.c index d649d5944826..01b00bb69c1e 100644 --- a/drivers/mtd/nand/raw/oxnas_nand.c +++ b/drivers/mtd/nand/raw/oxnas_nand.c @@ -32,7 +32,7 @@ #define OXNAS_NAND_MAX_CHIPS 1 struct oxnas_nand_ctrl { - struct nand_hw_control base; + struct nand_controller base; void __iomem *io_base; struct clk *clk; struct nand_chip *chips[OXNAS_NAND_MAX_CHIPS]; @@ -96,7 +96,7 @@ static int oxnas_nand_probe(struct platform_device *pdev) if (!oxnas) return -ENOMEM; - nand_hw_control_init(&oxnas->base); + nand_controller_init(&oxnas->base); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); oxnas->io_base = devm_ioremap_resource(&pdev->dev, res); diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c index 645630953f38..aa6c3e026ef1 100644 --- a/drivers/mtd/nand/raw/qcom_nandc.c +++ b/drivers/mtd/nand/raw/qcom_nandc.c @@ -365,7 +365,7 @@ struct nandc_regs { * from all connected NAND devices pagesize */ struct qcom_nand_controller { - struct nand_hw_control controller; + struct nand_controller controller; struct list_head host_list; struct device *dev; @@ -2728,7 +2728,7 @@ static int qcom_nandc_alloc(struct qcom_nand_controller *nandc) INIT_LIST_HEAD(&nandc->desc_list); INIT_LIST_HEAD(&nandc->host_list); - nand_hw_control_init(&nandc->controller); + nand_controller_init(&nandc->controller); return 0; } diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c index 5a4a68790653..e8bf64832213 100644 --- a/drivers/mtd/nand/raw/s3c2410.c +++ b/drivers/mtd/nand/raw/s3c2410.c @@ -162,7 +162,7 @@ enum s3c_nand_clk_state { */ struct s3c2410_nand_info { /* mtd info */ - struct nand_hw_control controller; + struct nand_controller controller; struct s3c2410_nand_mtd *mtds; struct s3c2410_platform_nand *platform; @@ -1094,7 +1094,7 @@ static int s3c24xx_nand_probe(struct platform_device *pdev) platform_set_drvdata(pdev, info); - nand_hw_control_init(&info->controller); + nand_controller_init(&info->controller); /* get the clock source and enable it */ diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c index 4b11cd4a79be..07f3ff9a28f2 100644 --- a/drivers/mtd/nand/raw/sunxi_nand.c +++ b/drivers/mtd/nand/raw/sunxi_nand.c @@ -234,7 +234,7 @@ static inline struct sunxi_nand_chip *to_sunxi_nand(struct nand_chip *nand) * controller events */ struct sunxi_nfc { - struct nand_hw_control controller; + struct nand_controller controller; struct device *dev; void __iomem *regs; struct clk *ahb_clk; @@ -247,7 +247,7 @@ struct sunxi_nfc { struct dma_chan *dmac; }; -static inline struct sunxi_nfc *to_sunxi_nfc(struct nand_hw_control *ctrl) +static inline struct sunxi_nfc *to_sunxi_nfc(struct nand_controller *ctrl) { return container_of(ctrl, struct sunxi_nfc, controller); } @@ -2012,7 +2012,7 @@ static int sunxi_nfc_probe(struct platform_device *pdev) return -ENOMEM; nfc->dev = dev; - nand_hw_control_init(&nfc->controller); + nand_controller_init(&nfc->controller); INIT_LIST_HEAD(&nfc->chips); r = platform_get_resource(pdev, IORESOURCE_MEM, 0); diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c index f2052fae21c7..dd7a26efdf4f 100644 --- a/drivers/mtd/nand/raw/tango_nand.c +++ b/drivers/mtd/nand/raw/tango_nand.c @@ -83,7 +83,7 @@ #define MAX_CS 4 struct tango_nfc { - struct nand_hw_control hw; + struct nand_controller hw; void __iomem *reg_base; void __iomem *mem_base; void __iomem *pbus_base; @@ -654,7 +654,7 @@ static int tango_nand_probe(struct platform_device *pdev) return PTR_ERR(nfc->chan); platform_set_drvdata(pdev, nfc); - nand_hw_control_init(&nfc->hw); + nand_controller_init(&nfc->hw); nfc->freq_kHz = clk_get_rate(clk) / 1000; for_each_child_of_node(pdev->dev.of_node, np) { diff --git a/drivers/mtd/nand/raw/tegra_nand.c b/drivers/mtd/nand/raw/tegra_nand.c index 56c0aa1bc81f..31c0d9ca9d23 100644 --- a/drivers/mtd/nand/raw/tegra_nand.c +++ b/drivers/mtd/nand/raw/tegra_nand.c @@ -164,7 +164,7 @@ HWSTATUS_RBSY_VALUE(NAND_STATUS_READY)) struct tegra_nand_controller { - struct nand_hw_control controller; + struct nand_controller controller; struct device *dev; void __iomem *regs; int irq; @@ -187,7 +187,7 @@ struct tegra_nand_chip { }; static inline struct tegra_nand_controller * - to_tegra_ctrl(struct nand_hw_control *hw_ctrl) + to_tegra_ctrl(struct nand_controller *hw_ctrl) { return container_of(hw_ctrl, struct tegra_nand_controller, controller); } @@ -1136,7 +1136,7 @@ static int tegra_nand_probe(struct platform_device *pdev) return -ENOMEM; ctrl->dev = &pdev->dev; - nand_hw_control_init(&ctrl->controller); + nand_controller_init(&ctrl->controller); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ctrl->regs = devm_ioremap_resource(&pdev->dev, res); diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c index 5fe9da8b4a0a..8f5bbbac4612 100644 --- a/drivers/mtd/nand/raw/txx9ndfmc.c +++ b/drivers/mtd/nand/raw/txx9ndfmc.c @@ -73,7 +73,7 @@ struct txx9ndfmc_drvdata { void __iomem *base; unsigned char hold; /* in gbusclock */ unsigned char spw; /* in gbusclock */ - struct nand_hw_control hw_control; + struct nand_controller hw_control; }; static struct platform_device *mtd_to_platdev(struct mtd_info *mtd) @@ -303,7 +303,7 @@ static int __init txx9ndfmc_probe(struct platform_device *dev) dev_info(&dev->dev, "CLK:%ldMHz HOLD:%d SPW:%d\n", (gbusclk + 500000) / 1000000, hold, spw); - nand_hw_control_init(&drvdata->hw_control); + nand_controller_init(&drvdata->hw_control); platform_set_drvdata(dev, drvdata); txx9ndfmc_initialize(dev); diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 598d356de83f..93a2678e0f0d 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -510,20 +510,21 @@ struct nand_id { }; /** - * struct nand_hw_control - Control structure for hardware controller (e.g ECC generator) shared among independent devices + * struct nand_controller - Structure used to describe a NAND controller + * * @lock: protection lock * @active: the mtd device which holds the controller currently * @wq: wait queue to sleep on if a NAND operation is in * progress used instead of the per chip wait queue * when a hw controller is available. */ -struct nand_hw_control { +struct nand_controller { spinlock_t lock; struct nand_chip *active; wait_queue_head_t wq; }; -static inline void nand_hw_control_init(struct nand_hw_control *nfc) +static inline void nand_controller_init(struct nand_controller *nfc) { nfc->active = NULL; spin_lock_init(&nfc->lock); @@ -1197,7 +1198,8 @@ int nand_op_parser_exec_op(struct nand_chip *chip, * setting the read-retry mode. Mostly needed for MLC NAND. * @ecc: [BOARDSPECIFIC] ECC control structure * @buf_align: minimum buffer alignment required by a platform - * @hwcontrol: platform-specific hardware control structure + * @dummy_controller: dummy controller implementation for drivers that can + * only control a single chip * @erase: [REPLACEABLE] erase function * @chip_delay: [BOARDSPECIFIC] chip dependent delay for transferring * data from array to read regs (tR). @@ -1333,11 +1335,11 @@ struct nand_chip { flstate_t state; uint8_t *oob_poi; - struct nand_hw_control *controller; + struct nand_controller *controller; struct nand_ecc_ctrl ecc; unsigned long buf_align; - struct nand_hw_control hwcontrol; + struct nand_controller dummy_controller; uint8_t *bbt; struct nand_bbt_descr *bbt_td; -- cgit v1.2.3 From 05b54c7bac906c443fd0b23ab8954e0560b33e5c Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 19 Jul 2018 01:05:46 +0200 Subject: mtd: rawnand: add hooks that may be called during nand_scan() In order to remove the limitation that forbids dynamic allocation in nand_scan_ident(), we must create a path that will be the same for all controller drivers. The idea is to use nand_scan() instead of the widely used nand_scan_ident()/nand_scan_tail() couple. In order to achieve this, controller drivers will need to adjust some parameters between these two functions depending on the NAND chip wired on them. This takes the form of two new hooks (->{attach,detach}_chip()) that are placed in a new nand_controller_ops structure, which is then attached to the nand_controller object at driver initialization time. ->attach_chip() is called between nand_scan_ident() and nand_scan_tail(), and ->detach_chip() is called in the error path of nand_scan() and in nand_cleanup(). Note that some NAND controller drivers don't have a dedicated nand_controller object and instead rely on the default/dummy one embedded in nand_chip. If you're in this case and still want to initialize the controller ops, you'll have to manipulate chip->dummy_controller directly. Last but not least, it's worth mentioning that we plan to move some of the controller related hooks placed in nand_chip into nand_controller_ops to make the separation between NAND chip and NAND controller methods clearer. Signed-off-by: Miquel Raynal Acked-by: Boris Brezillon --- drivers/mtd/nand/raw/nand_base.c | 32 ++++++++++++++++++++++++++++++-- include/linux/mtd/rawnand.h | 21 +++++++++++++++++++++ 2 files changed, 51 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index dcdf0f373100..dea41fa25be1 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -6718,6 +6718,20 @@ err_free_buf: } EXPORT_SYMBOL(nand_scan_tail); +static int nand_attach(struct nand_chip *chip) +{ + if (chip->controller->ops && chip->controller->ops->attach_chip) + return chip->controller->ops->attach_chip(chip); + + return 0; +} + +static void nand_detach(struct nand_chip *chip) +{ + if (chip->controller->ops && chip->controller->ops->detach_chip) + chip->controller->ops->detach_chip(chip); +} + /** * nand_scan_with_ids - [NAND Interface] Scan for the NAND device * @mtd: MTD device structure @@ -6731,11 +6745,21 @@ EXPORT_SYMBOL(nand_scan_tail); int nand_scan_with_ids(struct mtd_info *mtd, int maxchips, struct nand_flash_dev *ids) { + struct nand_chip *chip = mtd_to_nand(mtd); int ret; ret = nand_scan_ident(mtd, maxchips, ids); - if (!ret) - ret = nand_scan_tail(mtd); + if (ret) + return ret; + + ret = nand_attach(chip); + if (ret) + return ret; + + ret = nand_scan_tail(mtd); + if (ret) + nand_detach(chip); + return ret; } EXPORT_SYMBOL(nand_scan_with_ids); @@ -6763,7 +6787,11 @@ void nand_cleanup(struct nand_chip *chip) /* Free manufacturer priv data. */ nand_manufacturer_cleanup(chip); + + /* Free controller specific allocations after chip identification */ + nand_detach(chip); } + EXPORT_SYMBOL_GPL(nand_cleanup); /** diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 93a2678e0f0d..fbd6b29cf22c 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -509,6 +509,25 @@ struct nand_id { int len; }; +/** + * struct nand_controller_ops - Controller operations + * + * @attach_chip: this method is called after the NAND detection phase after + * flash ID and MTD fields such as erase size, page size and OOB + * size have been set up. ECC requirements are available if + * provided by the NAND chip or device tree. Typically used to + * choose the appropriate ECC configuration and allocate + * associated resources. + * This hook is optional. + * @detach_chip: free all resources allocated/claimed in + * nand_controller_ops->attach_chip(). + * This hook is optional. + */ +struct nand_controller_ops { + int (*attach_chip)(struct nand_chip *chip); + void (*detach_chip)(struct nand_chip *chip); +}; + /** * struct nand_controller - Structure used to describe a NAND controller * @@ -517,11 +536,13 @@ struct nand_id { * @wq: wait queue to sleep on if a NAND operation is in * progress used instead of the per chip wait queue * when a hw controller is available. + * @ops: NAND controller operations. */ struct nand_controller { spinlock_t lock; struct nand_chip *active; wait_queue_head_t wq; + const struct nand_controller_ops *ops; }; static inline void nand_controller_init(struct nand_controller *nfc) -- cgit v1.2.3 From 98732da1a08ebb666983d469981a8b994e77d556 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 25 Jul 2018 15:31:50 +0200 Subject: mtd: rawnand: do not export nand_scan_[ident|tail]() anymore Both nand_scan_ident() and nand_scan_tail() helpers used to be called directly from controller drivers that needed to tweak some ECC-related parameters before nand_scan_tail(). This separation prevented dynamic allocations during the phase of NAND identification, which was inconvenient. All controller drivers have been moved to use nand_scan(), in conjunction with the chip->ecc.[attach|detach]_chip() hooks that actually do the required tweaking sequence between both ident/tail calls, allowing programmers to use dynamic allocation as they need all across the scanning sequence. Declare nand_scan_[ident|tail]() statically now. Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon --- drivers/mtd/nand/raw/nand_base.c | 16 +++++++++------- include/linux/mtd/rawnand.h | 18 ++++++------------ 2 files changed, 15 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 42a7a934a17b..34ea44f90fd8 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -5924,7 +5924,7 @@ static int nand_dt_init(struct nand_chip *chip) } /** - * nand_scan_ident - [NAND Interface] Scan for the NAND device + * nand_scan_ident - Scan for the NAND device * @mtd: MTD device structure * @maxchips: number of chips to scan for * @table: alternative NAND ID table @@ -5932,9 +5932,13 @@ static int nand_dt_init(struct nand_chip *chip) * This is the first phase of the normal nand_scan() function. It reads the * flash ID and sets up MTD fields accordingly. * + * This helper used to be called directly from controller drivers that needed + * to tweak some ECC-related parameters before nand_scan_tail(). This separation + * prevented dynamic allocations during this phase which was unconvenient and + * as been banned for the benefit of the ->init_ecc()/cleanup_ecc() hooks. */ -int nand_scan_ident(struct mtd_info *mtd, int maxchips, - struct nand_flash_dev *table) +static int nand_scan_ident(struct mtd_info *mtd, int maxchips, + struct nand_flash_dev *table) { int i, nand_maf_id, nand_dev_id; struct nand_chip *chip = mtd_to_nand(mtd); @@ -6008,7 +6012,6 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips, return 0; } -EXPORT_SYMBOL(nand_scan_ident); static int nand_set_ecc_soft_ops(struct mtd_info *mtd) { @@ -6385,14 +6388,14 @@ static bool nand_ecc_strength_good(struct mtd_info *mtd) } /** - * nand_scan_tail - [NAND Interface] Scan for the NAND device + * nand_scan_tail - Scan for the NAND device * @mtd: MTD device structure * * This is the second phase of the normal nand_scan() function. It fills out * all the uninitialized function pointers with the defaults and scans for a * bad block table if appropriate. */ -int nand_scan_tail(struct mtd_info *mtd) +static int nand_scan_tail(struct mtd_info *mtd) { struct nand_chip *chip = mtd_to_nand(mtd); struct nand_ecc_ctrl *ecc = &chip->ecc; @@ -6716,7 +6719,6 @@ err_free_buf: return ret; } -EXPORT_SYMBOL(nand_scan_tail); static int nand_attach(struct nand_chip *chip) { diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index fbd6b29cf22c..71571ed23a20 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -35,17 +35,6 @@ static inline int nand_scan(struct mtd_info *mtd, int max_chips) return nand_scan_with_ids(mtd, max_chips, NULL); } -/* - * Separate phases of nand_scan(), allowing board driver to intervene - * and override command or ECC setup according to flash type. - */ -int nand_scan_ident(struct mtd_info *mtd, int max_chips, - struct nand_flash_dev *table); -int nand_scan_tail(struct mtd_info *mtd); - -/* Unregister the MTD device and free resources held by the NAND device */ -void nand_release(struct mtd_info *mtd); - /* Internal helper for board drivers which need to override command function */ void nand_wait_ready(struct mtd_info *mtd); @@ -1745,8 +1734,13 @@ int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len, int nand_write_data_op(struct nand_chip *chip, const void *buf, unsigned int len, bool force_8bit); -/* Free resources held by the NAND device */ +/* + * Free resources held by the NAND device, must be called on error after a + * sucessful nand_scan(). + */ void nand_cleanup(struct nand_chip *chip); +/* Unregister the MTD device and calls nand_cleanup() */ +void nand_release(struct mtd_info *mtd); /* Default extended ID decoding function */ void nand_decode_ext_id(struct nand_chip *chip); -- cgit v1.2.3 From 2023f1fa216f30b1877d65be2057fbaf0bbd49b3 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 25 Jul 2018 15:31:51 +0200 Subject: mtd: rawnand: allocate model parameter dynamically Thanks to the migration of all drivers to use nand_scan() and the related nand_controller_ops, we can now allocate data during the detection phase. Let's do it first for the NAND model parameter which is allocated in nand_detect(). Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon --- drivers/mtd/nand/raw/nand_base.c | 52 +++++++++++++++++++++++++++++++--------- include/linux/mtd/rawnand.h | 2 +- 2 files changed, 42 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 34ea44f90fd8..00e80781124a 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -5225,8 +5225,11 @@ static int nand_flash_detect_onfi(struct nand_chip *chip) sanitize_string(p->manufacturer, sizeof(p->manufacturer)); sanitize_string(p->model, sizeof(p->model)); - strncpy(chip->parameters.model, p->model, - sizeof(chip->parameters.model) - 1); + chip->parameters.model = kstrdup(p->model, GFP_KERNEL); + if (!chip->parameters.model) { + ret = -ENOMEM; + goto free_onfi_param_page; + } mtd->writesize = le32_to_cpu(p->byte_per_page); @@ -5356,8 +5359,11 @@ static int nand_flash_detect_jedec(struct nand_chip *chip) sanitize_string(p->manufacturer, sizeof(p->manufacturer)); sanitize_string(p->model, sizeof(p->model)); - strncpy(chip->parameters.model, p->model, - sizeof(chip->parameters.model) - 1); + chip->parameters.model = kstrdup(p->model, GFP_KERNEL); + if (!chip->parameters.model) { + ret = -ENOMEM; + goto free_jedec_param_page; + } mtd->writesize = le32_to_cpu(p->byte_per_page); @@ -5546,8 +5552,9 @@ static bool find_full_id_nand(struct nand_chip *chip, chip->onfi_timing_mode_default = type->onfi_timing_mode_default; - strncpy(chip->parameters.model, type->name, - sizeof(chip->parameters.model) - 1); + chip->parameters.model = kstrdup(type->name, GFP_KERNEL); + if (!chip->parameters.model) + return false; return true; } @@ -5706,8 +5713,9 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type) if (!type->name) return -ENODEV; - strncpy(chip->parameters.model, type->name, - sizeof(chip->parameters.model) - 1); + chip->parameters.model = kstrdup(type->name, GFP_KERNEL); + if (!chip->parameters.model) + return -ENOMEM; chip->chipsize = (uint64_t)type->chipsize << 20; @@ -5737,7 +5745,9 @@ ident_done: mtd->name); pr_warn("bus width %d instead of %d bits\n", busw ? 16 : 8, (chip->options & NAND_BUSWIDTH_16) ? 16 : 8); - return -EINVAL; + ret = -EINVAL; + + goto free_detect_allocation; } nand_decode_bbm_options(chip); @@ -5774,6 +5784,11 @@ ident_done: (int)(chip->chipsize >> 20), nand_is_slc(chip) ? "SLC" : "MLC", mtd->erasesize >> 10, mtd->writesize, mtd->oobsize); return 0; + +free_detect_allocation: + kfree(chip->parameters.model); + + return ret; } static const char * const nand_ecc_modes[] = { @@ -6013,6 +6028,11 @@ static int nand_scan_ident(struct mtd_info *mtd, int maxchips, return 0; } +static void nand_scan_ident_cleanup(struct nand_chip *chip) +{ + kfree(chip->parameters.model); +} + static int nand_set_ecc_soft_ops(struct mtd_info *mtd) { struct nand_chip *chip = mtd_to_nand(mtd); @@ -6760,11 +6780,18 @@ int nand_scan_with_ids(struct mtd_info *mtd, int maxchips, ret = nand_attach(chip); if (ret) - return ret; + goto cleanup_ident; ret = nand_scan_tail(mtd); if (ret) - nand_detach(chip); + goto detach_chip; + + return 0; + +detach_chip: + nand_detach(chip); +cleanup_ident: + nand_scan_ident_cleanup(chip); return ret; } @@ -6796,6 +6823,9 @@ void nand_cleanup(struct nand_chip *chip) /* Free controller specific allocations after chip identification */ nand_detach(chip); + + /* Free identification phase allocations */ + nand_scan_ident_cleanup(chip); } EXPORT_SYMBOL_GPL(nand_cleanup); diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 71571ed23a20..099fa166569a 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -476,7 +476,7 @@ struct onfi_params { */ struct nand_parameters { /* Generic parameters */ - char model[100]; + const char *model; bool supports_set_get_features; DECLARE_BITMAP(set_feature_list, ONFI_FEATURE_NUMBER); DECLARE_BITMAP(get_feature_list, ONFI_FEATURE_NUMBER); -- cgit v1.2.3 From 3d3fe3c05d5a17ebdf55b936c51017c127c0ed44 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 25 Jul 2018 15:31:52 +0200 Subject: mtd: rawnand: allocate dynamically ONFI parameters during detection Now that it is possible to do dynamic allocations during the identification phase, convert the onfi_params structure (which is only needed with ONFI compliant chips) into a pointer that will be allocated only if needed. Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon --- drivers/mtd/nand/raw/nand_base.c | 54 +++++++++++++++++++++++-------------- drivers/mtd/nand/raw/nand_micron.c | 7 ++--- drivers/mtd/nand/raw/nand_timings.c | 12 ++++----- include/linux/mtd/rawnand.h | 6 ++--- 4 files changed, 47 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 00e80781124a..d527e448ce19 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -5151,6 +5151,8 @@ static int nand_flash_detect_onfi(struct nand_chip *chip) { struct mtd_info *mtd = nand_to_mtd(chip); struct nand_onfi_params *p; + struct onfi_params *onfi; + int onfi_version = 0; char id[4]; int i, ret, val; @@ -5206,21 +5208,19 @@ static int nand_flash_detect_onfi(struct nand_chip *chip) /* Check version */ val = le16_to_cpu(p->revision); if (val & ONFI_VERSION_2_3) - chip->parameters.onfi.version = 23; + onfi_version = 23; else if (val & ONFI_VERSION_2_2) - chip->parameters.onfi.version = 22; + onfi_version = 22; else if (val & ONFI_VERSION_2_1) - chip->parameters.onfi.version = 21; + onfi_version = 21; else if (val & ONFI_VERSION_2_0) - chip->parameters.onfi.version = 20; + onfi_version = 20; else if (val & ONFI_VERSION_1_0) - chip->parameters.onfi.version = 10; + onfi_version = 10; - if (!chip->parameters.onfi.version) { + if (!onfi_version) { pr_info("unsupported ONFI version: %d\n", val); goto free_onfi_param_page; - } else { - ret = 1; } sanitize_string(p->manufacturer, sizeof(p->manufacturer)); @@ -5257,7 +5257,7 @@ static int nand_flash_detect_onfi(struct nand_chip *chip) if (p->ecc_bits != 0xff) { chip->ecc_strength_ds = p->ecc_bits; chip->ecc_step_ds = 512; - } else if (chip->parameters.onfi.version >= 21 && + } else if (onfi_version >= 21 && (le16_to_cpu(p->features) & ONFI_FEATURE_EXT_PARAM_PAGE)) { /* @@ -5284,19 +5284,33 @@ static int nand_flash_detect_onfi(struct nand_chip *chip) bitmap_set(chip->parameters.set_feature_list, ONFI_FEATURE_ADDR_TIMING_MODE, 1); } - chip->parameters.onfi.tPROG = le16_to_cpu(p->t_prog); - chip->parameters.onfi.tBERS = le16_to_cpu(p->t_bers); - chip->parameters.onfi.tR = le16_to_cpu(p->t_r); - chip->parameters.onfi.tCCS = le16_to_cpu(p->t_ccs); - chip->parameters.onfi.async_timing_mode = - le16_to_cpu(p->async_timing_mode); - chip->parameters.onfi.vendor_revision = - le16_to_cpu(p->vendor_revision); - memcpy(chip->parameters.onfi.vendor, p->vendor, - sizeof(p->vendor)); + onfi = kzalloc(sizeof(*onfi), GFP_KERNEL); + if (!onfi) { + ret = -ENOMEM; + goto free_model; + } + + onfi->version = onfi_version; + onfi->tPROG = le16_to_cpu(p->t_prog); + onfi->tBERS = le16_to_cpu(p->t_bers); + onfi->tR = le16_to_cpu(p->t_r); + onfi->tCCS = le16_to_cpu(p->t_ccs); + onfi->async_timing_mode = le16_to_cpu(p->async_timing_mode); + onfi->vendor_revision = le16_to_cpu(p->vendor_revision); + memcpy(onfi->vendor, p->vendor, sizeof(p->vendor)); + chip->parameters.onfi = onfi; + + /* Identification done, free the full ONFI parameter page and exit */ + kfree(p); + + return 1; + +free_model: + kfree(chip->parameters.model); free_onfi_param_page: kfree(p); + return ret; } @@ -5693,7 +5707,6 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type) } } - chip->parameters.onfi.version = 0; if (!type->name || !type->pagesize) { /* Check if the chip is ONFI compliant */ ret = nand_flash_detect_onfi(chip); @@ -6031,6 +6044,7 @@ static int nand_scan_ident(struct mtd_info *mtd, int maxchips, static void nand_scan_ident_cleanup(struct nand_chip *chip) { kfree(chip->parameters.model); + kfree(chip->parameters.onfi); } static int nand_set_ecc_soft_ops(struct mtd_info *mtd) diff --git a/drivers/mtd/nand/raw/nand_micron.c b/drivers/mtd/nand/raw/nand_micron.c index 656947d91841..f5dc0a7a2456 100644 --- a/drivers/mtd/nand/raw/nand_micron.c +++ b/drivers/mtd/nand/raw/nand_micron.c @@ -88,9 +88,10 @@ static int micron_nand_setup_read_retry(struct mtd_info *mtd, int retry_mode) static int micron_nand_onfi_init(struct nand_chip *chip) { struct nand_parameters *p = &chip->parameters; - struct nand_onfi_vendor_micron *micron = (void *)p->onfi.vendor; - if (chip->parameters.onfi.version && p->onfi.vendor_revision) { + if (p->onfi) { + struct nand_onfi_vendor_micron *micron = (void *)p->onfi->vendor; + chip->read_retries = micron->read_retry_options; chip->setup_read_retry = micron_nand_setup_read_retry; } @@ -382,7 +383,7 @@ static int micron_supports_on_die_ecc(struct nand_chip *chip) u8 id[5]; int ret; - if (!chip->parameters.onfi.version) + if (!chip->parameters.onfi) return MICRON_ON_DIE_UNSUPPORTED; if (chip->bits_per_cell != 1) diff --git a/drivers/mtd/nand/raw/nand_timings.c b/drivers/mtd/nand/raw/nand_timings.c index 9bb599106a31..ebc7b5f76f77 100644 --- a/drivers/mtd/nand/raw/nand_timings.c +++ b/drivers/mtd/nand/raw/nand_timings.c @@ -294,6 +294,7 @@ int onfi_fill_data_interface(struct nand_chip *chip, int timing_mode) { struct nand_data_interface *iface = &chip->data_interface; + struct onfi_params *onfi = chip->parameters.onfi; if (type != NAND_SDR_IFACE) return -EINVAL; @@ -308,17 +309,16 @@ int onfi_fill_data_interface(struct nand_chip *chip, * tPROG, tBERS, tR and tCCS. * These information are part of the ONFI parameter page. */ - if (chip->parameters.onfi.version) { - struct nand_parameters *params = &chip->parameters; + if (onfi) { struct nand_sdr_timings *timings = &iface->timings.sdr; /* microseconds -> picoseconds */ - timings->tPROG_max = 1000000ULL * params->onfi.tPROG; - timings->tBERS_max = 1000000ULL * params->onfi.tBERS; - timings->tR_max = 1000000ULL * params->onfi.tR; + timings->tPROG_max = 1000000ULL * onfi->tPROG; + timings->tBERS_max = 1000000ULL * onfi->tBERS; + timings->tR_max = 1000000ULL * onfi->tR; /* nanoseconds -> picoseconds */ - timings->tCCS_min = 1000UL * params->onfi.tCCS; + timings->tCCS_min = 1000UL * onfi->tCCS; } else { struct nand_sdr_timings *timings = &iface->timings.sdr; /* diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 099fa166569a..efb2345359bb 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -482,7 +482,7 @@ struct nand_parameters { DECLARE_BITMAP(get_feature_list, ONFI_FEATURE_NUMBER); /* ONFI parameters */ - struct onfi_params onfi; + struct onfi_params *onfi; }; /* The maximum expected count of bytes in the NAND ID sequence */ @@ -1618,10 +1618,10 @@ struct platform_nand_data { /* return the supported asynchronous timing mode. */ static inline int onfi_get_async_timing_mode(struct nand_chip *chip) { - if (!chip->parameters.onfi.version) + if (!chip->parameters.onfi) return ONFI_TIMING_MODE_UNKNOWN; - return chip->parameters.onfi.async_timing_mode; + return chip->parameters.onfi->async_timing_mode; } int onfi_fill_data_interface(struct nand_chip *chip, -- cgit v1.2.3