From fa443bc3c1e4b28d9315dea882e8358ba6e26f8b Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 29 Oct 2021 17:28:56 +0200 Subject: HID: intel-ish-hid: add support for MODULE_DEVICE_TABLE() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows to selectively autoload drivers for ISH devices. Currently all ISH drivers are loaded for all systems having any ISH device. Signed-off-by: Thomas Weißschuh Acked-by: Srinivas Pandruvada Acked-by: Hans de Goede Signed-off-by: Jiri Kosina --- include/linux/mod_devicetable.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index ae2e75d15b21..befbf53c4b7c 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -895,4 +895,17 @@ struct dfl_device_id { kernel_ulong_t driver_data; }; +/* ISHTP (Integrated Sensor Hub Transport Protocol) */ + +#define ISHTP_MODULE_PREFIX "ishtp:" + +/** + * struct ishtp_device_id - ISHTP device identifier + * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba + * @context: pointer to driver specific data + */ +struct ishtp_device_id { + guid_t guid; +}; + #endif /* LINUX_MOD_DEVICETABLE_H */ -- cgit v1.2.3 From 64355db3caf6468dc711995239efe0cbcd7d0091 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 10 Nov 2021 13:16:55 +0100 Subject: mod_devicetable: fix kdocs for ishtp_device_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kdocs were copied from another device_id struct and not adapted. Fixes: fa443bc3c1e4 ("HID: intel-ish-hid: add support for MODULE_DEVICE_TABLE()") Signed-off-by: Thomas Weißschuh Reported-by: Stephen Rothwell Signed-off-by: Jiri Kosina --- include/linux/mod_devicetable.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index befbf53c4b7c..c70abe7aaef2 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -901,8 +901,7 @@ struct dfl_device_id { /** * struct ishtp_device_id - ISHTP device identifier - * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba - * @context: pointer to driver specific data + * @guid: GUID of the device. */ struct ishtp_device_id { guid_t guid; -- cgit v1.2.3 From bf9167a8b40c9cf463521da05342db81808c1b6e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 Nov 2021 09:56:33 +0100 Subject: HID: intel-ish-hid: fix module device-id handling A late addititon to the intel-ish-hid framework caused a build failure with clang, and introduced an ABI to the module loader that stops working if any driver ever needs to bind to more than one UUID: drivers/hid/intel-ish-hid/ishtp-fw-loader.c:1067:4: error: initializer element is not a compile-time constant Change the ishtp_device_id to have correct documentation and a driver_data field like all the other ones, and change the drivers to use the ID table as the primary identification in a way that works with all compilers and avoids duplciating the identifiers. Fixes: f155dfeaa4ee ("platform/x86: isthp_eclite: only load for matching devices") Fixes: facfe0a4fdce ("platform/chrome: chros_ec_ishtp: only load for matching devices") Fixes: 0d0cccc0fd83 ("HID: intel-ish-hid: hid-client: only load for matching devices") Fixes: 44e2a58cb880 ("HID: intel-ish-hid: fw-loader: only load for matching devices") Fixes: cb1a2c6847f7 ("HID: intel-ish-hid: use constants for modaliases") Fixes: fa443bc3c1e4 ("HID: intel-ish-hid: add support for MODULE_DEVICE_TABLE()") Signed-off-by: Arnd Bergmann Reviewed-by: Hans de Goede [jkosina@suse.cz: fix ecl_ishtp_cl_driver.id initialization] [jkosina@suse.cz: fix conflict with already fixed kerneldoc] Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 19 ++++++++----------- drivers/hid/intel-ish-hid/ishtp-hid-client.c | 19 ++++++++----------- drivers/hid/intel-ish-hid/ishtp/bus.c | 2 +- drivers/platform/chrome/cros_ec_ishtp.c | 19 ++++++++----------- drivers/platform/x86/intel/ishtp_eclite.c | 19 ++++++++----------- include/linux/intel-ish-client-if.h | 4 ++-- include/linux/mod_devicetable.h | 2 ++ 7 files changed, 37 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c index 945a9d0b68cd..0e1183e96147 100644 --- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c +++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c @@ -76,9 +76,12 @@ enum ish_loader_commands { #define LOADER_XFER_MODE_ISHTP BIT(1) /* ISH Transport Loader client unique GUID */ -static const guid_t loader_ishtp_guid = - GUID_INIT(0xc804d06a, 0x55bd, 0x4ea7, - 0xad, 0xed, 0x1e, 0x31, 0x22, 0x8c, 0x76, 0xdc); +static const struct ishtp_device_id loader_ishtp_id_table[] = { + { .guid = GUID_INIT(0xc804d06a, 0x55bd, 0x4ea7, + 0xad, 0xed, 0x1e, 0x31, 0x22, 0x8c, 0x76, 0xdc) }, + { } +}; +MODULE_DEVICE_TABLE(ishtp, loader_ishtp_id_table); #define FILENAME_SIZE 256 @@ -880,7 +883,7 @@ static int loader_init(struct ishtp_cl *loader_ishtp_cl, int reset) fw_client = ishtp_fw_cl_get_client(ishtp_get_ishtp_device(loader_ishtp_cl), - &loader_ishtp_guid); + &loader_ishtp_id_table[0].guid); if (!fw_client) { dev_err(cl_data_to_dev(client_data), "ISH client uuid not found\n"); @@ -1057,18 +1060,12 @@ static int loader_ishtp_cl_reset(struct ishtp_cl_device *cl_device) static struct ishtp_cl_driver loader_ishtp_cl_driver = { .name = "ish-loader", - .guid = &loader_ishtp_guid, + .id = loader_ishtp_id_table, .probe = loader_ishtp_cl_probe, .remove = loader_ishtp_cl_remove, .reset = loader_ishtp_cl_reset, }; -static const struct ishtp_device_id loader_ishtp_id_table[] = { - { loader_ishtp_guid }, - { } -}; -MODULE_DEVICE_TABLE(ishtp, loader_ishtp_id_table); - static int __init ish_loader_init(void) { return ishtp_cl_driver_register(&loader_ishtp_cl_driver, THIS_MODULE); diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c index fb47d38d1e87..4338c9b68a43 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c +++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c @@ -12,9 +12,12 @@ #include "ishtp-hid.h" /* ISH Transport protocol (ISHTP in short) GUID */ -static const guid_t hid_ishtp_guid = - GUID_INIT(0x33AECD58, 0xB679, 0x4E54, - 0x9B, 0xD9, 0xA0, 0x4D, 0x34, 0xF0, 0xC2, 0x26); +static const struct ishtp_device_id hid_ishtp_id_table[] = { + { .guid = GUID_INIT(0x33AECD58, 0xB679, 0x4E54, + 0x9B, 0xD9, 0xA0, 0x4D, 0x34, 0xF0, 0xC2, 0x26), }, + { } +}; +MODULE_DEVICE_TABLE(ishtp, hid_ishtp_id_table); /* Rx ring buffer pool size */ #define HID_CL_RX_RING_SIZE 32 @@ -662,7 +665,7 @@ static int hid_ishtp_cl_init(struct ishtp_cl *hid_ishtp_cl, int reset) ishtp_set_tx_ring_size(hid_ishtp_cl, HID_CL_TX_RING_SIZE); ishtp_set_rx_ring_size(hid_ishtp_cl, HID_CL_RX_RING_SIZE); - fw_client = ishtp_fw_cl_get_client(dev, &hid_ishtp_guid); + fw_client = ishtp_fw_cl_get_client(dev, &hid_ishtp_id_table[0].guid); if (!fw_client) { dev_err(cl_data_to_dev(client_data), "ish client uuid not found\n"); @@ -945,19 +948,13 @@ static const struct dev_pm_ops hid_ishtp_pm_ops = { static struct ishtp_cl_driver hid_ishtp_cl_driver = { .name = "ish-hid", - .guid = &hid_ishtp_guid, + .id = hid_ishtp_id_table, .probe = hid_ishtp_cl_probe, .remove = hid_ishtp_cl_remove, .reset = hid_ishtp_cl_reset, .driver.pm = &hid_ishtp_pm_ops, }; -static const struct ishtp_device_id hid_ishtp_id_table[] = { - { hid_ishtp_guid }, - { } -}; -MODULE_DEVICE_TABLE(ishtp, hid_ishtp_id_table); - static int __init ish_hid_init(void) { int rv; diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c index e159cd1c5f37..f68aba8794fe 100644 --- a/drivers/hid/intel-ish-hid/ishtp/bus.c +++ b/drivers/hid/intel-ish-hid/ishtp/bus.c @@ -241,7 +241,7 @@ static int ishtp_cl_bus_match(struct device *dev, struct device_driver *drv) struct ishtp_cl_device *device = to_ishtp_cl_device(dev); struct ishtp_cl_driver *driver = to_ishtp_cl_driver(drv); - return guid_equal(driver->guid, + return guid_equal(&driver->id[0].guid, &device->fw_client->props.protocol_name); } diff --git a/drivers/platform/chrome/cros_ec_ishtp.c b/drivers/platform/chrome/cros_ec_ishtp.c index 8c17358e84c1..4020b8354bae 100644 --- a/drivers/platform/chrome/cros_ec_ishtp.c +++ b/drivers/platform/chrome/cros_ec_ishtp.c @@ -41,9 +41,12 @@ enum cros_ec_ish_channel { #define ISHTP_SEND_TIMEOUT (3 * HZ) /* ISH Transport CrOS EC ISH client unique GUID */ -static const guid_t cros_ish_guid = - GUID_INIT(0x7b7154d0, 0x56f4, 0x4bdc, - 0xb0, 0xd8, 0x9e, 0x7c, 0xda, 0xe0, 0xd6, 0xa0); +static const struct ishtp_device_id cros_ec_ishtp_id_table[] = { + { .guid = GUID_INIT(0x7b7154d0, 0x56f4, 0x4bdc, + 0xb0, 0xd8, 0x9e, 0x7c, 0xda, 0xe0, 0xd6, 0xa0), }, + { } +}; +MODULE_DEVICE_TABLE(ishtp, cros_ec_ishtp_id_table); struct header { u8 channel; @@ -389,7 +392,7 @@ static int cros_ish_init(struct ishtp_cl *cros_ish_cl) ishtp_set_tx_ring_size(cros_ish_cl, CROS_ISH_CL_TX_RING_SIZE); ishtp_set_rx_ring_size(cros_ish_cl, CROS_ISH_CL_RX_RING_SIZE); - fw_client = ishtp_fw_cl_get_client(dev, &cros_ish_guid); + fw_client = ishtp_fw_cl_get_client(dev, &cros_ec_ishtp_id_table[0].guid); if (!fw_client) { dev_err(cl_data_to_dev(client_data), "ish client uuid not found\n"); @@ -765,7 +768,7 @@ static SIMPLE_DEV_PM_OPS(cros_ec_ishtp_pm_ops, cros_ec_ishtp_suspend, static struct ishtp_cl_driver cros_ec_ishtp_driver = { .name = "cros_ec_ishtp", - .guid = &cros_ish_guid, + .id = cros_ec_ishtp_id_table, .probe = cros_ec_ishtp_probe, .remove = cros_ec_ishtp_remove, .reset = cros_ec_ishtp_reset, @@ -774,12 +777,6 @@ static struct ishtp_cl_driver cros_ec_ishtp_driver = { }, }; -static const struct ishtp_device_id cros_ec_ishtp_id_table[] = { - { cros_ish_guid }, - { } -}; -MODULE_DEVICE_TABLE(ishtp, cros_ec_ishtp_id_table); - static int __init cros_ec_ishtp_mod_init(void) { return ishtp_cl_driver_register(&cros_ec_ishtp_driver, THIS_MODULE); diff --git a/drivers/platform/x86/intel/ishtp_eclite.c b/drivers/platform/x86/intel/ishtp_eclite.c index b9fb8f28fd63..93ac8b2dbf38 100644 --- a/drivers/platform/x86/intel/ishtp_eclite.c +++ b/drivers/platform/x86/intel/ishtp_eclite.c @@ -93,9 +93,12 @@ struct ishtp_opregion_dev { }; /* eclite ishtp client UUID: 6a19cc4b-d760-4de3-b14d-f25ebd0fbcd9 */ -static const guid_t ecl_ishtp_guid = - GUID_INIT(0x6a19cc4b, 0xd760, 0x4de3, - 0xb1, 0x4d, 0xf2, 0x5e, 0xbd, 0xf, 0xbc, 0xd9); +static const struct ishtp_device_id ecl_ishtp_id_table[] = { + { .guid = GUID_INIT(0x6a19cc4b, 0xd760, 0x4de3, + 0xb1, 0x4d, 0xf2, 0x5e, 0xbd, 0xf, 0xbc, 0xd9), }, + { } +}; +MODULE_DEVICE_TABLE(ishtp, ecl_ishtp_id_table); /* ACPI DSM UUID: 91d936a7-1f01-49c6-a6b4-72f00ad8d8a5 */ static const guid_t ecl_acpi_guid = @@ -462,7 +465,7 @@ static int ecl_ishtp_cl_init(struct ishtp_cl *ecl_ishtp_cl) ishtp_set_tx_ring_size(ecl_ishtp_cl, ECL_CL_TX_RING_SIZE); ishtp_set_rx_ring_size(ecl_ishtp_cl, ECL_CL_RX_RING_SIZE); - fw_client = ishtp_fw_cl_get_client(dev, &ecl_ishtp_guid); + fw_client = ishtp_fw_cl_get_client(dev, &ecl_ishtp_id_table[0].guid); if (!fw_client) { dev_err(cl_data_to_dev(opr_dev), "fw client not found\n"); return -ENOENT; @@ -674,19 +677,13 @@ static const struct dev_pm_ops ecl_ishtp_pm_ops = { static struct ishtp_cl_driver ecl_ishtp_cl_driver = { .name = "ishtp-eclite", - .guid = &ecl_ishtp_guid, + .id = ecl_ishtp_id_table, .probe = ecl_ishtp_cl_probe, .remove = ecl_ishtp_cl_remove, .reset = ecl_ishtp_cl_reset, .driver.pm = &ecl_ishtp_pm_ops, }; -static const struct ishtp_device_id ecl_ishtp_id_table[] = { - { ecl_ishtp_guid }, - { } -}; -MODULE_DEVICE_TABLE(ishtp, ecl_ishtp_id_table); - static int __init ecl_ishtp_init(void) { return ishtp_cl_driver_register(&ecl_ishtp_cl_driver, THIS_MODULE); diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h index aee8ff4739b1..f45f13304add 100644 --- a/include/linux/intel-ish-client-if.h +++ b/include/linux/intel-ish-client-if.h @@ -9,7 +9,7 @@ #define _INTEL_ISH_CLIENT_IF_H_ #include -#include +#include struct ishtp_cl_device; struct ishtp_device; @@ -40,7 +40,7 @@ enum cl_state { struct ishtp_cl_driver { struct device_driver driver; const char *name; - const guid_t *guid; + const struct ishtp_device_id *id; int (*probe)(struct ishtp_cl_device *dev); void (*remove)(struct ishtp_cl_device *dev); int (*reset)(struct ishtp_cl_device *dev); diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index c70abe7aaef2..4bb71979a8fd 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -902,9 +902,11 @@ struct dfl_device_id { /** * struct ishtp_device_id - ISHTP device identifier * @guid: GUID of the device. + * @driver_data: pointer to driver specific data */ struct ishtp_device_id { guid_t guid; + kernel_ulong_t driver_data; }; #endif /* LINUX_MOD_DEVICETABLE_H */ -- cgit v1.2.3 From 522a0032af005502507f5f81ae64fdcc82b5d068 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 6 Nov 2021 17:13:35 -0400 Subject: Add linux/cacheflush.h Many architectures do not include asm-generic/cacheflush.h, so turn the includes on their head and add linux/cacheflush.h which includes asm/cacheflush.h. Move the flush_dcache_folio() declaration from asm-generic/cacheflush.h to linux/cacheflush.h and change linux/highmem.h to include linux/cacheflush.h instead of asm/cacheflush.h so that all necessary places will see flush_dcache_folio(). More functions should have their default implementations moved in the future, but those are for follow-on patches. This fixes csky, sparc and sparc64 which were missed in the commit which added flush_dcache_folio(). Fixes: 08b0b0059bf1 ("mm: Add flush_dcache_folio()") Suggested-by: Christoph Hellwig Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Geert Uytterhoeven --- arch/arc/include/asm/cacheflush.h | 1 - arch/arm/include/asm/cacheflush.h | 1 - arch/m68k/include/asm/cacheflush_mm.h | 1 - arch/mips/include/asm/cacheflush.h | 2 -- arch/nds32/include/asm/cacheflush.h | 1 - arch/nios2/include/asm/cacheflush.h | 1 - arch/parisc/include/asm/cacheflush.h | 1 - arch/sh/include/asm/cacheflush.h | 1 - arch/xtensa/include/asm/cacheflush.h | 3 --- include/asm-generic/cacheflush.h | 6 ------ include/linux/cacheflush.h | 18 ++++++++++++++++++ include/linux/highmem.h | 3 +-- 12 files changed, 19 insertions(+), 20 deletions(-) create mode 100644 include/linux/cacheflush.h (limited to 'include/linux') diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h index e8c2c7469e10..e201b4b1655a 100644 --- a/arch/arc/include/asm/cacheflush.h +++ b/arch/arc/include/asm/cacheflush.h @@ -36,7 +36,6 @@ void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 void flush_dcache_page(struct page *page); -void flush_dcache_folio(struct folio *folio); void dma_cache_wback_inv(phys_addr_t start, unsigned long sz); void dma_cache_inv(phys_addr_t start, unsigned long sz); diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index e68fb879e4f9..5e56288e343b 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -290,7 +290,6 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr */ #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); -void flush_dcache_folio(struct folio *folio); #define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 static inline void flush_kernel_vmap_range(void *addr, int size) diff --git a/arch/m68k/include/asm/cacheflush_mm.h b/arch/m68k/include/asm/cacheflush_mm.h index 8ab46625ddd3..1ac55e7b47f0 100644 --- a/arch/m68k/include/asm/cacheflush_mm.h +++ b/arch/m68k/include/asm/cacheflush_mm.h @@ -250,7 +250,6 @@ static inline void __flush_page_to_ram(void *vaddr) #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 #define flush_dcache_page(page) __flush_page_to_ram(page_address(page)) -void flush_dcache_folio(struct folio *folio); #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) #define flush_icache_page(vma, page) __flush_page_to_ram(page_address(page)) diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h index f207388541d5..b3dc9c589442 100644 --- a/arch/mips/include/asm/cacheflush.h +++ b/arch/mips/include/asm/cacheflush.h @@ -61,8 +61,6 @@ static inline void flush_dcache_page(struct page *page) SetPageDcacheDirty(page); } -void flush_dcache_folio(struct folio *folio); - #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h index 3fc0bb7d6487..c2a222ebfa2a 100644 --- a/arch/nds32/include/asm/cacheflush.h +++ b/arch/nds32/include/asm/cacheflush.h @@ -27,7 +27,6 @@ void flush_cache_vunmap(unsigned long start, unsigned long end); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 void flush_dcache_page(struct page *page); -void flush_dcache_folio(struct folio *folio); void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr, void *dst, void *src, int len); void copy_from_user_page(struct vm_area_struct *vma, struct page *page, diff --git a/arch/nios2/include/asm/cacheflush.h b/arch/nios2/include/asm/cacheflush.h index 1999561b22aa..d0b71dd71287 100644 --- a/arch/nios2/include/asm/cacheflush.h +++ b/arch/nios2/include/asm/cacheflush.h @@ -29,7 +29,6 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 void flush_dcache_page(struct page *page); -void flush_dcache_folio(struct folio *folio); extern void flush_icache_range(unsigned long start, unsigned long end); extern void flush_icache_page(struct vm_area_struct *vma, struct page *page); diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index da0cd4b3a28f..859b8a34adcf 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -50,7 +50,6 @@ void invalidate_kernel_vmap_range(void *vaddr, int size); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 void flush_dcache_page(struct page *page); -void flush_dcache_folio(struct folio *folio); #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages) #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages) diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h index c7a97f32432f..481a664287e2 100644 --- a/arch/sh/include/asm/cacheflush.h +++ b/arch/sh/include/asm/cacheflush.h @@ -43,7 +43,6 @@ extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 void flush_dcache_page(struct page *page); -void flush_dcache_folio(struct folio *folio); extern void flush_icache_range(unsigned long start, unsigned long end); #define flush_icache_user_range flush_icache_range extern void flush_icache_page(struct vm_area_struct *vma, diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h index a8a041609c5d..7b4359312c25 100644 --- a/arch/xtensa/include/asm/cacheflush.h +++ b/arch/xtensa/include/asm/cacheflush.h @@ -121,7 +121,6 @@ void flush_cache_page(struct vm_area_struct*, #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 void flush_dcache_page(struct page *); -void flush_dcache_folio(struct folio *); void local_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); @@ -138,9 +137,7 @@ void local_flush_cache_page(struct vm_area_struct *vma, #define flush_cache_vunmap(start,end) do { } while (0) #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 -#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO #define flush_dcache_page(page) do { } while (0) -static inline void flush_dcache_folio(struct folio *folio) { } #define flush_icache_range local_flush_icache_range #define flush_cache_page(vma, addr, pfn) do { } while (0) diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h index fedc0dfa4877..4f07afacbc23 100644 --- a/include/asm-generic/cacheflush.h +++ b/include/asm-generic/cacheflush.h @@ -50,13 +50,7 @@ static inline void flush_dcache_page(struct page *page) { } -static inline void flush_dcache_folio(struct folio *folio) { } #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 -#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO -#endif - -#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO -void flush_dcache_folio(struct folio *folio); #endif #ifndef flush_dcache_mmap_lock diff --git a/include/linux/cacheflush.h b/include/linux/cacheflush.h new file mode 100644 index 000000000000..fef8b607f97e --- /dev/null +++ b/include/linux/cacheflush.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CACHEFLUSH_H +#define _LINUX_CACHEFLUSH_H + +#include + +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO +void flush_dcache_folio(struct folio *folio); +#endif +#else +static inline void flush_dcache_folio(struct folio *folio) +{ +} +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO 0 +#endif /* ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE */ + +#endif /* _LINUX_CACHEFLUSH_H */ diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 25aff0f2ed0b..c944b3b70ee7 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -5,12 +5,11 @@ #include #include #include +#include #include #include #include -#include - #include "highmem-internal.h" /** -- cgit v1.2.3 From 9c3252152e8a6401c2b9e32490a5a16ec4472778 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 16 Nov 2021 21:17:14 -0500 Subject: mm: Rename folio_test_multi to folio_test_large This is a better name. Also add kernel-doc. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/page-flags.h | 8 +++++++- mm/memcontrol.c | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 52ec4b5e5615..05510118fbb8 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -692,7 +692,13 @@ static inline bool folio_test_single(struct folio *folio) return !folio_test_head(folio); } -static inline bool folio_test_multi(struct folio *folio) +/** + * folio_test_large() - Does this folio contain more than one page? + * @folio: The folio to test. + * + * Return: True if the folio is larger than one page. + */ +static inline bool folio_test_large(struct folio *folio) { return folio_test_head(folio); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 781605e92015..6863a834ed42 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5558,7 +5558,7 @@ static int mem_cgroup_move_account(struct page *page, VM_BUG_ON(from == to); VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); - VM_BUG_ON(compound && !folio_test_multi(folio)); + VM_BUG_ON(compound && !folio_test_large(folio)); /* * Prevent mem_cgroup_migrate() from looking at -- cgit v1.2.3 From a1efe484dd8c04c4c2d4eb1ee6b04d01cfc07ccc Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 16 Nov 2021 21:18:52 -0500 Subject: mm: Remove folio_test_single There's no need for this predicate; callers can just use !folio_test_large(). Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/page-flags.h | 6 ------ mm/util.c | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 05510118fbb8..b5f14d581113 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -686,12 +686,6 @@ static inline bool test_set_page_writeback(struct page *page) __PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY) -/* Whether there are one or multiple pages in a folio */ -static inline bool folio_test_single(struct folio *folio) -{ - return !folio_test_head(folio); -} - /** * folio_test_large() - Does this folio contain more than one page? * @folio: The folio to test. diff --git a/mm/util.c b/mm/util.c index e58151a61255..741ba32a43ac 100644 --- a/mm/util.c +++ b/mm/util.c @@ -670,7 +670,7 @@ bool folio_mapped(struct folio *folio) { long i, nr; - if (folio_test_single(folio)) + if (!folio_test_large(folio)) return atomic_read(&folio->_mapcount) >= 0; if (atomic_read(folio_mapcount_ptr(folio)) >= 0) return true; -- cgit v1.2.3 From ff36da69bc90d80b0c73f47f4b2e270b3ff6da99 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 29 Aug 2021 06:07:03 -0400 Subject: fs: Remove FS_THP_SUPPORT Instead of setting a bit in the fs_flags to set a bit in the address_space, set the bit in the address_space directly. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/inode.c | 2 -- include/linux/fs.h | 1 - include/linux/pagemap.h | 16 ++++++++++++++++ mm/shmem.c | 3 ++- 4 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/inode.c b/fs/inode.c index 3eba0940ffcf..6b80a51129d5 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -180,8 +180,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode) mapping->a_ops = &empty_aops; mapping->host = inode; mapping->flags = 0; - if (sb->s_type->fs_flags & FS_THP_SUPPORT) - __set_bit(AS_THP_SUPPORT, &mapping->flags); mapping->wb_err = 0; atomic_set(&mapping->i_mmap_writable, 0); #ifdef CONFIG_READ_ONLY_THP_FOR_FS diff --git a/include/linux/fs.h b/include/linux/fs.h index 1cb616fc1105..bbf812ce89a8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2518,7 +2518,6 @@ struct file_system_type { #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */ #define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */ -#define FS_THP_SUPPORT 8192 /* Remove once all fs converted */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 1a0c646eb6ff..9e33878bf23b 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -176,6 +176,22 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) m->gfp_mask = mask; } +/** + * mapping_set_large_folios() - Indicate the file supports large folios. + * @mapping: The file. + * + * The filesystem should call this function in its inode constructor to + * indicate that the VFS can use large folios to cache the contents of + * the file. + * + * Context: This should not be called while the inode is active as it + * is non-atomic. + */ +static inline void mapping_set_large_folios(struct address_space *mapping) +{ + __set_bit(AS_THP_SUPPORT, &mapping->flags); +} + static inline bool mapping_thp_support(struct address_space *mapping) { return test_bit(AS_THP_SUPPORT, &mapping->flags); diff --git a/mm/shmem.c b/mm/shmem.c index dc038ce78700..18f93c2d68f1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2303,6 +2303,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode INIT_LIST_HEAD(&info->swaplist); simple_xattrs_init(&info->xattrs); cache_no_acl(inode); + mapping_set_large_folios(inode->i_mapping); switch (mode & S_IFMT) { default: @@ -3870,7 +3871,7 @@ static struct file_system_type shmem_fs_type = { .parameters = shmem_fs_parameters, #endif .kill_sb = kill_litter_super, - .fs_flags = FS_USERNS_MOUNT | FS_THP_SUPPORT, + .fs_flags = FS_USERNS_MOUNT, }; int __init shmem_init(void) -- cgit v1.2.3 From ed2145c474c9015bc634e35f6d1a9b7767f3fbfc Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 29 Aug 2021 06:28:19 -0400 Subject: fs: Rename AS_THP_SUPPORT and mapping_thp_support These are now indicators of large folio support, not THP support. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/pagemap.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 9e33878bf23b..605246452305 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -84,7 +84,7 @@ enum mapping_flags { AS_EXITING = 4, /* final truncate in progress */ /* writeback related tags are not used */ AS_NO_WRITEBACK_TAGS = 5, - AS_THP_SUPPORT = 6, /* THPs supported */ + AS_LARGE_FOLIO_SUPPORT = 6, }; /** @@ -189,12 +189,12 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) */ static inline void mapping_set_large_folios(struct address_space *mapping) { - __set_bit(AS_THP_SUPPORT, &mapping->flags); + __set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); } -static inline bool mapping_thp_support(struct address_space *mapping) +static inline bool mapping_large_folio_support(struct address_space *mapping) { - return test_bit(AS_THP_SUPPORT, &mapping->flags); + return test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); } static inline int filemap_nr_thps(struct address_space *mapping) @@ -209,7 +209,7 @@ static inline int filemap_nr_thps(struct address_space *mapping) static inline void filemap_nr_thps_inc(struct address_space *mapping) { #ifdef CONFIG_READ_ONLY_THP_FOR_FS - if (!mapping_thp_support(mapping)) + if (!mapping_large_folio_support(mapping)) atomic_inc(&mapping->nr_thps); #else WARN_ON_ONCE(1); @@ -219,7 +219,7 @@ static inline void filemap_nr_thps_inc(struct address_space *mapping) static inline void filemap_nr_thps_dec(struct address_space *mapping) { #ifdef CONFIG_READ_ONLY_THP_FOR_FS - if (!mapping_thp_support(mapping)) + if (!mapping_large_folio_support(mapping)) atomic_dec(&mapping->nr_thps); #else WARN_ON_ONCE(1); -- cgit v1.2.3 From c035713998700e8843c7d087f55bce3c54c0e3ec Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 5 Nov 2021 10:19:05 -0400 Subject: mm: Add functions to zero portions of a folio These functions are wrappers around zero_user_segments(), which means that zero_user_segments() can now be called for compound pages even when CONFIG_TRANSPARENT_HUGEPAGE is disabled. Use 'xend' as the name of the parameter to indicate that this is an excluded end, not the more usual included end. Excluding the end makes more sense to the callers, but can cause confusion to readers who are more used to seeing included ends. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- include/linux/highmem.h | 44 +++++++++++++++++++++++++++++++++++++++++--- mm/highmem.c | 2 -- 2 files changed, 41 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index c944b3b70ee7..39bb9b47fa9c 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -230,10 +230,10 @@ static inline void tag_clear_highpage(struct page *page) * If we pass in a base or tail page, we can zero up to PAGE_SIZE. * If we pass in a head page, we can zero up to the size of the compound page. */ -#if defined(CONFIG_HIGHMEM) && defined(CONFIG_TRANSPARENT_HUGEPAGE) +#ifdef CONFIG_HIGHMEM void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2); -#else /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */ +#else static inline void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2) @@ -253,7 +253,7 @@ static inline void zero_user_segments(struct page *page, for (i = 0; i < compound_nr(page); i++) flush_dcache_page(page + i); } -#endif /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */ +#endif static inline void zero_user_segment(struct page *page, unsigned start, unsigned end) @@ -363,4 +363,42 @@ static inline void memzero_page(struct page *page, size_t offset, size_t len) kunmap_local(addr); } +/** + * folio_zero_segments() - Zero two byte ranges in a folio. + * @folio: The folio to write to. + * @start1: The first byte to zero. + * @xend1: One more than the last byte in the first range. + * @start2: The first byte to zero in the second range. + * @xend2: One more than the last byte in the second range. + */ +static inline void folio_zero_segments(struct folio *folio, + size_t start1, size_t xend1, size_t start2, size_t xend2) +{ + zero_user_segments(&folio->page, start1, xend1, start2, xend2); +} + +/** + * folio_zero_segment() - Zero a byte range in a folio. + * @folio: The folio to write to. + * @start: The first byte to zero. + * @xend: One more than the last byte to zero. + */ +static inline void folio_zero_segment(struct folio *folio, + size_t start, size_t xend) +{ + zero_user_segments(&folio->page, start, xend, 0, 0); +} + +/** + * folio_zero_range() - Zero a byte range in a folio. + * @folio: The folio to write to. + * @start: The first byte to zero. + * @length: The number of bytes to zero. + */ +static inline void folio_zero_range(struct folio *folio, + size_t start, size_t length) +{ + zero_user_segments(&folio->page, start, start + length, 0, 0); +} + #endif /* _LINUX_HIGHMEM_H */ diff --git a/mm/highmem.c b/mm/highmem.c index 88f65f155845..819d41140e5b 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -359,7 +359,6 @@ void kunmap_high(struct page *page) } EXPORT_SYMBOL(kunmap_high); -#ifdef CONFIG_TRANSPARENT_HUGEPAGE void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2) { @@ -416,7 +415,6 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1, BUG_ON((start1 | start2 | end1 | end2) != 0); } EXPORT_SYMBOL(zero_user_segments); -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* CONFIG_HIGHMEM */ #ifdef CONFIG_KMAP_LOCAL -- cgit v1.2.3 From 985e9ece1e55a94da842f6c1f9ff84d587b26267 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 17 Nov 2021 20:07:35 +0200 Subject: ACPI: Make acpi_node_get_parent() local acpi_node_get_parent() isn't used outside drivers/acpi/property.c. Make it local. Signed-off-by: Sakari Ailus Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- drivers/acpi/property.c | 3 ++- include/linux/acpi.h | 7 ------- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 781e312f4534..2366f54d8e9c 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -1084,7 +1084,8 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode, * Returns parent node of an ACPI device or data firmware node or %NULL if * not available. */ -struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode) +static struct fwnode_handle * +acpi_node_get_parent(const struct fwnode_handle *fwnode) { if (is_acpi_data_node(fwnode)) { /* All data nodes have parent pointer so just return that */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 668d007f0917..b28f8790192a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1182,7 +1182,6 @@ int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname, struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode, struct fwnode_handle *child); -struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode); struct acpi_probe_entry; typedef bool (*acpi_probe_entry_validate_subtbl)(struct acpi_subtable_header *, @@ -1287,12 +1286,6 @@ acpi_get_next_subnode(const struct fwnode_handle *fwnode, return NULL; } -static inline struct fwnode_handle * -acpi_node_get_parent(const struct fwnode_handle *fwnode) -{ - return NULL; -} - static inline struct fwnode_handle * acpi_graph_get_next_endpoint(const struct fwnode_handle *fwnode, struct fwnode_handle *prev) -- cgit v1.2.3 From f124034faa911ed534bf8c4881ad98dbbde2a966 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 24 Nov 2021 18:44:17 -0500 Subject: Revert "virtio_ring: validate used buffer length" This reverts commit 939779f5152d161b34f612af29e7dc1ac4472fcf. Attempts to validate length in the core did not work out: there turn out to exist multiple broken devices, and in particular legacy devices are known to be broken in this respect. We have ideas for handling this better in the next version but for now let's revert to a known good state to make sure drivers work for people. Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 60 -------------------------------------------- include/linux/virtio.h | 2 -- 2 files changed, 62 deletions(-) (limited to 'include/linux') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 00f64f2f8b72..6d2614e34470 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -14,9 +14,6 @@ #include #include -static bool force_used_validation = false; -module_param(force_used_validation, bool, 0444); - #ifdef DEBUG /* For development, we want to crash whenever the ring is screwed. */ #define BAD_RING(_vq, fmt, args...) \ @@ -185,9 +182,6 @@ struct vring_virtqueue { } packed; }; - /* Per-descriptor in buffer length */ - u32 *buflen; - /* How to notify other side. FIXME: commonalize hcalls! */ bool (*notify)(struct virtqueue *vq); @@ -496,7 +490,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, unsigned int i, n, avail, descs_used, prev, err_idx; int head; bool indirect; - u32 buflen = 0; START_USE(vq); @@ -578,7 +571,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE, indirect); - buflen += sg->length; } } /* Last one doesn't continue. */ @@ -618,10 +610,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, else vq->split.desc_state[head].indir_desc = ctx; - /* Store in buffer length if necessary */ - if (vq->buflen) - vq->buflen[head] = buflen; - /* Put entry in available array (but don't update avail->idx until they * do sync). */ avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); @@ -796,11 +784,6 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, BAD_RING(vq, "id %u is not a head!\n", i); return NULL; } - if (vq->buflen && unlikely(*len > vq->buflen[i])) { - BAD_RING(vq, "used len %d is larger than in buflen %u\n", - *len, vq->buflen[i]); - return NULL; - } /* detach_buf_split clears data, so grab it now. */ ret = vq->split.desc_state[i].data; @@ -1079,7 +1062,6 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, unsigned int i, n, err_idx; u16 head, id; dma_addr_t addr; - u32 buflen = 0; head = vq->packed.next_avail_idx; desc = alloc_indirect_packed(total_sg, gfp); @@ -1109,8 +1091,6 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, desc[i].addr = cpu_to_le64(addr); desc[i].len = cpu_to_le32(sg->length); i++; - if (n >= out_sgs) - buflen += sg->length; } } @@ -1164,10 +1144,6 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, vq->packed.desc_state[id].indir_desc = desc; vq->packed.desc_state[id].last = id; - /* Store in buffer length if necessary */ - if (vq->buflen) - vq->buflen[id] = buflen; - vq->num_added += 1; pr_debug("Added buffer head %i to %p\n", head, vq); @@ -1203,7 +1179,6 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, __le16 head_flags, flags; u16 head, id, prev, curr, avail_used_flags; int err; - u32 buflen = 0; START_USE(vq); @@ -1283,8 +1258,6 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, 1 << VRING_PACKED_DESC_F_AVAIL | 1 << VRING_PACKED_DESC_F_USED; } - if (n >= out_sgs) - buflen += sg->length; } } @@ -1304,10 +1277,6 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, vq->packed.desc_state[id].indir_desc = ctx; vq->packed.desc_state[id].last = prev; - /* Store in buffer length if necessary */ - if (vq->buflen) - vq->buflen[id] = buflen; - /* * A driver MUST NOT make the first descriptor in the list * available before all subsequent descriptors comprising @@ -1494,11 +1463,6 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, BAD_RING(vq, "id %u is not a head!\n", id); return NULL; } - if (vq->buflen && unlikely(*len > vq->buflen[id])) { - BAD_RING(vq, "used len %d is larger than in buflen %u\n", - *len, vq->buflen[id]); - return NULL; - } /* detach_buf_packed clears data, so grab it now. */ ret = vq->packed.desc_state[id].data; @@ -1704,7 +1668,6 @@ static struct virtqueue *vring_create_virtqueue_packed( struct vring_virtqueue *vq; struct vring_packed_desc *ring; struct vring_packed_desc_event *driver, *device; - struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver); dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; size_t ring_size_in_bytes, event_size_in_bytes; @@ -1794,15 +1757,6 @@ static struct virtqueue *vring_create_virtqueue_packed( if (!vq->packed.desc_extra) goto err_desc_extra; - if (!drv->suppress_used_validation || force_used_validation) { - vq->buflen = kmalloc_array(num, sizeof(*vq->buflen), - GFP_KERNEL); - if (!vq->buflen) - goto err_buflen; - } else { - vq->buflen = NULL; - } - /* No callback? Tell other side not to bother us. */ if (!callback) { vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; @@ -1815,8 +1769,6 @@ static struct virtqueue *vring_create_virtqueue_packed( spin_unlock(&vdev->vqs_list_lock); return &vq->vq; -err_buflen: - kfree(vq->packed.desc_extra); err_desc_extra: kfree(vq->packed.desc_state); err_desc_state: @@ -2224,7 +2176,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, void (*callback)(struct virtqueue *), const char *name) { - struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver); struct vring_virtqueue *vq; if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) @@ -2284,15 +2235,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, if (!vq->split.desc_extra) goto err_extra; - if (!drv->suppress_used_validation || force_used_validation) { - vq->buflen = kmalloc_array(vring.num, sizeof(*vq->buflen), - GFP_KERNEL); - if (!vq->buflen) - goto err_buflen; - } else { - vq->buflen = NULL; - } - /* Put everything in free lists. */ vq->free_head = 0; memset(vq->split.desc_state, 0, vring.num * @@ -2303,8 +2245,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, spin_unlock(&vdev->vqs_list_lock); return &vq->vq; -err_buflen: - kfree(vq->split.desc_extra); err_extra: kfree(vq->split.desc_state); err_state: diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 44d0e09da2d9..41edbc01ffa4 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -152,7 +152,6 @@ size_t virtio_max_dma_size(struct virtio_device *vdev); * @feature_table_size: number of entries in the feature table array. * @feature_table_legacy: same as feature_table but when working in legacy mode. * @feature_table_size_legacy: number of entries in feature table legacy array. - * @suppress_used_validation: set to not have core validate used length * @probe: the function to call when a device is found. Returns 0 or -errno. * @scan: optional function to call after successful probe; intended * for virtio-scsi to invoke a scan. @@ -169,7 +168,6 @@ struct virtio_driver { unsigned int feature_table_size; const unsigned int *feature_table_legacy; unsigned int feature_table_size_legacy; - bool suppress_used_validation; int (*validate)(struct virtio_device *dev); int (*probe)(struct virtio_device *dev); void (*scan)(struct virtio_device *dev); -- cgit v1.2.3 From ec15baec3272bbec576f2ce7ce47765a8e9b7b1c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 26 Nov 2021 19:28:43 +0200 Subject: net: ptp: add a definition for the UDP port for IEEE 1588 general messages As opposed to event messages (Sync, PdelayReq etc) which require timestamping, general messages (Announce, FollowUp etc) do not. In PTP they are part of different streams of data. IEEE 1588-2008 Annex D.2 "UDP port numbers" states that the UDP destination port assigned by IANA is 319 for event messages, and 320 for general messages. Yet the kernel seems to be missing the definition for general messages. This patch adds it. Signed-off-by: Vladimir Oltean Acked-by: Richard Cochran Signed-off-by: Jakub Kicinski --- include/linux/ptp_classify.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index ae04968a3a47..9afd34a2d36c 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -37,6 +37,7 @@ #define PTP_MSGTYPE_PDELAY_RESP 0x3 #define PTP_EV_PORT 319 +#define PTP_GEN_PORT 320 #define PTP_GEN_BIT 0x08 /* indicates general message, if set in message type */ #define OFF_PTP_SOURCE_UUID 22 /* PTPv1 only */ -- cgit v1.2.3 From f7e5b9bfa6c8820407b64eabc1f29c9a87e8993d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 29 Nov 2021 10:39:29 -0500 Subject: siphash: use _unaligned version by default On ARM v6 and later, we define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS because the ordinary load/store instructions (ldr, ldrh, ldrb) can tolerate any misalignment of the memory address. However, load/store double and load/store multiple instructions (ldrd, ldm) may still only be used on memory addresses that are 32-bit aligned, and so we have to use the CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS macro with care, or we may end up with a severe performance hit due to alignment traps that require fixups by the kernel. Testing shows that this currently happens with clang-13 but not gcc-11. In theory, any compiler version can produce this bug or other problems, as we are dealing with undefined behavior in C99 even on architectures that support this in hardware, see also https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100363. Fortunately, the get_unaligned() accessors do the right thing: when building for ARMv6 or later, the compiler will emit unaligned accesses using the ordinary load/store instructions (but avoid the ones that require 32-bit alignment). When building for older ARM, those accessors will emit the appropriate sequence of ldrb/mov/orr instructions. And on architectures that can truly tolerate any kind of misalignment, the get_unaligned() accessors resolve to the leXX_to_cpup accessors that operate on aligned addresses. Since the compiler will in fact emit ldrd or ldm instructions when building this code for ARM v6 or later, the solution is to use the unaligned accessors unconditionally on architectures where this is known to be fast. The _aligned version of the hash function is however still needed to get the best performance on architectures that cannot do any unaligned access in hardware. This new version avoids the undefined behavior and should produce the fastest hash on all architectures we support. Link: https://lore.kernel.org/linux-arm-kernel/20181008211554.5355-4-ard.biesheuvel@linaro.org/ Link: https://lore.kernel.org/linux-crypto/CAK8P3a2KfmmGDbVHULWevB0hv71P2oi2ZCHEAqT=8dQfa0=cqQ@mail.gmail.com/ Reported-by: Ard Biesheuvel Fixes: 2c956a60778c ("siphash: add cryptographically secure PRF") Signed-off-by: Arnd Bergmann Reviewed-by: Jason A. Donenfeld Acked-by: Ard Biesheuvel Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- include/linux/siphash.h | 14 ++++---------- lib/siphash.c | 12 ++++++------ 2 files changed, 10 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/siphash.h b/include/linux/siphash.h index bf21591a9e5e..0cda61855d90 100644 --- a/include/linux/siphash.h +++ b/include/linux/siphash.h @@ -27,9 +27,7 @@ static inline bool siphash_key_is_zero(const siphash_key_t *key) } u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); -#endif u64 siphash_1u64(const u64 a, const siphash_key_t *key); u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key); @@ -82,10 +80,9 @@ static inline u64 ___siphash_aligned(const __le64 *data, size_t len, static inline u64 siphash(const void *data, size_t len, const siphash_key_t *key) { -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + !IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) return __siphash_unaligned(data, len, key); -#endif return ___siphash_aligned(data, len, key); } @@ -96,10 +93,8 @@ typedef struct { u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key); -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key); -#endif u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key); u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key); @@ -135,10 +130,9 @@ static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len, static inline u32 hsiphash(const void *data, size_t len, const hsiphash_key_t *key) { -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + !IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) return __hsiphash_unaligned(data, len, key); -#endif return ___hsiphash_aligned(data, len, key); } diff --git a/lib/siphash.c b/lib/siphash.c index a90112ee72a1..72b9068ab57b 100644 --- a/lib/siphash.c +++ b/lib/siphash.c @@ -49,6 +49,7 @@ SIPROUND; \ return (v0 ^ v1) ^ (v2 ^ v3); +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u64)); @@ -80,8 +81,8 @@ u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key) POSTAMBLE } EXPORT_SYMBOL(__siphash_aligned); +#endif -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u64)); @@ -113,7 +114,6 @@ u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key) POSTAMBLE } EXPORT_SYMBOL(__siphash_unaligned); -#endif /** * siphash_1u64 - compute 64-bit siphash PRF value of a u64 @@ -250,6 +250,7 @@ EXPORT_SYMBOL(siphash_3u32); HSIPROUND; \ return (v0 ^ v1) ^ (v2 ^ v3); +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u64)); @@ -280,8 +281,8 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_aligned); +#endif -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key) { @@ -313,7 +314,6 @@ u32 __hsiphash_unaligned(const void *data, size_t len, HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_unaligned); -#endif /** * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32 @@ -418,6 +418,7 @@ EXPORT_SYMBOL(hsiphash_4u32); HSIPROUND; \ return v1 ^ v3; +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u32)); @@ -438,8 +439,8 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_aligned); +#endif -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key) { @@ -461,7 +462,6 @@ u32 __hsiphash_unaligned(const void *data, size_t len, HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_unaligned); -#endif /** * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32 -- cgit v1.2.3 From 502e82b91361955c66c8453b5b7a905b0b5bd5a1 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 7 Nov 2021 17:21:45 +0200 Subject: net/mlx5: Fix access to a non-supported register Validate MRTC register is supported before triggering a delayed work which accesses it. Fixes: 5a1023deeed0 ("net/mlx5: Add periodic update of host time to firmware") Signed-off-by: Aya Levin Reviewed-by: Gal Pressman Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 8 +++----- include/linux/mlx5/mlx5_ifc.h | 5 ++++- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 380f50d5462d..3ca998874c50 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -836,7 +836,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms); add_timer(&health->timer); - if (mlx5_core_is_pf(dev)) + if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc)) queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index e127c0530b3a..7df9c7f8d9c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1071,18 +1071,16 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) mlx5_set_driver_version(dev); - mlx5_start_health_poll(dev); - err = mlx5_query_hca_caps(dev); if (err) { mlx5_core_err(dev, "query hca failed\n"); - goto stop_health; + goto reclaim_boot_pages; } + mlx5_start_health_poll(dev); + return 0; -stop_health: - mlx5_stop_health_poll(dev, boot); reclaim_boot_pages: mlx5_reclaim_startup_pages(dev); err_disable_hca: diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3636df90899a..fbaab440a484 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9698,7 +9698,10 @@ struct mlx5_ifc_mcam_access_reg_bits { u8 regs_84_to_68[0x11]; u8 tracer_registers[0x4]; - u8 regs_63_to_32[0x20]; + u8 regs_63_to_46[0x12]; + u8 mrtc[0x1]; + u8 regs_44_to_32[0xd]; + u8 regs_31_to_0[0x20]; }; -- cgit v1.2.3 From 6bbfa44116689469267f1a6e3d233b52114139d2 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 1 Dec 2021 23:45:50 +0900 Subject: kprobes: Limit max data_size of the kretprobe instances The 'kprobe::data_size' is unsigned, thus it can not be negative. But if user sets it enough big number (e.g. (size_t)-8), the result of 'data_size + sizeof(struct kretprobe_instance)' becomes smaller than sizeof(struct kretprobe_instance) or zero. In result, the kretprobe_instance are allocated without enough memory, and kretprobe accesses outside of allocated memory. To avoid this issue, introduce a max limitation of the kretprobe::data_size. 4KB per instance should be OK. Link: https://lkml.kernel.org/r/163836995040.432120.10322772773821182925.stgit@devnote2 Cc: stable@vger.kernel.org Fixes: f47cd9b553aa ("kprobes: kretprobe user entry-handler") Reported-by: zhangyue Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/kprobes.h | 2 ++ kernel/kprobes.c | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index e974caf39d3e..8c8f7a4d93af 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -153,6 +153,8 @@ struct kretprobe { struct kretprobe_holder *rph; }; +#define KRETPROBE_MAX_DATA_SIZE 4096 + struct kretprobe_instance { union { struct freelist_node freelist; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index e9db0c810554..21eccc961bba 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2086,6 +2086,9 @@ int register_kretprobe(struct kretprobe *rp) } } + if (rp->data_size > KRETPROBE_MAX_DATA_SIZE) + return -E2BIG; + rp->kp.pre_handler = pre_handler_kretprobe; rp->kp.post_handler = NULL; -- cgit v1.2.3 From 7a10d8c810cfad3e79372d7d1c77899d86cd6662 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Nov 2021 09:01:55 -0800 Subject: net: annotate data-races on txq->xmit_lock_owner syzbot found that __dev_queue_xmit() is reading txq->xmit_lock_owner without annotations. No serious issue there, let's document what is happening there. BUG: KCSAN: data-race in __dev_queue_xmit / __dev_queue_xmit write to 0xffff888139d09484 of 4 bytes by interrupt on cpu 0: __netif_tx_unlock include/linux/netdevice.h:4437 [inline] __dev_queue_xmit+0x948/0xf70 net/core/dev.c:4229 dev_queue_xmit_accel+0x19/0x20 net/core/dev.c:4265 macvlan_queue_xmit drivers/net/macvlan.c:543 [inline] macvlan_start_xmit+0x2b3/0x3d0 drivers/net/macvlan.c:567 __netdev_start_xmit include/linux/netdevice.h:4987 [inline] netdev_start_xmit include/linux/netdevice.h:5001 [inline] xmit_one+0x105/0x2f0 net/core/dev.c:3590 dev_hard_start_xmit+0x72/0x120 net/core/dev.c:3606 sch_direct_xmit+0x1b2/0x7c0 net/sched/sch_generic.c:342 __dev_xmit_skb+0x83d/0x1370 net/core/dev.c:3817 __dev_queue_xmit+0x590/0xf70 net/core/dev.c:4194 dev_queue_xmit+0x13/0x20 net/core/dev.c:4259 neigh_hh_output include/net/neighbour.h:511 [inline] neigh_output include/net/neighbour.h:525 [inline] ip6_finish_output2+0x995/0xbb0 net/ipv6/ip6_output.c:126 __ip6_finish_output net/ipv6/ip6_output.c:191 [inline] ip6_finish_output+0x444/0x4c0 net/ipv6/ip6_output.c:201 NF_HOOK_COND include/linux/netfilter.h:296 [inline] ip6_output+0x10e/0x210 net/ipv6/ip6_output.c:224 dst_output include/net/dst.h:450 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] ndisc_send_skb+0x486/0x610 net/ipv6/ndisc.c:508 ndisc_send_rs+0x3b0/0x3e0 net/ipv6/ndisc.c:702 addrconf_rs_timer+0x370/0x540 net/ipv6/addrconf.c:3898 call_timer_fn+0x2e/0x240 kernel/time/timer.c:1421 expire_timers+0x116/0x240 kernel/time/timer.c:1466 __run_timers+0x368/0x410 kernel/time/timer.c:1734 run_timer_softirq+0x2e/0x60 kernel/time/timer.c:1747 __do_softirq+0x158/0x2de kernel/softirq.c:558 __irq_exit_rcu kernel/softirq.c:636 [inline] irq_exit_rcu+0x37/0x70 kernel/softirq.c:648 sysvec_apic_timer_interrupt+0x3e/0xb0 arch/x86/kernel/apic/apic.c:1097 asm_sysvec_apic_timer_interrupt+0x12/0x20 read to 0xffff888139d09484 of 4 bytes by interrupt on cpu 1: __dev_queue_xmit+0x5e3/0xf70 net/core/dev.c:4213 dev_queue_xmit_accel+0x19/0x20 net/core/dev.c:4265 macvlan_queue_xmit drivers/net/macvlan.c:543 [inline] macvlan_start_xmit+0x2b3/0x3d0 drivers/net/macvlan.c:567 __netdev_start_xmit include/linux/netdevice.h:4987 [inline] netdev_start_xmit include/linux/netdevice.h:5001 [inline] xmit_one+0x105/0x2f0 net/core/dev.c:3590 dev_hard_start_xmit+0x72/0x120 net/core/dev.c:3606 sch_direct_xmit+0x1b2/0x7c0 net/sched/sch_generic.c:342 __dev_xmit_skb+0x83d/0x1370 net/core/dev.c:3817 __dev_queue_xmit+0x590/0xf70 net/core/dev.c:4194 dev_queue_xmit+0x13/0x20 net/core/dev.c:4259 neigh_resolve_output+0x3db/0x410 net/core/neighbour.c:1523 neigh_output include/net/neighbour.h:527 [inline] ip6_finish_output2+0x9be/0xbb0 net/ipv6/ip6_output.c:126 __ip6_finish_output net/ipv6/ip6_output.c:191 [inline] ip6_finish_output+0x444/0x4c0 net/ipv6/ip6_output.c:201 NF_HOOK_COND include/linux/netfilter.h:296 [inline] ip6_output+0x10e/0x210 net/ipv6/ip6_output.c:224 dst_output include/net/dst.h:450 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] ndisc_send_skb+0x486/0x610 net/ipv6/ndisc.c:508 ndisc_send_rs+0x3b0/0x3e0 net/ipv6/ndisc.c:702 addrconf_rs_timer+0x370/0x540 net/ipv6/addrconf.c:3898 call_timer_fn+0x2e/0x240 kernel/time/timer.c:1421 expire_timers+0x116/0x240 kernel/time/timer.c:1466 __run_timers+0x368/0x410 kernel/time/timer.c:1734 run_timer_softirq+0x2e/0x60 kernel/time/timer.c:1747 __do_softirq+0x158/0x2de kernel/softirq.c:558 __irq_exit_rcu kernel/softirq.c:636 [inline] irq_exit_rcu+0x37/0x70 kernel/softirq.c:648 sysvec_apic_timer_interrupt+0x8d/0xb0 arch/x86/kernel/apic/apic.c:1097 asm_sysvec_apic_timer_interrupt+0x12/0x20 kcsan_setup_watchpoint+0x94/0x420 kernel/kcsan/core.c:443 folio_test_anon include/linux/page-flags.h:581 [inline] PageAnon include/linux/page-flags.h:586 [inline] zap_pte_range+0x5ac/0x10e0 mm/memory.c:1347 zap_pmd_range mm/memory.c:1467 [inline] zap_pud_range mm/memory.c:1496 [inline] zap_p4d_range mm/memory.c:1517 [inline] unmap_page_range+0x2dc/0x3d0 mm/memory.c:1538 unmap_single_vma+0x157/0x210 mm/memory.c:1583 unmap_vmas+0xd0/0x180 mm/memory.c:1615 exit_mmap+0x23d/0x470 mm/mmap.c:3170 __mmput+0x27/0x1b0 kernel/fork.c:1113 mmput+0x3d/0x50 kernel/fork.c:1134 exit_mm+0xdb/0x170 kernel/exit.c:507 do_exit+0x608/0x17a0 kernel/exit.c:819 do_group_exit+0xce/0x180 kernel/exit.c:929 get_signal+0xfc3/0x1550 kernel/signal.c:2852 arch_do_signal_or_restart+0x8c/0x2e0 arch/x86/kernel/signal.c:868 handle_signal_work kernel/entry/common.c:148 [inline] exit_to_user_mode_loop kernel/entry/common.c:172 [inline] exit_to_user_mode_prepare+0x113/0x190 kernel/entry/common.c:207 __syscall_exit_to_user_mode_work kernel/entry/common.c:289 [inline] syscall_exit_to_user_mode+0x20/0x40 kernel/entry/common.c:300 do_syscall_64+0x50/0xd0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0x00000000 -> 0xffffffff Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 28712 Comm: syz-executor.0 Tainted: G W 5.16.0-rc1-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Link: https://lore.kernel.org/r/20211130170155.2331929-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 19 +++++++++++++------ net/core/dev.c | 5 ++++- 2 files changed, 17 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3ec42495a43a..be5cb3360b94 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4404,7 +4404,8 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) { spin_lock(&txq->_xmit_lock); - txq->xmit_lock_owner = cpu; + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, cpu); } static inline bool __netif_tx_acquire(struct netdev_queue *txq) @@ -4421,26 +4422,32 @@ static inline void __netif_tx_release(struct netdev_queue *txq) static inline void __netif_tx_lock_bh(struct netdev_queue *txq) { spin_lock_bh(&txq->_xmit_lock); - txq->xmit_lock_owner = smp_processor_id(); + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); } static inline bool __netif_tx_trylock(struct netdev_queue *txq) { bool ok = spin_trylock(&txq->_xmit_lock); - if (likely(ok)) - txq->xmit_lock_owner = smp_processor_id(); + + if (likely(ok)) { + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); + } return ok; } static inline void __netif_tx_unlock(struct netdev_queue *txq) { - txq->xmit_lock_owner = -1; + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock(&txq->_xmit_lock); } static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) { - txq->xmit_lock_owner = -1; + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock_bh(&txq->_xmit_lock); } diff --git a/net/core/dev.c b/net/core/dev.c index 15ac064b5562..2a352e668d10 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4210,7 +4210,10 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) if (dev->flags & IFF_UP) { int cpu = smp_processor_id(); /* ok because BHs are off */ - if (txq->xmit_lock_owner != cpu) { + /* Other cpus might concurrently change txq->xmit_lock_owner + * to -1 or to their cpu id, but not to our id. + */ + if (READ_ONCE(txq->xmit_lock_owner) != cpu) { if (dev_xmit_recursion()) goto recursion_alert; -- cgit v1.2.3 From e7f2be115f0746b969c0df14c0d182f65f005ca5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 26 Oct 2021 16:10:55 +0200 Subject: sched/cputime: Fix getrusage(RUSAGE_THREAD) with nohz_full getrusage(RUSAGE_THREAD) with nohz_full may return shorter utime/stime than the actual time. task_cputime_adjusted() snapshots utime and stime and then adjust their sum to match the scheduler maintained cputime.sum_exec_runtime. Unfortunately in nohz_full, sum_exec_runtime is only updated once per second in the worst case, causing a discrepancy against utime and stime that can be updated anytime by the reader using vtime. To fix this situation, perform an update of cputime.sum_exec_runtime when the cputime snapshot reports the task as actually running while the tick is disabled. The related overhead is then contained within the relevant situations. Reported-by: Hasegawa Hitomi Signed-off-by: Frederic Weisbecker Signed-off-by: Hasegawa Hitomi Signed-off-by: Thomas Gleixner Tested-by: Masayoshi Mizuma Acked-by: Phil Auld Link: https://lore.kernel.org/r/20211026141055.57358-3-frederic@kernel.org --- include/linux/sched/cputime.h | 5 +++-- kernel/sched/cputime.c | 12 +++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h index 6c9f19a33865..ce3c58286062 100644 --- a/include/linux/sched/cputime.h +++ b/include/linux/sched/cputime.h @@ -18,15 +18,16 @@ #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -extern void task_cputime(struct task_struct *t, +extern bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime); extern u64 task_gtime(struct task_struct *t); #else -static inline void task_cputime(struct task_struct *t, +static inline bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime) { *utime = t->utime; *stime = t->stime; + return false; } static inline u64 task_gtime(struct task_struct *t) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 872e481d5098..9392aea1804e 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -615,7 +615,8 @@ void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) .sum_exec_runtime = p->se.sum_exec_runtime, }; - task_cputime(p, &cputime.utime, &cputime.stime); + if (task_cputime(p, &cputime.utime, &cputime.stime)) + cputime.sum_exec_runtime = task_sched_runtime(p); cputime_adjust(&cputime, &p->prev_cputime, ut, st); } EXPORT_SYMBOL_GPL(task_cputime_adjusted); @@ -828,19 +829,21 @@ u64 task_gtime(struct task_struct *t) * add up the pending nohz execution time since the last * cputime snapshot. */ -void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) +bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime) { struct vtime *vtime = &t->vtime; unsigned int seq; u64 delta; + int ret; if (!vtime_accounting_enabled()) { *utime = t->utime; *stime = t->stime; - return; + return false; } do { + ret = false; seq = read_seqcount_begin(&vtime->seqcount); *utime = t->utime; @@ -850,6 +853,7 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) if (vtime->state < VTIME_SYS) continue; + ret = true; delta = vtime_delta(vtime); /* @@ -861,6 +865,8 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) else *utime += vtime->utime + delta; } while (read_seqcount_retry(&vtime->seqcount, seq)); + + return ret; } static int vtime_state_fetch(struct vtime *vtime, int cpu) -- cgit v1.2.3