diff options
| author | Tvrtko Ursulin <tursulin@ursulin.net> | 2024-05-16 07:33:01 +0000 |
|---|---|---|
| committer | Tvrtko Ursulin <tursulin@ursulin.net> | 2024-05-16 07:33:01 +0000 |
| commit | 60a2f25de7b8b785baee2932db932ae9a5b8c86d (patch) | |
| tree | c4ea099c03d015c8761c1682e9b3714dc79c4caf /include/linux | |
| parent | fbad43eccae5cb14594195c20113369aabaa22b5 (diff) | |
| parent | 431c590c3ab0469dfedad3a832fe73556396ee52 (diff) | |
Merge drm/drm-next into drm-intel-gt-next
Some display refactoring patches are needed in order to allow conflict-
less merging.
Signed-off-by: Tvrtko Ursulin <tursulin@ursulin.net>
Diffstat (limited to 'include/linux')
713 files changed, 20675 insertions, 9299 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index afd94c9b8b8a..34829f2c517a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -15,6 +15,7 @@ #include <linux/mod_devicetable.h> #include <linux/property.h> #include <linux/uuid.h> +#include <linux/node.h> struct irq_domain; struct irq_domain_ops; @@ -30,6 +31,7 @@ struct irq_domain_ops; #include <linux/dynamic_debug.h> #include <linux/module.h> #include <linux/mutex.h> +#include <linux/fw_table.h> #include <acpi/acpi_bus.h> #include <acpi/acpi_drivers.h> @@ -37,6 +39,16 @@ struct irq_domain_ops; #include <acpi/acpi_io.h> #include <asm/acpi.h> +#ifdef CONFIG_ACPI_TABLE_LIB +#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, ACPI) +#define __init_or_acpilib +#define __initdata_or_acpilib +#else +#define EXPORT_SYMBOL_ACPI_LIB(x) +#define __init_or_acpilib __init +#define __initdata_or_acpilib __initdata +#endif + static inline acpi_handle acpi_device_handle(struct acpi_device *adev) { return adev ? adev->handle : NULL; @@ -119,21 +131,8 @@ enum acpi_address_range_id { /* Table Handlers */ -union acpi_subtable_headers { - struct acpi_subtable_header common; - struct acpi_hmat_structure hmat; - struct acpi_prmt_module_header prmt; - struct acpi_cedt_header cedt; -}; - typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table); -typedef int (*acpi_tbl_entry_handler)(union acpi_subtable_headers *header, - const unsigned long end); - -typedef int (*acpi_tbl_entry_handler_arg)(union acpi_subtable_headers *header, - void *arg, const unsigned long end); - /* Debugger support */ struct acpi_debugger_ops { @@ -207,14 +206,6 @@ static inline int acpi_debugger_notify_command_complete(void) (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ ((struct acpi_subtable_header *)entry)->length < sizeof(*entry)) -struct acpi_subtable_proc { - int id; - acpi_tbl_entry_handler handler; - acpi_tbl_entry_handler_arg handler_arg; - void *arg; - int count; -}; - void __iomem *__acpi_map_table(unsigned long phys, unsigned long size); void __acpi_unmap_table(void __iomem *map, unsigned long size); int early_acpi_boot_init(void); @@ -229,16 +220,6 @@ void acpi_reserve_initial_tables (void); void acpi_table_init_complete (void); int acpi_table_init (void); -#ifdef CONFIG_ACPI_TABLE_LIB -#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, ACPI) -#define __init_or_acpilib -#define __initdata_or_acpilib -#else -#define EXPORT_SYMBOL_ACPI_LIB(x) -#define __init_or_acpilib __init -#define __initdata_or_acpilib __initdata -#endif - int acpi_table_parse(char *id, acpi_tbl_table_handler handler); int __init_or_acpilib acpi_table_parse_entries(char *id, unsigned long table_size, int entry_id, @@ -256,10 +237,15 @@ acpi_table_parse_cedt(enum acpi_cedt_type id, int acpi_parse_mcfg (struct acpi_table_header *header); void acpi_table_print_madt_entry (struct acpi_subtable_header *madt); +static inline bool acpi_gicc_is_usable(struct acpi_madt_generic_interrupt *gicc) +{ + return gicc->flags & ACPI_MADT_ENABLED; +} + /* the following numa functions are architecture-dependent */ void acpi_numa_slit_init (struct acpi_table_slit *slit); -#if defined(CONFIG_X86) || defined(CONFIG_IA64) || defined(CONFIG_LOONGARCH) +#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa); #else static inline void @@ -439,6 +425,23 @@ extern int acpi_blacklisted(void); extern void acpi_osi_setup(char *str); extern bool acpi_osi_is_win8(void); +#ifdef CONFIG_ACPI_THERMAL_LIB +int thermal_acpi_active_trip_temp(struct acpi_device *adev, int id, int *ret_temp); +int thermal_acpi_passive_trip_temp(struct acpi_device *adev, int *ret_temp); +int thermal_acpi_hot_trip_temp(struct acpi_device *adev, int *ret_temp); +int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp); +#endif + +#ifdef CONFIG_ACPI_HMAT +int acpi_get_genport_coordinates(u32 uid, struct access_coordinate *coord); +#else +static inline int acpi_get_genport_coordinates(u32 uid, + struct access_coordinate *coord) +{ + return -EOPNOTSUPP; +} +#endif + #ifdef CONFIG_ACPI_NUMA int acpi_map_pxm_to_node(int pxm); int acpi_get_node(acpi_handle handle); @@ -771,6 +774,10 @@ const char *acpi_get_subsystem_id(acpi_handle handle); #define ACPI_HANDLE(dev) (NULL) #define ACPI_HANDLE_FWNODE(fwnode) (NULL) +/* Get rid of the -Wunused-variable for adev */ +#define acpi_dev_uid_match(adev, uid2) (adev && false) +#define acpi_dev_hid_uid_match(adev, hid2, uid2) (adev && false) + #include <acpi/acpi_numa.h> struct fwnode_handle; @@ -787,12 +794,6 @@ static inline bool acpi_dev_present(const char *hid, const char *uid, s64 hrv) struct acpi_device; -static inline bool -acpi_dev_hid_uid_match(struct acpi_device *adev, const char *hid2, const char *uid2) -{ - return false; -} - static inline int acpi_dev_uid_to_integer(struct acpi_device *adev, u64 *integer) { return -ENODEV; @@ -1114,15 +1115,8 @@ static inline int acpi_get_lps0_constraint(struct device *dev) return ACPI_STATE_UNKNOWN; } #endif /* CONFIG_SUSPEND && CONFIG_X86 */ -#ifndef CONFIG_IA64 void arch_reserve_mem_area(acpi_physical_address addr, size_t size); #else -static inline void arch_reserve_mem_area(acpi_physical_address addr, - size_t size) -{ -} -#endif /* CONFIG_X86 */ -#else #define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0) #endif @@ -1176,6 +1170,7 @@ static inline void acpi_ec_set_gpe_wake_mask(u8 action) {} #endif #ifdef CONFIG_ACPI +char *acpi_handle_path(acpi_handle handle); __printf(3, 4) void acpi_handle_printk(const char *level, acpi_handle handle, const char *fmt, ...); @@ -1548,4 +1543,30 @@ static inline void acpi_device_notify(struct device *dev) { } static inline void acpi_device_notify_remove(struct device *dev) { } #endif +static inline void acpi_use_parent_companion(struct device *dev) +{ + ACPI_COMPANION_SET(dev, ACPI_COMPANION(dev->parent)); +} + +#ifdef CONFIG_ACPI_HMAT +int hmat_update_target_coordinates(int nid, struct access_coordinate *coord, + enum access_coordinate_class access); +#else +static inline int hmat_update_target_coordinates(int nid, + struct access_coordinate *coord, + enum access_coordinate_class access) +{ + return -EOPNOTSUPP; +} +#endif + +#ifdef CONFIG_ACPI_NUMA +bool acpi_node_backed_by_real_pxm(int nid); +#else +static inline bool acpi_node_backed_by_real_pxm(int nid) +{ + return false; +} +#endif + #endif /*_LINUX_ACPI_H*/ diff --git a/include/linux/acpi_amd_wbrf.h b/include/linux/acpi_amd_wbrf.h new file mode 100644 index 000000000000..898f31d536d4 --- /dev/null +++ b/include/linux/acpi_amd_wbrf.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Wifi Band Exclusion Interface (AMD ACPI Implementation) + * Copyright (C) 2023 Advanced Micro Devices + */ + +#ifndef _ACPI_AMD_WBRF_H +#define _ACPI_AMD_WBRF_H + +#include <linux/device.h> +#include <linux/notifier.h> + +/* The maximum number of frequency band ranges */ +#define MAX_NUM_OF_WBRF_RANGES 11 + +/* Record actions */ +#define WBRF_RECORD_ADD 0x0 +#define WBRF_RECORD_REMOVE 0x1 + +/** + * struct freq_band_range - Wifi frequency band range definition + * @start: start frequency point (in Hz) + * @end: end frequency point (in Hz) + */ +struct freq_band_range { + u64 start; + u64 end; +}; + +/** + * struct wbrf_ranges_in_out - wbrf ranges info + * @num_of_ranges: total number of band ranges in this struct + * @band_list: array of Wifi band ranges + */ +struct wbrf_ranges_in_out { + u64 num_of_ranges; + struct freq_band_range band_list[MAX_NUM_OF_WBRF_RANGES]; +}; + +/** + * enum wbrf_notifier_actions - wbrf notifier actions index + * @WBRF_CHANGED: there was some frequency band updates. The consumers + * should retrieve the latest active frequency bands. + */ +enum wbrf_notifier_actions { + WBRF_CHANGED, +}; + +#if IS_ENABLED(CONFIG_AMD_WBRF) +bool acpi_amd_wbrf_supported_producer(struct device *dev); +int acpi_amd_wbrf_add_remove(struct device *dev, uint8_t action, struct wbrf_ranges_in_out *in); +bool acpi_amd_wbrf_supported_consumer(struct device *dev); +int amd_wbrf_retrieve_freq_band(struct device *dev, struct wbrf_ranges_in_out *out); +int amd_wbrf_register_notifier(struct notifier_block *nb); +int amd_wbrf_unregister_notifier(struct notifier_block *nb); +#else +static inline +bool acpi_amd_wbrf_supported_consumer(struct device *dev) +{ + return false; +} + +static inline +int acpi_amd_wbrf_add_remove(struct device *dev, uint8_t action, struct wbrf_ranges_in_out *in) +{ + return -ENODEV; +} + +static inline +bool acpi_amd_wbrf_supported_producer(struct device *dev) +{ + return false; +} +static inline +int amd_wbrf_retrieve_freq_band(struct device *dev, struct wbrf_ranges_in_out *out) +{ + return -ENODEV; +} +static inline +int amd_wbrf_register_notifier(struct notifier_block *nb) +{ + return -ENODEV; +} +static inline +int amd_wbrf_unregister_notifier(struct notifier_block *nb) +{ + return -ENODEV; +} +#endif /* CONFIG_AMD_WBRF */ + +#endif /* _ACPI_AMD_WBRF_H */ diff --git a/include/linux/aer.h b/include/linux/aer.h index 29cc10220952..4b97f38f3fcf 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -18,11 +18,8 @@ struct pci_dev; -struct aer_header_log_regs { - unsigned int dw0; - unsigned int dw1; - unsigned int dw2; - unsigned int dw3; +struct pcie_tlp_log { + u32 dw[4]; }; struct aer_capability_regs { @@ -33,13 +30,15 @@ struct aer_capability_regs { u32 cor_status; u32 cor_mask; u32 cap_control; - struct aer_header_log_regs header_log; + struct pcie_tlp_log header_log; u32 root_command; u32 root_status; u16 cor_err_source; u16 uncor_err_source; }; +int pcie_read_tlp_log(struct pci_dev *dev, int where, struct pcie_tlp_log *log); + #if defined(CONFIG_PCIEAER) int pci_aer_clear_nonfatal_status(struct pci_dev *dev); int pcie_aer_is_native(struct pci_dev *dev); @@ -51,7 +50,7 @@ static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } #endif -void cper_print_aer(struct pci_dev *dev, int aer_severity, +void pci_print_aer(struct pci_dev *dev, int aer_severity, struct aer_capability_regs *aer); int cper_severity_to_aer(int cper_severity); void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, diff --git a/include/linux/amba/clcd-regs.h b/include/linux/amba/clcd-regs.h deleted file mode 100644 index 421b0fa90d6a..000000000000 --- a/include/linux/amba/clcd-regs.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * David A Rusling - * - * Copyright (C) 2001 ARM Limited - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of this archive - * for more details. - */ - -#ifndef AMBA_CLCD_REGS_H -#define AMBA_CLCD_REGS_H - -/* - * CLCD Controller Internal Register addresses - */ -#define CLCD_TIM0 0x00000000 -#define CLCD_TIM1 0x00000004 -#define CLCD_TIM2 0x00000008 -#define CLCD_TIM3 0x0000000c -#define CLCD_UBAS 0x00000010 -#define CLCD_LBAS 0x00000014 - -#define CLCD_PL110_IENB 0x00000018 -#define CLCD_PL110_CNTL 0x0000001c -#define CLCD_PL110_STAT 0x00000020 -#define CLCD_PL110_INTR 0x00000024 -#define CLCD_PL110_UCUR 0x00000028 -#define CLCD_PL110_LCUR 0x0000002C - -#define CLCD_PL111_CNTL 0x00000018 -#define CLCD_PL111_IENB 0x0000001c -#define CLCD_PL111_RIS 0x00000020 -#define CLCD_PL111_MIS 0x00000024 -#define CLCD_PL111_ICR 0x00000028 -#define CLCD_PL111_UCUR 0x0000002c -#define CLCD_PL111_LCUR 0x00000030 - -#define CLCD_PALL 0x00000200 -#define CLCD_PALETTE 0x00000200 - -#define TIM2_PCD_LO_MASK GENMASK(4, 0) -#define TIM2_PCD_LO_BITS 5 -#define TIM2_CLKSEL (1 << 5) -#define TIM2_ACB_MASK GENMASK(10, 6) -#define TIM2_IVS (1 << 11) -#define TIM2_IHS (1 << 12) -#define TIM2_IPC (1 << 13) -#define TIM2_IOE (1 << 14) -#define TIM2_BCD (1 << 26) -#define TIM2_PCD_HI_MASK GENMASK(31, 27) -#define TIM2_PCD_HI_BITS 5 -#define TIM2_PCD_HI_SHIFT 27 - -#define CNTL_LCDEN (1 << 0) -#define CNTL_LCDBPP1 (0 << 1) -#define CNTL_LCDBPP2 (1 << 1) -#define CNTL_LCDBPP4 (2 << 1) -#define CNTL_LCDBPP8 (3 << 1) -#define CNTL_LCDBPP16 (4 << 1) -#define CNTL_LCDBPP16_565 (6 << 1) -#define CNTL_LCDBPP16_444 (7 << 1) -#define CNTL_LCDBPP24 (5 << 1) -#define CNTL_LCDBW (1 << 4) -#define CNTL_LCDTFT (1 << 5) -#define CNTL_LCDMONO8 (1 << 6) -#define CNTL_LCDDUAL (1 << 7) -#define CNTL_BGR (1 << 8) -#define CNTL_BEBO (1 << 9) -#define CNTL_BEPO (1 << 10) -#define CNTL_LCDPWR (1 << 11) -#define CNTL_LCDVCOMP(x) ((x) << 12) -#define CNTL_LDMAFIFOTIME (1 << 15) -#define CNTL_WATERMARK (1 << 16) - -/* ST Microelectronics variant bits */ -#define CNTL_ST_1XBPP_444 0x0 -#define CNTL_ST_1XBPP_5551 (1 << 17) -#define CNTL_ST_1XBPP_565 (1 << 18) -#define CNTL_ST_CDWID_12 0x0 -#define CNTL_ST_CDWID_16 (1 << 19) -#define CNTL_ST_CDWID_18 (1 << 20) -#define CNTL_ST_CDWID_24 ((1 << 19)|(1 << 20)) -#define CNTL_ST_CEAEN (1 << 21) -#define CNTL_ST_LCDBPP24_PACKED (6 << 1) - -#endif /* AMBA_CLCD_REGS_H */ diff --git a/include/linux/amba/clcd.h b/include/linux/amba/clcd.h deleted file mode 100644 index b6e0cbeaf533..000000000000 --- a/include/linux/amba/clcd.h +++ /dev/null @@ -1,290 +0,0 @@ -/* - * linux/include/asm-arm/hardware/amba_clcd.h -- Integrator LCD panel. - * - * David A Rusling - * - * Copyright (C) 2001 ARM Limited - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of this archive - * for more details. - */ -#include <linux/fb.h> -#include <linux/amba/clcd-regs.h> - -enum { - /* individual formats */ - CLCD_CAP_RGB444 = (1 << 0), - CLCD_CAP_RGB5551 = (1 << 1), - CLCD_CAP_RGB565 = (1 << 2), - CLCD_CAP_RGB888 = (1 << 3), - CLCD_CAP_BGR444 = (1 << 4), - CLCD_CAP_BGR5551 = (1 << 5), - CLCD_CAP_BGR565 = (1 << 6), - CLCD_CAP_BGR888 = (1 << 7), - - /* connection layouts */ - CLCD_CAP_444 = CLCD_CAP_RGB444 | CLCD_CAP_BGR444, - CLCD_CAP_5551 = CLCD_CAP_RGB5551 | CLCD_CAP_BGR5551, - CLCD_CAP_565 = CLCD_CAP_RGB565 | CLCD_CAP_BGR565, - CLCD_CAP_888 = CLCD_CAP_RGB888 | CLCD_CAP_BGR888, - - /* red/blue ordering */ - CLCD_CAP_RGB = CLCD_CAP_RGB444 | CLCD_CAP_RGB5551 | - CLCD_CAP_RGB565 | CLCD_CAP_RGB888, - CLCD_CAP_BGR = CLCD_CAP_BGR444 | CLCD_CAP_BGR5551 | - CLCD_CAP_BGR565 | CLCD_CAP_BGR888, - - CLCD_CAP_ALL = CLCD_CAP_BGR | CLCD_CAP_RGB, -}; - -struct backlight_device; - -struct clcd_panel { - struct fb_videomode mode; - signed short width; /* width in mm */ - signed short height; /* height in mm */ - u32 tim2; - u32 tim3; - u32 cntl; - u32 caps; - unsigned int bpp:8, - fixedtimings:1, - grayscale:1; - unsigned int connector; - struct backlight_device *backlight; - /* - * If the B/R lines are switched between the CLCD - * and the panel we need to know this and not try to - * compensate with the BGR bit in the control register. - */ - bool bgr_connection; -}; - -struct clcd_regs { - u32 tim0; - u32 tim1; - u32 tim2; - u32 tim3; - u32 cntl; - unsigned long pixclock; -}; - -struct clcd_fb; - -/* - * the board-type specific routines - */ -struct clcd_board { - const char *name; - - /* - * Optional. Hardware capability flags. - */ - u32 caps; - - /* - * Optional. Check whether the var structure is acceptable - * for this display. - */ - int (*check)(struct clcd_fb *fb, struct fb_var_screeninfo *var); - - /* - * Compulsory. Decode fb->fb.var into regs->*. In the case of - * fixed timing, set regs->* to the register values required. - */ - void (*decode)(struct clcd_fb *fb, struct clcd_regs *regs); - - /* - * Optional. Disable any extra display hardware. - */ - void (*disable)(struct clcd_fb *); - - /* - * Optional. Enable any extra display hardware. - */ - void (*enable)(struct clcd_fb *); - - /* - * Setup platform specific parts of CLCD driver - */ - int (*setup)(struct clcd_fb *); - - /* - * mmap the framebuffer memory - */ - int (*mmap)(struct clcd_fb *, struct vm_area_struct *); - - /* - * Remove platform specific parts of CLCD driver - */ - void (*remove)(struct clcd_fb *); -}; - -struct amba_device; -struct clk; - -/* this data structure describes each frame buffer device we find */ -struct clcd_fb { - struct fb_info fb; - struct amba_device *dev; - struct clk *clk; - struct clcd_panel *panel; - struct clcd_board *board; - void *board_data; - void __iomem *regs; - u16 off_ienb; - u16 off_cntl; - u32 clcd_cntl; - u32 cmap[16]; - bool clk_enabled; -}; - -static inline void clcdfb_decode(struct clcd_fb *fb, struct clcd_regs *regs) -{ - struct fb_var_screeninfo *var = &fb->fb.var; - u32 val, cpl; - - /* - * Program the CLCD controller registers and start the CLCD - */ - val = ((var->xres / 16) - 1) << 2; - val |= (var->hsync_len - 1) << 8; - val |= (var->right_margin - 1) << 16; - val |= (var->left_margin - 1) << 24; - regs->tim0 = val; - - val = var->yres; - if (fb->panel->cntl & CNTL_LCDDUAL) - val /= 2; - val -= 1; - val |= (var->vsync_len - 1) << 10; - val |= var->lower_margin << 16; - val |= var->upper_margin << 24; - regs->tim1 = val; - - val = fb->panel->tim2; - val |= var->sync & FB_SYNC_HOR_HIGH_ACT ? 0 : TIM2_IHS; - val |= var->sync & FB_SYNC_VERT_HIGH_ACT ? 0 : TIM2_IVS; - - cpl = var->xres_virtual; - if (fb->panel->cntl & CNTL_LCDTFT) /* TFT */ - /* / 1 */; - else if (!var->grayscale) /* STN color */ - cpl = cpl * 8 / 3; - else if (fb->panel->cntl & CNTL_LCDMONO8) /* STN monochrome, 8bit */ - cpl /= 8; - else /* STN monochrome, 4bit */ - cpl /= 4; - - regs->tim2 = val | ((cpl - 1) << 16); - - regs->tim3 = fb->panel->tim3; - - val = fb->panel->cntl; - if (var->grayscale) - val |= CNTL_LCDBW; - - if (fb->panel->caps && fb->board->caps && var->bits_per_pixel >= 16) { - /* - * if board and panel supply capabilities, we can support - * changing BGR/RGB depending on supplied parameters. Here - * we switch to what the framebuffer is providing if need - * be, so if the framebuffer is BGR but the display connection - * is RGB (first case) we switch it around. Vice versa mutatis - * mutandis if the framebuffer is RGB but the display connection - * is BGR, we flip it around. - */ - if (var->red.offset == 0) - val &= ~CNTL_BGR; - else - val |= CNTL_BGR; - if (fb->panel->bgr_connection) - val ^= CNTL_BGR; - } - - switch (var->bits_per_pixel) { - case 1: - val |= CNTL_LCDBPP1; - break; - case 2: - val |= CNTL_LCDBPP2; - break; - case 4: - val |= CNTL_LCDBPP4; - break; - case 8: - val |= CNTL_LCDBPP8; - break; - case 16: - /* - * PL110 cannot choose between 5551 and 565 modes in its - * control register. It is possible to use 565 with - * custom external wiring. - */ - if (amba_part(fb->dev) == 0x110 || - var->green.length == 5) - val |= CNTL_LCDBPP16; - else if (var->green.length == 6) - val |= CNTL_LCDBPP16_565; - else - val |= CNTL_LCDBPP16_444; - break; - case 32: - val |= CNTL_LCDBPP24; - break; - } - - regs->cntl = val; - regs->pixclock = var->pixclock; -} - -static inline int clcdfb_check(struct clcd_fb *fb, struct fb_var_screeninfo *var) -{ - var->xres_virtual = var->xres = (var->xres + 15) & ~15; - var->yres_virtual = var->yres = (var->yres + 1) & ~1; - -#define CHECK(e,l,h) (var->e < l || var->e > h) - if (CHECK(right_margin, (5+1), 256) || /* back porch */ - CHECK(left_margin, (5+1), 256) || /* front porch */ - CHECK(hsync_len, (5+1), 256) || - var->xres > 4096 || - var->lower_margin > 255 || /* back porch */ - var->upper_margin > 255 || /* front porch */ - var->vsync_len > 32 || - var->yres > 1024) - return -EINVAL; -#undef CHECK - - /* single panel mode: PCD = max(PCD, 1) */ - /* dual panel mode: PCD = max(PCD, 5) */ - - /* - * You can't change the grayscale setting, and - * we can only do non-interlaced video. - */ - if (var->grayscale != fb->fb.var.grayscale || - (var->vmode & FB_VMODE_MASK) != FB_VMODE_NONINTERLACED) - return -EINVAL; - -#define CHECK(e) (var->e != fb->fb.var.e) - if (fb->panel->fixedtimings && - (CHECK(xres) || - CHECK(yres) || - CHECK(bits_per_pixel) || - CHECK(pixclock) || - CHECK(left_margin) || - CHECK(right_margin) || - CHECK(upper_margin) || - CHECK(lower_margin) || - CHECK(hsync_len) || - CHECK(vsync_len) || - CHECK(sync))) - return -EINVAL; -#undef CHECK - - var->nonstd = 0; - var->accel_flags = 0; - - return 0; -} diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h index 9bf58aac0df2..d7b07d0311e1 100644 --- a/include/linux/amba/pl022.h +++ b/include/linux/amba/pl022.h @@ -16,6 +16,7 @@ #ifndef _SSP_PL022_H #define _SSP_PL022_H +#include <linux/dmaengine.h> #include <linux/types.h> /** @@ -224,6 +225,7 @@ struct dma_chan; * struct pl022_ssp_master - device.platform_data for SPI controller devices. * @bus_id: identifier for this bus * @enable_dma: if true enables DMA driven transfers. + * @dma_filter: callback filter for dma_request_channel. * @dma_rx_param: parameter to locate an RX DMA channel. * @dma_tx_param: parameter to locate a TX DMA channel. * @autosuspend_delay: delay in ms following transfer completion before the @@ -235,7 +237,7 @@ struct dma_chan; struct pl022_ssp_controller { u16 bus_id; u8 enable_dma:1; - bool (*dma_filter)(struct dma_chan *chan, void *filter_param); + dma_filter_fn dma_filter; void *dma_rx_param; void *dma_tx_param; int autosuspend_delay; diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index a1307b58cc2c..9120de05ead0 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -10,6 +10,11 @@ #ifndef ASM_ARM_HARDWARE_SERIAL_AMBA_H #define ASM_ARM_HARDWARE_SERIAL_AMBA_H +#ifndef __ASSEMBLY__ +#include <linux/bitfield.h> +#include <linux/bits.h> +#endif + #include <linux/types.h> /* ------------------------------------------------------------------------------- @@ -70,141 +75,145 @@ #define ZX_UART011_ICR 0x4c #define ZX_UART011_DMACR 0x50 -#define UART011_DR_OE (1 << 11) -#define UART011_DR_BE (1 << 10) -#define UART011_DR_PE (1 << 9) -#define UART011_DR_FE (1 << 8) +#define UART011_DR_OE BIT(11) +#define UART011_DR_BE BIT(10) +#define UART011_DR_PE BIT(9) +#define UART011_DR_FE BIT(8) -#define UART01x_RSR_OE 0x08 -#define UART01x_RSR_BE 0x04 -#define UART01x_RSR_PE 0x02 -#define UART01x_RSR_FE 0x01 +#define UART01x_RSR_OE BIT(3) +#define UART01x_RSR_BE BIT(2) +#define UART01x_RSR_PE BIT(1) +#define UART01x_RSR_FE BIT(0) -#define UART011_FR_RI 0x100 -#define UART011_FR_TXFE 0x080 -#define UART011_FR_RXFF 0x040 -#define UART01x_FR_TXFF 0x020 -#define UART01x_FR_RXFE 0x010 -#define UART01x_FR_BUSY 0x008 -#define UART01x_FR_DCD 0x004 -#define UART01x_FR_DSR 0x002 -#define UART01x_FR_CTS 0x001 +#define UART011_FR_RI BIT(8) +#define UART011_FR_TXFE BIT(7) +#define UART011_FR_RXFF BIT(6) +#define UART01x_FR_TXFF (1 << 5) /* used in ASM */ +#define UART01x_FR_RXFE BIT(4) +#define UART01x_FR_BUSY (1 << 3) /* used in ASM */ +#define UART01x_FR_DCD BIT(2) +#define UART01x_FR_DSR BIT(1) +#define UART01x_FR_CTS BIT(0) #define UART01x_FR_TMSK (UART01x_FR_TXFF + UART01x_FR_BUSY) /* * Some bits of Flag Register on ZTE device have different position from * standard ones. */ -#define ZX_UART01x_FR_BUSY 0x100 -#define ZX_UART01x_FR_DSR 0x008 -#define ZX_UART01x_FR_CTS 0x002 -#define ZX_UART011_FR_RI 0x001 +#define ZX_UART01x_FR_BUSY BIT(8) +#define ZX_UART01x_FR_DSR BIT(3) +#define ZX_UART01x_FR_CTS BIT(1) +#define ZX_UART011_FR_RI BIT(0) + +#define UART011_CR_CTSEN BIT(15) /* CTS hardware flow control */ +#define UART011_CR_RTSEN BIT(14) /* RTS hardware flow control */ +#define UART011_CR_OUT2 BIT(13) /* OUT2 */ +#define UART011_CR_OUT1 BIT(12) /* OUT1 */ +#define UART011_CR_RTS BIT(11) /* RTS */ +#define UART011_CR_DTR BIT(10) /* DTR */ +#define UART011_CR_RXE BIT(9) /* receive enable */ +#define UART011_CR_TXE BIT(8) /* transmit enable */ +#define UART011_CR_LBE BIT(7) /* loopback enable */ +#define UART010_CR_RTIE BIT(6) +#define UART010_CR_TIE BIT(5) +#define UART010_CR_RIE BIT(4) +#define UART010_CR_MSIE BIT(3) +#define ST_UART011_CR_OVSFACT BIT(3) /* Oversampling factor */ +#define UART01x_CR_IIRLP BIT(2) /* SIR low power mode */ +#define UART01x_CR_SIREN BIT(1) /* SIR enable */ +#define UART01x_CR_UARTEN BIT(0) /* UART enable */ -#define UART011_CR_CTSEN 0x8000 /* CTS hardware flow control */ -#define UART011_CR_RTSEN 0x4000 /* RTS hardware flow control */ -#define UART011_CR_OUT2 0x2000 /* OUT2 */ -#define UART011_CR_OUT1 0x1000 /* OUT1 */ -#define UART011_CR_RTS 0x0800 /* RTS */ -#define UART011_CR_DTR 0x0400 /* DTR */ -#define UART011_CR_RXE 0x0200 /* receive enable */ -#define UART011_CR_TXE 0x0100 /* transmit enable */ -#define UART011_CR_LBE 0x0080 /* loopback enable */ -#define UART010_CR_RTIE 0x0040 -#define UART010_CR_TIE 0x0020 -#define UART010_CR_RIE 0x0010 -#define UART010_CR_MSIE 0x0008 -#define ST_UART011_CR_OVSFACT 0x0008 /* Oversampling factor */ -#define UART01x_CR_IIRLP 0x0004 /* SIR low power mode */ -#define UART01x_CR_SIREN 0x0002 /* SIR enable */ -#define UART01x_CR_UARTEN 0x0001 /* UART enable */ - -#define UART011_LCRH_SPS 0x80 +#define UART011_LCRH_SPS BIT(7) #define UART01x_LCRH_WLEN_8 0x60 #define UART01x_LCRH_WLEN_7 0x40 #define UART01x_LCRH_WLEN_6 0x20 #define UART01x_LCRH_WLEN_5 0x00 -#define UART01x_LCRH_FEN 0x10 -#define UART01x_LCRH_STP2 0x08 -#define UART01x_LCRH_EPS 0x04 -#define UART01x_LCRH_PEN 0x02 -#define UART01x_LCRH_BRK 0x01 +#define UART01x_LCRH_FEN BIT(4) +#define UART01x_LCRH_STP2 BIT(3) +#define UART01x_LCRH_EPS BIT(2) +#define UART01x_LCRH_PEN BIT(1) +#define UART01x_LCRH_BRK BIT(0) -#define ST_UART011_DMAWM_RX_1 (0 << 3) -#define ST_UART011_DMAWM_RX_2 (1 << 3) -#define ST_UART011_DMAWM_RX_4 (2 << 3) -#define ST_UART011_DMAWM_RX_8 (3 << 3) -#define ST_UART011_DMAWM_RX_16 (4 << 3) -#define ST_UART011_DMAWM_RX_32 (5 << 3) -#define ST_UART011_DMAWM_RX_48 (6 << 3) -#define ST_UART011_DMAWM_TX_1 0 -#define ST_UART011_DMAWM_TX_2 1 -#define ST_UART011_DMAWM_TX_4 2 -#define ST_UART011_DMAWM_TX_8 3 -#define ST_UART011_DMAWM_TX_16 4 -#define ST_UART011_DMAWM_TX_32 5 -#define ST_UART011_DMAWM_TX_48 6 +#define ST_UART011_DMAWM_RX GENMASK(5, 3) +#define ST_UART011_DMAWM_RX_1 FIELD_PREP_CONST(ST_UART011_DMAWM_RX, 0) +#define ST_UART011_DMAWM_RX_2 FIELD_PREP_CONST(ST_UART011_DMAWM_RX, 1) +#define ST_UART011_DMAWM_RX_4 FIELD_PREP_CONST(ST_UART011_DMAWM_RX, 2) +#define ST_UART011_DMAWM_RX_8 FIELD_PREP_CONST(ST_UART011_DMAWM_RX, 3) +#define ST_UART011_DMAWM_RX_16 FIELD_PREP_CONST(ST_UART011_DMAWM_RX, 4) +#define ST_UART011_DMAWM_RX_32 FIELD_PREP_CONST(ST_UART011_DMAWM_RX, 5) +#define ST_UART011_DMAWM_RX_48 FIELD_PREP_CONST(ST_UART011_DMAWM_RX, 6) +#define ST_UART011_DMAWM_TX GENMASK(2, 0) +#define ST_UART011_DMAWM_TX_1 FIELD_PREP_CONST(ST_UART011_DMAWM_TX, 0) +#define ST_UART011_DMAWM_TX_2 FIELD_PREP_CONST(ST_UART011_DMAWM_TX, 1) +#define ST_UART011_DMAWM_TX_4 FIELD_PREP_CONST(ST_UART011_DMAWM_TX, 2) +#define ST_UART011_DMAWM_TX_8 FIELD_PREP_CONST(ST_UART011_DMAWM_TX, 3) +#define ST_UART011_DMAWM_TX_16 FIELD_PREP_CONST(ST_UART011_DMAWM_TX, 4) +#define ST_UART011_DMAWM_TX_32 FIELD_PREP_CONST(ST_UART011_DMAWM_TX, 5) +#define ST_UART011_DMAWM_TX_48 FIELD_PREP_CONST(ST_UART011_DMAWM_TX, 6) -#define UART010_IIR_RTIS 0x08 -#define UART010_IIR_TIS 0x04 -#define UART010_IIR_RIS 0x02 -#define UART010_IIR_MIS 0x01 +#define UART010_IIR_RTIS BIT(3) +#define UART010_IIR_TIS BIT(2) +#define UART010_IIR_RIS BIT(1) +#define UART010_IIR_MIS BIT(0) -#define UART011_IFLS_RX1_8 (0 << 3) -#define UART011_IFLS_RX2_8 (1 << 3) -#define UART011_IFLS_RX4_8 (2 << 3) -#define UART011_IFLS_RX6_8 (3 << 3) -#define UART011_IFLS_RX7_8 (4 << 3) -#define UART011_IFLS_TX1_8 (0 << 0) -#define UART011_IFLS_TX2_8 (1 << 0) -#define UART011_IFLS_TX4_8 (2 << 0) -#define UART011_IFLS_TX6_8 (3 << 0) -#define UART011_IFLS_TX7_8 (4 << 0) +#define UART011_IFLS_RXIFLSEL GENMASK(5, 3) +#define UART011_IFLS_RX1_8 FIELD_PREP_CONST(UART011_IFLS_RXIFLSEL, 0) +#define UART011_IFLS_RX2_8 FIELD_PREP_CONST(UART011_IFLS_RXIFLSEL, 1) +#define UART011_IFLS_RX4_8 FIELD_PREP_CONST(UART011_IFLS_RXIFLSEL, 2) +#define UART011_IFLS_RX6_8 FIELD_PREP_CONST(UART011_IFLS_RXIFLSEL, 3) +#define UART011_IFLS_RX7_8 FIELD_PREP_CONST(UART011_IFLS_RXIFLSEL, 4) +#define UART011_IFLS_TXIFLSEL GENMASK(2, 0) +#define UART011_IFLS_TX1_8 FIELD_PREP_CONST(UART011_IFLS_TXIFLSEL, 0) +#define UART011_IFLS_TX2_8 FIELD_PREP_CONST(UART011_IFLS_TXIFLSEL, 1) +#define UART011_IFLS_TX4_8 FIELD_PREP_CONST(UART011_IFLS_TXIFLSEL, 2) +#define UART011_IFLS_TX6_8 FIELD_PREP_CONST(UART011_IFLS_TXIFLSEL, 3) +#define UART011_IFLS_TX7_8 FIELD_PREP_CONST(UART011_IFLS_TXIFLSEL, 4) /* special values for ST vendor with deeper fifo */ -#define UART011_IFLS_RX_HALF (5 << 3) -#define UART011_IFLS_TX_HALF (5 << 0) +#define UART011_IFLS_RX_HALF FIELD_PREP_CONST(UART011_IFLS_RXIFLSEL, 5) +#define UART011_IFLS_TX_HALF FIELD_PREP_CONST(UART011_IFLS_TXIFLSEL, 5) -#define UART011_OEIM (1 << 10) /* overrun error interrupt mask */ -#define UART011_BEIM (1 << 9) /* break error interrupt mask */ -#define UART011_PEIM (1 << 8) /* parity error interrupt mask */ -#define UART011_FEIM (1 << 7) /* framing error interrupt mask */ -#define UART011_RTIM (1 << 6) /* receive timeout interrupt mask */ -#define UART011_TXIM (1 << 5) /* transmit interrupt mask */ -#define UART011_RXIM (1 << 4) /* receive interrupt mask */ -#define UART011_DSRMIM (1 << 3) /* DSR interrupt mask */ -#define UART011_DCDMIM (1 << 2) /* DCD interrupt mask */ -#define UART011_CTSMIM (1 << 1) /* CTS interrupt mask */ -#define UART011_RIMIM (1 << 0) /* RI interrupt mask */ +#define UART011_OEIM BIT(10) /* overrun error interrupt mask */ +#define UART011_BEIM BIT(9) /* break error interrupt mask */ +#define UART011_PEIM BIT(8) /* parity error interrupt mask */ +#define UART011_FEIM BIT(7) /* framing error interrupt mask */ +#define UART011_RTIM BIT(6) /* receive timeout interrupt mask */ +#define UART011_TXIM BIT(5) /* transmit interrupt mask */ +#define UART011_RXIM BIT(4) /* receive interrupt mask */ +#define UART011_DSRMIM BIT(3) /* DSR interrupt mask */ +#define UART011_DCDMIM BIT(2) /* DCD interrupt mask */ +#define UART011_CTSMIM BIT(1) /* CTS interrupt mask */ +#define UART011_RIMIM BIT(0) /* RI interrupt mask */ -#define UART011_OEIS (1 << 10) /* overrun error interrupt status */ -#define UART011_BEIS (1 << 9) /* break error interrupt status */ -#define UART011_PEIS (1 << 8) /* parity error interrupt status */ -#define UART011_FEIS (1 << 7) /* framing error interrupt status */ -#define UART011_RTIS (1 << 6) /* receive timeout interrupt status */ -#define UART011_TXIS (1 << 5) /* transmit interrupt status */ -#define UART011_RXIS (1 << 4) /* receive interrupt status */ -#define UART011_DSRMIS (1 << 3) /* DSR interrupt status */ -#define UART011_DCDMIS (1 << 2) /* DCD interrupt status */ -#define UART011_CTSMIS (1 << 1) /* CTS interrupt status */ -#define UART011_RIMIS (1 << 0) /* RI interrupt status */ +#define UART011_OEIS BIT(10) /* overrun error interrupt status */ +#define UART011_BEIS BIT(9) /* break error interrupt status */ +#define UART011_PEIS BIT(8) /* parity error interrupt status */ +#define UART011_FEIS BIT(7) /* framing error interrupt status */ +#define UART011_RTIS BIT(6) /* receive timeout interrupt status */ +#define UART011_TXIS BIT(5) /* transmit interrupt status */ +#define UART011_RXIS BIT(4) /* receive interrupt status */ +#define UART011_DSRMIS BIT(3) /* DSR interrupt status */ +#define UART011_DCDMIS BIT(2) /* DCD interrupt status */ +#define UART011_CTSMIS BIT(1) /* CTS interrupt status */ +#define UART011_RIMIS BIT(0) /* RI interrupt status */ -#define UART011_OEIC (1 << 10) /* overrun error interrupt clear */ -#define UART011_BEIC (1 << 9) /* break error interrupt clear */ -#define UART011_PEIC (1 << 8) /* parity error interrupt clear */ -#define UART011_FEIC (1 << 7) /* framing error interrupt clear */ -#define UART011_RTIC (1 << 6) /* receive timeout interrupt clear */ -#define UART011_TXIC (1 << 5) /* transmit interrupt clear */ -#define UART011_RXIC (1 << 4) /* receive interrupt clear */ -#define UART011_DSRMIC (1 << 3) /* DSR interrupt clear */ -#define UART011_DCDMIC (1 << 2) /* DCD interrupt clear */ -#define UART011_CTSMIC (1 << 1) /* CTS interrupt clear */ -#define UART011_RIMIC (1 << 0) /* RI interrupt clear */ +#define UART011_OEIC BIT(10) /* overrun error interrupt clear */ +#define UART011_BEIC BIT(9) /* break error interrupt clear */ +#define UART011_PEIC BIT(8) /* parity error interrupt clear */ +#define UART011_FEIC BIT(7) /* framing error interrupt clear */ +#define UART011_RTIC BIT(6) /* receive timeout interrupt clear */ +#define UART011_TXIC BIT(5) /* transmit interrupt clear */ +#define UART011_RXIC BIT(4) /* receive interrupt clear */ +#define UART011_DSRMIC BIT(3) /* DSR interrupt clear */ +#define UART011_DCDMIC BIT(2) /* DCD interrupt clear */ +#define UART011_CTSMIC BIT(1) /* CTS interrupt clear */ +#define UART011_RIMIC BIT(0) /* RI interrupt clear */ -#define UART011_DMAONERR (1 << 2) /* disable dma on error */ -#define UART011_TXDMAE (1 << 1) /* enable transmit dma */ -#define UART011_RXDMAE (1 << 0) /* enable receive dma */ +#define UART011_DMAONERR BIT(2) /* disable dma on error */ +#define UART011_TXDMAE BIT(1) /* enable transmit dma */ +#define UART011_RXDMAE BIT(0) /* enable receive dma */ -#define UART01x_RSR_ANY (UART01x_RSR_OE|UART01x_RSR_BE|UART01x_RSR_PE|UART01x_RSR_FE) -#define UART01x_FR_MODEM_ANY (UART01x_FR_DCD|UART01x_FR_DSR|UART01x_FR_CTS) +#define UART01x_RSR_ANY (UART01x_RSR_OE | UART01x_RSR_BE | UART01x_RSR_PE | UART01x_RSR_FE) +#define UART01x_FR_MODEM_ANY (UART01x_FR_DCD | UART01x_FR_DSR | UART01x_FR_CTS) #ifndef __ASSEMBLY__ struct amba_device; /* in uncompress this is included but amba/bus.h is not */ @@ -220,8 +229,8 @@ struct amba_pl011_data { bool dma_rx_poll_enable; unsigned int dma_rx_poll_rate; unsigned int dma_rx_poll_timeout; - void (*init) (void); - void (*exit) (void); + void (*init)(void); + void (*exit)(void); }; #endif diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 99a5201d9e62..2b90c48a6a87 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -33,126 +33,6 @@ struct pci_dev; extern int amd_iommu_detect(void); -/** - * amd_iommu_init_device() - Init device for use with IOMMUv2 driver - * @pdev: The PCI device to initialize - * @pasids: Number of PASIDs to support for this device - * - * This function does all setup for the device pdev so that it can be - * used with IOMMUv2. - * Returns 0 on success or negative value on error. - */ -extern int amd_iommu_init_device(struct pci_dev *pdev, int pasids); - -/** - * amd_iommu_free_device() - Free all IOMMUv2 related device resources - * and disable IOMMUv2 usage for this device - * @pdev: The PCI device to disable IOMMUv2 usage for' - */ -extern void amd_iommu_free_device(struct pci_dev *pdev); - -/** - * amd_iommu_bind_pasid() - Bind a given task to a PASID on a device - * @pdev: The PCI device to bind the task to - * @pasid: The PASID on the device the task should be bound to - * @task: the task to bind - * - * The function returns 0 on success or a negative value on error. - */ -extern int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid, - struct task_struct *task); - -/** - * amd_iommu_unbind_pasid() - Unbind a PASID from its task on - * a device - * @pdev: The device of the PASID - * @pasid: The PASID to unbind - * - * When this function returns the device is no longer using the PASID - * and the PASID is no longer bound to its task. - */ -extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid); - -/** - * amd_iommu_set_invalid_ppr_cb() - Register a call-back for failed - * PRI requests - * @pdev: The PCI device the call-back should be registered for - * @cb: The call-back function - * - * The IOMMUv2 driver invokes this call-back when it is unable to - * successfully handle a PRI request. The device driver can then decide - * which PRI response the device should see. Possible return values for - * the call-back are: - * - * - AMD_IOMMU_INV_PRI_RSP_SUCCESS - Send SUCCESS back to the device - * - AMD_IOMMU_INV_PRI_RSP_INVALID - Send INVALID back to the device - * - AMD_IOMMU_INV_PRI_RSP_FAIL - Send Failure back to the device, - * the device is required to disable - * PRI when it receives this response - * - * The function returns 0 on success or negative value on error. - */ -#define AMD_IOMMU_INV_PRI_RSP_SUCCESS 0 -#define AMD_IOMMU_INV_PRI_RSP_INVALID 1 -#define AMD_IOMMU_INV_PRI_RSP_FAIL 2 - -typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev, - u32 pasid, - unsigned long address, - u16); - -extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, - amd_iommu_invalid_ppr_cb cb); - -#define PPR_FAULT_EXEC (1 << 1) -#define PPR_FAULT_READ (1 << 2) -#define PPR_FAULT_WRITE (1 << 5) -#define PPR_FAULT_USER (1 << 6) -#define PPR_FAULT_RSVD (1 << 7) -#define PPR_FAULT_GN (1 << 8) - -/** - * amd_iommu_device_info() - Get information about IOMMUv2 support of a - * PCI device - * @pdev: PCI device to query information from - * @info: A pointer to an amd_iommu_device_info structure which will contain - * the information about the PCI device - * - * Returns 0 on success, negative value on error - */ - -#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP 0x1 /* ATS feature supported */ -#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP 0x2 /* PRI feature supported */ -#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP 0x4 /* PASID context supported */ -#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP 0x8 /* Device may request execution - on memory pages */ -#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP 0x10 /* Device may request - super-user privileges */ - -struct amd_iommu_device_info { - int max_pasids; - u32 flags; -}; - -extern int amd_iommu_device_info(struct pci_dev *pdev, - struct amd_iommu_device_info *info); - -/** - * amd_iommu_set_invalidate_ctx_cb() - Register a call-back for invalidating - * a pasid context. This call-back is - * invoked when the IOMMUv2 driver needs to - * invalidate a PASID context, for example - * because the task that is bound to that - * context is about to exit. - * - * @pdev: The PCI device the call-back should be registered for - * @cb: The call-back function - */ - -typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, u32 pasid); - -extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, - amd_iommu_invalidate_ctx cb); #else /* CONFIG_AMD_IOMMU */ static inline int amd_iommu_detect(void) { return -ENODEV; } @@ -205,8 +85,10 @@ int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value); struct amd_iommu *get_amd_iommu(unsigned int idx); -#ifdef CONFIG_AMD_MEM_ENCRYPT -int amd_iommu_snp_enable(void); +#ifdef CONFIG_KVM_AMD_SEV +int amd_iommu_snp_disable(void); +#else +static inline int amd_iommu_snp_disable(void) { return 0; } #endif #endif /* _ASM_X86_AMD_IOMMU_H */ diff --git a/include/linux/amd-pmf-io.h b/include/linux/amd-pmf-io.h new file mode 100644 index 000000000000..b4f818205216 --- /dev/null +++ b/include/linux/amd-pmf-io.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD Platform Management Framework Interface + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Authors: Shyam Sundar S K <Shyam-sundar.S-k@amd.com> + * Basavaraj Natikar <Basavaraj.Natikar@amd.com> + */ + +#ifndef AMD_PMF_IO_H +#define AMD_PMF_IO_H + +#include <linux/types.h> + +/** + * enum sfh_message_type - Query the SFH message type + * @MT_HPD: Message ID to know the Human presence info from MP2 FW + * @MT_ALS: Message ID to know the Ambient light info from MP2 FW + */ +enum sfh_message_type { + MT_HPD, + MT_ALS, +}; + +/** + * enum sfh_hpd_info - Query the Human presence information + * @SFH_NOT_DETECTED: Check the HPD connection information from MP2 FW + * @SFH_USER_PRESENT: Check if the user is present from HPD sensor + * @SFH_USER_AWAY: Check if the user is away from HPD sensor + */ +enum sfh_hpd_info { + SFH_NOT_DETECTED, + SFH_USER_PRESENT, + SFH_USER_AWAY, +}; + +/** + * struct amd_sfh_info - get HPD sensor info from MP2 FW + * @ambient_light: Populates the ambient light information + * @user_present: Populates the user presence information + */ +struct amd_sfh_info { + u32 ambient_light; + u8 user_present; +}; + +int amd_get_sfh_info(struct amd_sfh_info *sfh_info, enum sfh_message_type op); +#endif diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h index 446394f84606..d21838835abd 100644 --- a/include/linux/amd-pstate.h +++ b/include/linux/amd-pstate.h @@ -39,11 +39,16 @@ struct amd_aperf_mperf { * @cppc_req_cached: cached performance request hints * @highest_perf: the maximum performance an individual processor may reach, * assuming ideal conditions + * For platforms that do not support the preferred core feature, the + * highest_pef may be configured with 166 or 255, to avoid max frequency + * calculated wrongly. we take the fixed value as the highest_perf. * @nominal_perf: the maximum sustained performance level of the processor, * assuming ideal operating conditions * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power * savings are achieved * @lowest_perf: the absolute lowest performance level of the processor + * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher + * priority. * @max_freq: the frequency that mapped to highest_perf * @min_freq: the frequency that mapped to lowest_perf * @nominal_freq: the frequency that mapped to nominal_perf @@ -52,6 +57,9 @@ struct amd_aperf_mperf { * @prev: Last Aperf/Mperf/tsc count value read from register * @freq: current cpu frequency value * @boost_supported: check whether the Processor or SBIOS supports boost mode + * @hw_prefcore: check whether HW supports preferred core featue. + * Only when hw_prefcore and early prefcore param are true, + * AMD P-State driver supports preferred core featue. * @epp_policy: Last saved policy used to set energy-performance preference * @epp_cached: Cached CPPC energy-performance preference value * @policy: Cpufreq policy value @@ -70,6 +78,11 @@ struct amd_cpudata { u32 nominal_perf; u32 lowest_nonlinear_perf; u32 lowest_perf; + u32 prefcore_ranking; + u32 min_limit_perf; + u32 max_limit_perf; + u32 min_limit_freq; + u32 max_limit_freq; u32 max_freq; u32 min_freq; @@ -81,6 +94,7 @@ struct amd_cpudata { u64 freq; bool boost_supported; + bool hw_prefcore; /* EPP feature related attributes*/ s16 epp_policy; diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h index 5deaddbd7927..93a5f16d03f3 100644 --- a/include/linux/anon_inodes.h +++ b/include/linux/anon_inodes.h @@ -15,13 +15,13 @@ struct inode; struct file *anon_inode_getfile(const char *name, const struct file_operations *fops, void *priv, int flags); -struct file *anon_inode_getfile_secure(const char *name, +struct file *anon_inode_create_getfile(const char *name, const struct file_operations *fops, void *priv, int flags, const struct inode *context_inode); int anon_inode_getfd(const char *name, const struct file_operations *fops, void *priv, int flags); -int anon_inode_getfd_secure(const char *name, +int anon_inode_create_getfd(const char *name, const struct file_operations *fops, void *priv, int flags, const struct inode *context_inode); diff --git a/include/linux/apple-mailbox.h b/include/linux/apple-mailbox.h deleted file mode 100644 index 720fbb70294a..000000000000 --- a/include/linux/apple-mailbox.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ -/* - * Apple mailbox message format - * - * Copyright (C) 2021 The Asahi Linux Contributors - */ - -#ifndef _LINUX_APPLE_MAILBOX_H_ -#define _LINUX_APPLE_MAILBOX_H_ - -#include <linux/types.h> - -/* encodes a single 96bit message sent over the single channel */ -struct apple_mbox_msg { - u64 msg0; - u32 msg1; -}; - -#endif diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index a07b510e7dc5..a63d61ca55af 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -27,6 +27,13 @@ static inline unsigned long topology_get_cpu_scale(int cpu) void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); +DECLARE_PER_CPU(unsigned long, capacity_freq_ref); + +static inline unsigned long topology_get_freq_ref(int cpu) +{ + return per_cpu(capacity_freq_ref, cpu); +} + DECLARE_PER_CPU(unsigned long, arch_freq_scale); static inline unsigned long topology_get_freq_scale(int cpu) @@ -92,6 +99,7 @@ void update_siblings_masks(unsigned int cpu); void remove_cpu_topology(unsigned int cpuid); void reset_cpu_topology(void); int parse_acpi_topology(void); +void freq_inv_set_max_ratio(int cpu, u64 max_rate); #endif #endif /* _LINUX_ARCH_TOPOLOGY_H_ */ diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index cc060da51bec..c906f666ff5d 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -6,6 +6,7 @@ #ifndef _LINUX_ARM_FFA_H #define _LINUX_ARM_FFA_H +#include <linux/bitfield.h> #include <linux/device.h> #include <linux/module.h> #include <linux/types.h> @@ -20,6 +21,7 @@ #define FFA_ERROR FFA_SMC_32(0x60) #define FFA_SUCCESS FFA_SMC_32(0x61) +#define FFA_FN64_SUCCESS FFA_SMC_64(0x61) #define FFA_INTERRUPT FFA_SMC_32(0x62) #define FFA_VERSION FFA_SMC_32(0x63) #define FFA_FEATURES FFA_SMC_32(0x64) @@ -54,6 +56,23 @@ #define FFA_MEM_FRAG_RX FFA_SMC_32(0x7A) #define FFA_MEM_FRAG_TX FFA_SMC_32(0x7B) #define FFA_NORMAL_WORLD_RESUME FFA_SMC_32(0x7C) +#define FFA_NOTIFICATION_BITMAP_CREATE FFA_SMC_32(0x7D) +#define FFA_NOTIFICATION_BITMAP_DESTROY FFA_SMC_32(0x7E) +#define FFA_NOTIFICATION_BIND FFA_SMC_32(0x7F) +#define FFA_NOTIFICATION_UNBIND FFA_SMC_32(0x80) +#define FFA_NOTIFICATION_SET FFA_SMC_32(0x81) +#define FFA_NOTIFICATION_GET FFA_SMC_32(0x82) +#define FFA_NOTIFICATION_INFO_GET FFA_SMC_32(0x83) +#define FFA_FN64_NOTIFICATION_INFO_GET FFA_SMC_64(0x83) +#define FFA_RX_ACQUIRE FFA_SMC_32(0x84) +#define FFA_SPM_ID_GET FFA_SMC_32(0x85) +#define FFA_MSG_SEND2 FFA_SMC_32(0x86) +#define FFA_SECONDARY_EP_REGISTER FFA_SMC_32(0x87) +#define FFA_FN64_SECONDARY_EP_REGISTER FFA_SMC_64(0x87) +#define FFA_MEM_PERM_GET FFA_SMC_32(0x88) +#define FFA_FN64_MEM_PERM_GET FFA_SMC_64(0x88) +#define FFA_MEM_PERM_SET FFA_SMC_32(0x89) +#define FFA_FN64_MEM_PERM_SET FFA_SMC_64(0x89) /* * For some calls it is necessary to use SMC64 to pass or return 64-bit values. @@ -76,6 +95,7 @@ #define FFA_RET_DENIED (-6) #define FFA_RET_RETRY (-7) #define FFA_RET_ABORTED (-8) +#define FFA_RET_NO_DATA (-9) /* FFA version encoding */ #define FFA_MAJOR_VERSION_MASK GENMASK(30, 16) @@ -86,6 +106,7 @@ (FIELD_PREP(FFA_MAJOR_VERSION_MASK, (major)) | \ FIELD_PREP(FFA_MINOR_VERSION_MASK, (minor))) #define FFA_VERSION_1_0 FFA_PACK_VERSION_INFO(1, 0) +#define FFA_VERSION_1_1 FFA_PACK_VERSION_INFO(1, 1) /** * FF-A specification mentions explicitly about '4K pages'. This should @@ -188,6 +209,8 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev) { return false; } #define module_ffa_driver(__ffa_driver) \ module_driver(__ffa_driver, ffa_register, ffa_unregister) +extern const struct bus_type ffa_bus_type; + /* FFA transport related */ struct ffa_partition_info { u16 id; @@ -278,8 +301,8 @@ struct ffa_mem_region { #define FFA_MEM_NON_SHAREABLE (0) #define FFA_MEM_OUTER_SHAREABLE (2) #define FFA_MEM_INNER_SHAREABLE (3) - u8 attributes; - u8 reserved_0; + /* Memory region attributes, upper byte MBZ pre v1.1 */ + u16 attributes; /* * Clear memory region contents after unmapping it from the sender and * before mapping it for any receiver. @@ -317,27 +340,41 @@ struct ffa_mem_region { * memory region. */ u64 tag; - u32 reserved_1; + /* Size of each endpoint memory access descriptor, MBZ pre v1.1 */ + u32 ep_mem_size; /* * The number of `ffa_mem_region_attributes` entries included in this * transaction. */ u32 ep_count; /* - * An array of endpoint memory access descriptors. - * Each one specifies a memory region offset, an endpoint and the - * attributes with which this memory region should be mapped in that - * endpoint's page table. + * 16-byte aligned offset from the base address of this descriptor + * to the first element of the endpoint memory access descriptor array + * Valid only from v1.1 */ - struct ffa_mem_region_attributes ep_mem_access[]; + u32 ep_mem_offset; + /* MBZ, valid only from v1.1 */ + u32 reserved[3]; }; -#define COMPOSITE_OFFSET(x) \ - (offsetof(struct ffa_mem_region, ep_mem_access[x])) #define CONSTITUENTS_OFFSET(x) \ (offsetof(struct ffa_composite_mem_region, constituents[x])) -#define COMPOSITE_CONSTITUENTS_OFFSET(x, y) \ - (COMPOSITE_OFFSET(x) + CONSTITUENTS_OFFSET(y)) + +static inline u32 +ffa_mem_desc_offset(struct ffa_mem_region *buf, int count, u32 ffa_version) +{ + u32 offset = count * sizeof(struct ffa_mem_region_attributes); + /* + * Earlier to v1.1, the endpoint memory descriptor array started at + * offset 32(i.e. offset of ep_mem_offset in the current structure) + */ + if (ffa_version <= FFA_VERSION_1_0) + offset += offsetof(struct ffa_mem_region, ep_mem_offset); + else + offset += sizeof(struct ffa_mem_region); + + return offset; +} struct ffa_mem_ops_args { bool use_txbuf; @@ -367,10 +404,30 @@ struct ffa_mem_ops { int (*memory_lend)(struct ffa_mem_ops_args *args); }; +struct ffa_cpu_ops { + int (*run)(struct ffa_device *dev, u16 vcpu); +}; + +typedef void (*ffa_sched_recv_cb)(u16 vcpu, bool is_per_vcpu, void *cb_data); +typedef void (*ffa_notifier_cb)(int notify_id, void *cb_data); + +struct ffa_notifier_ops { + int (*sched_recv_cb_register)(struct ffa_device *dev, + ffa_sched_recv_cb cb, void *cb_data); + int (*sched_recv_cb_unregister)(struct ffa_device *dev); + int (*notify_request)(struct ffa_device *dev, bool per_vcpu, + ffa_notifier_cb cb, void *cb_data, int notify_id); + int (*notify_relinquish)(struct ffa_device *dev, int notify_id); + int (*notify_send)(struct ffa_device *dev, int notify_id, bool per_vcpu, + u16 vcpu); +}; + struct ffa_ops { const struct ffa_info_ops *info_ops; const struct ffa_msg_ops *msg_ops; const struct ffa_mem_ops *mem_ops; + const struct ffa_cpu_ops *cpu_ops; + const struct ffa_notifier_ops *notifier_ops; }; #endif /* _LINUX_ARM_FFA_H */ diff --git a/include/linux/array_size.h b/include/linux/array_size.h new file mode 100644 index 000000000000..06d7d83196ca --- /dev/null +++ b/include/linux/array_size.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_ARRAY_SIZE_H +#define _LINUX_ARRAY_SIZE_H + +#include <linux/compiler.h> + +/** + * ARRAY_SIZE - get the number of elements in array @arr + * @arr: array to be sized + */ +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) + +#endif /* _LINUX_ARRAY_SIZE_H */ diff --git a/include/linux/async.h b/include/linux/async.h index cce4ad31e8fc..19b778d08600 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -90,6 +90,8 @@ async_schedule_dev(async_func_t func, struct device *dev) return async_schedule_node(func, dev, dev_to_node(dev)); } +bool async_schedule_dev_nocall(async_func_t func, struct device *dev); + /** * async_schedule_dev_domain - A device specific version of async_schedule_domain * @func: function to execute asynchronously @@ -118,4 +120,5 @@ extern void async_synchronize_cookie(async_cookie_t cookie); extern void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *domain); extern bool current_is_async(void); +extern void async_init(void); #endif diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index b83ef19da13d..956bcba5dbf2 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -428,6 +428,19 @@ extern void raw_cmpxchg128_relaxed_not_implemented(void); #define raw_sync_cmpxchg arch_sync_cmpxchg +#ifdef arch_sync_try_cmpxchg +#define raw_sync_try_cmpxchg arch_sync_try_cmpxchg +#else +#define raw_sync_try_cmpxchg(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = raw_sync_cmpxchg((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif + /** * raw_atomic_read() - atomic load with relaxed ordering * @v: pointer to atomic_t @@ -1992,6 +2005,7 @@ raw_atomic_xchg_relaxed(atomic_t *v, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_cmpxchg() elsewhere. * @@ -2020,6 +2034,7 @@ raw_atomic_cmpxchg(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_cmpxchg_acquire() elsewhere. * @@ -2048,6 +2063,7 @@ raw_atomic_cmpxchg_acquire(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_cmpxchg_release() elsewhere. * @@ -2075,6 +2091,7 @@ raw_atomic_cmpxchg_release(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_cmpxchg_relaxed() elsewhere. * @@ -2099,7 +2116,8 @@ raw_atomic_cmpxchg_relaxed(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_try_cmpxchg() elsewhere. * @@ -2132,7 +2150,8 @@ raw_atomic_try_cmpxchg(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_try_cmpxchg_acquire() elsewhere. * @@ -2165,7 +2184,8 @@ raw_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_try_cmpxchg_release() elsewhere. * @@ -2197,7 +2217,8 @@ raw_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_try_cmpxchg_relaxed() elsewhere. * @@ -2390,6 +2411,7 @@ raw_atomic_add_negative_relaxed(int i, atomic_t *v) * @u: int value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_fetch_add_unless() elsewhere. * @@ -2419,6 +2441,7 @@ raw_atomic_fetch_add_unless(atomic_t *v, int a, int u) * @u: int value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_add_unless() elsewhere. * @@ -2439,6 +2462,7 @@ raw_atomic_add_unless(atomic_t *v, int a, int u) * @v: pointer to atomic_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_inc_not_zero() elsewhere. * @@ -2459,6 +2483,7 @@ raw_atomic_inc_not_zero(atomic_t *v) * @v: pointer to atomic_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_inc_unless_negative() elsewhere. * @@ -2486,6 +2511,7 @@ raw_atomic_inc_unless_negative(atomic_t *v) * @v: pointer to atomic_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_dec_unless_positive() elsewhere. * @@ -2513,6 +2539,7 @@ raw_atomic_dec_unless_positive(atomic_t *v) * @v: pointer to atomic_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_dec_if_positive() elsewhere. * @@ -4104,6 +4131,7 @@ raw_atomic64_xchg_relaxed(atomic64_t *v, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_cmpxchg() elsewhere. * @@ -4132,6 +4160,7 @@ raw_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_cmpxchg_acquire() elsewhere. * @@ -4160,6 +4189,7 @@ raw_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_cmpxchg_release() elsewhere. * @@ -4187,6 +4217,7 @@ raw_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_cmpxchg_relaxed() elsewhere. * @@ -4211,7 +4242,8 @@ raw_atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg() elsewhere. * @@ -4244,7 +4276,8 @@ raw_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_acquire() elsewhere. * @@ -4277,7 +4310,8 @@ raw_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_release() elsewhere. * @@ -4309,7 +4343,8 @@ raw_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_relaxed() elsewhere. * @@ -4502,6 +4537,7 @@ raw_atomic64_add_negative_relaxed(s64 i, atomic64_t *v) * @u: s64 value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_fetch_add_unless() elsewhere. * @@ -4531,6 +4567,7 @@ raw_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) * @u: s64 value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_add_unless() elsewhere. * @@ -4551,6 +4588,7 @@ raw_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) * @v: pointer to atomic64_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_inc_not_zero() elsewhere. * @@ -4571,6 +4609,7 @@ raw_atomic64_inc_not_zero(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_inc_unless_negative() elsewhere. * @@ -4598,6 +4637,7 @@ raw_atomic64_inc_unless_negative(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_dec_unless_positive() elsewhere. * @@ -4625,6 +4665,7 @@ raw_atomic64_dec_unless_positive(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_dec_if_positive() elsewhere. * @@ -4649,4 +4690,4 @@ raw_atomic64_dec_if_positive(atomic64_t *v) } #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// 2fdd6702823fa842f9cea57a002e6e4476ae780c +// 14850c0b0db20c62fdc78ccd1d42b98b88d76331 diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h index d401b406ef7c..debd487fe971 100644 --- a/include/linux/atomic/atomic-instrumented.h +++ b/include/linux/atomic/atomic-instrumented.h @@ -1182,6 +1182,7 @@ atomic_xchg_relaxed(atomic_t *v, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_cmpxchg() there. * @@ -1202,6 +1203,7 @@ atomic_cmpxchg(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_cmpxchg_acquire() there. * @@ -1221,6 +1223,7 @@ atomic_cmpxchg_acquire(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_cmpxchg_release() there. * @@ -1241,6 +1244,7 @@ atomic_cmpxchg_release(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_cmpxchg_relaxed() there. * @@ -1260,7 +1264,8 @@ atomic_cmpxchg_relaxed(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg() there. * @@ -1282,7 +1287,8 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_acquire() there. * @@ -1303,7 +1309,8 @@ atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_release() there. * @@ -1325,7 +1332,8 @@ atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_relaxed() there. * @@ -1475,6 +1483,7 @@ atomic_add_negative_relaxed(int i, atomic_t *v) * @u: int value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_fetch_add_unless() there. * @@ -1495,6 +1504,7 @@ atomic_fetch_add_unless(atomic_t *v, int a, int u) * @u: int value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_add_unless() there. * @@ -1513,6 +1523,7 @@ atomic_add_unless(atomic_t *v, int a, int u) * @v: pointer to atomic_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_inc_not_zero() there. * @@ -1531,6 +1542,7 @@ atomic_inc_not_zero(atomic_t *v) * @v: pointer to atomic_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_inc_unless_negative() there. * @@ -1549,6 +1561,7 @@ atomic_inc_unless_negative(atomic_t *v) * @v: pointer to atomic_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_dec_unless_positive() there. * @@ -1567,6 +1580,7 @@ atomic_dec_unless_positive(atomic_t *v) * @v: pointer to atomic_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_dec_if_positive() there. * @@ -2746,6 +2760,7 @@ atomic64_xchg_relaxed(atomic64_t *v, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg() there. * @@ -2766,6 +2781,7 @@ atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_acquire() there. * @@ -2785,6 +2801,7 @@ atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_release() there. * @@ -2805,6 +2822,7 @@ atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_relaxed() there. * @@ -2824,7 +2842,8 @@ atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg() there. * @@ -2846,7 +2865,8 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_acquire() there. * @@ -2867,7 +2887,8 @@ atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_release() there. * @@ -2889,7 +2910,8 @@ atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_relaxed() there. * @@ -3039,6 +3061,7 @@ atomic64_add_negative_relaxed(s64 i, atomic64_t *v) * @u: s64 value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_fetch_add_unless() there. * @@ -3059,6 +3082,7 @@ atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) * @u: s64 value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_add_unless() there. * @@ -3077,6 +3101,7 @@ atomic64_add_unless(atomic64_t *v, s64 a, s64 u) * @v: pointer to atomic64_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_inc_not_zero() there. * @@ -3095,6 +3120,7 @@ atomic64_inc_not_zero(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_inc_unless_negative() there. * @@ -3113,6 +3139,7 @@ atomic64_inc_unless_negative(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_dec_unless_positive() there. * @@ -3131,6 +3158,7 @@ atomic64_dec_unless_positive(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_dec_if_positive() there. * @@ -4310,6 +4338,7 @@ atomic_long_xchg_relaxed(atomic_long_t *v, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg() there. * @@ -4330,6 +4359,7 @@ atomic_long_cmpxchg(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_acquire() there. * @@ -4349,6 +4379,7 @@ atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_release() there. * @@ -4369,6 +4400,7 @@ atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_relaxed() there. * @@ -4388,7 +4420,8 @@ atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg() there. * @@ -4410,7 +4443,8 @@ atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_acquire() there. * @@ -4431,7 +4465,8 @@ atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_release() there. * @@ -4453,7 +4488,8 @@ atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_relaxed() there. * @@ -4603,6 +4639,7 @@ atomic_long_add_negative_relaxed(long i, atomic_long_t *v) * @u: long value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_fetch_add_unless() there. * @@ -4623,6 +4660,7 @@ atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) * @u: long value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_add_unless() there. * @@ -4641,6 +4679,7 @@ atomic_long_add_unless(atomic_long_t *v, long a, long u) * @v: pointer to atomic_long_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_inc_not_zero() there. * @@ -4659,6 +4698,7 @@ atomic_long_inc_not_zero(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_inc_unless_negative() there. * @@ -4677,6 +4717,7 @@ atomic_long_inc_unless_negative(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_dec_unless_positive() there. * @@ -4695,6 +4736,7 @@ atomic_long_dec_unless_positive(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_dec_if_positive() there. * @@ -4998,6 +5040,14 @@ atomic_long_dec_if_positive(atomic_long_t *v) raw_try_cmpxchg128_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \ }) +#define sync_try_cmpxchg(ptr, ...) \ +({ \ + typeof(ptr) __ai_ptr = (ptr); \ + kcsan_mb(); \ + instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \ + raw_sync_try_cmpxchg(__ai_ptr, __VA_ARGS__); \ +}) + #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ -// 1568f875fef72097413caab8339120c065a39aa4 +// ce5b65e0f1f8a276268b667194581d24bed219d4 diff --git a/include/linux/atomic/atomic-long.h b/include/linux/atomic/atomic-long.h index c82947170ddc..3ef844b3ab8a 100644 --- a/include/linux/atomic/atomic-long.h +++ b/include/linux/atomic/atomic-long.h @@ -1352,6 +1352,7 @@ raw_atomic_long_xchg_relaxed(atomic_long_t *v, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_cmpxchg() elsewhere. * @@ -1374,6 +1375,7 @@ raw_atomic_long_cmpxchg(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_cmpxchg_acquire() elsewhere. * @@ -1396,6 +1398,7 @@ raw_atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_cmpxchg_release() elsewhere. * @@ -1418,6 +1421,7 @@ raw_atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_cmpxchg_relaxed() elsewhere. * @@ -1440,7 +1444,8 @@ raw_atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg() elsewhere. * @@ -1463,7 +1468,8 @@ raw_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_acquire() elsewhere. * @@ -1486,7 +1492,8 @@ raw_atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_release() elsewhere. * @@ -1509,7 +1516,8 @@ raw_atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_relaxed() elsewhere. * @@ -1677,6 +1685,7 @@ raw_atomic_long_add_negative_relaxed(long i, atomic_long_t *v) * @u: long value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_fetch_add_unless() elsewhere. * @@ -1699,6 +1708,7 @@ raw_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) * @u: long value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_add_unless() elsewhere. * @@ -1719,6 +1729,7 @@ raw_atomic_long_add_unless(atomic_long_t *v, long a, long u) * @v: pointer to atomic_long_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_inc_not_zero() elsewhere. * @@ -1739,6 +1750,7 @@ raw_atomic_long_inc_not_zero(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_inc_unless_negative() elsewhere. * @@ -1759,6 +1771,7 @@ raw_atomic_long_inc_unless_negative(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_dec_unless_positive() elsewhere. * @@ -1779,6 +1792,7 @@ raw_atomic_long_dec_unless_positive(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_dec_if_positive() elsewhere. * @@ -1795,4 +1809,4 @@ raw_atomic_long_dec_if_positive(atomic_long_t *v) } #endif /* _LINUX_ATOMIC_LONG_H */ -// 4ef23f98c73cff96d239896175fd26b10b88899e +// 1c4a26fc77f345342953770ebe3c4d08e7ce2f9a diff --git a/include/linux/audit.h b/include/linux/audit.h index 51b1b7054a23..0050ef288ab3 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -36,6 +36,7 @@ struct mqstat; struct audit_watch; struct audit_tree; struct sk_buff; +struct kern_ipc_perm; struct audit_krule { u32 pflags; diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index d0807ad43f93..8e177b67e82f 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -4,6 +4,11 @@ #ifndef _VIRTCHNL_H_ #define _VIRTCHNL_H_ +#include <linux/bitops.h> +#include <linux/bits.h> +#include <linux/overflow.h> +#include <uapi/linux/if_ether.h> + /* Description: * This header file describes the Virtual Function (VF) - Physical Function * (PF) communication protocol used by the drivers for all devices starting @@ -114,6 +119,7 @@ enum virtchnl_ops { VIRTCHNL_OP_GET_STATS = 15, VIRTCHNL_OP_RSVD = 16, VIRTCHNL_OP_EVENT = 17, /* must ALWAYS be 17 */ + VIRTCHNL_OP_CONFIG_RSS_HFUNC = 18, /* opcode 19 is reserved */ VIRTCHNL_OP_IWARP = 20, /* advanced opcode */ VIRTCHNL_OP_RDMA = VIRTCHNL_OP_IWARP, @@ -240,6 +246,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_REQ_QUEUES BIT(6) /* used to negotiate communicating link speeds in Mbps */ #define VIRTCHNL_VF_CAP_ADV_LINK_SPEED BIT(7) +#define VIRTCHNL_VF_OFFLOAD_CRC BIT(10) #define VIRTCHNL_VF_OFFLOAD_VLAN_V2 BIT(15) #define VIRTCHNL_VF_OFFLOAD_VLAN BIT(16) #define VIRTCHNL_VF_OFFLOAD_RX_POLLING BIT(17) @@ -295,7 +302,13 @@ VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_txq_info); /* VIRTCHNL_OP_CONFIG_RX_QUEUE * VF sends this message to set up parameters for one RX queue. * External data buffer contains one instance of virtchnl_rxq_info. - * PF configures requested queue and returns a status code. + * PF configures requested queue and returns a status code. The + * crc_disable flag disables CRC stripping on the VF. Setting + * the crc_disable flag to 1 will disable CRC stripping for each + * queue in the VF where the flag is set. The VIRTCHNL_VF_OFFLOAD_CRC + * offload must have been set prior to sending this info or the PF + * will ignore the request. This flag should be set the same for + * all of the queues for a VF. */ /* Rx queue config info */ @@ -307,7 +320,7 @@ struct virtchnl_rxq_info { u16 splithdr_enabled; /* deprecated with AVF 1.0 */ u32 databuffer_size; u32 max_pkt_size; - u8 pad0; + u8 crc_disable; u8 rxdid; u8 pad1[2]; u64 dma_ring_addr; @@ -900,6 +913,29 @@ struct virtchnl_rss_hena { VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_rss_hena); +/* Type of RSS algorithm */ +enum virtchnl_rss_algorithm { + VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC = 0, + VIRTCHNL_RSS_ALG_R_ASYMMETRIC = 1, + VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC = 2, + VIRTCHNL_RSS_ALG_XOR_SYMMETRIC = 3, +}; + +/* VIRTCHNL_OP_CONFIG_RSS_HFUNC + * VF sends this message to configure the RSS hash function. Only supported + * if both PF and VF drivers set the VIRTCHNL_VF_OFFLOAD_RSS_PF bit during + * configuration negotiation. + * The hash function is initialized to VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC + * by the PF. + */ +struct virtchnl_rss_hfunc { + u16 vsi_id; + u16 rss_algorithm; /* enum virtchnl_rss_algorithm */ + u32 reserved; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_rss_hfunc); + /* VIRTCHNL_OP_ENABLE_CHANNELS * VIRTCHNL_OP_DISABLE_CHANNELS * VF sends these messages to enable or disable channels based on @@ -1084,14 +1120,6 @@ enum virtchnl_vfr_states { VIRTCHNL_VFR_VFACTIVE, }; -/* Type of RSS algorithm */ -enum virtchnl_rss_algorithm { - VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC = 0, - VIRTCHNL_RSS_ALG_R_ASYMMETRIC = 1, - VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC = 2, - VIRTCHNL_RSS_ALG_XOR_SYMMETRIC = 3, -}; - #define VIRTCHNL_MAX_NUM_PROTO_HDRS 32 #define PROTO_HDR_SHIFT 5 #define PROTO_HDR_FIELD_START(proto_hdr_type) ((proto_hdr_type) << PROTO_HDR_SHIFT) @@ -1531,6 +1559,9 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, vrl->lut_entries); } break; + case VIRTCHNL_OP_CONFIG_RSS_HFUNC: + valid_len = sizeof(struct virtchnl_rss_hfunc); + break; case VIRTCHNL_OP_GET_RSS_HENA_CAPS: break; case VIRTCHNL_OP_SET_RSS_HENA: diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index ae12696ec492..2ad261082bba 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -141,8 +141,6 @@ struct bdi_writeback { struct delayed_work dwork; /* work item used for writeback */ struct delayed_work bw_dwork; /* work item used for bandwidth estimate */ - unsigned long dirty_sleep; /* last wait */ - struct list_head bdi_node; /* anchored at bdi->wb_list */ #ifdef CONFIG_CGROUP_WRITEBACK @@ -179,6 +177,11 @@ struct backing_dev_info { * any dirty wbs, which is depended upon by bdi_has_dirty(). */ atomic_long_t tot_write_bandwidth; + /* + * Jiffies when last process was dirty throttled on this bdi. Used by + * blk-wbt. + */ + unsigned long last_bdp_sleep; struct bdi_writeback wb; /* the root writeback info for this bdi */ struct list_head wb_list; /* list of all wbs */ diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 1a97277f99b1..8e7af9a03b41 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -38,7 +38,6 @@ struct backing_dev_info *bdi_alloc(int node_id); void wb_start_background_writeback(struct bdi_writeback *wb); void wb_workfn(struct work_struct *work); -void wb_wakeup_delayed(struct bdi_writeback *wb); void wb_wait_for_completion(struct wb_completion *done); diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h new file mode 100644 index 000000000000..3f1fe1774f1b --- /dev/null +++ b/include/linux/backing-file.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Common helpers for stackable filesystems and backing files. + * + * Copyright (C) 2023 CTERA Networks. + */ + +#ifndef _LINUX_BACKING_FILE_H +#define _LINUX_BACKING_FILE_H + +#include <linux/file.h> +#include <linux/uio.h> +#include <linux/fs.h> + +struct backing_file_ctx { + const struct cred *cred; + struct file *user_file; + void (*accessed)(struct file *); + void (*end_write)(struct file *); +}; + +struct file *backing_file_open(const struct path *user_path, int flags, + const struct path *real_path, + const struct cred *cred); +ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter, + struct kiocb *iocb, int flags, + struct backing_file_ctx *ctx); +ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter, + struct kiocb *iocb, int flags, + struct backing_file_ctx *ctx); +ssize_t backing_file_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags, + struct backing_file_ctx *ctx); +ssize_t backing_file_splice_write(struct pipe_inode_info *pipe, + struct file *out, loff_t *ppos, size_t len, + unsigned int flags, + struct backing_file_ctx *ctx); +int backing_file_mmap(struct file *file, struct vm_area_struct *vma, + struct backing_file_ctx *ctx); + +#endif /* _LINUX_BACKING_FILE_H */ diff --git a/include/linux/badblocks.h b/include/linux/badblocks.h index 2426276b9bd3..670f2dae692f 100644 --- a/include/linux/badblocks.h +++ b/include/linux/badblocks.h @@ -15,6 +15,7 @@ #define BB_OFFSET(x) (((x) & BB_OFFSET_MASK) >> 9) #define BB_LEN(x) (((x) & BB_LEN_MASK) + 1) #define BB_ACK(x) (!!((x) & BB_ACK_MASK)) +#define BB_END(x) (BB_OFFSET(x) + BB_LEN(x)) #define BB_MAKE(a, l, ack) (((a)<<9) | ((l)-1) | ((u64)(!!(ack)) << 63)) /* Bad block numbers are stored sorted in a single page. @@ -41,6 +42,12 @@ struct badblocks { sector_t size; /* in sectors */ }; +struct badblocks_context { + sector_t start; + sector_t len; + int ack; +}; + int badblocks_check(struct badblocks *bb, sector_t s, int sectors, sector_t *first_bad, int *bad_sectors); int badblocks_set(struct badblocks *bb, sector_t s, int sectors, @@ -63,4 +70,27 @@ static inline void devm_exit_badblocks(struct device *dev, struct badblocks *bb) } badblocks_exit(bb); } + +static inline int badblocks_full(struct badblocks *bb) +{ + return (bb->count >= MAX_BADBLOCKS); +} + +static inline int badblocks_empty(struct badblocks *bb) +{ + return (bb->count == 0); +} + +static inline void set_changed(struct badblocks *bb) +{ + if (bb->changed != 1) + bb->changed = 1; +} + +static inline void clear_changed(struct badblocks *bb) +{ + if (bb->changed != 0) + bb->changed = 0; +} + #endif diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 8d51f69f9f5e..70f97f685bff 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -90,6 +90,16 @@ struct linux_binfmt { #endif } __randomize_layout; +#if IS_ENABLED(CONFIG_BINFMT_MISC) +struct binfmt_misc { + struct list_head entries; + rwlock_t entries_lock; + bool enabled; +} __randomize_layout; + +extern struct binfmt_misc init_binfmt_misc; +#endif + extern void __register_binfmt(struct linux_binfmt *fmt, int insert); /* Registration of default binfmt handlers */ diff --git a/include/linux/bio.h b/include/linux/bio.h index 41d417ee1349..875d792bffff 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -286,6 +286,11 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, { struct bio_vec *bvec = bio_first_bvec_all(bio) + i; + if (unlikely(i >= bio->bi_vcnt)) { + fi->folio = NULL; + return; + } + fi->folio = page_folio(bvec->bv_page); fi->offset = bvec->bv_offset + PAGE_SIZE * (bvec->bv_page - &fi->folio->page); @@ -303,10 +308,8 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio) fi->offset = 0; fi->length = min(folio_size(fi->folio), fi->_seg_count); fi->_next = folio_next(fi->folio); - } else if (fi->_i + 1 < bio->bi_vcnt) { - bio_first_folio(fi, bio, fi->_i + 1); } else { - fi->folio = NULL; + bio_first_folio(fi, bio, fi->_i + 1); } } @@ -324,6 +327,8 @@ enum bip_flags { BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */ BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */ BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */ + BIP_INTEGRITY_USER = 1 << 5, /* Integrity payload is user address */ + BIP_COPY_USER = 1 << 6, /* Kernel bounce buffer in use */ }; /* @@ -718,6 +723,7 @@ static inline bool bioset_initialized(struct bio_set *bs) for_each_bio(_bio) \ bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) +int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed); extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); extern bool bio_integrity_prep(struct bio *); @@ -789,6 +795,12 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page, return 0; } +static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf, + ssize_t len, u32 seed) +{ + return -EINVAL; +} + #endif /* CONFIG_BLK_DEV_INTEGRITY */ /* diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index ebfa12f69501..63928f173223 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -66,7 +66,8 @@ _pfx "mask is not constant"); \ BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero"); \ BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ - ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ + ~((_mask) >> __bf_shf(_mask)) & \ + (0 + (_val)) : 0, \ _pfx "value too large for the field"); \ BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \ __bf_cast_unsigned(_reg, ~0ull), \ diff --git a/include/linux/bitmap-str.h b/include/linux/bitmap-str.h new file mode 100644 index 000000000000..17caeca94cab --- /dev/null +++ b/include/linux/bitmap-str.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_BITMAP_STR_H +#define __LINUX_BITMAP_STR_H + +int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); +int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); +extern int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, + int nmaskbits, loff_t off, size_t count); +extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, + int nmaskbits, loff_t off, size_t count); +int bitmap_parse(const char *buf, unsigned int buflen, unsigned long *dst, int nbits); +int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits); +int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, + unsigned long *dst, int nbits); + +#endif /* __LINUX_BITMAP_STR_H */ diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 03644237e1ef..aa4096126553 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -6,10 +6,13 @@ #include <linux/align.h> #include <linux/bitops.h> +#include <linux/cleanup.h> +#include <linux/errno.h> #include <linux/find.h> #include <linux/limits.h> #include <linux/string.h> #include <linux/types.h> +#include <linux/bitmap-str.h> struct device; @@ -52,6 +55,7 @@ struct device; * bitmap_full(src, nbits) Are all bits set in *src? * bitmap_weight(src, nbits) Hamming Weight: number set bits * bitmap_weight_and(src1, src2, nbits) Hamming Weight of and'ed bitmap + * bitmap_weight_andnot(src1, src2, nbits) Hamming Weight of andnot'ed bitmap * bitmap_set(dst, pos, nbits) Set specified bit area * bitmap_clear(dst, pos, nbits) Clear specified bit area * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area @@ -60,6 +64,8 @@ struct device; * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n * bitmap_cut(dst, src, first, n, nbits) Cut n bits from first, copy rest * bitmap_replace(dst, old, new, mask, nbits) *dst = (*old & ~(*mask)) | (*new & *mask) + * bitmap_scatter(dst, src, mask, nbits) *dst = map(dense, sparse)(src) + * bitmap_gather(dst, src, mask, nbits) *dst = map(sparse, dense)(src) * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src) * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) * bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap @@ -125,6 +131,8 @@ unsigned long *bitmap_alloc_node(unsigned int nbits, gfp_t flags, int node); unsigned long *bitmap_zalloc_node(unsigned int nbits, gfp_t flags, int node); void bitmap_free(const unsigned long *bitmap); +DEFINE_FREE(bitmap, unsigned long *, if (_T) bitmap_free(_T)) + /* Managed variants of the above. */ unsigned long *devm_bitmap_alloc(struct device *dev, unsigned int nbits, gfp_t flags); @@ -167,6 +175,8 @@ bool __bitmap_subset(const unsigned long *bitmap1, unsigned int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); unsigned int __bitmap_weight_and(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); +unsigned int __bitmap_weight_andnot(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); void __bitmap_set(unsigned long *map, unsigned int start, int len); void __bitmap_clear(unsigned long *map, unsigned int start, int len); @@ -200,14 +210,6 @@ bitmap_find_next_zero_area(unsigned long *map, align_mask, 0); } -int bitmap_parse(const char *buf, unsigned int buflen, - unsigned long *dst, int nbits); -int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, - unsigned long *dst, int nbits); -int bitmap_parselist(const char *buf, unsigned long *maskp, - int nmaskbits); -int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, - unsigned long *dst, int nbits); void bitmap_remap(unsigned long *dst, const unsigned long *src, const unsigned long *old, const unsigned long *new, unsigned int nbits); int bitmap_bitremap(int oldbit, @@ -216,23 +218,6 @@ void bitmap_onto(unsigned long *dst, const unsigned long *orig, const unsigned long *relmap, unsigned int bits); void bitmap_fold(unsigned long *dst, const unsigned long *orig, unsigned int sz, unsigned int nbits); -int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); -void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); -int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); - -#ifdef __BIG_ENDIAN -void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); -#else -#define bitmap_copy_le bitmap_copy -#endif -int bitmap_print_to_pagebuf(bool list, char *buf, - const unsigned long *maskp, int nmaskbits); - -extern int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, - int nmaskbits, loff_t off, size_t count); - -extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, - int nmaskbits, loff_t off, size_t count); #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) @@ -448,6 +433,15 @@ unsigned long bitmap_weight_and(const unsigned long *src1, return __bitmap_weight_and(src1, src2, nbits); } +static __always_inline +unsigned long bitmap_weight_andnot(const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) +{ + if (small_const_nbits(nbits)) + return hweight_long(*src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)); + return __bitmap_weight_andnot(src1, src2, nbits); +} + static __always_inline void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits) { @@ -510,6 +504,107 @@ static inline void bitmap_replace(unsigned long *dst, __bitmap_replace(dst, old, new, mask, nbits); } +/** + * bitmap_scatter - Scatter a bitmap according to the given mask + * @dst: scattered bitmap + * @src: gathered bitmap + * @mask: mask representing bits to assign to in the scattered bitmap + * @nbits: number of bits in each of these bitmaps + * + * Scatters bitmap with sequential bits according to the given @mask. + * + * Example: + * If @src bitmap = 0x005a, with @mask = 0x1313, @dst will be 0x0302. + * + * Or in binary form + * @src @mask @dst + * 0000000001011010 0001001100010011 0000001100000010 + * + * (Bits 0, 1, 2, 3, 4, 5 are copied to the bits 0, 1, 4, 8, 9, 12) + * + * A more 'visual' description of the operation:: + * + * src: 0000000001011010 + * |||||| + * +------+||||| + * | +----+|||| + * | |+----+||| + * | || +-+|| + * | || | || + * mask: ...v..vv...v..vv + * ...0..11...0..10 + * dst: 0000001100000010 + * + * A relationship exists between bitmap_scatter() and bitmap_gather(). + * bitmap_gather() can be seen as the 'reverse' bitmap_scatter() operation. + * See bitmap_scatter() for details related to this relationship. + */ +static inline void bitmap_scatter(unsigned long *dst, const unsigned long *src, + const unsigned long *mask, unsigned int nbits) +{ + unsigned int n = 0; + unsigned int bit; + + bitmap_zero(dst, nbits); + + for_each_set_bit(bit, mask, nbits) + __assign_bit(bit, dst, test_bit(n++, src)); +} + +/** + * bitmap_gather - Gather a bitmap according to given mask + * @dst: gathered bitmap + * @src: scattered bitmap + * @mask: mask representing bits to extract from in the scattered bitmap + * @nbits: number of bits in each of these bitmaps + * + * Gathers bitmap with sparse bits according to the given @mask. + * + * Example: + * If @src bitmap = 0x0302, with @mask = 0x1313, @dst will be 0x001a. + * + * Or in binary form + * @src @mask @dst + * 0000001100000010 0001001100010011 0000000000011010 + * + * (Bits 0, 1, 4, 8, 9, 12 are copied to the bits 0, 1, 2, 3, 4, 5) + * + * A more 'visual' description of the operation:: + * + * mask: ...v..vv...v..vv + * src: 0000001100000010 + * ^ ^^ ^ 0 + * | || | 10 + * | || > 010 + * | |+--> 1010 + * | +--> 11010 + * +----> 011010 + * dst: 0000000000011010 + * + * A relationship exists between bitmap_gather() and bitmap_scatter(). See + * bitmap_scatter() for the bitmap scatter detailed operations. + * Suppose scattered computed using bitmap_scatter(scattered, src, mask, n). + * The operation bitmap_gather(result, scattered, mask, n) leads to a result + * equal or equivalent to src. + * + * The result can be 'equivalent' because bitmap_scatter() and bitmap_gather() + * are not bijective. + * The result and src values are equivalent in that sense that a call to + * bitmap_scatter(res, src, mask, n) and a call to + * bitmap_scatter(res, result, mask, n) will lead to the same res value. + */ +static inline void bitmap_gather(unsigned long *dst, const unsigned long *src, + const unsigned long *mask, unsigned int nbits) +{ + unsigned int n = 0; + unsigned int bit; + + bitmap_zero(dst, nbits); + + for_each_set_bit(bit, mask, nbits) + __assign_bit(n++, dst, test_bit(bit, src)); +} + static inline void bitmap_next_set_region(unsigned long *bitmap, unsigned int *rs, unsigned int *re, unsigned int end) @@ -519,6 +614,66 @@ static inline void bitmap_next_set_region(unsigned long *bitmap, } /** + * bitmap_release_region - release allocated bitmap region + * @bitmap: array of unsigned longs corresponding to the bitmap + * @pos: beginning of bit region to release + * @order: region size (log base 2 of number of bits) to release + * + * This is the complement to __bitmap_find_free_region() and releases + * the found region (by clearing it in the bitmap). + */ +static inline void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order) +{ + bitmap_clear(bitmap, pos, BIT(order)); +} + +/** + * bitmap_allocate_region - allocate bitmap region + * @bitmap: array of unsigned longs corresponding to the bitmap + * @pos: beginning of bit region to allocate + * @order: region size (log base 2 of number of bits) to allocate + * + * Allocate (set bits in) a specified region of a bitmap. + * + * Returns: 0 on success, or %-EBUSY if specified region wasn't + * free (not all bits were zero). + */ +static inline int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order) +{ + unsigned int len = BIT(order); + + if (find_next_bit(bitmap, pos + len, pos) < pos + len) + return -EBUSY; + bitmap_set(bitmap, pos, len); + return 0; +} + +/** + * bitmap_find_free_region - find a contiguous aligned mem region + * @bitmap: array of unsigned longs corresponding to the bitmap + * @bits: number of bits in the bitmap + * @order: region size (log base 2 of number of bits) to find + * + * Find a region of free (zero) bits in a @bitmap of @bits bits and + * allocate them (set them to one). Only consider regions of length + * a power (@order) of two, aligned to that power of two, which + * makes the search algorithm much faster. + * + * Returns: the bit offset in bitmap of the allocated region, + * or -errno on failure. + */ +static inline int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order) +{ + unsigned int pos, end; /* scans bitmap by regions of size order */ + + for (pos = 0; (end = pos + BIT(order)) <= bits; pos = end) { + if (!bitmap_allocate_region(bitmap, pos, order)) + return pos; + } + return -ENOMEM; +} + +/** * BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap. * @n: u64 value * diff --git a/include/linux/bits.h b/include/linux/bits.h index 7c0cf5031abe..0eb24d21aac2 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -4,6 +4,7 @@ #include <linux/const.h> #include <vdso/bits.h> +#include <uapi/linux/bits.h> #include <asm/bitsperlong.h> #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) @@ -30,15 +31,8 @@ #define GENMASK_INPUT_CHECK(h, l) 0 #endif -#define __GENMASK(h, l) \ - (((~UL(0)) - (UL(1) << (l)) + 1) & \ - (~UL(0) >> (BITS_PER_LONG - 1 - (h)))) #define GENMASK(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) - -#define __GENMASK_ULL(h, l) \ - (((~ULL(0)) - (ULL(1) << (l)) + 1) & \ - (~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) #define GENMASK_ULL(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index 378b2459efe2..e253e7bd0d17 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -20,6 +20,7 @@ struct blk_integrity_iter { unsigned int data_size; unsigned short interval; unsigned char tuple_size; + unsigned char pi_offset; const char *disk_name; }; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 958ed7e89b30..d3d8fd8e229b 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -8,6 +8,7 @@ #include <linux/scatterlist.h> #include <linux/prefetch.h> #include <linux/srcu.h> +#include <linux/rw_hint.h> struct blk_mq_tags; struct blk_flush_queue; @@ -32,8 +33,6 @@ typedef __u32 __bitwise req_flags_t; #define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4)) /* merge of different types, fail separately */ #define RQF_MIXED_MERGE ((__force req_flags_t)(1 << 5)) -/* track inflight for MQ */ -#define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6)) /* don't call prep for this one */ #define RQF_DONTPREP ((__force req_flags_t)(1 << 7)) /* use hctx->sched_tags */ @@ -137,6 +136,7 @@ struct request { struct blk_crypto_keyslot *crypt_keyslot; #endif + enum rw_hint write_hint; unsigned short ioprio; enum mq_rq_state state; @@ -393,9 +393,6 @@ struct blk_mq_hw_ctx { */ struct blk_mq_tags *sched_tags; - /** @run: Number of dispatched requests. */ - unsigned long run; - /** @numa_node: NUMA node the storage adapter has been connected to. */ unsigned int numa_node; /** @queue_num: Index of this hardware queue. */ @@ -687,17 +684,19 @@ enum { #define BLK_MQ_NO_HCTX_IDX (-1U) -struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata, +struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, + struct queue_limits *lim, void *queuedata, struct lock_class_key *lkclass); -#define blk_mq_alloc_disk(set, queuedata) \ +#define blk_mq_alloc_disk(set, lim, queuedata) \ ({ \ static struct lock_class_key __key; \ \ - __blk_mq_alloc_disk(set, queuedata, &__key); \ + __blk_mq_alloc_disk(set, lim, queuedata, &__key); \ }) struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, struct lock_class_key *lkclass); -struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); +struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set, + struct queue_limits *lim, void *queuedata); int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); void blk_mq_destroy_queue(struct request_queue *); @@ -832,6 +831,12 @@ void blk_mq_end_request_batch(struct io_comp_batch *ib); */ static inline bool blk_mq_need_time_stamp(struct request *rq) { + /* + * passthrough io doesn't use iostat accounting, cgroup stats + * and io scheduler functionalities. + */ + if (blk_rq_is_passthrough(rq)) + return false; return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED)); } diff --git a/include/linux/blk-pm.h b/include/linux/blk-pm.h index 2580e05a8ab6..004b38a538ff 100644 --- a/include/linux/blk-pm.h +++ b/include/linux/blk-pm.h @@ -15,7 +15,6 @@ extern int blk_pre_runtime_suspend(struct request_queue *q); extern void blk_post_runtime_suspend(struct request_queue *q, int err); extern void blk_pre_runtime_resume(struct request_queue *q); extern void blk_post_runtime_resume(struct request_queue *q); -extern void blk_set_runtime_active(struct request_queue *q); #else static inline void blk_pm_runtime_init(struct request_queue *q, struct device *dev) {} diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d5c5e59ddbd2..cb1526ec44b5 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -10,6 +10,7 @@ #include <linux/bvec.h> #include <linux/device.h> #include <linux/ktime.h> +#include <linux/rw_hint.h> struct bio_set; struct bio; @@ -49,26 +50,26 @@ struct block_device { bool bd_write_holder; bool bd_has_submit_bio; dev_t bd_dev; + struct inode *bd_inode; /* will die */ + atomic_t bd_openers; spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ - struct inode * bd_inode; /* will die */ void * bd_claiming; void * bd_holder; const struct blk_holder_ops *bd_holder_ops; struct mutex bd_holder_lock; - /* The counter of freeze processes */ - int bd_fsfreeze_count; int bd_holders; struct kobject *bd_holder_dir; - /* Mutex for freeze */ - struct mutex bd_fsfreeze_mutex; - struct super_block *bd_fsfreeze_sb; + atomic_t bd_fsfreeze_count; /* number of freeze requests */ + struct mutex bd_fsfreeze_mutex; /* serialize freeze/thaw */ struct partition_meta_info *bd_meta_info; #ifdef CONFIG_FAIL_MAKE_REQUEST bool bd_make_it_fail; #endif + bool bd_ro_warned; + int bd_writers; /* * keep this out-of-line as it's both big and not needed in the fast * path @@ -206,52 +207,10 @@ static inline bool blk_path_error(blk_status_t error) return true; } -/* - * From most significant bit: - * 1 bit: reserved for other usage, see below - * 12 bits: original size of bio - * 51 bits: issue time of bio - */ -#define BIO_ISSUE_RES_BITS 1 -#define BIO_ISSUE_SIZE_BITS 12 -#define BIO_ISSUE_RES_SHIFT (64 - BIO_ISSUE_RES_BITS) -#define BIO_ISSUE_SIZE_SHIFT (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS) -#define BIO_ISSUE_TIME_MASK ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1) -#define BIO_ISSUE_SIZE_MASK \ - (((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT) -#define BIO_ISSUE_RES_MASK (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1)) - -/* Reserved bit for blk-throtl */ -#define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63) - struct bio_issue { u64 value; }; -static inline u64 __bio_issue_time(u64 time) -{ - return time & BIO_ISSUE_TIME_MASK; -} - -static inline u64 bio_issue_time(struct bio_issue *issue) -{ - return __bio_issue_time(issue->value); -} - -static inline sector_t bio_issue_size(struct bio_issue *issue) -{ - return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT); -} - -static inline void bio_issue_init(struct bio_issue *issue, - sector_t size) -{ - size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1; - issue->value = ((issue->value & BIO_ISSUE_RES_MASK) | - (ktime_get_ns() & BIO_ISSUE_TIME_MASK) | - ((u64)size << BIO_ISSUE_SIZE_SHIFT)); -} - typedef __u32 __bitwise blk_opf_t; typedef unsigned int blk_qc_t; @@ -269,6 +228,7 @@ struct bio { */ unsigned short bi_flags; /* BIO_* below */ unsigned short bi_ioprio; + enum rw_hint bi_write_hint; blk_status_t bi_status; atomic_t __bi_remaining; @@ -378,6 +338,8 @@ enum req_op { REQ_OP_DISCARD = (__force blk_opf_t)3, /* securely erase sectors */ REQ_OP_SECURE_ERASE = (__force blk_opf_t)5, + /* write data at the current zone write pointer */ + REQ_OP_ZONE_APPEND = (__force blk_opf_t)7, /* write the zero filled sector many times */ REQ_OP_WRITE_ZEROES = (__force blk_opf_t)9, /* Open a zone */ @@ -386,12 +348,10 @@ enum req_op { REQ_OP_ZONE_CLOSE = (__force blk_opf_t)11, /* Transition a zone to full */ REQ_OP_ZONE_FINISH = (__force blk_opf_t)12, - /* write data at the current zone write pointer */ - REQ_OP_ZONE_APPEND = (__force blk_opf_t)13, /* reset a zone write pointer */ - REQ_OP_ZONE_RESET = (__force blk_opf_t)15, + REQ_OP_ZONE_RESET = (__force blk_opf_t)13, /* reset all the zone present on the device */ - REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)17, + REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)15, /* Driver private requests */ REQ_OP_DRV_IN = (__force blk_opf_t)34, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index eef450f25982..69e7da33ca49 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -24,6 +24,7 @@ #include <linux/sbitmap.h> #include <linux/uuid.h> #include <linux/xarray.h> +#include <linux/file.h> struct module; struct request_queue; @@ -42,7 +43,7 @@ struct blk_crypto_profile; extern const struct device_type disk_type; extern const struct device_type part_type; -extern struct class block_class; +extern const struct class block_class; /* * Maximum number of blkcg policies allowed to be registered concurrently. @@ -108,6 +109,7 @@ struct blk_integrity { const struct blk_integrity_profile *profile; unsigned char flags; unsigned char tuple_size; + unsigned char pi_offset; unsigned char interval_exp; unsigned char tag_size; }; @@ -124,6 +126,10 @@ typedef unsigned int __bitwise blk_mode_t; #define BLK_OPEN_NDELAY ((__force blk_mode_t)(1 << 3)) /* open for "writes" only for ioctls (specialy hack for floppy.c) */ #define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4)) +/* open is exclusive wrt all other BLK_OPEN_WRITE opens to the device */ +#define BLK_OPEN_RESTRICT_WRITES ((__force blk_mode_t)(1 << 5)) +/* return partition scanning errors */ +#define BLK_OPEN_STRICT_SCAN ((__force blk_mode_t)(1 << 6)) struct gendisk { /* @@ -187,8 +193,6 @@ struct gendisk { * blk_mq_unfreeze_queue(). */ unsigned int nr_zones; - unsigned int max_open_zones; - unsigned int max_active_zones; unsigned long *conv_zones_bitmap; unsigned long *seq_zones_wlock; #endif /* CONFIG_BLK_DEV_ZONED */ @@ -264,18 +268,6 @@ static inline bool blk_op_is_passthrough(blk_opf_t op) } /* - * Zoned block device models (zoned limit). - * - * Note: This needs to be ordered from the least to the most severe - * restrictions for the inheritance in blk_stack_limits() to work. - */ -enum blk_zoned_model { - BLK_ZONED_NONE = 0, /* Regular block device */ - BLK_ZONED_HA, /* Host-aware zoned block device */ - BLK_ZONED_HM, /* Host-managed zoned block device */ -}; - -/* * BLK_BOUNCE_NONE: never bounce (default) * BLK_BOUNCE_HIGH: bounce all highmem pages */ @@ -302,6 +294,7 @@ struct queue_limits { unsigned int io_opt; unsigned int max_discard_sectors; unsigned int max_hw_discard_sectors; + unsigned int max_user_discard_sectors; unsigned int max_secure_erase_sectors; unsigned int max_write_zeroes_sectors; unsigned int max_zone_append_sectors; @@ -316,7 +309,9 @@ struct queue_limits { unsigned char misaligned; unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; - enum blk_zoned_model zoned; + bool zoned; + unsigned int max_open_zones; + unsigned int max_active_zones; /* * Drivers that set dma_alignment to less than 511 must be prepared to @@ -329,24 +324,15 @@ struct queue_limits { typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, void *data); -void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model); +void disk_set_zoned(struct gendisk *disk); -#ifdef CONFIG_BLK_DEV_ZONED #define BLK_ALL_ZONES ((unsigned int)-1) int blkdev_report_zones(struct block_device *bdev, sector_t sector, - unsigned int nr_zones, report_zones_cb cb, void *data); -unsigned int bdev_nr_zones(struct block_device *bdev); -extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, - sector_t sectors, sector_t nr_sectors, - gfp_t gfp_mask); + unsigned int nr_zones, report_zones_cb cb, void *data); +int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, + sector_t sectors, sector_t nr_sectors); int blk_revalidate_disk_zones(struct gendisk *disk, - void (*update_driver_data)(struct gendisk *disk)); -#else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int bdev_nr_zones(struct block_device *bdev) -{ - return 0; -} -#endif /* CONFIG_BLK_DEV_ZONED */ + void (*update_driver_data)(struct gendisk *disk)); /* * Independent access ranges: struct blk_independent_access_range describes @@ -376,59 +362,51 @@ struct blk_independent_access_ranges { }; struct request_queue { - struct request *last_merge; - struct elevator_queue *elevator; - - struct percpu_ref q_usage_counter; + /* + * The queue owner gets to use this for whatever they like. + * ll_rw_blk doesn't touch it. + */ + void *queuedata; - struct blk_queue_stats *stats; - struct rq_qos *rq_qos; - struct mutex rq_qos_mutex; + struct elevator_queue *elevator; const struct blk_mq_ops *mq_ops; /* sw queues */ struct blk_mq_ctx __percpu *queue_ctx; + /* + * various queue flags, see QUEUE_* below + */ + unsigned long queue_flags; + + unsigned int rq_timeout; + unsigned int queue_depth; + refcount_t refs; + /* hw dispatch queues */ - struct xarray hctx_table; unsigned int nr_hw_queues; + struct xarray hctx_table; - /* - * The queue owner gets to use this for whatever they like. - * ll_rw_blk doesn't touch it. - */ - void *queuedata; - - /* - * various queue flags, see QUEUE_* below - */ - unsigned long queue_flags; - /* - * Number of contexts that have called blk_set_pm_only(). If this - * counter is above zero then only RQF_PM requests are processed. - */ - atomic_t pm_only; + struct percpu_ref q_usage_counter; - /* - * ida allocated id for this queue. Used to index queues from - * ioctx. - */ - int id; + struct request *last_merge; spinlock_t queue_lock; - struct gendisk *disk; + int quiesce_depth; - refcount_t refs; + struct gendisk *disk; /* * mq queue kobject */ struct kobject *mq_kobj; + struct queue_limits limits; + #ifdef CONFIG_BLK_DEV_INTEGRITY struct blk_integrity integrity; #endif /* CONFIG_BLK_DEV_INTEGRITY */ @@ -439,24 +417,40 @@ struct request_queue { #endif /* - * queue settings + * Number of contexts that have called blk_set_pm_only(). If this + * counter is above zero then only RQF_PM requests are processed. */ - unsigned long nr_requests; /* Max # of requests */ + atomic_t pm_only; + + struct blk_queue_stats *stats; + struct rq_qos *rq_qos; + struct mutex rq_qos_mutex; + + /* + * ida allocated id for this queue. Used to index queues from + * ioctx. + */ + int id; unsigned int dma_pad_mask; + /* + * queue settings + */ + unsigned long nr_requests; /* Max # of requests */ + #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct blk_crypto_profile *crypto_profile; struct kobject *crypto_kobject; #endif - unsigned int rq_timeout; - struct timer_list timeout; struct work_struct timeout_work; atomic_t nr_active_requests_shared_tags; + unsigned int required_elevator_features; + struct blk_mq_tags *sched_shared_tags; struct list_head icq_list; @@ -467,11 +461,12 @@ struct request_queue { struct mutex blkcg_mutex; #endif - struct queue_limits limits; + int node; - unsigned int required_elevator_features; + spinlock_t requeue_lock; + struct list_head requeue_list; + struct delayed_work requeue_work; - int node; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; #endif @@ -481,12 +476,9 @@ struct request_queue { struct blk_flush_queue *fq; struct list_head flush_list; - struct list_head requeue_list; - spinlock_t requeue_lock; - struct delayed_work requeue_work; - struct mutex sysfs_lock; struct mutex sysfs_dir_lock; + struct mutex limits_lock; /* * for reusing dead hctx instance in case of updating @@ -509,8 +501,6 @@ struct request_queue { */ struct mutex mq_freeze_lock; - int quiesce_depth; - struct blk_mq_tag_set *tag_set; struct list_head tag_set_list; @@ -538,7 +528,7 @@ struct request_queue { #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ -#define QUEUE_FLAG_HW_WC 18 /* Write back caching supported */ +#define QUEUE_FLAG_HW_WC 13 /* Write back caching supported */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ @@ -623,26 +613,14 @@ static inline enum rpm_status queue_rpm_status(struct request_queue *q) } #endif -static inline enum blk_zoned_model -blk_queue_zoned_model(struct request_queue *q) -{ - if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) - return q->limits.zoned; - return BLK_ZONED_NONE; -} - static inline bool blk_queue_is_zoned(struct request_queue *q) { - switch (blk_queue_zoned_model(q)) { - case BLK_ZONED_HA: - case BLK_ZONED_HM: - return true; - default: - return false; - } + return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && q->limits.zoned; } #ifdef CONFIG_BLK_DEV_ZONED +unsigned int bdev_nr_zones(struct block_device *bdev); + static inline unsigned int disk_nr_zones(struct gendisk *disk) { return blk_queue_is_zoned(disk->queue) ? disk->nr_zones : 0; @@ -667,26 +645,31 @@ static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector) static inline void disk_set_max_open_zones(struct gendisk *disk, unsigned int max_open_zones) { - disk->max_open_zones = max_open_zones; + disk->queue->limits.max_open_zones = max_open_zones; } static inline void disk_set_max_active_zones(struct gendisk *disk, unsigned int max_active_zones) { - disk->max_active_zones = max_active_zones; + disk->queue->limits.max_active_zones = max_active_zones; } static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { - return bdev->bd_disk->max_open_zones; + return bdev->bd_disk->queue->limits.max_open_zones; } static inline unsigned int bdev_max_active_zones(struct block_device *bdev) { - return bdev->bd_disk->max_active_zones; + return bdev->bd_disk->queue->limits.max_active_zones; } #else /* CONFIG_BLK_DEV_ZONED */ +static inline unsigned int bdev_nr_zones(struct block_device *bdev) +{ + return 0; +} + static inline unsigned int disk_nr_zones(struct gendisk *disk) { return 0; @@ -786,22 +769,26 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb) int bdev_disk_changed(struct gendisk *disk, bool invalidate); void put_disk(struct gendisk *disk); -struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); +struct gendisk *__blk_alloc_disk(struct queue_limits *lim, int node, + struct lock_class_key *lkclass); /** * blk_alloc_disk - allocate a gendisk structure + * @lim: queue limits to be used for this disk. * @node_id: numa node to allocate on * * Allocate and pre-initialize a gendisk structure for use with BIO based * drivers. * + * Returns an ERR_PTR on error, else the allocated disk. + * * Context: can sleep */ -#define blk_alloc_disk(node_id) \ +#define blk_alloc_disk(lim, node_id) \ ({ \ static struct lock_class_key __key; \ \ - __blk_alloc_disk(node_id, &__key); \ + __blk_alloc_disk(lim, node_id, &__key); \ }) int __register_blkdev(unsigned int major, const char *name, @@ -884,6 +871,29 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset, return chunk_sectors - (offset & (chunk_sectors - 1)); } +/** + * queue_limits_start_update - start an atomic update of queue limits + * @q: queue to update + * + * This functions starts an atomic update of the queue limits. It takes a lock + * to prevent other updates and returns a snapshot of the current limits that + * the caller can modify. The caller must call queue_limits_commit_update() + * to finish the update. + * + * Context: process context. The caller must have frozen the queue or ensured + * that there is outstanding I/O by other means. + */ +static inline struct queue_limits +queue_limits_start_update(struct request_queue *q) + __acquires(q->limits_lock) +{ + mutex_lock(&q->limits_lock); + return q->limits; +} +int queue_limits_commit_update(struct request_queue *q, + struct queue_limits *lim); +int queue_limits_set(struct request_queue *q, struct queue_limits *lim); + /* * Access functions for manipulating queue properties */ @@ -917,8 +927,8 @@ extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); extern void blk_set_stacking_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); -extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, - sector_t offset); +void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev, + sector_t offset, const char *pfx); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); @@ -965,6 +975,7 @@ struct blk_plug { /* if ios_left is > 1, we can batch tag/rq allocations */ struct request *cached_rq; + u64 cur_ktime; unsigned short nr_ios; unsigned short rq_count; @@ -995,6 +1006,18 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async) __blk_flush_plug(plug, async); } +/* + * tsk == current here + */ +static inline void blk_plug_invalidate_ts(struct task_struct *tsk) +{ + struct blk_plug *plug = tsk->plug; + + if (plug) + plug->cur_ktime = 0; + current->flags &= ~PF_BLOCK_TS; +} + int blkdev_issue_flush(struct block_device *bdev); long nr_blockdev_pages(void); #else /* CONFIG_BLOCK */ @@ -1018,6 +1041,10 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async) { } +static inline void blk_plug_invalidate_ts(struct task_struct *tsk) +{ +} + static inline int blkdev_issue_flush(struct block_device *bdev) { return 0; @@ -1080,7 +1107,14 @@ enum blk_default_limits { BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL, }; -#define BLK_DEF_MAX_SECTORS 2560u +/* + * Default upper limit for the software max_sectors limit used for + * regular file system I/O. This can be increased through sysfs. + * + * Not to be confused with the max_hw_sector limit that is entirely + * controlled by the driver, usually based on hardware limits. + */ +#define BLK_DEF_MAX_SECTORS_CAP 2560u static inline unsigned long queue_segment_boundary(const struct request_queue *q) { @@ -1259,11 +1293,6 @@ static inline bool bdev_nowait(struct block_device *bdev) return test_bit(QUEUE_FLAG_NOWAIT, &bdev_get_queue(bdev)->queue_flags); } -static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev) -{ - return blk_queue_zoned_model(bdev_get_queue(bdev)); -} - static inline bool bdev_is_zoned(struct block_device *bdev) { return blk_queue_is_zoned(bdev_get_queue(bdev)); @@ -1468,8 +1497,23 @@ struct blk_holder_ops { * Sync the file system mounted on the block device. */ void (*sync)(struct block_device *bdev); + + /* + * Freeze the file system mounted on the block device. + */ + int (*freeze)(struct block_device *bdev); + + /* + * Thaw the file system mounted on the block device. + */ + int (*thaw)(struct block_device *bdev); }; +/* + * For filesystems using @fs_holder_ops, the @holder argument passed to + * helpers used to open and claim block devices via + * bd_prepare_to_claim() must point to a superblock. + */ extern const struct blk_holder_ops fs_holder_ops; /* @@ -1477,22 +1521,23 @@ extern const struct blk_holder_ops fs_holder_ops; * as stored in sb->s_flags. */ #define sb_open_mode(flags) \ - (BLK_OPEN_READ | (((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE)) + (BLK_OPEN_READ | BLK_OPEN_RESTRICT_WRITES | \ + (((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE)) -struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder, +struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); -struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode, +struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); int bd_prepare_to_claim(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops); void bd_abort_claiming(struct block_device *bdev, void *holder); -void blkdev_put(struct block_device *bdev, void *holder); /* just for blk-cgroup, don't use elsewhere */ struct block_device *blkdev_get_no_open(dev_t dev); void blkdev_put_no_open(struct block_device *bdev); struct block_device *I_BDEV(struct inode *inode); +struct block_device *file_bdev(struct file *bdev_file); #ifdef CONFIG_BLOCK void invalidate_bdev(struct block_device *bdev); @@ -1530,8 +1575,9 @@ static inline int early_lookup_bdev(const char *pathname, dev_t *dev) } #endif /* CONFIG_BLOCK */ -int freeze_bdev(struct block_device *bdev); -int thaw_bdev(struct block_device *bdev); +int bdev_freeze(struct block_device *bdev); +int bdev_thaw(struct block_device *bdev); +void bdev_fput(struct file *bdev_file); struct io_comp_batch { struct request *req_list; diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index ca73940e26df..3f4b4ac527ca 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -10,6 +10,7 @@ #ifdef __KERNEL__ #include <linux/kernel.h> #include <linux/types.h> +bool __init cmdline_has_extra_options(void); #else /* !__KERNEL__ */ /* * NOTE: This is only for tools/bootconfig, because tools/bootconfig will @@ -287,7 +288,12 @@ int __init xbc_init(const char *buf, size_t size, const char **emsg, int *epos); int __init xbc_get_info(int *node_size, size_t *data_size); /* XBC cleanup data structures */ -void __init xbc_exit(void); +void __init _xbc_exit(bool early); + +static inline void xbc_exit(void) +{ + _xbc_exit(false); +} /* XBC embedded bootconfig data in kernel */ #ifdef CONFIG_BOOT_CONFIG_EMBED diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h index e1a3c9c9754c..cffa38a73618 100644 --- a/include/linux/bootmem_info.h +++ b/include/linux/bootmem_info.h @@ -60,7 +60,7 @@ static inline void get_page_bootmem(unsigned long info, struct page *page, static inline void free_bootmem_page(struct page *page) { - kmemleak_free_part(page_to_virt(page), PAGE_SIZE); + kmemleak_free_part_phys(PFN_PHYS(page_to_pfn(page)), PAGE_SIZE); free_reserved_page(page); } #endif diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index 7b121bd780eb..0985221d5478 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -28,19 +28,24 @@ enum cgroup_bpf_attach_type { CGROUP_INET6_BIND, CGROUP_INET4_CONNECT, CGROUP_INET6_CONNECT, + CGROUP_UNIX_CONNECT, CGROUP_INET4_POST_BIND, CGROUP_INET6_POST_BIND, CGROUP_UDP4_SENDMSG, CGROUP_UDP6_SENDMSG, + CGROUP_UNIX_SENDMSG, CGROUP_SYSCTL, CGROUP_UDP4_RECVMSG, CGROUP_UDP6_RECVMSG, + CGROUP_UNIX_RECVMSG, CGROUP_GETSOCKOPT, CGROUP_SETSOCKOPT, CGROUP_INET4_GETPEERNAME, CGROUP_INET6_GETPEERNAME, + CGROUP_UNIX_GETPEERNAME, CGROUP_INET4_GETSOCKNAME, CGROUP_INET6_GETSOCKNAME, + CGROUP_UNIX_GETSOCKNAME, CGROUP_INET_SOCK_RELEASE, CGROUP_LSM_START, CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1, diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 8506690dbb9c..fb3c3e7181e6 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -48,19 +48,24 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) CGROUP_ATYPE(CGROUP_INET6_BIND); CGROUP_ATYPE(CGROUP_INET4_CONNECT); CGROUP_ATYPE(CGROUP_INET6_CONNECT); + CGROUP_ATYPE(CGROUP_UNIX_CONNECT); CGROUP_ATYPE(CGROUP_INET4_POST_BIND); CGROUP_ATYPE(CGROUP_INET6_POST_BIND); CGROUP_ATYPE(CGROUP_UDP4_SENDMSG); CGROUP_ATYPE(CGROUP_UDP6_SENDMSG); + CGROUP_ATYPE(CGROUP_UNIX_SENDMSG); CGROUP_ATYPE(CGROUP_SYSCTL); CGROUP_ATYPE(CGROUP_UDP4_RECVMSG); CGROUP_ATYPE(CGROUP_UDP6_RECVMSG); + CGROUP_ATYPE(CGROUP_UNIX_RECVMSG); CGROUP_ATYPE(CGROUP_GETSOCKOPT); CGROUP_ATYPE(CGROUP_SETSOCKOPT); CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME); CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME); + CGROUP_ATYPE(CGROUP_UNIX_GETPEERNAME); CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME); + CGROUP_ATYPE(CGROUP_UNIX_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE); default: return CGROUP_BPF_ATTACH_TYPE_INVALID; @@ -120,6 +125,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, + int *uaddrlen, enum cgroup_bpf_attach_type atype, void *t_ctx, u32 *flags); @@ -137,11 +143,12 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, enum cgroup_bpf_attach_type atype); int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level, - int *optname, char __user *optval, + int *optname, sockptr_t optval, int *optlen, char **kernel_optval); + int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, - int optname, char __user *optval, - int __user *optlen, int max_optlen, + int optname, sockptr_t optval, + sockptr_t optlen, int max_optlen, int retval); int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, @@ -189,7 +196,8 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) && \ - cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS)) \ + cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS) && sk && \ + sk_fullsock(sk)) \ __ret = __cgroup_bpf_run_filter_skb(sk, skb, \ CGROUP_INET_INGRESS); \ \ @@ -230,22 +238,22 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET6_POST_BIND) -#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) \ +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, NULL, NULL); \ __ret; \ }) -#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) \ +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - t_ctx, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, t_ctx, NULL); \ release_sock(sk); \ } \ __ret; \ @@ -256,14 +264,14 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE). */ -#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, bind_flags) \ +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, bind_flags) \ ({ \ u32 __flags = 0; \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, &__flags); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, NULL, &__flags); \ release_sock(sk); \ if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \ *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \ @@ -276,29 +284,38 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) && \ (sk)->sk_prot->pre_connect) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET4_CONNECT) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT) + +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT) + +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT, NULL) + +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET6_CONNECT) +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET4_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET6_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL) -#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL) -#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_RECVMSG, NULL) /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a * fullsock and its parent fullsock cannot be traced by @@ -377,7 +394,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \ - get_user(__ret, optlen); \ + copy_from_sockptr(&__ret, optlen, sizeof(int)); \ __ret; \ }) @@ -477,24 +494,27 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, } #define cgroup_bpf_enabled(atype) (0) -#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; }) +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) ({ 0; }) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, flags) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, flags) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 49f8b691496c..890e152d553e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -29,6 +29,7 @@ #include <linux/rcupdate_trace.h> #include <linux/static_call.h> #include <linux/memcontrol.h> +#include <linux/cfi.h> struct bpf_verifier_env; struct bpf_verifier_log; @@ -36,6 +37,7 @@ struct perf_event; struct bpf_prog; struct bpf_prog_aux; struct bpf_map; +struct bpf_arena; struct sock; struct seq_file; struct btf; @@ -51,11 +53,15 @@ struct module; struct bpf_func_state; struct ftrace_ops; struct cgroup; +struct bpf_token; +struct user_namespace; +struct super_block; +struct inode; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; extern struct kobject *btf_kobj; -extern struct bpf_mem_alloc bpf_global_ma; +extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma; extern bool bpf_global_ma_set; typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64); @@ -106,7 +112,11 @@ struct bpf_map_ops { /* funcs called by prog_array and perf_event_array map */ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, int fd); - void (*map_fd_put_ptr)(void *ptr); + /* If need_defer is true, the implementation should guarantee that + * the to-be-put element is still alive before the bpf program, which + * may manipulate it, exists. + */ + void (*map_fd_put_ptr)(struct bpf_map *map, void *ptr, bool need_defer); int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); u32 (*map_fd_sys_lookup_elem)(void *ptr); void (*map_seq_show_elem)(struct bpf_map *map, void *key, @@ -130,6 +140,9 @@ struct bpf_map_ops { int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma); __poll_t (*map_poll)(struct bpf_map *map, struct file *filp, struct poll_table_struct *pts); + unsigned long (*map_get_unmapped_area)(struct file *filep, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags); /* Functions called by bpf_local_storage maps */ int (*map_local_storage_charge)(struct bpf_local_storage_map *smap, @@ -180,14 +193,15 @@ enum btf_field_type { BPF_TIMER = (1 << 1), BPF_KPTR_UNREF = (1 << 2), BPF_KPTR_REF = (1 << 3), - BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF, - BPF_LIST_HEAD = (1 << 4), - BPF_LIST_NODE = (1 << 5), - BPF_RB_ROOT = (1 << 6), - BPF_RB_NODE = (1 << 7), - BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD | - BPF_RB_NODE | BPF_RB_ROOT, - BPF_REFCOUNT = (1 << 8), + BPF_KPTR_PERCPU = (1 << 4), + BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF | BPF_KPTR_PERCPU, + BPF_LIST_HEAD = (1 << 5), + BPF_LIST_NODE = (1 << 6), + BPF_RB_ROOT = (1 << 7), + BPF_RB_NODE = (1 << 8), + BPF_GRAPH_NODE = BPF_RB_NODE | BPF_LIST_NODE, + BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD, + BPF_REFCOUNT = (1 << 9), }; typedef void (*btf_dtor_kfunc_t)(void *); @@ -241,10 +255,7 @@ struct bpf_list_node_kern { } __attribute__((aligned(8))); struct bpf_map { - /* The first two cachelines with read-mostly members of which some - * are also accessed in fast-path (e.g. ops, max_entries). - */ - const struct bpf_map_ops *ops ____cacheline_aligned; + const struct bpf_map_ops *ops; struct bpf_map *inner_map_meta; #ifdef CONFIG_SECURITY void *security; @@ -266,13 +277,14 @@ struct bpf_map { struct obj_cgroup *objcg; #endif char name[BPF_OBJ_NAME_LEN]; - /* The 3rd and 4th cacheline with misc members to avoid false sharing - * particularly with refcounting. - */ - atomic64_t refcnt ____cacheline_aligned; - atomic64_t usercnt; - struct work_struct work; struct mutex freeze_mutex; + atomic64_t refcnt; + atomic64_t usercnt; + /* rcu is used before freeing and work is only used during freeing */ + union { + struct work_struct work; + struct rcu_head rcu; + }; atomic64_t writecnt; /* 'Ownership' of program-containing map is claimed by the first program * that is going to use this map or by the first program which FD is @@ -287,6 +299,9 @@ struct bpf_map { } owner; bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ + bool free_after_mult_rcu_gp; + bool free_after_rcu_gp; + atomic64_t sleepable_refcnt; s64 __percpu *elem_count; }; @@ -300,6 +315,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type) case BPF_KPTR_UNREF: case BPF_KPTR_REF: return "kptr"; + case BPF_KPTR_PERCPU: + return "percpu_kptr"; case BPF_LIST_HEAD: return "bpf_list_head"; case BPF_LIST_NODE: @@ -325,6 +342,7 @@ static inline u32 btf_field_type_size(enum btf_field_type type) return sizeof(struct bpf_timer); case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: return sizeof(u64); case BPF_LIST_HEAD: return sizeof(struct bpf_list_head); @@ -351,6 +369,7 @@ static inline u32 btf_field_type_align(enum btf_field_type type) return __alignof__(struct bpf_timer); case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: return __alignof__(u64); case BPF_LIST_HEAD: return __alignof__(struct bpf_list_head); @@ -389,6 +408,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) case BPF_TIMER: case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: break; default: WARN_ON_ONCE(1); @@ -509,8 +529,8 @@ void bpf_list_head_free(const struct btf_field *field, void *list_head, struct bpf_spin_lock *spin_lock); void bpf_rb_root_free(const struct btf_field *field, void *rb_root, struct bpf_spin_lock *spin_lock); - - +u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena); +u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena); int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size); struct bpf_offload_dev; @@ -692,6 +712,7 @@ enum bpf_arg_type { * on eBPF program stack */ ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ + ARG_PTR_TO_ARENA, ARG_CONST_SIZE, /* number of bytes accessed from memory */ ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ @@ -863,6 +884,7 @@ enum bpf_reg_type { * an explicit null check is required for this struct. */ PTR_TO_MEM, /* reg points to valid memory region */ + PTR_TO_ARENA, PTR_TO_BUF, /* reg points to a read/write buffer */ PTR_TO_FUNC, /* reg points to a bpf program function */ CONST_PTR_TO_DYNPTR, /* reg points to a const struct bpf_dynptr */ @@ -903,10 +925,14 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) aux->ctx_field_size = size; } +static bool bpf_is_ldimm64(const struct bpf_insn *insn) +{ + return insn->code == (BPF_LD | BPF_IMM | BPF_DW); +} + static inline bool bpf_pseudo_func(const struct bpf_insn *insn) { - return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && - insn->src_reg == BPF_PSEUDO_FUNC; + return bpf_is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC; } struct bpf_prog_ops { @@ -1029,6 +1055,22 @@ struct btf_func_model { */ #define BPF_TRAMP_F_SHARE_IPMODIFY BIT(6) +/* Indicate that current trampoline is in a tail call context. Then, it has to + * cache and restore tail_call_cnt to avoid infinite tail call loop. + */ +#define BPF_TRAMP_F_TAIL_CALL_CTX BIT(7) + +/* + * Indicate the trampoline should be suitable to receive indirect calls; + * without this indirectly calling the generated code can result in #UD/#CP, + * depending on the CFI options. + * + * Used by bpf_struct_ops. + * + * Incompatible with FENTRY usage, overloads @func_addr argument. + */ +#define BPF_TRAMP_F_INDIRECT BIT(8) + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. */ @@ -1068,10 +1110,17 @@ struct bpf_tramp_run_ctx; * fexit = a set of program to run after original function */ struct bpf_tramp_image; -int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end, +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, - void *orig_call); + void *func_addr); +void *arch_alloc_bpf_trampoline(unsigned int size); +void arch_free_bpf_trampoline(void *image, unsigned int size); +void arch_protect_bpf_trampoline(void *image, unsigned int size); +void arch_unprotect_bpf_trampoline(void *image, unsigned int size); +int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, void *func_addr); + u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start, @@ -1104,6 +1153,7 @@ enum bpf_tramp_prog_type { struct bpf_tramp_image { void *image; + int size; struct bpf_ksym ksym; struct percpu_ref pcref; void *ip_after_call; @@ -1139,7 +1189,6 @@ struct bpf_trampoline { int progs_cnt[BPF_TRAMP_MAX]; /* Executable image of trampoline */ struct bpf_tramp_image *cur_image; - struct module *mod; }; struct bpf_attach_target_info { @@ -1173,7 +1222,11 @@ struct bpf_dispatcher { #endif }; -static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( +#ifndef __bpfcall +#define __bpfcall __nocfi +#endif + +static __always_inline __bpfcall unsigned int bpf_dispatcher_nop_func( const void *ctx, const struct bpf_insn *insnsi, bpf_func_t bpf_func) @@ -1211,6 +1264,8 @@ enum bpf_dynptr_type { int bpf_dynptr_check_size(u32 size); u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); +const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len); +void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len); #ifdef CONFIG_BPF_JIT int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); @@ -1263,7 +1318,7 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func #define DEFINE_BPF_DISPATCHER(name) \ __BPF_DISPATCHER_SC(name); \ - noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ + noinline __bpfcall unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ bpf_func_t bpf_func) \ @@ -1286,7 +1341,7 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); /* Called only from JIT-enabled code, so there's no need for stubs. */ -void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym); +void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); @@ -1330,6 +1385,8 @@ static inline bool bpf_prog_has_trampoline(const struct bpf_prog *prog) struct bpf_func_info_aux { u16 linkage; bool unreliable; + bool called : 1; + bool verified : 1; }; enum bpf_jit_poke_reason { @@ -1358,6 +1415,7 @@ struct bpf_jit_poke_descriptor { struct bpf_ctx_arg_aux { u32 offset; enum bpf_reg_type reg_type; + struct btf *btf; u32 btf_id; }; @@ -1378,6 +1436,7 @@ struct bpf_prog_aux { u32 stack_depth; u32 id; u32 func_cnt; /* used by non-func prog as the number of func progs */ + u32 real_func_cnt; /* includes hidden progs, only used for JIT and freeing progs */ u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ u32 attach_btf_id; /* in-kernel BTF type id to attach to */ u32 ctx_arg_info_size; @@ -1394,10 +1453,13 @@ struct bpf_prog_aux { bool dev_bound; /* Program is bound to the netdev. */ bool offload_requested; /* Program is bound and offloaded to the netdev. */ bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ + bool attach_tracing_prog; /* true if tracing another tracing program */ bool func_proto_unreliable; - bool sleepable; bool tail_call_reachable; bool xdp_has_frags; + bool exception_cb; + bool exception_boundary; + struct bpf_arena *arena; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ @@ -1408,6 +1470,9 @@ struct bpf_prog_aux { struct bpf_kfunc_desc_tab *kfunc_tab; struct bpf_kfunc_btf_tab *kfunc_btf_tab; u32 size_poke_tab; +#ifdef CONFIG_FINEIBT + struct bpf_ksym ksym_prefix; +#endif struct bpf_ksym ksym; const struct bpf_prog_ops *ops; struct bpf_map **used_maps; @@ -1420,9 +1485,11 @@ struct bpf_prog_aux { int cgroup_atype; /* enum cgroup_bpf_attach_type */ struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; char name[BPF_OBJ_NAME_LEN]; + u64 (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp, u64, u64); #ifdef CONFIG_SECURITY void *security; #endif + struct bpf_token *token; struct bpf_prog_offload *offload; struct btf *btf; struct bpf_func_info *func_info; @@ -1473,7 +1540,8 @@ struct bpf_prog { enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ call_get_func_ip:1, /* Do we call get_func_ip() */ - tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */ + tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */ + sleepable:1; /* BPF program is sleepable */ enum bpf_prog_type type; /* Type of BPF program */ enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ @@ -1506,12 +1574,26 @@ struct bpf_link { enum bpf_link_type type; const struct bpf_link_ops *ops; struct bpf_prog *prog; - struct work_struct work; + /* rcu is used before freeing, work can be used to schedule that + * RCU-based freeing before that, so they never overlap + */ + union { + struct rcu_head rcu; + struct work_struct work; + }; }; struct bpf_link_ops { void (*release)(struct bpf_link *link); + /* deallocate link resources callback, called without RCU grace period + * waiting + */ void (*dealloc)(struct bpf_link *link); + /* deallocate link resources callback, called after RCU grace period; + * if underlying BPF program is sleepable we go through tasks trace + * RCU GP and then "classic" RCU GP + */ + void (*dealloc_deferred)(struct bpf_link *link); int (*detach)(struct bpf_link *link); int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, struct bpf_prog *old_prog); @@ -1547,6 +1629,31 @@ struct bpf_link_primer { u32 id; }; +struct bpf_mount_opts { + kuid_t uid; + kgid_t gid; + umode_t mode; + + /* BPF token-related delegation options */ + u64 delegate_cmds; + u64 delegate_maps; + u64 delegate_progs; + u64 delegate_attachs; +}; + +struct bpf_token { + struct work_struct work; + atomic64_t refcnt; + struct user_namespace *userns; + u64 allowed_cmds; + u64 allowed_maps; + u64 allowed_progs; + u64 allowed_attachs; +#ifdef CONFIG_SECURITY + void *security; +#endif +}; + struct bpf_struct_ops_value; struct btf_member; @@ -1611,18 +1718,64 @@ struct bpf_struct_ops { void (*unreg)(void *kdata); int (*update)(void *kdata, void *old_kdata); int (*validate)(void *kdata); - const struct btf_type *type; - const struct btf_type *value_type; + void *cfi_stubs; + struct module *owner; const char *name; struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS]; +}; + +/* Every member of a struct_ops type has an instance even a member is not + * an operator (function pointer). The "info" field will be assigned to + * prog->aux->ctx_arg_info of BPF struct_ops programs to provide the + * argument information required by the verifier to verify the program. + * + * btf_ctx_access() will lookup prog->aux->ctx_arg_info to find the + * corresponding entry for an given argument. + */ +struct bpf_struct_ops_arg_info { + struct bpf_ctx_arg_aux *info; + u32 cnt; +}; + +struct bpf_struct_ops_desc { + struct bpf_struct_ops *st_ops; + + const struct btf_type *type; + const struct btf_type *value_type; u32 type_id; u32 value_id; + + /* Collection of argument information for each member */ + struct bpf_struct_ops_arg_info *arg_info; +}; + +enum bpf_struct_ops_state { + BPF_STRUCT_OPS_STATE_INIT, + BPF_STRUCT_OPS_STATE_INUSE, + BPF_STRUCT_OPS_STATE_TOBEFREE, + BPF_STRUCT_OPS_STATE_READY, +}; + +struct bpf_struct_ops_common_value { + refcount_t refcnt; + enum bpf_struct_ops_state state; }; #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) +/* This macro helps developer to register a struct_ops type and generate + * type information correctly. Developers should use this macro to register + * a struct_ops type instead of calling __register_bpf_struct_ops() directly. + */ +#define register_bpf_struct_ops(st_ops, type) \ + ({ \ + struct bpf_struct_ops_##type { \ + struct bpf_struct_ops_common_value common; \ + struct type data ____cacheline_aligned_in_smp; \ + }; \ + BTF_TYPE_EMIT(struct bpf_struct_ops_##type); \ + __register_bpf_struct_ops(st_ops); \ + }) #define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) -const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id); -void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log); bool bpf_struct_ops_get(const void *kdata); void bpf_struct_ops_put(const void *kdata); int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, @@ -1630,7 +1783,10 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, struct bpf_tramp_link *link, const struct btf_func_model *model, - void *image, void *image_end); + void *stub_func, + void **image, u32 *image_off, + bool allow_alloc); +void bpf_struct_ops_image_free(void *image); static inline bool bpf_try_module_get(const void *data, struct module *owner) { if (owner == BPF_MODULE_OWNER) @@ -1663,15 +1819,13 @@ struct bpf_dummy_ops { int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); #endif +int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, + struct btf *btf, + struct bpf_verifier_log *log); +void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map); +void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc); #else -static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) -{ - return NULL; -} -static inline void bpf_struct_ops_init(struct btf *btf, - struct bpf_verifier_log *log) -{ -} +#define register_bpf_struct_ops(st_ops, type) ({ (void *)(st_ops); 0; }) static inline bool bpf_try_module_get(const void *data, struct module *owner) { return try_module_get(owner); @@ -1690,6 +1844,13 @@ static inline int bpf_struct_ops_link_create(union bpf_attr *attr) { return -EOPNOTSUPP; } +static inline void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map) +{ +} + +static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc) +{ +} #endif @@ -1965,14 +2126,14 @@ bpf_prog_run_array_uprobe(const struct bpf_prog_array __rcu *array_rcu, old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); item = &array->items[0]; while ((prog = READ_ONCE(item->prog))) { - if (!prog->aux->sleepable) + if (!prog->sleepable) rcu_read_lock(); run_ctx.bpf_cookie = item->bpf_cookie; ret &= run_prog(prog, ctx); item++; - if (!prog->aux->sleepable) + if (!prog->sleepable) rcu_read_unlock(); } bpf_reset_run_ctx(old_run_ctx); @@ -2004,6 +2165,7 @@ static inline void bpf_enable_instrumentation(void) migrate_enable(); } +extern const struct super_operations bpf_super_ops; extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; extern const struct file_operations bpf_iter_fops; @@ -2043,6 +2205,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec); bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b); void bpf_obj_free_timer(const struct btf_record *rec, void *obj); void bpf_obj_free_fields(const struct btf_record *rec, void *obj); +void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu); struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *bpf_map_get_with_uref(u32 ufd); @@ -2070,6 +2233,8 @@ int generic_map_delete_batch(struct bpf_map *map, struct bpf_map *bpf_map_get_curr_or_next(u32 *id); struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); +int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid, + unsigned long nr_pages, struct page **page_array); #ifdef CONFIG_MEMCG_KMEM void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node); @@ -2137,24 +2302,26 @@ static inline void bpf_map_dec_elem_count(struct bpf_map *map) extern int sysctl_unprivileged_bpf_disabled; -static inline bool bpf_allow_ptr_leaks(void) +bool bpf_token_capable(const struct bpf_token *token, int cap); + +static inline bool bpf_allow_ptr_leaks(const struct bpf_token *token) { - return perfmon_capable(); + return bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_allow_uninit_stack(void) +static inline bool bpf_allow_uninit_stack(const struct bpf_token *token) { - return perfmon_capable(); + return bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_bypass_spec_v1(void) +static inline bool bpf_bypass_spec_v1(const struct bpf_token *token) { - return perfmon_capable(); + return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_bypass_spec_v4(void) +static inline bool bpf_bypass_spec_v4(const struct bpf_token *token) { - return perfmon_capable(); + return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); } int bpf_map_new_fd(struct bpf_map *map, int flags); @@ -2171,8 +2338,21 @@ int bpf_link_new_fd(struct bpf_link *link); struct bpf_link *bpf_link_get_from_fd(u32 ufd); struct bpf_link *bpf_link_get_curr_or_next(u32 *id); +void bpf_token_inc(struct bpf_token *token); +void bpf_token_put(struct bpf_token *token); +int bpf_token_create(union bpf_attr *attr); +struct bpf_token *bpf_token_get_from_fd(u32 ufd); + +bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd); +bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type); +bool bpf_token_allow_prog_type(const struct bpf_token *token, + enum bpf_prog_type prog_type, + enum bpf_attach_type attach_type); + int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname); int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags); +struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir, + umode_t mode); #define BPF_ITER_FUNC_PREFIX "bpf_iter_" #define DEFINE_BPF_ITER_FUNC(target, args...) \ @@ -2402,19 +2582,19 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, struct btf_func_model *m); struct bpf_reg_state; -int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *regs); -int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *regs); -int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *reg); +int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, struct btf *btf, const struct btf_type *t); +const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt, + int comp_idx, const char *tag_key); +int btf_find_next_decl_tag(const struct btf *btf, const struct btf_type *pt, + int comp_idx, const char *tag_key, int last_id); struct bpf_prog *bpf_prog_by_id(u32 id); struct bpf_link *bpf_link_by_id(u32 id); -const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); +const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id, + const struct bpf_prog *prog); void bpf_task_storage_free(struct task_struct *task); void bpf_cgrp_storage_free(struct cgroup *cgroup); bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); @@ -2461,6 +2641,9 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, enum bpf_dynptr_type type, u32 offset, u32 size); void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr); + +bool dev_check_flush(void); +bool cpu_map_check_flush(void); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -2530,6 +2713,24 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags) return -EOPNOTSUPP; } +static inline bool bpf_token_capable(const struct bpf_token *token, int cap) +{ + return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN)); +} + +static inline void bpf_token_inc(struct bpf_token *token) +{ +} + +static inline void bpf_token_put(struct bpf_token *token) +{ +} + +static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline void __dev_flush(void) { } @@ -2653,7 +2854,7 @@ static inline int btf_struct_access(struct bpf_verifier_log *log, } static inline const struct bpf_func_proto * -bpf_base_func_proto(enum bpf_func_id func_id) +bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { return NULL; } @@ -2905,6 +3106,22 @@ static inline int sock_map_bpf_prog_query(const union bpf_attr *attr, #endif /* CONFIG_BPF_SYSCALL */ #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ +static __always_inline void +bpf_prog_inc_misses_counters(const struct bpf_prog_array *array) +{ + const struct bpf_prog_array_item *item; + struct bpf_prog *prog; + + if (unlikely(!array)) + return; + + item = &array->items[0]; + while ((prog = READ_ONCE(item->prog))) { + bpf_prog_inc_misses_counter(prog); + item++; + } +} + #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) void bpf_sk_reuseport_detach(struct sock *sk); int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, @@ -3134,6 +3351,9 @@ enum bpf_text_poke_type { int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); +void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, + struct bpf_prog *new, struct bpf_prog *old); + void *bpf_arch_text_copy(void *dst, void *src, size_t len); int bpf_arch_text_invalidate(void *dst, size_t len); @@ -3183,4 +3403,9 @@ static inline gfp_t bpf_memcg_flags(gfp_t flags) return flags; } +static inline bool bpf_is_subprog(const struct bpf_prog *prog) +{ + return prog->aux->func_idx != 0; +} + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 173ec7f43ed1..dcddb0aef7d8 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -129,10 +129,36 @@ bpf_local_storage_map_alloc(union bpf_attr *attr, struct bpf_local_storage_cache *cache, bool bpf_ma); -struct bpf_local_storage_data * +void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage, + struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem *selem); +/* If cacheit_lockit is false, this lookup function is lockless */ +static inline struct bpf_local_storage_data * bpf_local_storage_lookup(struct bpf_local_storage *local_storage, struct bpf_local_storage_map *smap, - bool cacheit_lockit); + bool cacheit_lockit) +{ + struct bpf_local_storage_data *sdata; + struct bpf_local_storage_elem *selem; + + /* Fast path (cache hit) */ + sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx], + bpf_rcu_lock_held()); + if (sdata && rcu_access_pointer(sdata->smap) == smap) + return sdata; + + /* Slow path (cache miss) */ + hlist_for_each_entry_rcu(selem, &local_storage->list, snode, + rcu_read_lock_trace_held()) + if (rcu_access_pointer(SDATA(selem)->smap) == smap) + break; + + if (!selem) + return NULL; + if (cacheit_lockit) + __bpf_local_storage_insert_cache(local_storage, smap, selem); + return SDATA(selem); +} void bpf_local_storage_destroy(struct bpf_local_storage *local_storage); diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h index d644bbb298af..aaf004d94322 100644 --- a/include/linux/bpf_mem_alloc.h +++ b/include/linux/bpf_mem_alloc.h @@ -11,6 +11,8 @@ struct bpf_mem_caches; struct bpf_mem_alloc { struct bpf_mem_caches __percpu *caches; struct bpf_mem_cache __percpu *cache; + struct obj_cgroup *objcg; + bool percpu; struct work_struct work; }; @@ -20,8 +22,15 @@ struct bpf_mem_alloc { * 'size = 0' is for bpf_mem_alloc which manages many fixed-size objects. * Alloc and free are done with bpf_mem_{alloc,free}() and the size of * the returned object is given by the size argument of bpf_mem_alloc(). + * If percpu equals true, error will be returned in order to avoid + * large memory consumption and the below bpf_mem_alloc_percpu_unit_init() + * should be used to do on-demand per-cpu allocation for each size. */ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu); +/* Initialize a non-fix-size percpu memory allocator */ +int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg); +/* The percpu allocation with a specific unit size. */ +int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size); void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma); /* kmalloc/kfree equivalent: */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index fc0d6f32c687..9f2a6b83b49e 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -132,6 +132,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_ARENA, arena_map_ops) BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) @@ -142,9 +143,13 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter) #ifdef CONFIG_NET BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns) BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp) +BPF_LINK_TYPE(BPF_LINK_TYPE_NETFILTER, netfilter) +BPF_LINK_TYPE(BPF_LINK_TYPE_TCX, tcx) +BPF_LINK_TYPE(BPF_LINK_TYPE_NETKIT, netkit) #endif #ifdef CONFIG_PERF_EVENTS BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf) #endif BPF_LINK_TYPE(BPF_LINK_TYPE_KPROBE_MULTI, kprobe_multi) BPF_LINK_TYPE(BPF_LINK_TYPE_STRUCT_OPS, struct_ops) +BPF_LINK_TYPE(BPF_LINK_TYPE_UPROBE_MULTI, uprobe_multi) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b6e58dab8e27..7cb1b75eee38 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -275,6 +275,11 @@ struct bpf_reference_state { int callback_ref; }; +struct bpf_retval_range { + s32 minval; + s32 maxval; +}; + /* state of the program: * type of all registers and stack info */ @@ -297,23 +302,67 @@ struct bpf_func_state { * void foo(void) { bpf_timer_set_callback(,foo); } */ u32 async_entry_cnt; + struct bpf_retval_range callback_ret_range; bool in_callback_fn; - struct tnum callback_ret_range; bool in_async_callback_fn; + bool in_exception_callback_fn; + /* For callback calling functions that limit number of possible + * callback executions (e.g. bpf_loop) keeps track of current + * simulated iteration number. + * Value in frame N refers to number of times callback with frame + * N+1 was simulated, e.g. for the following call: + * + * bpf_loop(..., fn, ...); | suppose current frame is N + * | fn would be simulated in frame N+1 + * | number of simulations is tracked in frame N + */ + u32 callback_depth; /* The following fields should be last. See copy_func_state() */ int acquired_refs; struct bpf_reference_state *refs; - int allocated_stack; + /* The state of the stack. Each element of the array describes BPF_REG_SIZE + * (i.e. 8) bytes worth of stack memory. + * stack[0] represents bytes [*(r10-8)..*(r10-1)] + * stack[1] represents bytes [*(r10-16)..*(r10-9)] + * ... + * stack[allocated_stack/8 - 1] represents [*(r10-allocated_stack)..*(r10-allocated_stack+7)] + */ struct bpf_stack_state *stack; + /* Size of the current stack, in bytes. The stack state is tracked below, in + * `stack`. allocated_stack is always a multiple of BPF_REG_SIZE. + */ + int allocated_stack; }; -struct bpf_idx_pair { - u32 prev_idx; +#define MAX_CALL_FRAMES 8 + +/* instruction history flags, used in bpf_jmp_history_entry.flags field */ +enum { + /* instruction references stack slot through PTR_TO_STACK register; + * we also store stack's frame number in lower 3 bits (MAX_CALL_FRAMES is 8) + * and accessed stack slot's index in next 6 bits (MAX_BPF_STACK is 512, + * 8 bytes per slot, so slot index (spi) is [0, 63]) + */ + INSN_F_FRAMENO_MASK = 0x7, /* 3 bits */ + + INSN_F_SPI_MASK = 0x3f, /* 6 bits */ + INSN_F_SPI_SHIFT = 3, /* shifted 3 bits to the left */ + + INSN_F_STACK_ACCESS = BIT(9), /* we need 10 bits total */ +}; + +static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES); +static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8); + +struct bpf_jmp_history_entry { u32 idx; + /* insn idx can't be bigger than 1 million */ + u32 prev_idx : 22; + /* special flags, e.g., whether insn is doing register stack spill/load */ + u32 flags : 10; }; -#define MAX_CALL_FRAMES 8 /* Maximum number of register states that can exist at once */ #define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES) struct bpf_verifier_state { @@ -372,32 +421,49 @@ struct bpf_verifier_state { struct bpf_active_lock active_lock; bool speculative; bool active_rcu_lock; + /* If this state was ever pointed-to by other state's loop_entry field + * this flag would be set to true. Used to avoid freeing such states + * while they are still in use. + */ + bool used_as_loop_entry; /* first and last insn idx of this verifier state */ u32 first_insn_idx; u32 last_insn_idx; + /* If this state is a part of states loop this field points to some + * parent of this state such that: + * - it is also a member of the same states loop; + * - DFS states traversal starting from initial state visits loop_entry + * state before this state. + * Used to compute topmost loop entry for state loops. + * State loops might appear because of open coded iterators logic. + * See get_loop_entry() for more information. + */ + struct bpf_verifier_state *loop_entry; /* jmp history recorded from first to last. * backtracking is using it to go from last to first. * For most states jmp_history_cnt is [0-3]. * For loops can go up to ~40. */ - struct bpf_idx_pair *jmp_history; + struct bpf_jmp_history_entry *jmp_history; u32 jmp_history_cnt; + u32 dfs_depth; + u32 callback_unroll_depth; + u32 may_goto_depth; }; -#define bpf_get_spilled_reg(slot, frame) \ +#define bpf_get_spilled_reg(slot, frame, mask) \ (((slot < frame->allocated_stack / BPF_REG_SIZE) && \ - (frame->stack[slot].slot_type[0] == STACK_SPILL)) \ + ((1 << frame->stack[slot].slot_type[BPF_REG_SIZE - 1]) & (mask))) \ ? &frame->stack[slot].spilled_ptr : NULL) /* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */ -#define bpf_for_each_spilled_reg(iter, frame, reg) \ - for (iter = 0, reg = bpf_get_spilled_reg(iter, frame); \ +#define bpf_for_each_spilled_reg(iter, frame, reg, mask) \ + for (iter = 0, reg = bpf_get_spilled_reg(iter, frame, mask); \ iter < frame->allocated_stack / BPF_REG_SIZE; \ - iter++, reg = bpf_get_spilled_reg(iter, frame)) + iter++, reg = bpf_get_spilled_reg(iter, frame, mask)) -/* Invoke __expr over regsiters in __vst, setting __state and __reg */ -#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \ +#define bpf_for_each_reg_in_vstate_mask(__vst, __state, __reg, __mask, __expr) \ ({ \ struct bpf_verifier_state *___vstate = __vst; \ int ___i, ___j; \ @@ -409,7 +475,7 @@ struct bpf_verifier_state { __reg = &___regs[___j]; \ (void)(__expr); \ } \ - bpf_for_each_spilled_reg(___j, __state, __reg) { \ + bpf_for_each_spilled_reg(___j, __state, __reg, __mask) { \ if (!__reg) \ continue; \ (void)(__expr); \ @@ -417,6 +483,10 @@ struct bpf_verifier_state { } \ }) +/* Invoke __expr over regsiters in __vst, setting __state and __reg */ +#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \ + bpf_for_each_reg_in_vstate_mask(__vst, __state, __reg, 1 << STACK_SPILL, __expr) + /* linked list of verifier states used to prune search */ struct bpf_verifier_state_list { struct bpf_verifier_state state; @@ -478,8 +548,10 @@ struct bpf_insn_aux_data { u32 seen; /* this insn was processed by the verifier at env->pass_cnt */ bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */ bool zext_dst; /* this insn zero extends dst reg */ + bool needs_zext; /* alu op needs to clear upper bits */ bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */ bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */ + bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */ u8 alu_state; /* used in combination with alu_limit */ /* below fields are initialized once */ @@ -490,6 +562,10 @@ struct bpf_insn_aux_data { * this instruction, regardless of any heuristics */ bool force_checkpoint; + /* true if instruction is a call to a helper function that + * accepts callback function as a parameter. + */ + bool calls_callback; }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ @@ -532,15 +608,30 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) #define BPF_MAX_SUBPROGS 256 +struct bpf_subprog_arg_info { + enum bpf_arg_type arg_type; + union { + u32 mem_size; + u32 btf_id; + }; +}; + struct bpf_subprog_info { /* 'start' has to be the first field otherwise find_subprog() won't work */ u32 start; /* insn idx of function entry point */ u32 linfo_idx; /* The idx to the main_prog->aux->linfo */ u16 stack_depth; /* max. stack depth used by this function */ - bool has_tail_call; - bool tail_call_reachable; - bool has_ld_abs; - bool is_async_cb; + u16 stack_extra; + bool has_tail_call: 1; + bool tail_call_reachable: 1; + bool has_ld_abs: 1; + bool is_cb: 1; + bool is_async_cb: 1; + bool is_exception_cb: 1; + bool args_cached: 1; + + u8 arg_cnt; + struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS]; }; struct bpf_verifier_env; @@ -575,10 +666,12 @@ struct bpf_verifier_env { u32 prev_insn_idx; struct bpf_prog *prog; /* eBPF program being verified */ const struct bpf_verifier_ops *ops; + struct module *attach_btf_mod; /* The owner module of prog->aux->attach_btf */ struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ int stack_size; /* number of states to be processed */ bool strict_alignment; /* perform strict pointer alignment checks */ bool test_state_freq; /* test verifier with different pruning frequency */ + bool test_reg_invariants; /* fail verification on register invariants violations */ struct bpf_verifier_state *cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ struct bpf_verifier_state_list *free_list; @@ -587,17 +680,24 @@ struct bpf_verifier_env { u32 used_map_cnt; /* number of used maps */ u32 used_btf_cnt; /* number of used BTF objects */ u32 id_gen; /* used to generate unique reg IDs */ + u32 hidden_subprog_cnt; /* number of hidden subprogs */ + int exception_callback_subprog; bool explore_alu_limits; bool allow_ptr_leaks; + /* Allow access to uninitialized stack memory. Writes with fixed offset are + * always allowed, so this refers to reads (with fixed or variable offset), + * to writes with variable offset and to indirect (helper) accesses. + */ bool allow_uninit_stack; bool bpf_capable; bool bypass_spec_v1; bool bypass_spec_v4; bool seen_direct_write; + bool seen_exception; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ const struct bpf_line_info *prev_linfo; struct bpf_verifier_log log; - struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1]; + struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 2]; /* max + 2 for the fake and exception subprogs */ union { struct bpf_idmap idmap_scratch; struct bpf_idset idset_scratch; @@ -608,6 +708,7 @@ struct bpf_verifier_env { int cur_stack; } cfg; struct backtrack_state bt; + struct bpf_jmp_history_entry *cur_hist_ent; u32 pass_cnt; /* number of times do_check() was called */ u32 subprog_cnt; /* number of instructions analyzed by the verifier */ @@ -642,6 +743,16 @@ struct bpf_verifier_env { char tmp_str_buf[TMP_STR_BUF_LEN]; }; +static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog) +{ + return &env->prog->aux->func_info_aux[subprog]; +} + +static inline struct bpf_subprog_info *subprog_info(struct bpf_verifier_env *env, int subprog) +{ + return &env->subprog_info[subprog]; +} + __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, va_list args); __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, @@ -653,6 +764,10 @@ int bpf_vlog_init(struct bpf_verifier_log *log, u32 log_level, void bpf_vlog_reset(struct bpf_verifier_log *log, u64 new_pos); int bpf_vlog_finalize(struct bpf_verifier_log *log, u32 *log_size_actual); +__printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env, + u32 insn_off, + const char *prefix_fmt, ...); + static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env) { struct bpf_verifier_state *cur = env->cur_state; @@ -675,14 +790,6 @@ bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off, void bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); -int check_ptr_off_reg(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, int regno); -int check_func_arg_reg_off(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, int regno, - enum bpf_arg_type arg_type); -int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, - u32 regno, u32 mem_size); - /* this lives here instead of in bpf.h because it needs to dereference tgt_prog */ static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog, struct btf *btf, u32 btf_id) @@ -752,4 +859,85 @@ static inline bool bpf_type_has_unsafe_modifiers(u32 type) return type_flag(type) & ~BPF_REG_TRUSTED_MODIFIERS; } +static inline bool type_is_ptr_alloc_obj(u32 type) +{ + return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC; +} + +static inline bool type_is_non_owning_ref(u32 type) +{ + return type_is_ptr_alloc_obj(type) && type_flag(type) & NON_OWN_REF; +} + +static inline bool type_is_pkt_pointer(enum bpf_reg_type type) +{ + type = base_type(type); + return type == PTR_TO_PACKET || + type == PTR_TO_PACKET_META; +} + +static inline bool type_is_sk_pointer(enum bpf_reg_type type) +{ + return type == PTR_TO_SOCKET || + type == PTR_TO_SOCK_COMMON || + type == PTR_TO_TCP_SOCK || + type == PTR_TO_XDP_SOCK; +} + +static inline void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno) +{ + env->scratched_regs |= 1U << regno; +} + +static inline void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi) +{ + env->scratched_stack_slots |= 1ULL << spi; +} + +static inline bool reg_scratched(const struct bpf_verifier_env *env, u32 regno) +{ + return (env->scratched_regs >> regno) & 1; +} + +static inline bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno) +{ + return (env->scratched_stack_slots >> regno) & 1; +} + +static inline bool verifier_state_scratched(const struct bpf_verifier_env *env) +{ + return env->scratched_regs || env->scratched_stack_slots; +} + +static inline void mark_verifier_state_clean(struct bpf_verifier_env *env) +{ + env->scratched_regs = 0U; + env->scratched_stack_slots = 0ULL; +} + +/* Used for printing the entire verifier state. */ +static inline void mark_verifier_state_scratched(struct bpf_verifier_env *env) +{ + env->scratched_regs = ~0U; + env->scratched_stack_slots = ~0ULL; +} + +static inline bool bpf_stack_narrow_access_ok(int off, int fill_size, int spill_size) +{ +#ifdef __BIG_ENDIAN + off -= spill_size - fill_size; +#endif + + return !(off % BPF_REG_SIZE); +} + +const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type); +const char *dynptr_type_str(enum bpf_dynptr_type type); +const char *iter_type_str(const struct btf *btf, u32 btf_id); +const char *iter_state_str(enum bpf_iter_state state); + +void print_verifier_state(struct bpf_verifier_env *env, + const struct bpf_func_state *state, bool print_all); +void print_insn_state(struct bpf_verifier_env *env, const struct bpf_func_state *state); + #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h deleted file mode 100644 index 736ded4905e0..000000000000 --- a/include/linux/bpfilter.h +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_BPFILTER_H -#define _LINUX_BPFILTER_H - -#include <uapi/linux/bpfilter.h> -#include <linux/usermode_driver.h> -#include <linux/sockptr.h> - -struct sock; -int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval, - unsigned int optlen); -int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, - int __user *optlen); - -struct bpfilter_umh_ops { - struct umd_info info; - /* since ip_getsockopt() can run in parallel, serialize access to umh */ - struct mutex lock; - int (*sockopt)(struct sock *sk, int optname, sockptr_t optval, - unsigned int optlen, bool is_set); - int (*start)(void); -}; -extern struct bpfilter_umh_ops bpfilter_ops; -#endif diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index c55810a43541..1394ba302367 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -11,6 +11,7 @@ #define PHY_ID_BCM50610 0x0143bd60 #define PHY_ID_BCM50610M 0x0143bd70 +#define PHY_ID_BCM5221 0x004061e0 #define PHY_ID_BCM5241 0x0143bc30 #define PHY_ID_BCMAC131 0x0143bc70 #define PHY_ID_BCM5481 0x0143bca0 @@ -331,6 +332,15 @@ #define BCM54XX_WOL_INT_STATUS (MII_BCM54XX_EXP_SEL_WOL + 0x94) +/* BCM5221 Registers */ +#define BCM5221_AEGSR 0x1C +#define BCM5221_AEGSR_MDIX_STATUS BIT(13) +#define BCM5221_AEGSR_MDIX_MAN_SWAP BIT(12) +#define BCM5221_AEGSR_MDIX_DIS BIT(11) + +#define BCM5221_SHDW_AM4_EN_CLK_LPM BIT(2) +#define BCM5221_SHDW_AM4_FORCE_LPM BIT(1) + /*****************************************************************************/ /* Fast Ethernet Transceiver definitions. */ /*****************************************************************************/ diff --git a/include/linux/btf.h b/include/linux/btf.h index 928113a80a95..f9e56fd12a9f 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -74,6 +74,7 @@ #define KF_ITER_NEW (1 << 8) /* kfunc implements BPF iter constructor */ #define KF_ITER_NEXT (1 << 9) /* kfunc implements BPF iter next method */ #define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */ +#define KF_RCU_PROTECTED (1 << 11) /* kfunc should be protected by rcu cs when they are invoked */ /* * Tag marking a kernel function as a kfunc. This is meant to minimize the @@ -83,6 +84,17 @@ */ #define __bpf_kfunc __used noinline +#define __bpf_kfunc_start_defs() \ + __diag_push(); \ + __diag_ignore_all("-Wmissing-declarations", \ + "Global kfuncs as their definitions will be in BTF");\ + __diag_ignore_all("-Wmissing-prototypes", \ + "Global kfuncs as their definitions will be in BTF") + +#define __bpf_kfunc_end_defs() __diag_pop() +#define __bpf_hook_start() __bpf_kfunc_start_defs() +#define __bpf_hook_end() __bpf_kfunc_end_defs() + /* * Return the name of the passed struct, if exists, or halt the build if for * example the structure gets renamed. In this way, developers have to revisit @@ -125,6 +137,7 @@ struct btf_struct_metas { extern const struct file_operations btf_fops; +const char *btf_get_name(const struct btf *btf); void btf_get(struct btf *btf); void btf_put(struct btf *btf); int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_sz); @@ -482,8 +495,26 @@ static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id) return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func); } +bool btf_param_match_suffix(const struct btf *btf, + const struct btf_param *arg, + const char *suffix); +int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto, + u32 arg_no); + struct bpf_verifier_log; +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) +struct bpf_struct_ops; +int __register_bpf_struct_ops(struct bpf_struct_ops *st_ops); +const struct bpf_struct_ops_desc *bpf_struct_ops_find_value(struct btf *btf, u32 value_id); +const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf *btf, u32 type_id); +#else +static inline const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf *btf, u32 type_id) +{ + return NULL; +} +#endif + #ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); @@ -500,10 +531,9 @@ s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id); int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt, struct module *owner); struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id); -const struct btf_member * -btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, enum bpf_prog_type prog_type, - int arg); +bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, enum bpf_prog_type prog_type, + int arg); int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type); bool btf_types_are_same(const struct btf *btf1, u32 id1, const struct btf *btf2, u32 id2); @@ -543,12 +573,12 @@ static inline struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf { return NULL; } -static inline const struct btf_member * -btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, enum bpf_prog_type prog_type, - int arg) +static inline bool +btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, enum bpf_prog_type prog_type, + int arg) { - return NULL; + return false; } static inline int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type) { diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index a9cb10b0e2e9..e24aabfe8ecc 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -8,6 +8,9 @@ struct btf_id_set { u32 ids[]; }; +/* This flag implies BTF_SET8 holds kfunc(s) */ +#define BTF_SET8_KFUNCS (1 << 0) + struct btf_id_set8 { u32 cnt; u32 flags; @@ -21,6 +24,7 @@ struct btf_id_set8 { #include <linux/compiler.h> /* for __PASTE */ #include <linux/compiler_attributes.h> /* for __maybe_unused */ +#include <linux/stringify.h> /* * Following macros help to define lists of BTF IDs placed @@ -183,17 +187,18 @@ extern struct btf_id_set name; * .word (1 << 3) | (1 << 1) | (1 << 2) * */ -#define __BTF_SET8_START(name, scope) \ +#define __BTF_SET8_START(name, scope, flags) \ +__BTF_ID_LIST(name, local) \ asm( \ ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ "." #scope " __BTF_ID__set8__" #name "; \n" \ "__BTF_ID__set8__" #name ":; \n" \ -".zero 8 \n" \ +".zero 4 \n" \ +".long " __stringify(flags) "\n" \ ".popsection; \n"); #define BTF_SET8_START(name) \ -__BTF_ID_LIST(name, local) \ -__BTF_SET8_START(name, local) +__BTF_SET8_START(name, local, 0) #define BTF_SET8_END(name) \ asm( \ @@ -202,6 +207,12 @@ asm( \ ".popsection; \n"); \ extern struct btf_id_set8 name; +#define BTF_KFUNCS_START(name) \ +__BTF_SET8_START(name, local, BTF_SET8_KFUNCS) + +#define BTF_KFUNCS_END(name) \ +BTF_SET8_END(name) + #else #define BTF_ID_LIST(name) static u32 __maybe_unused name[64]; @@ -216,6 +227,8 @@ extern struct btf_id_set8 name; #define BTF_SET_END(name) #define BTF_SET8_START(name) static struct btf_id_set8 __maybe_unused name = { 0 }; #define BTF_SET8_END(name) +#define BTF_KFUNCS_START(name) static struct btf_id_set8 __maybe_unused name = { .flags = BTF_SET8_KFUNCS }; +#define BTF_KFUNCS_END(name) #endif /* CONFIG_DEBUG_INFO_BTF */ diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 44e9de51eedf..d78454a4dd1f 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -198,16 +198,13 @@ void touch_buffer(struct buffer_head *bh); void folio_set_bh(struct buffer_head *bh, struct folio *folio, unsigned long offset); struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size, - bool retry); + gfp_t gfp); struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size, bool retry); -void create_empty_buffers(struct page *, unsigned long, - unsigned long b_state); -void folio_create_empty_buffers(struct folio *folio, unsigned long blocksize, - unsigned long b_state); +struct buffer_head *create_empty_buffers(struct folio *folio, + unsigned long blocksize, unsigned long b_state); void end_buffer_read_sync(struct buffer_head *bh, int uptodate); void end_buffer_write_sync(struct buffer_head *bh, int uptodate); -void end_buffer_async_write(struct buffer_head *bh, int uptodate); /* Things to do with buffers at mapping->private_list */ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode); @@ -227,8 +224,8 @@ void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); -struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block, - unsigned size, gfp_t gfp); +struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp); void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); void __breadahead(struct block_device *, sector_t block, unsigned int size); @@ -254,11 +251,10 @@ void __bh_read_batch(int nr, struct buffer_head *bhs[], * address_spaces. */ void block_invalidate_folio(struct folio *folio, size_t offset, size_t length); -int block_write_full_page(struct page *page, get_block_t *get_block, - struct writeback_control *wbc); +int block_write_full_folio(struct folio *folio, struct writeback_control *wbc, + void *get_block); int __block_write_full_folio(struct inode *inode, struct folio *folio, - get_block_t *get_block, struct writeback_control *wbc, - bh_end_io_t *handler); + get_block_t *get_block, struct writeback_control *wbc); int block_read_full_folio(struct folio *, get_block_t *); bool block_is_partially_uptodate(struct folio *, size_t from, size_t count); int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, @@ -272,7 +268,6 @@ int generic_write_end(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page *, void *); void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to); -void clean_page_buffers(struct page *page); int cont_write_begin(struct file *, struct address_space *, loff_t, unsigned, struct page **, void **, get_block_t *, loff_t *); @@ -338,17 +333,38 @@ sb_breadahead(struct super_block *sb, sector_t block) __breadahead(sb->s_bdev, block, sb->s_blocksize); } -static inline struct buffer_head * -sb_getblk(struct super_block *sb, sector_t block) +static inline struct buffer_head *getblk_unmovable(struct block_device *bdev, + sector_t block, unsigned size) { - return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); + gfp_t gfp; + + gfp = mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); + gfp |= __GFP_NOFAIL; + + return bdev_getblk(bdev, block, size, gfp); } +static inline struct buffer_head *__getblk(struct block_device *bdev, + sector_t block, unsigned size) +{ + gfp_t gfp; -static inline struct buffer_head * -sb_getblk_gfp(struct super_block *sb, sector_t block, gfp_t gfp) + gfp = mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); + gfp |= __GFP_MOVABLE | __GFP_NOFAIL; + + return bdev_getblk(bdev, block, size, gfp); +} + +static inline struct buffer_head *sb_getblk(struct super_block *sb, + sector_t block) { - return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, gfp); + return __getblk(sb->s_bdev, block, sb->s_blocksize); +} + +static inline struct buffer_head *sb_getblk_gfp(struct super_block *sb, + sector_t block, gfp_t gfp) +{ + return bdev_getblk(sb->s_bdev, block, sb->s_blocksize, gfp); } static inline struct buffer_head * @@ -385,20 +401,6 @@ static inline void lock_buffer(struct buffer_head *bh) __lock_buffer(bh); } -static inline struct buffer_head *getblk_unmovable(struct block_device *bdev, - sector_t block, - unsigned size) -{ - return __getblk_gfp(bdev, block, size, 0); -} - -static inline struct buffer_head *__getblk(struct block_device *bdev, - sector_t block, - unsigned size) -{ - return __getblk_gfp(bdev, block, size, __GFP_MOVABLE); -} - static inline void bh_readahead(struct buffer_head *bh, blk_opf_t op_flags) { if (!buffer_uptodate(bh) && trylock_buffer(bh)) { @@ -450,6 +452,28 @@ __bread(struct block_device *bdev, sector_t block, unsigned size) return __bread_gfp(bdev, block, size, __GFP_MOVABLE); } +/** + * get_nth_bh - Get a reference on the n'th buffer after this one. + * @bh: The buffer to start counting from. + * @count: How many buffers to skip. + * + * This is primarily useful for finding the nth buffer in a folio; in + * that case you pass the head buffer and the byte offset in the folio + * divided by the block size. It can be used for other purposes, but + * it will wrap at the end of the folio rather than returning NULL or + * proceeding to the next folio for you. + * + * Return: The requested buffer with an elevated refcount. + */ +static inline __must_check +struct buffer_head *get_nth_bh(struct buffer_head *bh, unsigned int count) +{ + while (count--) + bh = bh->b_this_page; + get_bh(bh); + return bh; +} + bool block_dirty_folio(struct address_space *mapping, struct folio *folio); #ifdef CONFIG_BUFFER_HEAD diff --git a/include/linux/buildid.h b/include/linux/buildid.h index 3b7a0ff4642f..20aa3c2d89f7 100644 --- a/include/linux/buildid.h +++ b/include/linux/buildid.h @@ -2,15 +2,16 @@ #ifndef _LINUX_BUILDID_H #define _LINUX_BUILDID_H -#include <linux/mm_types.h> +#include <linux/types.h> #define BUILD_ID_SIZE_MAX 20 +struct vm_area_struct; int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size); int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size); -#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_CRASH_CORE) +#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_VMCORE_INFO) extern unsigned char vmlinux_build_id[BUILD_ID_SIZE_MAX]; void init_vmlinux_build_id(void); #else diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 555aae5448ae..bd1e361b351c 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -83,7 +83,7 @@ struct bvec_iter { unsigned int bi_bvec_done; /* number of bytes completed in current bvec */ -} __packed; +} __packed __aligned(4); struct bvec_iter_all { struct bio_vec bv; diff --git a/include/linux/cache.h b/include/linux/cache.h index 9900d20b76c2..0ecb17bb6883 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -85,6 +85,31 @@ #define cache_line_size() L1_CACHE_BYTES #endif +#ifndef __cacheline_group_begin +#define __cacheline_group_begin(GROUP) \ + __u8 __cacheline_group_begin__##GROUP[0] +#endif + +#ifndef __cacheline_group_end +#define __cacheline_group_end(GROUP) \ + __u8 __cacheline_group_end__##GROUP[0] +#endif + +#ifndef CACHELINE_ASSERT_GROUP_MEMBER +#define CACHELINE_ASSERT_GROUP_MEMBER(TYPE, GROUP, MEMBER) \ + BUILD_BUG_ON(!(offsetof(TYPE, MEMBER) >= \ + offsetofend(TYPE, __cacheline_group_begin__##GROUP) && \ + offsetofend(TYPE, MEMBER) <= \ + offsetof(TYPE, __cacheline_group_end__##GROUP))) +#endif + +#ifndef CACHELINE_ASSERT_GROUP_SIZE +#define CACHELINE_ASSERT_GROUP_SIZE(TYPE, GROUP, SIZE) \ + BUILD_BUG_ON(offsetof(TYPE, __cacheline_group_end__##GROUP) - \ + offsetofend(TYPE, __cacheline_group_begin__##GROUP) > \ + SIZE) +#endif + /* * Helper to add padding within a struct to ensure data fall into separate * cachelines. diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index a5cfd44fab45..2cb15fe4fe12 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -73,6 +73,7 @@ struct cacheinfo { struct cpu_cacheinfo { struct cacheinfo *info_list; + unsigned int per_cpu_data_slice_size; unsigned int num_levels; unsigned int num_leaves; bool cpu_map_populated; @@ -137,4 +138,10 @@ static inline int get_cpu_cacheinfo_id(int cpu, int level) #define use_arch_cache_info() (false) #endif +#ifndef CONFIG_ARCH_HAS_CPU_CACHE_ALIASING +#define cpu_dcache_is_aliasing() false +#else +#include <asm/cachetype.h> +#endif + #endif /* _LINUX_CACHEINFO_H */ diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 982ba245eb41..1b92aed49363 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -195,6 +195,10 @@ int can_restart_now(struct net_device *dev); void can_bus_off(struct net_device *dev); const char *can_get_state_str(const enum can_state state); +void can_state_get_by_berr_counter(const struct net_device *dev, + const struct can_berr_counter *bec, + enum can_state *tx_state, + enum can_state *rx_state); void can_change_state(struct net_device *dev, struct can_frame *cf, enum can_state tx_state, enum can_state rx_state); diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index cb0d6cd1c12f..60693a145894 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -90,6 +90,14 @@ enum cc_attr { * Examples include TDX Guest. */ CC_ATTR_HOTPLUG_DISABLED, + + /** + * @CC_ATTR_HOST_SEV_SNP: AMD SNP enabled on the host. + * + * The host kernel is running with the necessary features + * enabled to run SEV-SNP guests. + */ + CC_ATTR_HOST_SEV_SNP, }; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM @@ -107,10 +115,14 @@ enum cc_attr { * * FALSE - Specified Confidential Computing attribute is not active */ bool cc_platform_has(enum cc_attr attr); +void cc_platform_set(enum cc_attr attr); +void cc_platform_clear(enum cc_attr attr); #else /* !CONFIG_ARCH_HAS_CC_PLATFORM */ static inline bool cc_platform_has(enum cc_attr attr) { return false; } +static inline void cc_platform_set(enum cc_attr attr) { } +static inline void cc_platform_clear(enum cc_attr attr) { } #endif /* CONFIG_ARCH_HAS_CC_PLATFORM */ diff --git a/include/linux/cdx/cdx_bus.h b/include/linux/cdx/cdx_bus.h index bead71b7bc73..b57118aaa679 100644 --- a/include/linux/cdx/cdx_bus.h +++ b/include/linux/cdx/cdx_bus.h @@ -12,6 +12,7 @@ #include <linux/device.h> #include <linux/list.h> #include <linux/mod_devicetable.h> +#include <linux/msi.h> #define MAX_CDX_DEV_RESOURCES 4 #define CDX_CONTROLLER_ID_SHIFT 4 @@ -21,13 +22,31 @@ struct cdx_controller; enum { + CDX_DEV_MSI_CONF, + CDX_DEV_BUS_MASTER_CONF, CDX_DEV_RESET_CONF, + CDX_DEV_MSI_ENABLE, +}; + +struct cdx_msi_config { + u64 addr; + u32 data; + u16 msi_index; }; struct cdx_device_config { u8 type; + union { + struct cdx_msi_config msi; + bool bus_master_enable; + bool msi_enable; + }; }; +typedef int (*cdx_bus_enable_cb)(struct cdx_controller *cdx, u8 bus_num); + +typedef int (*cdx_bus_disable_cb)(struct cdx_controller *cdx, u8 bus_num); + typedef int (*cdx_scan_cb)(struct cdx_controller *cdx); typedef int (*cdx_dev_configure_cb)(struct cdx_controller *cdx, @@ -35,6 +54,19 @@ typedef int (*cdx_dev_configure_cb)(struct cdx_controller *cdx, struct cdx_device_config *dev_config); /** + * CDX_DEVICE - macro used to describe a specific CDX device + * @vend: the 16 bit CDX Vendor ID + * @dev: the 16 bit CDX Device ID + * + * This macro is used to create a struct cdx_device_id that matches a + * specific device. The subvendor and subdevice fields will be set to + * CDX_ANY_ID. + */ +#define CDX_DEVICE(vend, dev) \ + .vendor = (vend), .device = (dev), \ + .subvendor = CDX_ANY_ID, .subdevice = CDX_ANY_ID + +/** * CDX_DEVICE_DRIVER_OVERRIDE - macro used to describe a CDX device with * override_only flags. * @vend: the 16 bit CDX Vendor ID @@ -42,18 +74,24 @@ typedef int (*cdx_dev_configure_cb)(struct cdx_controller *cdx, * @driver_override: the 32 bit CDX Device override_only * * This macro is used to create a struct cdx_device_id that matches only a - * driver_override device. + * driver_override device. The subvendor and subdevice fields will be set to + * CDX_ANY_ID. */ #define CDX_DEVICE_DRIVER_OVERRIDE(vend, dev, driver_override) \ - .vendor = (vend), .device = (dev), .override_only = (driver_override) + .vendor = (vend), .device = (dev), .subvendor = CDX_ANY_ID,\ + .subdevice = CDX_ANY_ID, .override_only = (driver_override) /** * struct cdx_ops - Callbacks supported by CDX controller. + * @bus_enable: enable bus on the controller + * @bus_disable: disable bus on the controller * @scan: scan the devices on the controller * @dev_configure: configuration like reset, master_enable, * msi_config etc for a CDX device */ struct cdx_ops { + cdx_bus_enable_cb bus_enable; + cdx_bus_disable_cb bus_disable; cdx_scan_cb scan; cdx_dev_configure_cb dev_configure; }; @@ -62,13 +100,17 @@ struct cdx_ops { * struct cdx_controller: CDX controller object * @dev: Linux device associated with the CDX controller. * @priv: private data + * @msi_domain: MSI domain * @id: Controller ID + * @controller_registered: controller registered with bus * @ops: CDX controller ops */ struct cdx_controller { struct device *dev; void *priv; + struct irq_domain *msi_domain; u32 id; + bool controller_registered; struct cdx_ops *ops; }; @@ -78,36 +120,68 @@ struct cdx_controller { * @cdx: CDX controller associated with the device * @vendor: Vendor ID for CDX device * @device: Device ID for CDX device + * @subsystem_vendor: Subsystem Vendor ID for CDX device + * @subsystem_device: Subsystem Device ID for CDX device + * @class: Class for the CDX device + * @revision: Revision of the CDX device * @bus_num: Bus number for this CDX device * @dev_num: Device number for this device * @res: array of MMIO region entries * @res_attr: resource binary attribute + * @debugfs_dir: debugfs directory for this device * @res_count: number of valid MMIO regions * @dma_mask: Default DMA mask * @flags: CDX device flags * @req_id: Requestor ID associated with CDX device + * @is_bus: Is this bus device + * @enabled: is this bus enabled + * @msi_dev_id: MSI Device ID associated with CDX device + * @num_msi: Number of MSI's supported by the device * @driver_override: driver name to force a match; do not set directly, * because core frees it; use driver_set_override() to * set or clear it. + * @irqchip_lock: lock to synchronize irq/msi configuration + * @msi_write_pending: MSI write pending for this device */ struct cdx_device { struct device dev; struct cdx_controller *cdx; u16 vendor; u16 device; + u16 subsystem_vendor; + u16 subsystem_device; + u32 class; + u8 revision; u8 bus_num; u8 dev_num; struct resource res[MAX_CDX_DEV_RESOURCES]; + struct bin_attribute *res_attr[MAX_CDX_DEV_RESOURCES]; + struct dentry *debugfs_dir; u8 res_count; u64 dma_mask; u16 flags; u32 req_id; + bool is_bus; + bool enabled; + u32 msi_dev_id; + u32 num_msi; const char *driver_override; + struct mutex irqchip_lock; + bool msi_write_pending; }; #define to_cdx_device(_dev) \ container_of(_dev, struct cdx_device, dev) +#define cdx_resource_start(dev, num) ((dev)->res[(num)].start) +#define cdx_resource_end(dev, num) ((dev)->res[(num)].end) +#define cdx_resource_flags(dev, num) ((dev)->res[(num)].flags) +#define cdx_resource_len(dev, num) \ + ((cdx_resource_start((dev), (num)) == 0 && \ + cdx_resource_end((dev), (num)) == \ + cdx_resource_start((dev), (num))) ? 0 : \ + (cdx_resource_end((dev), (num)) - \ + cdx_resource_start((dev), (num)) + 1)) /** * struct cdx_driver - CDX device driver * @driver: Generic device driver @@ -170,4 +244,48 @@ extern struct bus_type cdx_bus_type; */ int cdx_dev_reset(struct device *dev); +/** + * cdx_set_master - enables bus-mastering for CDX device + * @cdx_dev: the CDX device to enable + * + * Return: 0 for success, -errno on failure + */ +int cdx_set_master(struct cdx_device *cdx_dev); + +/** + * cdx_clear_master - disables bus-mastering for CDX device + * @cdx_dev: the CDX device to disable + * + * Return: 0 for success, -errno on failure + */ +int cdx_clear_master(struct cdx_device *cdx_dev); + +#ifdef CONFIG_GENERIC_MSI_IRQ +/** + * cdx_enable_msi - Enable MSI for the CDX device. + * @cdx_dev: device pointer + * + * Return: 0 for success, -errno on failure + */ +int cdx_enable_msi(struct cdx_device *cdx_dev); + +/** + * cdx_disable_msi - Disable MSI for the CDX device. + * @cdx_dev: device pointer + */ +void cdx_disable_msi(struct cdx_device *cdx_dev); + +#else /* CONFIG_GENERIC_MSI_IRQ */ + +static inline int cdx_enable_msi(struct cdx_device *cdx_dev) +{ + return -ENODEV; +} + +static inline void cdx_disable_msi(struct cdx_device *cdx_dev) +{ +} + +#endif /* CONFIG_GENERIC_MSI_IRQ */ + #endif /* _CDX_BUS_H_ */ diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h index d5a5da838caf..11a92a946016 100644 --- a/include/linux/ceph/ceph_debug.h +++ b/include/linux/ceph/ceph_debug.h @@ -19,12 +19,25 @@ pr_debug("%.*s %12.12s:%-4d : " fmt, \ 8 - (int)sizeof(KBUILD_MODNAME), " ", \ kbasename(__FILE__), __LINE__, ##__VA_ARGS__) +# define doutc(client, fmt, ...) \ + pr_debug("%.*s %12.12s:%-4d : [%pU %llu] " fmt, \ + 8 - (int)sizeof(KBUILD_MODNAME), " ", \ + kbasename(__FILE__), __LINE__, \ + &client->fsid, client->monc.auth->global_id, \ + ##__VA_ARGS__) # else /* faux printk call just to see any compiler warnings. */ # define dout(fmt, ...) do { \ if (0) \ printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ } while (0) +# define doutc(client, fmt, ...) do { \ + if (0) \ + printk(KERN_DEBUG "[%pU %llu] " fmt, \ + &client->fsid, \ + client->monc.auth->global_id, \ + ##__VA_ARGS__); \ + } while (0) # endif #else @@ -33,7 +46,32 @@ * or, just wrap pr_debug */ # define dout(fmt, ...) pr_debug(" " fmt, ##__VA_ARGS__) +# define doutc(client, fmt, ...) \ + pr_debug(" [%pU %llu] %s: " fmt, &client->fsid, \ + client->monc.auth->global_id, __func__, ##__VA_ARGS__) #endif +#define pr_notice_client(client, fmt, ...) \ + pr_notice("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_info_client(client, fmt, ...) \ + pr_info("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_warn_client(client, fmt, ...) \ + pr_warn("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_warn_once_client(client, fmt, ...) \ + pr_warn_once("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_err_client(client, fmt, ...) \ + pr_err("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_warn_ratelimited_client(client, fmt, ...) \ + pr_warn_ratelimited("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_err_ratelimited_client(client, fmt, ...) \ + pr_err_ratelimited("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) + #endif diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index f3b3593254b9..ee1d0e5f9789 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -357,6 +357,11 @@ enum { CEPH_MDS_OP_RENAMESNAP = 0x01403, }; +#define IS_CEPH_MDS_OP_NEWINODE(op) (op == CEPH_MDS_OP_CREATE || \ + op == CEPH_MDS_OP_MKNOD || \ + op == CEPH_MDS_OP_MKDIR || \ + op == CEPH_MDS_OP_SYMLINK) + extern const char *ceph_mds_op_name(int op); #define CEPH_SETATTR_MODE (1 << 0) @@ -497,7 +502,7 @@ struct ceph_mds_request_head_legacy { union ceph_mds_request_args args; } __attribute__ ((packed)); -#define CEPH_MDS_REQUEST_HEAD_VERSION 2 +#define CEPH_MDS_REQUEST_HEAD_VERSION 3 struct ceph_mds_request_head_old { __le16 version; /* struct version */ @@ -528,6 +533,9 @@ struct ceph_mds_request_head { __le32 ext_num_retry; /* new count retry attempts */ __le32 ext_num_fwd; /* new count fwd attempts */ + + __le32 struct_len; /* to store size of struct ceph_mds_request_head */ + __le32 owner_uid, owner_gid; /* used for OPs which create inodes */ } __attribute__ ((packed)); /* cap/lease release record */ diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h deleted file mode 100644 index 4c3e0648dc27..000000000000 --- a/include/linux/ceph/mdsmap.h +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _FS_CEPH_MDSMAP_H -#define _FS_CEPH_MDSMAP_H - -#include <linux/bug.h> -#include <linux/ceph/types.h> - -/* - * mds map - describe servers in the mds cluster. - * - * we limit fields to those the client actually xcares about - */ -struct ceph_mds_info { - u64 global_id; - struct ceph_entity_addr addr; - s32 state; - int num_export_targets; - bool laggy; - u32 *export_targets; -}; - -struct ceph_mdsmap { - u32 m_epoch, m_client_epoch, m_last_failure; - u32 m_root; - u32 m_session_timeout; /* seconds */ - u32 m_session_autoclose; /* seconds */ - u64 m_max_file_size; - u64 m_max_xattr_size; /* maximum size for xattrs blob */ - u32 m_max_mds; /* expected up:active mds number */ - u32 m_num_active_mds; /* actual up:active mds number */ - u32 possible_max_rank; /* possible max rank index */ - struct ceph_mds_info *m_info; - - /* which object pools file data can be stored in */ - int m_num_data_pg_pools; - u64 *m_data_pg_pools; - u64 m_cas_pg_pool; - - bool m_enabled; - bool m_damaged; - int m_num_laggy; -}; - -static inline struct ceph_entity_addr * -ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w) -{ - if (w >= m->possible_max_rank) - return NULL; - return &m->m_info[w].addr; -} - -static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w) -{ - BUG_ON(w < 0); - if (w >= m->possible_max_rank) - return CEPH_MDS_STATE_DNE; - return m->m_info[w].state; -} - -static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) -{ - if (w >= 0 && w < m->possible_max_rank) - return m->m_info[w].laggy; - return false; -} - -extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); -struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2); -extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); -extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m); - -#endif diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 2eaaabbe98cb..1717cc57cdac 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -283,7 +283,7 @@ struct ceph_msg { struct kref kref; bool more_to_follow; bool needs_out_seq; - bool sparse_read; + u64 sparse_read_total; int front_alloc_len; struct ceph_msgpool *pool; diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index b658961156a0..7a9a40163c0f 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -19,7 +19,7 @@ struct ceph_monmap { struct ceph_fsid fsid; u32 epoch; u32 num_mon; - struct ceph_entity_inst mon_inst[]; + struct ceph_entity_inst mon_inst[] __counted_by(num_mon); }; struct ceph_mon_client; diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index bf9823956758..f66f6aac74f6 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -45,6 +45,7 @@ enum ceph_sparse_read_state { CEPH_SPARSE_READ_HDR = 0, CEPH_SPARSE_READ_EXTENTS, CEPH_SPARSE_READ_DATA_LEN, + CEPH_SPARSE_READ_DATA_PRE, CEPH_SPARSE_READ_DATA, }; @@ -64,7 +65,7 @@ struct ceph_sparse_read { u64 sr_req_len; /* orig request length */ u64 sr_pos; /* current pos in buffer */ int sr_index; /* current extent index */ - __le32 sr_datalen; /* length of actual data */ + u32 sr_datalen; /* length of actual data */ u32 sr_count; /* extent count in reply */ int sr_ext_len; /* length of extent array */ struct ceph_sparse_extent *sr_extent; /* extent array */ @@ -278,7 +279,7 @@ struct ceph_osd_request { int r_attempts; u32 r_map_dne_bound; - struct ceph_osd_req_op r_ops[]; + struct ceph_osd_req_op r_ops[] __counted_by(r_num_ops); }; struct ceph_request_redirect { @@ -572,9 +573,12 @@ int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt); */ #define CEPH_SPARSE_EXT_ARRAY_INITIAL 16 -static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op) +static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt) { - return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL); + if (!cnt) + cnt = CEPH_SPARSE_EXT_ARRAY_INITIAL; + + return __ceph_alloc_sparse_ext_map(op, cnt); } extern void ceph_osdc_get_request(struct ceph_osd_request *req); diff --git a/include/linux/cfi.h b/include/linux/cfi.h index 3552ec82b725..f0df518e11dd 100644 --- a/include/linux/cfi.h +++ b/include/linux/cfi.h @@ -9,6 +9,14 @@ #include <linux/bug.h> #include <linux/module.h> +#include <asm/cfi.h> + +#ifndef cfi_get_offset +static inline int cfi_get_offset(void) +{ + return 0; +} +#endif #ifdef CONFIG_CFI_CLANG enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr, @@ -38,4 +46,8 @@ static inline void module_cfi_finalize(const Elf_Ehdr *hdr, #endif /* CONFIG_ARCH_USES_CFI_TRAPS */ #endif /* CONFIG_MODULES */ +#ifndef CFI_NOSEAL +#define CFI_NOSEAL(x) +#endif + #endif /* _LINUX_CFI_H */ diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 265da00a1a8b..ea48c861cd36 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -115,6 +115,11 @@ enum { * Enable recursive subtree protection */ CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 18), + + /* + * Enable hugetlb accounting for the memory controller. + */ + CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING = (1 << 19), }; /* cftype->flags */ @@ -491,6 +496,20 @@ struct cgroup { struct cgroup_rstat_cpu __percpu *rstat_cpu; struct list_head rstat_css_list; + /* + * Add padding to separate the read mostly rstat_cpu and + * rstat_css_list into a different cacheline from the following + * rstat_flush_next and *bstat fields which can have frequent updates. + */ + CACHELINE_PADDING(_pad_); + + /* + * A singly-linked list of cgroup structures to be rstat flushed. + * This is a scratch field to be used exclusively by + * cgroup_rstat_flush_locked() and protected by cgroup_rstat_lock. + */ + struct cgroup *rstat_flush_next; + /* cgroup basic resource statistics */ struct cgroup_base_stat last_bstat; struct cgroup_base_stat bstat; @@ -543,6 +562,10 @@ struct cgroup_root { /* Unique id for this hierarchy. */ int hierarchy_id; + /* A list running through the active hierarchies */ + struct list_head root_list; + struct rcu_head rcu; /* Must be near the top */ + /* * The root cgroup. The containing cgroup_root will be destroyed on its * release. cgrp->ancestors[0] will be used overflowing into the @@ -556,9 +579,6 @@ struct cgroup_root { /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ atomic_t nr_cgrps; - /* A list running through the active hierarchies */ - struct list_head root_list; - /* Hierarchy-specific flags */ unsigned int flags; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b307013b9c6c..34aaf0e87def 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -40,13 +40,11 @@ struct kernel_clone_args; #define CGROUP_WEIGHT_DFL 100 #define CGROUP_WEIGHT_MAX 10000 -/* walk only threadgroup leaders */ -#define CSS_TASK_ITER_PROCS (1U << 0) -/* walk all threaded css_sets in the domain */ -#define CSS_TASK_ITER_THREADED (1U << 1) - -/* internal flags */ -#define CSS_TASK_ITER_SKIPPED (1U << 16) +enum { + CSS_TASK_ITER_PROCS = (1U << 0), /* walk only threadgroup leaders */ + CSS_TASK_ITER_THREADED = (1U << 1), /* walk all threaded css_sets in the domain */ + CSS_TASK_ITER_SKIPPED = (1U << 16), /* internal flags */ +}; /* a css_task_iter should be treated as an opaque object */ struct css_task_iter { @@ -71,6 +69,7 @@ struct css_task_iter { extern struct file_system_type cgroup_fs_type; extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; +extern spinlock_t css_set_lock; #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; #include <linux/cgroup_subsys.h> @@ -388,7 +387,6 @@ static inline void cgroup_unlock(void) * as locks used during the cgroup_subsys::attach() methods. */ #ifdef CONFIG_PROVE_RCU -extern spinlock_t css_set_lock; #define task_css_set_check(task, __c) \ rcu_dereference_check((task)->cgroups, \ rcu_read_lock_sched_held() || \ @@ -855,4 +853,6 @@ static inline void cgroup_bpf_put(struct cgroup *cgrp) {} #endif /* CONFIG_CGROUP_BPF */ +struct cgroup *task_get_cgroup1(struct task_struct *tsk, int hierarchy_id); + #endif /* _LINUX_CGROUP_H */ diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 53f1a7a932b0..c2d09bc4f976 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -7,8 +7,9 @@ /* * DEFINE_FREE(name, type, free): * simple helper macro that defines the required wrapper for a __free() - * based cleanup function. @free is an expression using '_T' to access - * the variable. + * based cleanup function. @free is an expression using '_T' to access the + * variable. @free should typically include a NULL test before calling a + * function, see the example below. * * __free(name): * variable attribute to add a scoped based cleanup to the variable. @@ -17,6 +18,9 @@ * like a non-atomic xchg(var, NULL), such that the cleanup function will * be inhibited -- provided it sanely deals with a NULL value. * + * NOTE: this has __must_check semantics so that it is harder to accidentally + * leak the resource. + * * return_ptr(p): * returns p while inhibiting the __free(). * @@ -24,6 +28,8 @@ * * DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) * + * void *alloc_obj(...) + * { * struct obj *p __free(kfree) = kmalloc(...); * if (!p) * return NULL; @@ -32,6 +38,24 @@ * return NULL; * * return_ptr(p); + * } + * + * NOTE: the DEFINE_FREE()'s @free expression includes a NULL test even though + * kfree() is fine to be called with a NULL value. This is on purpose. This way + * the compiler sees the end of our alloc_obj() function as: + * + * tmp = p; + * p = NULL; + * if (p) + * kfree(p); + * return tmp; + * + * And through the magic of value-propagation and dead-code-elimination, it + * eliminates the actual cleanup call and compiles into: + * + * return p; + * + * Without the NULL test it turns into a mess and the compiler can't help us. */ #define DEFINE_FREE(_name, _type, _free) \ @@ -39,8 +63,17 @@ #define __free(_name) __cleanup(__free_##_name) +#define __get_and_null_ptr(p) \ + ({ __auto_type __ptr = &(p); \ + __auto_type __val = *__ptr; \ + *__ptr = NULL; __val; }) + +static inline __must_check +const volatile void * __must_check_fn(const volatile void *val) +{ return val; } + #define no_free_ptr(p) \ - ({ __auto_type __ptr = (p); (p) = NULL; __ptr; }) + ((typeof(p)) __must_check_fn(__get_and_null_ptr(p))) #define return_ptr(p) return no_free_ptr(p) @@ -92,25 +125,55 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ * trivial wrapper around DEFINE_CLASS() above specifically * for locks. * + * DEFINE_GUARD_COND(name, ext, condlock) + * wrapper around EXTEND_CLASS above to add conditional lock + * variants to a base class, eg. mutex_trylock() or + * mutex_lock_interruptible(). + * * guard(name): - * an anonymous instance of the (guard) class + * an anonymous instance of the (guard) class, not recommended for + * conditional locks. * * scoped_guard (name, args...) { }: * similar to CLASS(name, scope)(args), except the variable (with the * explicit name 'scope') is declard in a for-loop such that its scope is * bound to the next (compound) statement. * + * for conditional locks the loop body is skipped when the lock is not + * acquired. + * + * scoped_cond_guard (name, fail, args...) { }: + * similar to scoped_guard(), except it does fail when the lock + * acquire fails. + * */ #define DEFINE_GUARD(_name, _type, _lock, _unlock) \ - DEFINE_CLASS(_name, _type, _unlock, ({ _lock; _T; }), _type _T) + DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \ + static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ + { return *_T; } + +#define DEFINE_GUARD_COND(_name, _ext, _condlock) \ + EXTEND_CLASS(_name, _ext, \ + ({ void *_t = _T; if (_T && !(_condlock)) _t = NULL; _t; }), \ + class_##_name##_t _T) \ + static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ + { return class_##_name##_lock_ptr(_T); } #define guard(_name) \ CLASS(_name, __UNIQUE_ID(guard)) +#define __guard_ptr(_name) class_##_name##_lock_ptr + #define scoped_guard(_name, args...) \ for (CLASS(_name, scope)(args), \ - *done = NULL; !done; done = (void *)1) + *done = NULL; __guard_ptr(_name)(&scope) && !done; done = (void *)1) + +#define scoped_cond_guard(_name, _fail, args...) \ + for (CLASS(_name, scope)(args), \ + *done = NULL; !done; done = (void *)1) \ + if (!__guard_ptr(_name)(&scope)) _fail; \ + else /* * Additional helper macros for generating lock guards with types, either for @@ -119,6 +182,7 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ * * DEFINE_LOCK_GUARD_0(name, lock, unlock, ...) * DEFINE_LOCK_GUARD_1(name, type, lock, unlock, ...) + * DEFINE_LOCK_GUARD_1_COND(name, ext, condlock) * * will result in the following type: * @@ -140,6 +204,11 @@ typedef struct { \ static inline void class_##_name##_destructor(class_##_name##_t *_T) \ { \ if (_T->lock) { _unlock; } \ +} \ + \ +static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \ +{ \ + return _T->lock; \ } @@ -168,4 +237,14 @@ __DEFINE_LOCK_GUARD_1(_name, _type, _lock) __DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__) \ __DEFINE_LOCK_GUARD_0(_name, _lock) +#define DEFINE_LOCK_GUARD_1_COND(_name, _ext, _condlock) \ + EXTEND_CLASS(_name, _ext, \ + ({ class_##_name##_t _t = { .lock = l }, *_T = &_t;\ + if (_T->lock && !(_condlock)) _T->lock = NULL; \ + _t; }), \ + typeof_member(class_##_name##_t, lock) l) \ + static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ + { return class_##_name##_lock_ptr(_T); } + + #endif /* __LINUX_GUARDS_H */ diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index ec32ec58c59f..4a537260f655 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -74,7 +74,7 @@ void clk_hw_forward_rate_request(const struct clk_hw *core, unsigned long parent_rate); /** - * struct clk_duty - Struture encoding the duty cycle ratio of a clock + * struct clk_duty - Structure encoding the duty cycle ratio of a clock * * @num: Numerator of the duty cycle ratio * @den: Denominator of the duty cycle ratio @@ -129,7 +129,7 @@ struct clk_duty { * @restore_context: Restore the context of the clock after a restoration * of power. * - * @recalc_rate Recalculate the rate of this clock, by querying hardware. The + * @recalc_rate: Recalculate the rate of this clock, by querying hardware. The * parent rate is an input parameter. It is up to the caller to * ensure that the prepare_mutex is held across this call. If the * driver cannot figure out a rate for this clock, it must return @@ -448,15 +448,15 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, */ #define clk_hw_register_fixed_rate_with_accuracy_parent_hw(dev, name, \ parent_hw, flags, fixed_rate, fixed_accuracy) \ - __clk_hw_register_fixed_rate((dev), NULL, (name), NULL, (parent_hw) \ - NULL, NULL, (flags), (fixed_rate), \ + __clk_hw_register_fixed_rate((dev), NULL, (name), NULL, (parent_hw), \ + NULL, (flags), (fixed_rate), \ (fixed_accuracy), 0, false) /** * clk_hw_register_fixed_rate_with_accuracy_parent_data - register fixed-rate * clock with the clock framework * @dev: device that is registering this clock * @name: name of this clock - * @parent_name: name of clock's parent + * @parent_data: name of clock's parent * @flags: framework-specific flags * @fixed_rate: non-adjustable clock rate * @fixed_accuracy: non-adjustable clock accuracy @@ -471,7 +471,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, * the clock framework * @dev: device that is registering this clock * @name: name of this clock - * @parent_name: name of clock's parent + * @parent_data: name of clock's parent * @flags: framework-specific flags * @fixed_rate: non-adjustable clock rate */ @@ -649,7 +649,7 @@ struct clk_div_table { * Clock with an adjustable divider affecting its output frequency. Implements * .recalc_rate, .set_rate and .round_rate * - * Flags: + * @flags: * CLK_DIVIDER_ONE_BASED - by default the divisor is the value read from the * register plus one. If CLK_DIVIDER_ONE_BASED is set then the divider is * the raw value read from the register, with the value of zero considered @@ -1084,18 +1084,28 @@ void of_fixed_factor_clk_setup(struct device_node *node); * @hw: handle between common and hardware-specific interfaces * @mult: multiplier * @div: divider + * @acc: fixed accuracy in ppb + * @flags: behavior modifying flags * * Clock with a fixed multiplier and divider. The output frequency is the * parent clock rate divided by div and multiplied by mult. - * Implements .recalc_rate, .set_rate and .round_rate + * Implements .recalc_rate, .set_rate, .round_rate and .recalc_accuracy + * + * Flags: + * * CLK_FIXED_FACTOR_FIXED_ACCURACY - Use the value in @acc instead of the + * parent clk accuracy. */ struct clk_fixed_factor { struct clk_hw hw; unsigned int mult; unsigned int div; + unsigned long acc; + unsigned int flags; }; +#define CLK_FIXED_FACTOR_FIXED_ACCURACY BIT(0) + #define to_clk_fixed_factor(_hw) container_of(_hw, struct clk_fixed_factor, hw) extern const struct clk_ops clk_fixed_factor_ops; @@ -1106,10 +1116,24 @@ void clk_unregister_fixed_factor(struct clk *clk); struct clk_hw *clk_hw_register_fixed_factor(struct device *dev, const char *name, const char *parent_name, unsigned long flags, unsigned int mult, unsigned int div); +struct clk_hw *clk_hw_register_fixed_factor_fwname(struct device *dev, + struct device_node *np, const char *name, const char *fw_name, + unsigned long flags, unsigned int mult, unsigned int div); +struct clk_hw *clk_hw_register_fixed_factor_with_accuracy_fwname(struct device *dev, + struct device_node *np, const char *name, const char *fw_name, + unsigned long flags, unsigned int mult, unsigned int div, + unsigned long acc); void clk_hw_unregister_fixed_factor(struct clk_hw *hw); struct clk_hw *devm_clk_hw_register_fixed_factor(struct device *dev, const char *name, const char *parent_name, unsigned long flags, unsigned int mult, unsigned int div); +struct clk_hw *devm_clk_hw_register_fixed_factor_fwname(struct device *dev, + struct device_node *np, const char *name, const char *fw_name, + unsigned long flags, unsigned int mult, unsigned int div); +struct clk_hw *devm_clk_hw_register_fixed_factor_with_accuracy_fwname(struct device *dev, + struct device_node *np, const char *name, const char *fw_name, + unsigned long flags, unsigned int mult, unsigned int div, + unsigned long acc); struct clk_hw *devm_clk_hw_register_fixed_factor_index(struct device *dev, const char *name, unsigned int index, unsigned long flags, unsigned int mult, unsigned int div); @@ -1130,11 +1154,12 @@ struct clk_hw *clk_hw_register_fixed_factor_parent_hw(struct device *dev, * @mwidth: width of the numerator bit field * @nshift: shift to the denominator bit field * @nwidth: width of the denominator bit field + * @approximation: clk driver's callback for calculating the divider clock * @lock: register lock * * Clock with adjustable fractional divider affecting its output frequency. * - * Flags: + * @flags: * CLK_FRAC_DIVIDER_ZERO_BASED - by default the numerator and denominator * is the value read from the register. If CLK_FRAC_DIVIDER_ZERO_BASED * is set then the numerator and denominator are both the value read @@ -1191,7 +1216,7 @@ void clk_hw_unregister_fractional_divider(struct clk_hw *hw); * Clock with an adjustable multiplier affecting its output frequency. * Implements .recalc_rate, .set_rate and .round_rate * - * Flags: + * @flags: * CLK_MULTIPLIER_ZERO_BYPASS - By default, the multiplier is the value read * from the register, with 0 being a valid value effectively * zeroing the output clock rate. If CLK_MULTIPLIER_ZERO_BYPASS is diff --git a/include/linux/clk.h b/include/linux/clk.h index 06f1b292f8a0..0fa56d672532 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -202,6 +202,18 @@ bool clk_is_match(const struct clk *p, const struct clk *q); int clk_rate_exclusive_get(struct clk *clk); /** + * devm_clk_rate_exclusive_get - devm variant of clk_rate_exclusive_get + * @dev: device the exclusivity is bound to + * @clk: clock source + * + * Calls clk_rate_exclusive_get() on @clk and registers a devm cleanup handler + * on @dev to call clk_rate_exclusive_put(). + * + * Must not be called from within atomic context. + */ +int devm_clk_rate_exclusive_get(struct device *dev, struct clk *clk); + +/** * clk_rate_exclusive_put - release exclusivity over the rate control of a * producer * @clk: clock source @@ -274,6 +286,11 @@ static inline int clk_rate_exclusive_get(struct clk *clk) return 0; } +static inline int devm_clk_rate_exclusive_get(struct device *dev, struct clk *clk) +{ + return 0; +} + static inline void clk_rate_exclusive_put(struct clk *clk) {} #endif @@ -479,6 +496,22 @@ int __must_check devm_clk_bulk_get_all(struct device *dev, struct clk_bulk_data **clks); /** + * devm_clk_bulk_get_all_enable - Get and enable all clocks of the consumer (managed) + * @dev: device for clock "consumer" + * @clks: pointer to the clk_bulk_data table of consumer + * + * Returns success (0) or negative errno. + * + * This helper function allows drivers to get all clocks of the + * consumer and enables them in one operation with management. + * The clks will automatically be disabled and freed when the device + * is unbound. + */ + +int __must_check devm_clk_bulk_get_all_enable(struct device *dev, + struct clk_bulk_data **clks); + +/** * devm_clk_get - lookup and obtain a managed reference to a clock producer. * @dev: device for clock "consumer" * @id: clock consumer ID @@ -968,6 +1001,12 @@ static inline int __must_check devm_clk_bulk_get_all(struct device *dev, return 0; } +static inline int __must_check devm_clk_bulk_get_all_enable(struct device *dev, + struct clk_bulk_data **clks) +{ + return 0; +} + static inline struct clk *devm_get_clk_from_child(struct device *dev, struct device_node *np, const char *con_id) { diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index cbfcbf186ce3..e656f63efdce 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -13,11 +13,14 @@ /** * struct clk_omap_reg - OMAP register declaration * @offset: offset from the master IP module base address + * @bit: register bit offset * @index: index of the master IP module + * @flags: flags */ struct clk_omap_reg { void __iomem *ptr; u16 offset; + u8 bit; u8 index; u8 flags; }; diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 1d42d4b17327..0ad8b550bb4b 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -291,7 +291,19 @@ static inline void timer_probe(void) {} #define TIMER_ACPI_DECLARE(name, table_id, fn) \ ACPI_DECLARE_PROBE_ENTRY(timer, name, table_id, 0, NULL, 0, fn) -extern ulong max_cswd_read_retries; +static inline unsigned int clocksource_get_max_watchdog_retry(void) +{ + /* + * When system is in the boot phase or under heavy workload, there + * can be random big latencies during the clocksource/watchdog + * read, so allow retries to filter the noise latency. As the + * latency's frequency and maximum value goes up with the number of + * CPUs, scale the number of retries with the number of online + * CPUs. + */ + return (ilog2(num_online_cpus()) / 2) + 1; +} + void clocksource_verify_percpu(struct clocksource *cs); #endif /* _LINUX_CLOCKSOURCE_H */ diff --git a/include/linux/clocksource_ids.h b/include/linux/clocksource_ids.h index 16775d7d8f8d..a4fa3436940c 100644 --- a/include/linux/clocksource_ids.h +++ b/include/linux/clocksource_ids.h @@ -6,6 +6,9 @@ enum clocksource_ids { CSID_GENERIC = 0, CSID_ARM_ARCH_COUNTER, + CSID_X86_TSC_EARLY, + CSID_X86_TSC, + CSID_X86_KVM_CLK, CSID_MAX, }; diff --git a/include/linux/closure.h b/include/linux/closure.h new file mode 100644 index 000000000000..c554c6a08768 --- /dev/null +++ b/include/linux/closure.h @@ -0,0 +1,415 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CLOSURE_H +#define _LINUX_CLOSURE_H + +#include <linux/llist.h> +#include <linux/sched.h> +#include <linux/sched/task_stack.h> +#include <linux/workqueue.h> + +/* + * Closure is perhaps the most overused and abused term in computer science, but + * since I've been unable to come up with anything better you're stuck with it + * again. + * + * What are closures? + * + * They embed a refcount. The basic idea is they count "things that are in + * progress" - in flight bios, some other thread that's doing something else - + * anything you might want to wait on. + * + * The refcount may be manipulated with closure_get() and closure_put(). + * closure_put() is where many of the interesting things happen, when it causes + * the refcount to go to 0. + * + * Closures can be used to wait on things both synchronously and asynchronously, + * and synchronous and asynchronous use can be mixed without restriction. To + * wait synchronously, use closure_sync() - you will sleep until your closure's + * refcount hits 1. + * + * To wait asynchronously, use + * continue_at(cl, next_function, workqueue); + * + * passing it, as you might expect, the function to run when nothing is pending + * and the workqueue to run that function out of. + * + * continue_at() also, critically, requires a 'return' immediately following the + * location where this macro is referenced, to return to the calling function. + * There's good reason for this. + * + * To use safely closures asynchronously, they must always have a refcount while + * they are running owned by the thread that is running them. Otherwise, suppose + * you submit some bios and wish to have a function run when they all complete: + * + * foo_endio(struct bio *bio) + * { + * closure_put(cl); + * } + * + * closure_init(cl); + * + * do_stuff(); + * closure_get(cl); + * bio1->bi_endio = foo_endio; + * bio_submit(bio1); + * + * do_more_stuff(); + * closure_get(cl); + * bio2->bi_endio = foo_endio; + * bio_submit(bio2); + * + * continue_at(cl, complete_some_read, system_wq); + * + * If closure's refcount started at 0, complete_some_read() could run before the + * second bio was submitted - which is almost always not what you want! More + * importantly, it wouldn't be possible to say whether the original thread or + * complete_some_read()'s thread owned the closure - and whatever state it was + * associated with! + * + * So, closure_init() initializes a closure's refcount to 1 - and when a + * closure_fn is run, the refcount will be reset to 1 first. + * + * Then, the rule is - if you got the refcount with closure_get(), release it + * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount + * on a closure because you called closure_init() or you were run out of a + * closure - _always_ use continue_at(). Doing so consistently will help + * eliminate an entire class of particularly pernicious races. + * + * Lastly, you might have a wait list dedicated to a specific event, and have no + * need for specifying the condition - you just want to wait until someone runs + * closure_wake_up() on the appropriate wait list. In that case, just use + * closure_wait(). It will return either true or false, depending on whether the + * closure was already on a wait list or not - a closure can only be on one wait + * list at a time. + * + * Parents: + * + * closure_init() takes two arguments - it takes the closure to initialize, and + * a (possibly null) parent. + * + * If parent is non null, the new closure will have a refcount for its lifetime; + * a closure is considered to be "finished" when its refcount hits 0 and the + * function to run is null. Hence + * + * continue_at(cl, NULL, NULL); + * + * returns up the (spaghetti) stack of closures, precisely like normal return + * returns up the C stack. continue_at() with non null fn is better thought of + * as doing a tail call. + * + * All this implies that a closure should typically be embedded in a particular + * struct (which its refcount will normally control the lifetime of), and that + * struct can very much be thought of as a stack frame. + */ + +struct closure; +struct closure_syncer; +typedef void (closure_fn) (struct work_struct *); +extern struct dentry *bcache_debug; + +struct closure_waitlist { + struct llist_head list; +}; + +enum closure_state { + /* + * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by + * the thread that owns the closure, and cleared by the thread that's + * waking up the closure. + * + * The rest are for debugging and don't affect behaviour: + * + * CLOSURE_RUNNING: Set when a closure is running (i.e. by + * closure_init() and when closure_put() runs then next function), and + * must be cleared before remaining hits 0. Primarily to help guard + * against incorrect usage and accidentally transferring references. + * continue_at() and closure_return() clear it for you, if you're doing + * something unusual you can use closure_set_dead() which also helps + * annotate where references are being transferred. + */ + + CLOSURE_BITS_START = (1U << 26), + CLOSURE_DESTRUCTOR = (1U << 26), + CLOSURE_WAITING = (1U << 28), + CLOSURE_RUNNING = (1U << 30), +}; + +#define CLOSURE_GUARD_MASK \ + ((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1) + +#define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1) +#define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING) + +struct closure { + union { + struct { + struct workqueue_struct *wq; + struct closure_syncer *s; + struct llist_node list; + closure_fn *fn; + }; + struct work_struct work; + }; + + struct closure *parent; + + atomic_t remaining; + bool closure_get_happened; + +#ifdef CONFIG_DEBUG_CLOSURES +#define CLOSURE_MAGIC_DEAD 0xc054dead +#define CLOSURE_MAGIC_ALIVE 0xc054a11e + + unsigned int magic; + struct list_head all; + unsigned long ip; + unsigned long waiting_on; +#endif +}; + +void closure_sub(struct closure *cl, int v); +void closure_put(struct closure *cl); +void __closure_wake_up(struct closure_waitlist *list); +bool closure_wait(struct closure_waitlist *list, struct closure *cl); +void __closure_sync(struct closure *cl); + +static inline unsigned closure_nr_remaining(struct closure *cl) +{ + return atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK; +} + +/** + * closure_sync - sleep until a closure a closure has nothing left to wait on + * + * Sleeps until the refcount hits 1 - the thread that's running the closure owns + * the last refcount. + */ +static inline void closure_sync(struct closure *cl) +{ +#ifdef CONFIG_DEBUG_CLOSURES + BUG_ON(closure_nr_remaining(cl) != 1 && !cl->closure_get_happened); +#endif + + if (cl->closure_get_happened) + __closure_sync(cl); +} + +#ifdef CONFIG_DEBUG_CLOSURES + +void closure_debug_create(struct closure *cl); +void closure_debug_destroy(struct closure *cl); + +#else + +static inline void closure_debug_create(struct closure *cl) {} +static inline void closure_debug_destroy(struct closure *cl) {} + +#endif + +static inline void closure_set_ip(struct closure *cl) +{ +#ifdef CONFIG_DEBUG_CLOSURES + cl->ip = _THIS_IP_; +#endif +} + +static inline void closure_set_ret_ip(struct closure *cl) +{ +#ifdef CONFIG_DEBUG_CLOSURES + cl->ip = _RET_IP_; +#endif +} + +static inline void closure_set_waiting(struct closure *cl, unsigned long f) +{ +#ifdef CONFIG_DEBUG_CLOSURES + cl->waiting_on = f; +#endif +} + +static inline void closure_set_stopped(struct closure *cl) +{ + atomic_sub(CLOSURE_RUNNING, &cl->remaining); +} + +static inline void set_closure_fn(struct closure *cl, closure_fn *fn, + struct workqueue_struct *wq) +{ + closure_set_ip(cl); + cl->fn = fn; + cl->wq = wq; +} + +static inline void closure_queue(struct closure *cl) +{ + struct workqueue_struct *wq = cl->wq; + /** + * Changes made to closure, work_struct, or a couple of other structs + * may cause work.func not pointing to the right location. + */ + BUILD_BUG_ON(offsetof(struct closure, fn) + != offsetof(struct work_struct, func)); + + if (wq) { + INIT_WORK(&cl->work, cl->work.func); + BUG_ON(!queue_work(wq, &cl->work)); + } else + cl->fn(&cl->work); +} + +/** + * closure_get - increment a closure's refcount + */ +static inline void closure_get(struct closure *cl) +{ + cl->closure_get_happened = true; + +#ifdef CONFIG_DEBUG_CLOSURES + BUG_ON((atomic_inc_return(&cl->remaining) & + CLOSURE_REMAINING_MASK) <= 1); +#else + atomic_inc(&cl->remaining); +#endif +} + +/** + * closure_init - Initialize a closure, setting the refcount to 1 + * @cl: closure to initialize + * @parent: parent of the new closure. cl will take a refcount on it for its + * lifetime; may be NULL. + */ +static inline void closure_init(struct closure *cl, struct closure *parent) +{ + cl->fn = NULL; + cl->parent = parent; + if (parent) + closure_get(parent); + + atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); + cl->closure_get_happened = false; + + closure_debug_create(cl); + closure_set_ip(cl); +} + +static inline void closure_init_stack(struct closure *cl) +{ + memset(cl, 0, sizeof(struct closure)); + atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); +} + +/** + * closure_wake_up - wake up all closures on a wait list, + * with memory barrier + */ +static inline void closure_wake_up(struct closure_waitlist *list) +{ + /* Memory barrier for the wait list */ + smp_mb(); + __closure_wake_up(list); +} + +#define CLOSURE_CALLBACK(name) void name(struct work_struct *ws) +#define closure_type(name, type, member) \ + struct closure *cl = container_of(ws, struct closure, work); \ + type *name = container_of(cl, type, member) + +/** + * continue_at - jump to another function with barrier + * + * After @cl is no longer waiting on anything (i.e. all outstanding refs have + * been dropped with closure_put()), it will resume execution at @fn running out + * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly). + * + * This is because after calling continue_at() you no longer have a ref on @cl, + * and whatever @cl owns may be freed out from under you - a running closure fn + * has a ref on its own closure which continue_at() drops. + * + * Note you are expected to immediately return after using this macro. + */ +#define continue_at(_cl, _fn, _wq) \ +do { \ + set_closure_fn(_cl, _fn, _wq); \ + closure_sub(_cl, CLOSURE_RUNNING + 1); \ +} while (0) + +/** + * closure_return - finish execution of a closure + * + * This is used to indicate that @cl is finished: when all outstanding refs on + * @cl have been dropped @cl's ref on its parent closure (as passed to + * closure_init()) will be dropped, if one was specified - thus this can be + * thought of as returning to the parent closure. + */ +#define closure_return(_cl) continue_at((_cl), NULL, NULL) + +/** + * continue_at_nobarrier - jump to another function without barrier + * + * Causes @fn to be executed out of @cl, in @wq context (or called directly if + * @wq is NULL). + * + * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn, + * thus it's not safe to touch anything protected by @cl after a + * continue_at_nobarrier(). + */ +#define continue_at_nobarrier(_cl, _fn, _wq) \ +do { \ + set_closure_fn(_cl, _fn, _wq); \ + closure_queue(_cl); \ +} while (0) + +/** + * closure_return_with_destructor - finish execution of a closure, + * with destructor + * + * Works like closure_return(), except @destructor will be called when all + * outstanding refs on @cl have been dropped; @destructor may be used to safely + * free the memory occupied by @cl, and it is called with the ref on the parent + * closure still held - so @destructor could safely return an item to a + * freelist protected by @cl's parent. + */ +#define closure_return_with_destructor(_cl, _destructor) \ +do { \ + set_closure_fn(_cl, _destructor, NULL); \ + closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1); \ +} while (0) + +/** + * closure_call - execute @fn out of a new, uninitialized closure + * + * Typically used when running out of one closure, and we want to run @fn + * asynchronously out of a new closure - @parent will then wait for @cl to + * finish. + */ +static inline void closure_call(struct closure *cl, closure_fn fn, + struct workqueue_struct *wq, + struct closure *parent) +{ + closure_init(cl, parent); + continue_at_nobarrier(cl, fn, wq); +} + +#define __closure_wait_event(waitlist, _cond) \ +do { \ + struct closure cl; \ + \ + closure_init_stack(&cl); \ + \ + while (1) { \ + closure_wait(waitlist, &cl); \ + if (_cond) \ + break; \ + closure_sync(&cl); \ + } \ + closure_wake_up(waitlist); \ + closure_sync(&cl); \ +} while (0) + +#define closure_wait_event(waitlist, _cond) \ +do { \ + if (!(_cond)) \ + __closure_wait_event(waitlist, _cond); \ +} while (0) + +#endif /* _LINUX_CLOSURE_H */ diff --git a/include/linux/cma.h b/include/linux/cma.h index 63873b93deaa..9db877506ea8 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -6,12 +6,8 @@ #include <linux/types.h> #include <linux/numa.h> -/* - * There is always at least global CMA area and a few optional - * areas configured in kernel .config. - */ #ifdef CONFIG_CMA_AREAS -#define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS) +#define MAX_CMA_AREAS CONFIG_CMA_AREAS #endif #define CMA_MAX_NAME 64 diff --git a/include/linux/comedi/comedi_8254.h b/include/linux/comedi/comedi_8254.h index d8264417e53c..d527f04400df 100644 --- a/include/linux/comedi/comedi_8254.h +++ b/include/linux/comedi/comedi_8254.h @@ -12,6 +12,8 @@ #define _COMEDI_8254_H #include <linux/types.h> +#include <linux/errno.h> +#include <linux/err.h> struct comedi_device; struct comedi_insn; @@ -57,10 +59,24 @@ struct comedi_subdevice; /* counter maps zero to 0x10000 */ #define I8254_MAX_COUNT 0x10000 +struct comedi_8254; + +/** + * typedef comedi_8254_iocb_fn - call-back function type for 8254 register access + * @i8254: pointer to struct comedi_8254 + * @dir: direction (0 = read, 1 = write) + * @reg: register number + * @val: value to write + * + * Return: Register value when reading, 0 when writing. + */ +typedef unsigned int comedi_8254_iocb_fn(struct comedi_8254 *i8254, int dir, + unsigned int reg, unsigned int val); + /** * struct comedi_8254 - private data used by this module - * @iobase: PIO base address of the registers (in/out) - * @mmio: MMIO base address of the registers (read/write) + * @iocb: I/O call-back function for register access + * @context: context for register access (e.g. a base address) * @iosize: I/O size used to access the registers (b/w/l) * @regshift: register gap shift * @osc_base: cascaded oscillator speed in ns @@ -76,8 +92,8 @@ struct comedi_subdevice; * @insn_config: driver specific (*insn_config) callback */ struct comedi_8254 { - unsigned long iobase; - void __iomem *mmio; + comedi_8254_iocb_fn *iocb; + unsigned long context; unsigned int iosize; unsigned int regshift; unsigned int osc_base; @@ -122,13 +138,24 @@ void comedi_8254_set_busy(struct comedi_8254 *i8254, void comedi_8254_subdevice_init(struct comedi_subdevice *s, struct comedi_8254 *i8254); -struct comedi_8254 *comedi_8254_init(unsigned long iobase, - unsigned int osc_base, - unsigned int iosize, - unsigned int regshift); -struct comedi_8254 *comedi_8254_mm_init(void __iomem *mmio, - unsigned int osc_base, - unsigned int iosize, - unsigned int regshift); +#ifdef CONFIG_HAS_IOPORT +struct comedi_8254 *comedi_8254_io_alloc(unsigned long iobase, + unsigned int osc_base, + unsigned int iosize, + unsigned int regshift); +#else +static inline struct comedi_8254 *comedi_8254_io_alloc(unsigned long iobase, + unsigned int osc_base, + unsigned int iosize, + unsigned int regshift) +{ + return ERR_PTR(-ENXIO); +} +#endif + +struct comedi_8254 *comedi_8254_mm_alloc(void __iomem *mmio, + unsigned int osc_base, + unsigned int iosize, + unsigned int regshift); #endif /* _COMEDI_8254_H */ diff --git a/include/linux/comedi/comedi_8255.h b/include/linux/comedi/comedi_8255.h index b2a5bc6b3a49..d24a69da389b 100644 --- a/include/linux/comedi/comedi_8255.h +++ b/include/linux/comedi/comedi_8255.h @@ -10,6 +10,8 @@ #ifndef _COMEDI_8255_H #define _COMEDI_8255_H +#include <linux/errno.h> + #define I8255_SIZE 0x04 #define I8255_DATA_A_REG 0x00 @@ -27,16 +29,26 @@ struct comedi_device; struct comedi_subdevice; -int subdev_8255_init(struct comedi_device *dev, struct comedi_subdevice *s, - int (*io)(struct comedi_device *dev, int dir, int port, - int data, unsigned long regbase), - unsigned long regbase); +#ifdef CONFIG_HAS_IOPORT +int subdev_8255_io_init(struct comedi_device *dev, struct comedi_subdevice *s, + unsigned long regbase); +#else +static inline int subdev_8255_io_init(struct comedi_device *dev, + struct comedi_subdevice *s, + unsigned long regbase) +{ + return -ENXIO; +} +#endif int subdev_8255_mm_init(struct comedi_device *dev, struct comedi_subdevice *s, - int (*io)(struct comedi_device *dev, int dir, int port, - int data, unsigned long regbase), unsigned long regbase); +int subdev_8255_cb_init(struct comedi_device *dev, struct comedi_subdevice *s, + int (*io)(struct comedi_device *dev, int dir, int port, + int data, unsigned long context), + unsigned long context); + unsigned long subdev_8255_regbase(struct comedi_subdevice *s); #endif diff --git a/include/linux/comedi/comedidev.h b/include/linux/comedi/comedidev.h index 0a1150900ef3..c08416a7364b 100644 --- a/include/linux/comedi/comedidev.h +++ b/include/linux/comedi/comedidev.h @@ -633,7 +633,7 @@ extern const struct comedi_lrange range_unknown; */ struct comedi_lrange { int length; - struct comedi_krange range[]; + struct comedi_krange range[] __counted_by(length); }; /** diff --git a/include/linux/compat.h b/include/linux/compat.h index 1cfa4f0f490a..233f61ec8afc 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -581,7 +581,6 @@ asmlinkage long compat_sys_io_pgetevents_time64(compat_aio_context_t ctx_id, struct io_event __user *events, struct __kernel_timespec __user *timeout, const struct __compat_aio_sigset __user *usig); -asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t); asmlinkage long compat_sys_epoll_pwait(int epfd, struct epoll_event __user *events, int maxevents, int timeout, diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 9b673fefcef8..49feac0162a5 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -9,16 +9,11 @@ * Clang prior to 17 is being silly and considers many __cleanup() variables * as unused (because they are, their sole purpose is to go out of scope). * - * https://reviews.llvm.org/D152180 + * https://github.com/llvm/llvm-project/commit/877210faa447f4cc7db87812f8ed80e398fedd61 */ #undef __cleanup #define __cleanup(func) __maybe_unused __attribute__((__cleanup__(func))) -/* same as gcc, this was present in clang-2.6 so we can assume it works - * with any version that can compile the kernel - */ -#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) - /* all clang versions usable with the kernel support KASAN ABI version 5 */ #define KASAN_ABI_VERSION 5 @@ -119,11 +114,7 @@ #define __diag_str(s) __diag_str1(s) #define __diag(s) _Pragma(__diag_str(clang diagnostic s)) -#if CONFIG_CLANG_VERSION >= 110000 -#define __diag_clang_11(s) __diag(s) -#else -#define __diag_clang_11(s) -#endif +#define __diag_clang_13(s) __diag(s) #define __diag_ignore_all(option, comment) \ - __diag_clang(11, ignore, option) + __diag_clang(13, ignore, option) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 7af9e34ec261..aff92b1d284f 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -35,12 +35,10 @@ (typeof(ptr)) (__ptr + (off)); \ }) -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE #define __noretpoline __attribute__((__indirect_branch__("keep"))) #endif -#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) - #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) #define __latent_entropy __attribute__((latent_entropy)) #endif @@ -66,6 +64,26 @@ __builtin_unreachable(); \ } while (0) +/* + * GCC 'asm goto' with outputs miscompiles certain code sequences: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 + * + * Work around it via the same compiler barrier quirk that we used + * to use for the old 'asm goto' workaround. + * + * Also, always mark such 'asm goto' statements as volatile: all + * asm goto statements are supposed to be volatile as per the + * documentation, but some versions of gcc didn't actually do + * that for asms with outputs: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 + */ +#ifdef CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND +#define asm_goto_output(x...) \ + do { asm volatile goto(x); asm (""); } while (0) +#endif + #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ @@ -138,7 +156,7 @@ #endif #define __diag_ignore_all(option, comment) \ - __diag_GCC(8, ignore, option) + __diag(__diag_GCC_ignore option) /* * Prior to 9.1, -Wno-alloc-size-larger-than (and therefore the "alloc_size" diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d7779a18b24f..8c252e073bd8 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -116,6 +116,14 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, */ #define __stringify_label(n) #n +#define __annotate_reachable(c) ({ \ + asm volatile(__stringify_label(c) ":\n\t" \ + ".pushsection .discard.reachable\n\t" \ + ".long " __stringify_label(c) "b - .\n\t" \ + ".popsection\n\t"); \ +}) +#define annotate_reachable() __annotate_reachable(__COUNTER__) + #define __annotate_unreachable(c) ({ \ asm volatile(__stringify_label(c) ":\n\t" \ ".pushsection .discard.unreachable\n\t" \ @@ -128,6 +136,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #define __annotate_jump_table __section(".rodata..c_jump_table") #else /* !CONFIG_OBJTOOL */ +#define annotate_reachable() #define annotate_unreachable() #define __annotate_jump_table #endif /* CONFIG_OBJTOOL */ @@ -177,10 +186,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, __asm__ ("" : "=r" (var) : "0" (var)) #endif -/* Not-quite-unique ID. */ -#ifndef __UNIQUE_ID -# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__) -#endif +#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) /** * data_race - mark an expression as containing intentional data races @@ -212,7 +218,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, */ #define ___ADDRESSABLE(sym, __attrs) \ static void * __used __attrs \ - __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)&sym; + __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)(uintptr_t)&sym; #define __ADDRESSABLE(sym) \ ___ADDRESSABLE(sym, __section(".discard.addressable")) @@ -231,6 +237,53 @@ static inline void *offset_to_ptr(const int *off) #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) /* + * This returns a constant expression while determining if an argument is + * a constant expression, most importantly without evaluating the argument. + * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de> + * + * Details: + * - sizeof() return an integer constant expression, and does not evaluate + * the value of its operand; it only examines the type of its operand. + * - The results of comparing two integer constant expressions is also + * an integer constant expression. + * - The first literal "8" isn't important. It could be any literal value. + * - The second literal "8" is to avoid warnings about unaligned pointers; + * this could otherwise just be "1". + * - (long)(x) is used to avoid warnings about 64-bit types on 32-bit + * architectures. + * - The C Standard defines "null pointer constant", "(void *)0", as + * distinct from other void pointers. + * - If (x) is an integer constant expression, then the "* 0l" resolves + * it into an integer constant expression of value 0. Since it is cast to + * "void *", this makes the second operand a null pointer constant. + * - If (x) is not an integer constant expression, then the second operand + * resolves to a void pointer (but not a null pointer constant: the value + * is not an integer constant 0). + * - The conditional operator's third operand, "(int *)8", is an object + * pointer (to type "int"). + * - The behavior (including the return type) of the conditional operator + * ("operand1 ? operand2 : operand3") depends on the kind of expressions + * given for the second and third operands. This is the central mechanism + * of the macro: + * - When one operand is a null pointer constant (i.e. when x is an integer + * constant expression) and the other is an object pointer (i.e. our + * third operand), the conditional operator returns the type of the + * object pointer operand (i.e. "int *"). Here, within the sizeof(), we + * would then get: + * sizeof(*((int *)(...)) == sizeof(int) == 4 + * - When one operand is a void pointer (i.e. when x is not an integer + * constant expression) and the other is an object pointer (i.e. our + * third operand), the conditional operator returns a "void *" type. + * Here, within the sizeof(), we would then get: + * sizeof(*((void *)(...)) == sizeof(void) == 1 + * - The equality comparison to "sizeof(int)" therefore depends on (x): + * sizeof(int) == sizeof(int) (x) was a constant expression + * sizeof(int) != sizeof(void) (x) was not a constant expression + */ +#define __is_constexpr(x) \ + (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) + +/* * Whether 'type' is a signed type or an unsigned type. Supports scalar types, * bool and also pointer types. */ diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 28566624f008..8bdf6e0918c1 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -95,11 +95,11 @@ #endif /* - * Optional: only supported since gcc >= 14 + * Optional: only supported since gcc >= 15 * Optional: only supported since clang >= 18 * * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896 - * clang: https://reviews.llvm.org/D148381 + * clang: https://github.com/llvm/llvm-project/pull/76348 */ #if __has_attribute(__counted_by__) # define __counted_by(member) __attribute__((__counted_by__(member))) @@ -334,6 +334,18 @@ #define __section(section) __attribute__((__section__(section))) /* + * Optional: only supported since gcc >= 12 + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-uninitialized-variable-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#uninitialized + */ +#if __has_attribute(__uninitialized__) +# define __uninitialized __attribute__((__uninitialized__)) +#else +# define __uninitialized +#endif + +/* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-unused-function-attribute * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-unused-type-attribute * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-unused-variable-attribute diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index c523c6683789..2abaa3a825a9 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -2,6 +2,15 @@ #ifndef __LINUX_COMPILER_TYPES_H #define __LINUX_COMPILER_TYPES_H +/* + * __has_builtin is supported on gcc >= 10, clang >= 3 and icc >= 21. + * In the meantime, to support gcc < 10, we implement __has_builtin + * by hand. + */ +#ifndef __has_builtin +#define __has_builtin(x) (0) +#endif + #ifndef __ASSEMBLY__ /* @@ -90,17 +99,17 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } * gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute * * When -falign-functions=N is in use, we must avoid the cold attribute as - * contemporary versions of GCC drop the alignment for cold functions. Worse, - * GCC can implicitly mark callees of cold functions as cold themselves, so - * it's not sufficient to add __function_aligned here as that will not ensure - * that callees are correctly aligned. + * GCC drops the alignment for cold functions. Worse, GCC can implicitly mark + * callees of cold functions as cold themselves, so it's not sufficient to add + * __function_aligned here as that will not ensure that callees are correctly + * aligned. * * See: * * https://lore.kernel.org/lkml/Y77%2FqVgvaJidFpYt@FVFF77S0Q05N * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88345#c9 */ -#if !defined(CONFIG_CC_IS_GCC) || (CONFIG_FUNCTION_ALIGNMENT == 0) +#if defined(CONFIG_CC_HAS_SANE_FUNCTION_ALIGNMENT) || (CONFIG_FUNCTION_ALIGNMENT == 0) #define __cold __attribute__((__cold__)) #else #define __cold @@ -134,17 +143,6 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } # define __preserve_most #endif -/* Builtins */ - -/* - * __has_builtin is supported on gcc >= 10, clang >= 3 and icc >= 21. - * In the meantime, to support gcc < 10, we implement __has_builtin - * by hand. - */ -#ifndef __has_builtin -#define __has_builtin(x) (0) -#endif - /* Compiler specific macros. */ #ifdef __clang__ #include <linux/compiler-clang.h> @@ -284,11 +282,18 @@ struct ftrace_likely_data { #define __no_sanitize_or_inline __always_inline #endif +/* Do not trap wrapping arithmetic within an annotated function. */ +#ifdef CONFIG_UBSAN_SIGNED_WRAP +# define __signed_wrap __attribute__((no_sanitize("signed-integer-overflow"))) +#else +# define __signed_wrap +#endif + /* Section for code which can't be instrumented at all */ #define __noinstr_section(section) \ noinline notrace __attribute((__section__(section))) \ __no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage \ - __no_sanitize_memory + __no_sanitize_memory __signed_wrap #define noinstr __noinstr_section(".noinstr.text") @@ -352,8 +357,27 @@ struct ftrace_likely_data { # define __realloc_size(x, ...) #endif -#ifndef asm_volatile_goto -#define asm_volatile_goto(x...) asm goto(x) +/* + * When the size of an allocated object is needed, use the best available + * mechanism to find it. (For cases where sizeof() cannot be used.) + */ +#if __has_builtin(__builtin_dynamic_object_size) +#define __struct_size(p) __builtin_dynamic_object_size(p, 0) +#define __member_size(p) __builtin_dynamic_object_size(p, 1) +#else +#define __struct_size(p) __builtin_object_size(p, 0) +#define __member_size(p) __builtin_object_size(p, 1) +#endif + +/* + * Some versions of gcc do not mark 'asm goto' volatile: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103979 + * + * We do it here by hand, because it doesn't hurt. + */ +#ifndef asm_goto_output +#define asm_goto_output(x...) asm volatile goto(x) #endif #ifdef CONFIG_CC_HAS_ASM_INLINE diff --git a/include/linux/connector.h b/include/linux/connector.h index cec2d99ae902..70bc1160f3d8 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -100,8 +100,7 @@ void cn_del_callback(const struct cb_id *id); */ int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 group, gfp_t gfp_mask, - int (*filter)(struct sock *dsk, struct sk_buff *skb, - void *data), + netlink_filter_fn filter, void *filter_data); /** diff --git a/include/linux/console.h b/include/linux/console.h index 7de11c763eb3..31a8f5b85f5d 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -18,6 +18,7 @@ #include <linux/bits.h> #include <linux/rculist.h> #include <linux/types.h> +#include <linux/vesa.h> struct vc_data; struct console_font_op; @@ -36,63 +37,91 @@ enum vc_intensity; /** * struct consw - callbacks for consoles * + * @owner: the module to get references of when this console is used + * @con_startup: set up the console and return its name (like VGA, EGA, ...) + * @con_init: initialize the console on @vc. @init is true for the very first + * call on this @vc. + * @con_deinit: deinitialize the console from @vc. + * @con_clear: erase @count characters at [@x, @y] on @vc. @count >= 1. + * @con_putc: emit one character with attributes @ca to [@x, @y] on @vc. + * (optional -- @con_putcs would be called instead) + * @con_putcs: emit @count characters with attributes @s to [@x, @y] on @vc. + * @con_cursor: enable/disable cursor depending on @enable * @con_scroll: move lines from @top to @bottom in direction @dir by @lines. * Return true if no generic handling should be done. * Invoked by csi_M and printing to the console. - * @con_set_palette: sets the palette of the console to @table (optional) + * @con_switch: notifier about the console switch; it is supposed to return + * true if a redraw is needed. + * @con_blank: blank/unblank the console. The target mode is passed in @blank. + * @mode_switch is set if changing from/to text/graphics. The hook + * is supposed to return true if a redraw is needed. + * @con_font_set: set console @vc font to @font with height @vpitch. @flags can + * be %KD_FONT_FLAG_DONT_RECALC. (optional) + * @con_font_get: fetch the current font on @vc of height @vpitch into @font. + * (optional) + * @con_font_default: set default font on @vc. @name can be %NULL or font name + * to search for. @font can be filled back. (optional) + * @con_resize: resize the @vc console to @width x @height. @from_user is true + * when this change comes from the user space. + * @con_set_palette: sets the palette of the console @vc to @table (optional) * @con_scrolldelta: the contents of the console should be scrolled by @lines. * Invoked by user. (optional) + * @con_set_origin: set origin (see &vc_data::vc_origin) of the @vc. If not + * provided or returns false, the origin is set to + * @vc->vc_screenbuf. (optional) + * @con_save_screen: save screen content into @vc->vc_screenbuf. Called e.g. + * upon entering graphics. (optional) + * @con_build_attr: build attributes based on @color, @intensity and other + * parameters. The result is used for both normal and erase + * characters. (optional) + * @con_invert_region: invert a region of length @count on @vc starting at @p. + * (optional) + * @con_debug_enter: prepare the console for the debugger. This includes, but + * is not limited to, unblanking the console, loading an + * appropriate palette, and allowing debugger generated output. + * (optional) + * @con_debug_leave: restore the console to its pre-debug state as closely as + * possible. (optional) */ struct consw { struct module *owner; const char *(*con_startup)(void); - void (*con_init)(struct vc_data *vc, int init); + void (*con_init)(struct vc_data *vc, bool init); void (*con_deinit)(struct vc_data *vc); - void (*con_clear)(struct vc_data *vc, int sy, int sx, int height, - int width); - void (*con_putc)(struct vc_data *vc, int c, int ypos, int xpos); - void (*con_putcs)(struct vc_data *vc, const unsigned short *s, - int count, int ypos, int xpos); - void (*con_cursor)(struct vc_data *vc, int mode); + void (*con_clear)(struct vc_data *vc, unsigned int y, + unsigned int x, unsigned int count); + void (*con_putc)(struct vc_data *vc, u16 ca, unsigned int y, + unsigned int x); + void (*con_putcs)(struct vc_data *vc, const u16 *s, + unsigned int count, unsigned int ypos, + unsigned int xpos); + void (*con_cursor)(struct vc_data *vc, bool enable); bool (*con_scroll)(struct vc_data *vc, unsigned int top, unsigned int bottom, enum con_scroll dir, unsigned int lines); - int (*con_switch)(struct vc_data *vc); - int (*con_blank)(struct vc_data *vc, int blank, int mode_switch); - int (*con_font_set)(struct vc_data *vc, struct console_font *font, - unsigned int vpitch, unsigned int flags); + bool (*con_switch)(struct vc_data *vc); + bool (*con_blank)(struct vc_data *vc, enum vesa_blank_mode blank, + bool mode_switch); + int (*con_font_set)(struct vc_data *vc, + const struct console_font *font, + unsigned int vpitch, unsigned int flags); int (*con_font_get)(struct vc_data *vc, struct console_font *font, unsigned int vpitch); int (*con_font_default)(struct vc_data *vc, - struct console_font *font, char *name); + struct console_font *font, const char *name); int (*con_resize)(struct vc_data *vc, unsigned int width, - unsigned int height, unsigned int user); + unsigned int height, bool from_user); void (*con_set_palette)(struct vc_data *vc, const unsigned char *table); void (*con_scrolldelta)(struct vc_data *vc, int lines); - int (*con_set_origin)(struct vc_data *vc); + bool (*con_set_origin)(struct vc_data *vc); void (*con_save_screen)(struct vc_data *vc); u8 (*con_build_attr)(struct vc_data *vc, u8 color, enum vc_intensity intensity, bool blink, bool underline, bool reverse, bool italic); void (*con_invert_region)(struct vc_data *vc, u16 *p, int count); - u16 *(*con_screen_pos)(const struct vc_data *vc, int offset); - unsigned long (*con_getxy)(struct vc_data *vc, unsigned long position, - int *px, int *py); - /* - * Flush the video console driver's scrollback buffer - */ - void (*con_flush_scrollback)(struct vc_data *vc); - /* - * Prepare the console for the debugger. This includes, but is not - * limited to, unblanking the console, loading an appropriate - * palette, and allowing debugger generated output. - */ - int (*con_debug_enter)(struct vc_data *vc); - /* - * Restore the console to its pre-debug state as closely as possible. - */ - int (*con_debug_leave)(struct vc_data *vc); + void (*con_debug_enter)(struct vc_data *vc); + void (*con_debug_leave)(struct vc_data *vc); }; extern const struct consw *conswitchp; @@ -101,36 +130,32 @@ extern const struct consw dummy_con; /* dummy console buffer */ extern const struct consw vga_con; /* VGA text console */ extern const struct consw newport_con; /* SGI Newport console */ +struct screen_info; +#ifdef CONFIG_VGA_CONSOLE +void vgacon_register_screen(struct screen_info *si); +#else +static inline void vgacon_register_screen(struct screen_info *si) { } +#endif + int con_is_bound(const struct consw *csw); int do_unregister_con_driver(const struct consw *csw); int do_take_over_console(const struct consw *sw, int first, int last, int deflt); void give_up_console(const struct consw *sw); -#ifdef CONFIG_HW_CONSOLE -int con_debug_enter(struct vc_data *vc); -int con_debug_leave(void); +#ifdef CONFIG_VT +void con_debug_enter(struct vc_data *vc); +void con_debug_leave(void); #else -static inline int con_debug_enter(struct vc_data *vc) -{ - return 0; -} -static inline int con_debug_leave(void) -{ - return 0; -} +static inline void con_debug_enter(struct vc_data *vc) { } +static inline void con_debug_leave(void) { } #endif -/* cursor */ -#define CM_DRAW (1) -#define CM_ERASE (2) -#define CM_MOVE (3) - /* * The interface for a console, or any other device that wants to capture * console messages (printer driver?) */ /** - * cons_flags - General console flags + * enum cons_flags - General console flags * @CON_PRINTBUFFER: Used by newly registered consoles to avoid duplicate * output of messages that were already shown by boot * consoles or read by userspace via syslog() syscall. @@ -156,6 +181,8 @@ static inline int con_debug_leave(void) * /dev/kmesg which requires a larger output buffer. * @CON_SUSPENDED: Indicates if a console is suspended. If true, the * printing callbacks must not be called. + * @CON_NBCON: Console can operate outside of the legacy style console_lock + * constraints. */ enum cons_flags { CON_PRINTBUFFER = BIT(0), @@ -166,6 +193,111 @@ enum cons_flags { CON_BRL = BIT(5), CON_EXTENDED = BIT(6), CON_SUSPENDED = BIT(7), + CON_NBCON = BIT(8), +}; + +/** + * struct nbcon_state - console state for nbcon consoles + * @atom: Compound of the state fields for atomic operations + * + * @req_prio: The priority of a handover request + * @prio: The priority of the current owner + * @unsafe: Console is busy in a non takeover region + * @unsafe_takeover: A hostile takeover in an unsafe state happened in the + * past. The console cannot be safe until re-initialized. + * @cpu: The CPU on which the owner runs + * + * To be used for reading and preparing of the value stored in the nbcon + * state variable @console::nbcon_state. + * + * The @prio and @req_prio fields are particularly important to allow + * spin-waiting to timeout and give up without the risk of a waiter being + * assigned the lock after giving up. + */ +struct nbcon_state { + union { + unsigned int atom; + struct { + unsigned int prio : 2; + unsigned int req_prio : 2; + unsigned int unsafe : 1; + unsigned int unsafe_takeover : 1; + unsigned int cpu : 24; + }; + }; +}; + +/* + * The nbcon_state struct is used to easily create and interpret values that + * are stored in the @console::nbcon_state variable. Ensure this struct stays + * within the size boundaries of the atomic variable's underlying type in + * order to avoid any accidental truncation. + */ +static_assert(sizeof(struct nbcon_state) <= sizeof(int)); + +/** + * enum nbcon_prio - console owner priority for nbcon consoles + * @NBCON_PRIO_NONE: Unused + * @NBCON_PRIO_NORMAL: Normal (non-emergency) usage + * @NBCON_PRIO_EMERGENCY: Emergency output (WARN/OOPS...) + * @NBCON_PRIO_PANIC: Panic output + * @NBCON_PRIO_MAX: The number of priority levels + * + * A higher priority context can takeover the console when it is + * in the safe state. The final attempt to flush consoles in panic() + * can be allowed to do so even in an unsafe state (Hope and pray). + */ +enum nbcon_prio { + NBCON_PRIO_NONE = 0, + NBCON_PRIO_NORMAL, + NBCON_PRIO_EMERGENCY, + NBCON_PRIO_PANIC, + NBCON_PRIO_MAX, +}; + +struct console; +struct printk_buffers; + +/** + * struct nbcon_context - Context for console acquire/release + * @console: The associated console + * @spinwait_max_us: Limit for spin-wait acquire + * @prio: Priority of the context + * @allow_unsafe_takeover: Allow performing takeover even if unsafe. Can + * be used only with NBCON_PRIO_PANIC @prio. It + * might cause a system freeze when the console + * is used later. + * @backlog: Ringbuffer has pending records + * @pbufs: Pointer to the text buffer for this context + * @seq: The sequence number to print for this context + */ +struct nbcon_context { + /* members set by caller */ + struct console *console; + unsigned int spinwait_max_us; + enum nbcon_prio prio; + unsigned int allow_unsafe_takeover : 1; + + /* members set by emit */ + unsigned int backlog : 1; + + /* members set by acquire */ + struct printk_buffers *pbufs; + u64 seq; +}; + +/** + * struct nbcon_write_context - Context handed to the nbcon write callbacks + * @ctxt: The core console context + * @outbuf: Pointer to the text buffer for output + * @len: Length to write + * @unsafe_takeover: If a hostile takeover in an unsafe state has occurred + */ +struct nbcon_write_context { + struct nbcon_context __private ctxt; + char *outbuf; + unsigned int len; + bool unsafe_takeover; }; /** @@ -187,6 +319,11 @@ enum cons_flags { * @dropped: Number of unreported dropped ringbuffer records * @data: Driver private data * @node: hlist node for the console list + * + * @write_atomic: Write callback for atomic context + * @nbcon_state: State for nbcon consoles + * @nbcon_seq: Sequence number of the next record for nbcon to print + * @pbufs: Pointer to nbcon private buffer */ struct console { char name[16]; @@ -206,6 +343,13 @@ struct console { unsigned long dropped; void *data; struct hlist_node node; + + /* nbcon console specific members */ + bool (*write_atomic)(struct console *con, + struct nbcon_write_context *wctxt); + atomic_t __private nbcon_state; + atomic_long_t __private nbcon_seq; + struct printk_buffers *pbufs; }; #ifdef CONFIG_LOCKDEP @@ -324,7 +468,7 @@ static inline bool console_is_registered(const struct console *con) * for_each_console() - Iterator over registered consoles * @con: struct console pointer used as loop cursor * - * The console list and the console->flags are immutable while iterating. + * The console list and the &console.flags are immutable while iterating. * * Requires console_list_lock to be held. */ @@ -332,6 +476,16 @@ static inline bool console_is_registered(const struct console *con) lockdep_assert_console_list_lock_held(); \ hlist_for_each_entry(con, &console_list, node) +#ifdef CONFIG_PRINTK +extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt); +extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt); +extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt); +#else +static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; } +static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; } +static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; } +#endif + extern int console_set_on_cmdline; extern struct console *early_console; @@ -340,7 +494,7 @@ enum con_flush_mode { CONSOLE_REPLAY_ALL, }; -extern int add_preferred_console(char *name, int idx, char *options); +extern int add_preferred_console(const char *name, const short idx, char *options); extern void console_force_preferred_locked(struct console *con); extern void register_console(struct console *); extern int unregister_console(struct console *); @@ -384,12 +538,6 @@ void vcs_remove_sysfs(int index); */ extern atomic_t ignore_console_lock_warning; -/* VESA Blanking Levels */ -#define VESA_NO_BLANKING 0 -#define VESA_VSYNC_SUSPEND 1 -#define VESA_HSYNC_SUSPEND 2 -#define VESA_POWERDOWN 3 - extern void console_init(void); /* For deferred console takeover */ diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index 539f1cd45309..20f564e98552 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -151,7 +151,6 @@ struct vc_data { DECLARE_BITMAP(vc_tab_stop, VC_TABSTOPS_COUNT); /* Tab stops. 256 columns. */ unsigned char vc_palette[16*3]; /* Colour palette for VGA+ */ unsigned short * vc_translate; - unsigned int vc_resize_user; /* resize request from user */ unsigned int vc_bell_pitch; /* Console bell pitch */ unsigned int vc_bell_duration; /* Console bell duration */ unsigned short vc_cur_blink_ms; /* Cursor blink duration */ diff --git a/include/linux/const.h b/include/linux/const.h index 435ddd72d2c4..81b8aae5a855 100644 --- a/include/linux/const.h +++ b/include/linux/const.h @@ -3,12 +3,4 @@ #include <vdso/const.h> -/* - * This returns a constant expression while determining if an argument is - * a constant expression, most importantly without evaluating the argument. - * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de> - */ -#define __is_constexpr(x) \ - (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) - #endif /* _LINUX_CONST_H */ diff --git a/include/linux/container.h b/include/linux/container.h index 2566a1baa736..dd00cc918a92 100644 --- a/include/linux/container.h +++ b/include/linux/container.h @@ -12,7 +12,7 @@ #include <linux/device.h> /* drivers/base/power/container.c */ -extern struct bus_type container_subsys; +extern const struct bus_type container_subsys; struct container_dev { struct device dev; diff --git a/include/linux/coresight.h b/include/linux/coresight.h index a269fffaf991..5f288d475490 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -35,7 +35,7 @@ #define CORESIGHT_UNLOCK 0xc5acce55 -extern struct bus_type coresight_bustype; +extern const struct bus_type coresight_bustype; enum coresight_dev_type { CORESIGHT_DEV_TYPE_SINK, @@ -64,6 +64,7 @@ enum coresight_dev_subtype_source { CORESIGHT_DEV_SUBTYPE_SOURCE_PROC, CORESIGHT_DEV_SUBTYPE_SOURCE_BUS, CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE, + CORESIGHT_DEV_SUBTYPE_SOURCE_TPDM, CORESIGHT_DEV_SUBTYPE_SOURCE_OTHERS, }; @@ -225,13 +226,26 @@ struct coresight_sysfs_link { * by @coresight_ops. * @access: Device i/o access abstraction for this device. * @dev: The device entity associated to this component. - * @refcnt: keep track of what is in use. + * @mode: This tracer's mode, i.e sysFS, Perf or disabled. This is + * actually an 'enum cs_mode', but is stored in an atomic type. + * This is always accessed through local_read() and local_set(), + * but wherever it's done from within the Coresight device's lock, + * a non-atomic read would also work. This is the main point of + * synchronisation between code happening inside the sysfs mode's + * coresight_mutex and outside when running in Perf mode. A compare + * and exchange swap is done to atomically claim one mode or the + * other. + * @refcnt: keep track of what is in use. Only access this outside of the + * device's spinlock when the coresight_mutex held and mode == + * CS_MODE_SYSFS. Otherwise it must be accessed from inside the + * spinlock. * @orphan: true if the component has connections that haven't been linked. - * @enable: 'true' if component is currently part of an active path. - * @activated: 'true' only if a _sink_ has been activated. A sink can be - * activated but not yet enabled. Enabling for a _sink_ - * happens when a source has been selected and a path is enabled - * from source to that sink. + * @sysfs_sink_activated: 'true' when a sink has been selected for use via sysfs + * by writing a 1 to the 'enable_sink' file. A sink can be + * activated but not yet enabled. Enabling for a _sink_ happens + * when a source has been selected and a path is enabled from + * source to that sink. A sink can also become enabled but not + * activated if it's used via Perf. * @ea: Device attribute for sink representation under PMU directory. * @def_sink: cached reference to default sink found for this device. * @nr_links: number of sysfs links created to other components from this @@ -249,11 +263,11 @@ struct coresight_device { const struct coresight_ops *ops; struct csdev_access access; struct device dev; - atomic_t refcnt; + local_t mode; + int refcnt; bool orphan; - bool enable; /* true only if configured as part of a path */ /* sink specific fields */ - bool activated; /* true only if a sink is part of a path */ + bool sysfs_sink_activated; struct dev_ext_attribute *ea; struct coresight_device *def_sink; /* sysfs links between components */ @@ -377,8 +391,6 @@ struct coresight_ops { const struct coresight_ops_helper *helper_ops; }; -#if IS_ENABLED(CONFIG_CORESIGHT) - static inline u32 csdev_access_relaxed_read32(struct csdev_access *csa, u32 offset) { @@ -568,11 +580,43 @@ static inline bool coresight_is_percpu_sink(struct coresight_device *csdev) (csdev->subtype.sink_subtype == CORESIGHT_DEV_SUBTYPE_SINK_PERCPU_SYSMEM); } +/* + * Atomically try to take the device and set a new mode. Returns true on + * success, false if the device is already taken by someone else. + */ +static inline bool coresight_take_mode(struct coresight_device *csdev, + enum cs_mode new_mode) +{ + return local_cmpxchg(&csdev->mode, CS_MODE_DISABLED, new_mode) == + CS_MODE_DISABLED; +} + +static inline enum cs_mode coresight_get_mode(struct coresight_device *csdev) +{ + return local_read(&csdev->mode); +} + +static inline void coresight_set_mode(struct coresight_device *csdev, + enum cs_mode new_mode) +{ + enum cs_mode current_mode = coresight_get_mode(csdev); + + /* + * Changing to a new mode must be done from an already disabled state + * unless it's synchronized with coresight_take_mode(). Otherwise the + * device is already in use and signifies a locking issue. + */ + WARN(new_mode != CS_MODE_DISABLED && current_mode != CS_MODE_DISABLED && + current_mode != new_mode, "Device already in use\n"); + + local_set(&csdev->mode, new_mode); +} + extern struct coresight_device * coresight_register(struct coresight_desc *desc); extern void coresight_unregister(struct coresight_device *csdev); -extern int coresight_enable(struct coresight_device *csdev); -extern void coresight_disable(struct coresight_device *csdev); +extern int coresight_enable_sysfs(struct coresight_device *csdev); +extern void coresight_disable_sysfs(struct coresight_device *csdev); extern int coresight_timeout(struct csdev_access *csa, u32 offset, int position, int value); @@ -597,83 +641,6 @@ void coresight_relaxed_write64(struct coresight_device *csdev, u64 val, u32 offset); void coresight_write64(struct coresight_device *csdev, u64 val, u32 offset); -#else -static inline struct coresight_device * -coresight_register(struct coresight_desc *desc) { return NULL; } -static inline void coresight_unregister(struct coresight_device *csdev) {} -static inline int -coresight_enable(struct coresight_device *csdev) { return -ENOSYS; } -static inline void coresight_disable(struct coresight_device *csdev) {} - -static inline int coresight_timeout(struct csdev_access *csa, u32 offset, - int position, int value) -{ - return 1; -} - -static inline int coresight_claim_device_unlocked(struct coresight_device *csdev) -{ - return -EINVAL; -} - -static inline int coresight_claim_device(struct coresight_device *csdev) -{ - return -EINVAL; -} - -static inline void coresight_disclaim_device(struct coresight_device *csdev) {} -static inline void coresight_disclaim_device_unlocked(struct coresight_device *csdev) {} - -static inline bool coresight_loses_context_with_cpu(struct device *dev) -{ - return false; -} - -static inline u32 coresight_relaxed_read32(struct coresight_device *csdev, u32 offset) -{ - WARN_ON_ONCE(1); - return 0; -} - -static inline u32 coresight_read32(struct coresight_device *csdev, u32 offset) -{ - WARN_ON_ONCE(1); - return 0; -} - -static inline void coresight_write32(struct coresight_device *csdev, u32 val, u32 offset) -{ -} - -static inline void coresight_relaxed_write32(struct coresight_device *csdev, - u32 val, u32 offset) -{ -} - -static inline u64 coresight_relaxed_read64(struct coresight_device *csdev, - u32 offset) -{ - WARN_ON_ONCE(1); - return 0; -} - -static inline u64 coresight_read64(struct coresight_device *csdev, u32 offset) -{ - WARN_ON_ONCE(1); - return 0; -} - -static inline void coresight_relaxed_write64(struct coresight_device *csdev, - u64 val, u32 offset) -{ -} - -static inline void coresight_write64(struct coresight_device *csdev, u64 val, u32 offset) -{ -} - -#endif /* IS_ENABLED(CONFIG_CORESIGHT) */ - extern int coresight_get_cpu(struct device *dev); struct coresight_platform_data *coresight_get_platform_data(struct device *dev); diff --git a/include/linux/cper.h b/include/linux/cper.h index c1a7dc325121..265b0f8fc0b3 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -90,6 +90,29 @@ enum { GUID_INIT(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \ 0x72, 0x2D, 0xEB, 0x41) +/* CXL Event record UUIDs are formatted as GUIDs and reported in section type */ +/* + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CPER_SEC_CXL_GEN_MEDIA_GUID \ + GUID_INIT(0xfbcd0a77, 0xc260, 0x417f, \ + 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6) +/* + * DRAM Event Record + * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 + */ +#define CPER_SEC_CXL_DRAM_GUID \ + GUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, \ + 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24) +/* + * Memory Module Event Record + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +#define CPER_SEC_CXL_MEM_MODULE_GUID \ + GUID_INIT(0xfe927475, 0xdd59, 0x4339, \ + 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74) + /* * Flags bits definitions for flags in struct cper_record_header * If set, the error has been recovered diff --git a/include/linux/cpu.h b/include/linux/cpu.h index eb768a866fe3..861c3bfc5f17 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -75,11 +75,14 @@ extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, const struct attribute_group **groups, const char *fmt, ...); +extern bool arch_cpu_is_hotpluggable(int cpu); extern int arch_register_cpu(int cpu); extern void arch_unregister_cpu(int cpu); #ifdef CONFIG_HOTPLUG_CPU @@ -88,6 +91,10 @@ extern ssize_t arch_cpu_probe(const char *, size_t); extern ssize_t arch_cpu_release(const char *, size_t); #endif +#ifdef CONFIG_GENERIC_CPU_DEVICES +DECLARE_PER_CPU(struct cpu, cpu_devices); +#endif + /* * These states are not related to the core CPU hotplug mechanism. They are * used by various (sub)architectures to track internal state @@ -107,7 +114,7 @@ void notify_cpu_starting(unsigned int cpu); extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); int bringup_hibernate_cpu(unsigned int sleep_cpu); -void bringup_nonboot_cpus(unsigned int setup_max_cpus); +void bringup_nonboot_cpus(unsigned int max_cpus); #else /* CONFIG_SMP */ #define cpuhp_tasks_frozen 0 @@ -123,7 +130,7 @@ static inline void cpu_maps_update_done(void) static inline int add_cpu(unsigned int cpu) { return 0;} #endif /* CONFIG_SMP */ -extern struct bus_type cpu_subsys; +extern const struct bus_type cpu_subsys; extern int lockdep_is_cpus_held(void); @@ -155,6 +162,8 @@ static inline int remove_cpu(unsigned int cpu) { return -EPERM; } static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { } #endif /* !CONFIG_HOTPLUG_CPU */ +DEFINE_LOCK_GUARD_0(cpus_read_lock, cpus_read_lock(), cpus_read_unlock()) + #ifdef CONFIG_PM_SLEEP_SMP extern int freeze_secondary_cpus(int primary); extern void thaw_secondary_cpus(void); @@ -189,6 +198,8 @@ void arch_cpu_idle(void); void arch_cpu_idle_prepare(void); void arch_cpu_idle_enter(void); void arch_cpu_idle_exit(void); +void arch_tick_broadcast_enter(void); +void arch_tick_broadcast_exit(void); void __noreturn arch_cpu_idle_dead(void); #ifdef CONFIG_ARCH_HAS_CPU_FINALIZE_INIT @@ -210,7 +221,18 @@ void cpuhp_report_idle_dead(void); static inline void cpuhp_report_idle_dead(void) { } #endif /* #ifdef CONFIG_HOTPLUG_CPU */ +#ifdef CONFIG_CPU_MITIGATIONS extern bool cpu_mitigations_off(void); extern bool cpu_mitigations_auto_nosmt(void); +#else +static inline bool cpu_mitigations_off(void) +{ + return true; +} +static inline bool cpu_mitigations_auto_nosmt(void) +{ + return false; +} +#endif #endif /* _LINUX_CPU_H_ */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 71d186d6933a..9956afb9acc2 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -263,6 +263,7 @@ static inline bool cpufreq_supports_freq_invariance(void) return false; } static inline void disable_cpufreq(void) { } +static inline void cpufreq_update_limits(unsigned int cpu) { } #endif #ifdef CONFIG_CPU_FREQ_STAT @@ -568,9 +569,7 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, /* * The polling frequency depends on the capability of the processor. Default - * polling frequency is 1000 times the transition latency of the processor. The - * ondemand governor will work on any processor with transition latency <= 10ms, - * using appropriate sampling rate. + * polling frequency is 1000 times the transition latency of the processor. */ #define LATENCY_MULTIPLIER (1000) @@ -694,26 +693,6 @@ struct cpufreq_frequency_table { * order */ }; -#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP) -int dev_pm_opp_init_cpufreq_table(struct device *dev, - struct cpufreq_frequency_table **table); -void dev_pm_opp_free_cpufreq_table(struct device *dev, - struct cpufreq_frequency_table **table); -#else -static inline int dev_pm_opp_init_cpufreq_table(struct device *dev, - struct cpufreq_frequency_table - **table) -{ - return -EINVAL; -} - -static inline void dev_pm_opp_free_cpufreq_table(struct device *dev, - struct cpufreq_frequency_table - **table) -{ -} -#endif - /* * cpufreq_for_each_entry - iterate over a cpufreq_frequency_table * @pos: the cpufreq_frequency_table * to use as a loop cursor. @@ -1021,6 +1000,18 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, efficiencies); } +static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) +{ + unsigned int freq; + + if (idx < 0) + return false; + + freq = policy->freq_table[idx].frequency; + + return freq == clamp_val(freq, policy->min, policy->max); +} + static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) @@ -1054,7 +1045,8 @@ retry: return 0; } - if (idx < 0 && efficiencies) { + /* Limit frequency index to honor policy->min/max */ + if (!cpufreq_is_in_limits(policy, idx) && efficiencies) { efficiencies = false; goto retry; } @@ -1149,8 +1141,7 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_ if (ret < 0) continue; - if (pargs->np == args.np && pargs->args_count == args.args_count && - !memcmp(pargs->args, args.args, sizeof(args.args[0]) * args.args_count)) + if (of_phandle_args_equal(pargs, &args)) cpumask_set_cpu(cpu, cpumask); of_node_put(args.np); @@ -1193,14 +1184,6 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_ } #endif -#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) -void sched_cpufreq_governor_change(struct cpufreq_policy *policy, - struct cpufreq_governor *old_gov); -#else -static inline void sched_cpufreq_governor_change(struct cpufreq_policy *policy, - struct cpufreq_governor *old_gov) { } -#endif - extern unsigned int arch_freq_get_on_cpu(int cpu); #ifndef arch_set_freq_scale @@ -1211,6 +1194,7 @@ void arch_set_freq_scale(const struct cpumask *cpus, { } #endif + /* the following are really really optional */ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs; extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs; diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 068f7738be22..35e78ddb2b37 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -66,15 +66,12 @@ enum cpuhp_state { CPUHP_PERF_POWER, CPUHP_PERF_SUPERH, CPUHP_X86_HPET_DEAD, - CPUHP_X86_APB_DEAD, CPUHP_X86_MCE_DEAD, CPUHP_VIRT_NET_DEAD, CPUHP_IBMVNIC_DEAD, CPUHP_SLUB_DEAD, CPUHP_DEBUG_OBJ_DEAD, CPUHP_MM_WRITEBACK_DEAD, - /* Must be after CPUHP_MM_VMSTAT_DEAD */ - CPUHP_MM_DEMOTION_DEAD, CPUHP_MM_VMSTAT_DEAD, CPUHP_SOFTIRQ_DEAD, CPUHP_NET_MVNETA_DEAD, @@ -96,7 +93,6 @@ enum cpuhp_state { CPUHP_NET_DEV_DEAD, CPUHP_PCI_XGENE_DEAD, CPUHP_IOMMU_IOVA_DEAD, - CPUHP_LUSTRE_CFS_DEAD, CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, CPUHP_PADATA_DEAD, CPUHP_AP_DTPM_CPU_DEAD, @@ -108,7 +104,6 @@ enum cpuhp_state { CPUHP_X2APIC_PREPARE, CPUHP_SMPCFD_PREPARE, CPUHP_RELAY_PREPARE, - CPUHP_SLAB_PREPARE, CPUHP_MD_RAID5_PREPARE, CPUHP_RCUTREE_PREP, CPUHP_CPUIDLE_COUPLED_PREPARE, @@ -118,13 +113,11 @@ enum cpuhp_state { CPUHP_XEN_EVTCHN_PREPARE, CPUHP_ARM_SHMOBILE_SCU_PREPARE, CPUHP_SH_SH3X_PREPARE, - CPUHP_NET_FLOW_PREPARE, CPUHP_TOPOLOGY_PREPARE, CPUHP_NET_IUCV_PREPARE, CPUHP_ARM_BL_PREPARE, CPUHP_TRACE_RB_PREPARE, CPUHP_MM_ZS_PREPARE, - CPUHP_MM_ZSWP_MEM_PREPARE, CPUHP_MM_ZSWP_POOL_PREPARE, CPUHP_KVM_PPC_BOOK3S_PREPARE, CPUHP_ZCOMP_PREPARE, @@ -151,18 +144,14 @@ enum cpuhp_state { CPUHP_AP_IRQ_ARMADA_XP_STARTING, CPUHP_AP_IRQ_BCM2836_STARTING, CPUHP_AP_IRQ_MIPS_GIC_STARTING, - CPUHP_AP_IRQ_RISCV_STARTING, CPUHP_AP_IRQ_LOONGARCH_STARTING, CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, - CPUHP_AP_MICROCODE_LOADER, CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, CPUHP_AP_PERF_X86_STARTING, CPUHP_AP_PERF_X86_AMD_IBS_STARTING, - CPUHP_AP_PERF_X86_CQM_STARTING, CPUHP_AP_PERF_X86_CSTATE_STARTING, CPUHP_AP_PERF_XTENSA_STARTING, - CPUHP_AP_MIPS_OP_LOONGSON3_STARTING, CPUHP_AP_ARM_VFP_STARTING, CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING, CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING, @@ -172,13 +161,13 @@ enum cpuhp_state { CPUHP_AP_ARM_L2X0_STARTING, CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING, CPUHP_AP_ARM_ARCH_TIMER_STARTING, + CPUHP_AP_ARM_ARCH_TIMER_EVTSTRM_STARTING, CPUHP_AP_ARM_GLOBAL_TIMER_STARTING, CPUHP_AP_JCORE_TIMER_STARTING, CPUHP_AP_ARM_TWD_STARTING, CPUHP_AP_QCOM_TIMER_STARTING, CPUHP_AP_TEGRA_TIMER_STARTING, CPUHP_AP_ARMADA_TIMER_STARTING, - CPUHP_AP_MARCO_TIMER_STARTING, CPUHP_AP_MIPS_GIC_TIMER_STARTING, CPUHP_AP_ARC_TIMER_STARTING, CPUHP_AP_RISCV_TIMER_STARTING, @@ -189,10 +178,13 @@ enum cpuhp_state { /* Must be the last timer callback */ CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING, + CPUHP_AP_ARM_XEN_RUNSTATE_STARTING, CPUHP_AP_ARM_CORESIGHT_STARTING, CPUHP_AP_ARM_CORESIGHT_CTI_STARTING, CPUHP_AP_ARM64_ISNDEP_STARTING, CPUHP_AP_SMPCFD_DYING, + CPUHP_AP_HRTIMERS_DYING, + CPUHP_AP_TICK_DYING, CPUHP_AP_X86_TBOOT_DYING, CPUHP_AP_ARM_CACHE_B15_RAC_DYING, CPUHP_AP_ONLINE, @@ -204,7 +196,6 @@ enum cpuhp_state { CPUHP_AP_KVM_ONLINE, CPUHP_AP_SCHED_WAIT_EMPTY, CPUHP_AP_SMPBOOT_THREADS, - CPUHP_AP_X86_VDSO_VMA_ONLINE, CPUHP_AP_IRQ_AFFINITY_ONLINE, CPUHP_AP_BLK_MQ_ONLINE, CPUHP_AP_ARM_MVEBU_SYNC_CLOCKS, @@ -215,9 +206,7 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE, CPUHP_AP_PERF_X86_AMD_POWER_ONLINE, CPUHP_AP_PERF_X86_RAPL_ONLINE, - CPUHP_AP_PERF_X86_CQM_ONLINE, CPUHP_AP_PERF_X86_CSTATE_ONLINE, - CPUHP_AP_PERF_X86_IDXD_ONLINE, CPUHP_AP_PERF_S390_CF_ONLINE, CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, @@ -243,15 +232,14 @@ enum cpuhp_state { CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE, CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE, CPUHP_AP_PERF_CSKY_ONLINE, + CPUHP_AP_TMIGR_ONLINE, CPUHP_AP_WATCHDOG_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RANDOM_ONLINE, CPUHP_AP_RCUTREE_ONLINE, CPUHP_AP_BASE_CACHEINFO_ONLINE, CPUHP_AP_ONLINE_DYN, - CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30, - /* Must be after CPUHP_AP_ONLINE_DYN for node_states[N_CPU] update */ - CPUHP_AP_MM_DEMOTION_ONLINE, + CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 40, CPUHP_AP_X86_HPET_ONLINE, CPUHP_AP_X86_KVM_CLK_ONLINE, CPUHP_AP_ACTIVE, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index f10fb87d49db..1c29947db848 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -4,9 +4,10 @@ /* * Cpumasks provide a bitmap suitable for representing the - * set of CPU's in a system, one bit position per CPU number. In general, + * set of CPUs in a system, one bit position per CPU number. In general, * only nr_cpu_ids (<= NR_CPUS) bits are valid. */ +#include <linux/cleanup.h> #include <linux/kernel.h> #include <linux/threads.h> #include <linux/bitmap.h> @@ -97,7 +98,7 @@ static inline void set_nr_cpu_ids(unsigned int nr) * * If !CONFIG_HOTPLUG_CPU, present == possible, and active == online. * - * The cpu_possible_mask is fixed at boot time, as the set of CPU id's + * The cpu_possible_mask is fixed at boot time, as the set of CPU IDs * that it is possible might ever be plugged in at anytime during the * life of that system boot. The cpu_present_mask is dynamic(*), * representing which CPUs are currently plugged in. And @@ -112,7 +113,7 @@ static inline void set_nr_cpu_ids(unsigned int nr) * hotplug, it's a copy of cpu_possible_mask, hence fixed at boot. * * Subtleties: - * 1) UP arch's (NR_CPUS == 1, CONFIG_SMP not defined) hardcode + * 1) UP ARCHes (NR_CPUS == 1, CONFIG_SMP not defined) hardcode * assumption that their single CPU is online. The UP * cpu_{online,possible,present}_masks are placebos. Changing them * will have no useful affect on the following num_*_cpus() @@ -155,7 +156,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu) * cpumask_first - get the first cpu in a cpumask * @srcp: the cpumask pointer * - * Returns >= nr_cpu_ids if no cpus set. + * Return: >= nr_cpu_ids if no cpus set. */ static inline unsigned int cpumask_first(const struct cpumask *srcp) { @@ -166,7 +167,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) * cpumask_first_zero - get the first unset cpu in a cpumask * @srcp: the cpumask pointer * - * Returns >= nr_cpu_ids if all cpus are set. + * Return: >= nr_cpu_ids if all cpus are set. */ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) { @@ -178,7 +179,7 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) * @srcp1: the first input * @srcp2: the second input * - * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). + * Return: >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). */ static inline unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2) @@ -190,7 +191,7 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask * cpumask_last - get the last CPU in a cpumask * @srcp: - the cpumask pointer * - * Returns >= nr_cpumask_bits if no CPUs set. + * Return: >= nr_cpumask_bits if no CPUs set. */ static inline unsigned int cpumask_last(const struct cpumask *srcp) { @@ -199,10 +200,10 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp) /** * cpumask_next - get the next cpu in a cpumask - * @n: the cpu prior to the place to search (ie. return will be > @n) + * @n: the cpu prior to the place to search (i.e. return will be > @n) * @srcp: the cpumask pointer * - * Returns >= nr_cpu_ids if no further cpus set. + * Return: >= nr_cpu_ids if no further cpus set. */ static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) @@ -215,10 +216,10 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp) /** * cpumask_next_zero - get the next unset cpu in a cpumask - * @n: the cpu prior to the place to search (ie. return will be > @n) + * @n: the cpu prior to the place to search (i.e. return will be > @n) * @srcp: the cpumask pointer * - * Returns >= nr_cpu_ids if no further cpus unset. + * Return: >= nr_cpu_ids if no further cpus unset. */ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) { @@ -254,11 +255,11 @@ unsigned int cpumask_any_distribute(const struct cpumask *srcp); /** * cpumask_next_and - get the next cpu in *src1p & *src2p - * @n: the cpu prior to the place to search (ie. return will be > @n) + * @n: the cpu prior to the place to search (i.e. return will be > @n) * @src1p: the first cpumask pointer * @src2p: the second cpumask pointer * - * Returns >= nr_cpu_ids if no further cpus set in both. + * Return: >= nr_cpu_ids if no further cpus set in both. */ static inline unsigned int cpumask_next_and(int n, const struct cpumask *src1p, @@ -373,7 +374,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta * @cpu: the cpu to ignore. * * Often used to find any cpu but smp_processor_id() in a mask. - * Returns >= nr_cpu_ids if no cpus set. + * Return: >= nr_cpu_ids if no cpus set. */ static inline unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) @@ -388,11 +389,11 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) } /** - * cpumask_nth - get the first cpu in a cpumask + * cpumask_nth - get the Nth cpu in a cpumask * @srcp: the cpumask pointer - * @cpu: the N'th cpu to find, starting from 0 + * @cpu: the Nth cpu to find, starting from 0 * - * Returns >= nr_cpu_ids if such cpu doesn't exist. + * Return: >= nr_cpu_ids if such cpu doesn't exist. */ static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp) { @@ -400,12 +401,12 @@ static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *s } /** - * cpumask_nth_and - get the first cpu in 2 cpumasks + * cpumask_nth_and - get the Nth cpu in 2 cpumasks * @srcp1: the cpumask pointer * @srcp2: the cpumask pointer - * @cpu: the N'th cpu to find, starting from 0 + * @cpu: the Nth cpu to find, starting from 0 * - * Returns >= nr_cpu_ids if such cpu doesn't exist. + * Return: >= nr_cpu_ids if such cpu doesn't exist. */ static inline unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, @@ -416,12 +417,12 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, } /** - * cpumask_nth_andnot - get the first cpu set in 1st cpumask, and clear in 2nd. + * cpumask_nth_andnot - get the Nth cpu set in 1st cpumask, and clear in 2nd. * @srcp1: the cpumask pointer * @srcp2: the cpumask pointer - * @cpu: the N'th cpu to find, starting from 0 + * @cpu: the Nth cpu to find, starting from 0 * - * Returns >= nr_cpu_ids if such cpu doesn't exist. + * Return: >= nr_cpu_ids if such cpu doesn't exist. */ static inline unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, @@ -436,9 +437,9 @@ unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, * @srcp1: the cpumask pointer * @srcp2: the cpumask pointer * @srcp3: the cpumask pointer - * @cpu: the N'th cpu to find, starting from 0 + * @cpu: the Nth cpu to find, starting from 0 * - * Returns >= nr_cpu_ids if such cpu doesn't exist. + * Return: >= nr_cpu_ids if such cpu doesn't exist. */ static __always_inline unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp1, @@ -497,7 +498,7 @@ static __always_inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * - * Returns true if @cpu is set in @cpumask, else returns false + * Return: true if @cpu is set in @cpumask, else returns false */ static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpumask) { @@ -509,9 +510,9 @@ static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpum * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * - * Returns true if @cpu is set in old bitmap of @cpumask, else returns false - * * test_and_set_bit wrapper for cpumasks. + * + * Return: true if @cpu is set in old bitmap of @cpumask, else returns false */ static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) { @@ -523,9 +524,9 @@ static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cp * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * - * Returns true if @cpu is set in old bitmap of @cpumask, else returns false - * * test_and_clear_bit wrapper for cpumasks. + * + * Return: true if @cpu is set in old bitmap of @cpumask, else returns false */ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask) { @@ -560,7 +561,7 @@ static inline void cpumask_clear(struct cpumask *dstp) * @src1p: the first input * @src2p: the second input * - * If *@dstp is empty, returns false, else returns true + * Return: false if *@dstp is empty, else returns true */ static inline bool cpumask_and(struct cpumask *dstp, const struct cpumask *src1p, @@ -603,7 +604,7 @@ static inline void cpumask_xor(struct cpumask *dstp, * @src1p: the first input * @src2p: the second input * - * If *@dstp is empty, returns false, else returns true + * Return: false if *@dstp is empty, else returns true */ static inline bool cpumask_andnot(struct cpumask *dstp, const struct cpumask *src1p, @@ -617,6 +618,8 @@ static inline bool cpumask_andnot(struct cpumask *dstp, * cpumask_equal - *src1p == *src2p * @src1p: the first input * @src2p: the second input + * + * Return: true if the cpumasks are equal, false if not */ static inline bool cpumask_equal(const struct cpumask *src1p, const struct cpumask *src2p) @@ -630,6 +633,9 @@ static inline bool cpumask_equal(const struct cpumask *src1p, * @src1p: the first input * @src2p: the second input * @src3p: the third input + * + * Return: true if first cpumask ORed with second cpumask == third cpumask, + * otherwise false */ static inline bool cpumask_or_equal(const struct cpumask *src1p, const struct cpumask *src2p, @@ -643,6 +649,9 @@ static inline bool cpumask_or_equal(const struct cpumask *src1p, * cpumask_intersects - (*src1p & *src2p) != 0 * @src1p: the first input * @src2p: the second input + * + * Return: true if first cpumask ANDed with second cpumask is non-empty, + * otherwise false */ static inline bool cpumask_intersects(const struct cpumask *src1p, const struct cpumask *src2p) @@ -656,7 +665,7 @@ static inline bool cpumask_intersects(const struct cpumask *src1p, * @src1p: the first input * @src2p: the second input * - * Returns true if *@src1p is a subset of *@src2p, else returns false + * Return: true if *@src1p is a subset of *@src2p, else returns false */ static inline bool cpumask_subset(const struct cpumask *src1p, const struct cpumask *src2p) @@ -668,6 +677,8 @@ static inline bool cpumask_subset(const struct cpumask *src1p, /** * cpumask_empty - *srcp == 0 * @srcp: the cpumask to that all cpus < nr_cpu_ids are clear. + * + * Return: true if srcp is empty (has no bits set), else false */ static inline bool cpumask_empty(const struct cpumask *srcp) { @@ -677,6 +688,8 @@ static inline bool cpumask_empty(const struct cpumask *srcp) /** * cpumask_full - *srcp == 0xFFFFFFFF... * @srcp: the cpumask to that all cpus < nr_cpu_ids are set. + * + * Return: true if srcp is full (has all bits set), else false */ static inline bool cpumask_full(const struct cpumask *srcp) { @@ -686,6 +699,8 @@ static inline bool cpumask_full(const struct cpumask *srcp) /** * cpumask_weight - Count of bits in *srcp * @srcp: the cpumask to count bits (< nr_cpu_ids) in. + * + * Return: count of bits set in *srcp */ static inline unsigned int cpumask_weight(const struct cpumask *srcp) { @@ -696,6 +711,8 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp) * cpumask_weight_and - Count of bits in (*srcp1 & *srcp2) * @srcp1: the cpumask to count bits (< nr_cpu_ids) in. * @srcp2: the cpumask to count bits (< nr_cpu_ids) in. + * + * Return: count of bits set in both *srcp1 and *srcp2 */ static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1, const struct cpumask *srcp2) @@ -704,6 +721,19 @@ static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1, } /** + * cpumask_weight_andnot - Count of bits in (*srcp1 & ~*srcp2) + * @srcp1: the cpumask to count bits (< nr_cpu_ids) in. + * @srcp2: the cpumask to count bits (< nr_cpu_ids) in. + * + * Return: count of bits set in both *srcp1 and *srcp2 + */ +static inline unsigned int cpumask_weight_andnot(const struct cpumask *srcp1, + const struct cpumask *srcp2) +{ + return bitmap_weight_andnot(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); +} + +/** * cpumask_shift_right - *dstp = *srcp >> n * @dstp: the cpumask result * @srcp: the input to shift @@ -744,7 +774,7 @@ static inline void cpumask_copy(struct cpumask *dstp, * cpumask_any - pick a "random" cpu from *srcp * @srcp: the input cpumask * - * Returns >= nr_cpu_ids if no cpus set. + * Return: >= nr_cpu_ids if no cpus set. */ #define cpumask_any(srcp) cpumask_first(srcp) @@ -753,7 +783,7 @@ static inline void cpumask_copy(struct cpumask *dstp, * @mask1: the first input cpumask * @mask2: the second input cpumask * - * Returns >= nr_cpu_ids if no cpus set. + * Return: >= nr_cpu_ids if no cpus set. */ #define cpumask_any_and(mask1, mask2) cpumask_first_and((mask1), (mask2)) @@ -769,7 +799,7 @@ static inline void cpumask_copy(struct cpumask *dstp, * @len: the length of the buffer * @dstp: the cpumask to set. * - * Returns -errno, or 0 for success. + * Return: -errno, or 0 for success. */ static inline int cpumask_parse_user(const char __user *buf, int len, struct cpumask *dstp) @@ -783,7 +813,7 @@ static inline int cpumask_parse_user(const char __user *buf, int len, * @len: the length of the buffer * @dstp: the cpumask to set. * - * Returns -errno, or 0 for success. + * Return: -errno, or 0 for success. */ static inline int cpumask_parselist_user(const char __user *buf, int len, struct cpumask *dstp) @@ -797,7 +827,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len, * @buf: the buffer to extract from * @dstp: the cpumask to set. * - * Returns -errno, or 0 for success. + * Return: -errno, or 0 for success. */ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) { @@ -809,7 +839,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) * @buf: the buffer to extract from * @dstp: the cpumask to set. * - * Returns -errno, or 0 for success. + * Return: -errno, or 0 for success. */ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) { @@ -817,7 +847,9 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) } /** - * cpumask_size - size to allocate for a 'struct cpumask' in bytes + * cpumask_size - calculate size to allocate for a 'struct cpumask' in bytes + * + * Return: size to allocate for a &struct cpumask in bytes */ static inline unsigned int cpumask_size(void) { @@ -831,7 +863,7 @@ static inline unsigned int cpumask_size(void) * little more difficult, we typedef cpumask_var_t to an array or a * pointer: doing &mask on an array is a noop, so it still works. * - * ie. + * i.e. * cpumask_var_t tmpmask; * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) * return -ENOMEM; @@ -887,6 +919,8 @@ bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) * a nop returning a constant 1 (in <linux/cpumask.h>). * * See alloc_cpumask_var_node. + * + * Return: %true if allocation succeeded, %false if not */ static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) @@ -957,6 +991,8 @@ static inline bool cpumask_available(cpumask_var_t mask) } #endif /* CONFIG_CPUMASK_OFFSTACK */ +DEFINE_FREE(free_cpumask_var, struct cpumask *, if (_T) free_cpumask_var(_T)); + /* It's common to want to use cpu_all_mask in struct member initializers, * so it has to refer to an address rather than a pointer. */ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); @@ -1025,7 +1061,7 @@ set_cpu_dying(unsigned int cpu, bool dying) } /** - * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * + * to_cpumask - convert a NR_CPUS bitmap to a struct cpumask * * @bitmap: the bitmap * * There are a few places where cpumask_var_t isn't appropriate and @@ -1068,6 +1104,8 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) * interface gives only a momentary snapshot and is not protected against * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held * region. + * + * Return: momentary snapshot of the number of online CPUs */ static __always_inline unsigned int num_online_cpus(void) { @@ -1160,7 +1198,7 @@ static inline bool cpu_dying(unsigned int cpu) * @mask: the cpumask to copy * @buf: the buffer to copy into * - * Returns the length of the (null-terminated) @buf string, zero if + * Return: the length of the (null-terminated) @buf string, zero if * nothing is copied. */ static inline ssize_t @@ -1183,7 +1221,7 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) * cpumask; Typically used by bin_attribute to export cpumask bitmask * ABI. * - * Returns the length of how many bytes have been copied, excluding + * Return: the length of how many bytes have been copied, excluding * terminating '\0'. */ static inline ssize_t @@ -1204,6 +1242,9 @@ cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask, * * Everything is same with the above cpumap_print_bitmask_to_buf() * except the print format. + * + * Return: the length of how many bytes have been copied, excluding + * terminating '\0'. */ static inline ssize_t cpumap_print_list_to_buf(char *buf, const struct cpumask *mask, diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index d629094fac6e..0ce6ff0d9c9a 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -77,6 +77,7 @@ extern void cpuset_lock(void); extern void cpuset_unlock(void); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); extern bool cpuset_cpus_allowed_fallback(struct task_struct *p); +extern bool cpuset_cpu_is_isolated(int cpu); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); #define cpuset_current_mems_allowed (current->mems_allowed) void cpuset_init_current_mems_allowed(void); @@ -120,11 +121,6 @@ static inline int cpuset_do_page_mem_spread(void) return task_spread_page(current); } -static inline int cpuset_do_slab_mem_spread(void) -{ - return task_spread_slab(current); -} - extern bool current_cpuset_is_being_rebound(void); extern void rebuild_sched_domains(void); @@ -207,6 +203,11 @@ static inline bool cpuset_cpus_allowed_fallback(struct task_struct *p) return false; } +static inline bool cpuset_cpu_is_isolated(int cpu) +{ + return false; +} + static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) { return node_possible_map; @@ -258,11 +259,6 @@ static inline int cpuset_do_page_mem_spread(void) return 0; } -static inline int cpuset_do_slab_mem_spread(void) -{ - return 0; -} - static inline bool current_cpuset_is_being_rebound(void) { return false; diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 0c06561bf5ff..d33352c2e386 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -6,95 +6,57 @@ #include <linux/elfcore.h> #include <linux/elf.h> -#define CRASH_CORE_NOTE_NAME "CORE" -#define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) -#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(CRASH_CORE_NOTE_NAME), 4) -#define CRASH_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4) +struct kimage; + +struct crash_mem { + unsigned int max_nr_ranges; + unsigned int nr_ranges; + struct range ranges[] __counted_by(max_nr_ranges); +}; + +#ifdef CONFIG_CRASH_DUMP + +int crash_shrink_memory(unsigned long new_size); +ssize_t crash_get_memory_size(void); +#ifndef arch_kexec_protect_crashkres /* - * The per-cpu notes area is a list of notes terminated by a "NULL" - * note header. For kdump, the code in vmcore.c runs in the context - * of the second kernel to combine them into one note. + * Protection mechanism for crashkernel reserved memory after + * the kdump kernel is loaded. + * + * Provide an empty default implementation here -- architecture + * code may override this */ -#define CRASH_CORE_NOTE_BYTES ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ - CRASH_CORE_NOTE_NAME_BYTES + \ - CRASH_CORE_NOTE_DESC_BYTES) +static inline void arch_kexec_protect_crashkres(void) { } +#endif -#define VMCOREINFO_BYTES PAGE_SIZE -#define VMCOREINFO_NOTE_NAME "VMCOREINFO" -#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4) -#define VMCOREINFO_NOTE_SIZE ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ - VMCOREINFO_NOTE_NAME_BYTES + \ - VMCOREINFO_BYTES) +#ifndef arch_kexec_unprotect_crashkres +static inline void arch_kexec_unprotect_crashkres(void) { } +#endif -typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4]; -/* Per cpu memory for storing cpu states in case of system crash. */ -extern note_buf_t __percpu *crash_notes; -void crash_update_vmcoreinfo_safecopy(void *ptr); -void crash_save_vmcoreinfo(void); -void arch_crash_save_vmcoreinfo(void); -__printf(1, 2) -void vmcoreinfo_append_str(const char *fmt, ...); -phys_addr_t paddr_vmcoreinfo_note(void); -#define VMCOREINFO_OSRELEASE(value) \ - vmcoreinfo_append_str("OSRELEASE=%s\n", value) -#define VMCOREINFO_BUILD_ID() \ - ({ \ - static_assert(sizeof(vmlinux_build_id) == 20); \ - vmcoreinfo_append_str("BUILD-ID=%20phN\n", vmlinux_build_id); \ - }) +#ifndef arch_crash_handle_hotplug_event +static inline void arch_crash_handle_hotplug_event(struct kimage *image) { } +#endif -#define VMCOREINFO_PAGESIZE(value) \ - vmcoreinfo_append_str("PAGESIZE=%ld\n", value) -#define VMCOREINFO_SYMBOL(name) \ - vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name) -#define VMCOREINFO_SYMBOL_ARRAY(name) \ - vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name) -#define VMCOREINFO_SIZE(name) \ - vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ - (unsigned long)sizeof(name)) -#define VMCOREINFO_STRUCT_SIZE(name) \ - vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ - (unsigned long)sizeof(struct name)) -#define VMCOREINFO_OFFSET(name, field) \ - vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ - (unsigned long)offsetof(struct name, field)) -#define VMCOREINFO_TYPE_OFFSET(name, field) \ - vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ - (unsigned long)offsetof(name, field)) -#define VMCOREINFO_LENGTH(name, value) \ - vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value) -#define VMCOREINFO_NUMBER(name) \ - vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name) -#define VMCOREINFO_CONFIG(name) \ - vmcoreinfo_append_str("CONFIG_%s=y\n", #name) +int crash_check_update_elfcorehdr(void); -extern unsigned char *vmcoreinfo_data; -extern size_t vmcoreinfo_size; -extern u32 *vmcoreinfo_note; +#ifndef crash_hotplug_cpu_support +static inline int crash_hotplug_cpu_support(void) { return 0; } +#endif -Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, - void *data, size_t data_len); -void final_note(Elf_Word *buf); +#ifndef crash_hotplug_memory_support +static inline int crash_hotplug_memory_support(void) { return 0; } +#endif -int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); -int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); -int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); +#ifndef crash_get_elfcorehdr_size +static inline unsigned int crash_get_elfcorehdr_size(void) { return 0; } +#endif /* Alignment required for elf header segment */ #define ELF_CORE_HEADER_ALIGN 4096 -struct crash_mem { - unsigned int max_nr_ranges; - unsigned int nr_ranges; - struct range ranges[]; -}; - extern int crash_exclude_mem_range(struct crash_mem *mem, unsigned long long mstart, unsigned long long mend); @@ -111,4 +73,23 @@ struct kexec_segment; #define KEXEC_CRASH_HP_REMOVE_MEMORY 4 #define KEXEC_CRASH_HP_INVALID_CPU -1U +extern void __crash_kexec(struct pt_regs *regs); +extern void crash_kexec(struct pt_regs *regs); +int kexec_should_crash(struct task_struct *p); +int kexec_crash_loaded(void); +void crash_save_cpu(struct pt_regs *regs, int cpu); +extern int kimage_crash_copy_vmcoreinfo(struct kimage *image); + +#else /* !CONFIG_CRASH_DUMP*/ +struct pt_regs; +struct task_struct; +struct kimage; +static inline void __crash_kexec(struct pt_regs *regs) { } +static inline void crash_kexec(struct pt_regs *regs) { } +static inline int kexec_should_crash(struct task_struct *p) { return 0; } +static inline int kexec_crash_loaded(void) { return 0; } +static inline void crash_save_cpu(struct pt_regs *regs, int cpu) {}; +static inline int kimage_crash_copy_vmcoreinfo(struct kimage *image) { return 0; }; +#endif /* CONFIG_CRASH_DUMP*/ + #endif /* LINUX_CRASH_CORE_H */ diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 0f3a656293b0..acc55626afdc 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -50,6 +50,7 @@ void vmcore_cleanup(void); #define vmcore_elf64_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) #endif +#ifndef is_kdump_kernel /* * is_kdump_kernel() checks whether this kernel is booting after a panic of * previous kernel or not. This is determined by checking if previous kernel @@ -64,6 +65,7 @@ static inline bool is_kdump_kernel(void) { return elfcorehdr_addr != ELFCORE_ADDR_MAX; } +#endif /* is_vmcore_usable() checks if the kernel is booting after a panic and * the vmcore region is usable. @@ -75,7 +77,8 @@ static inline bool is_kdump_kernel(void) static inline int is_vmcore_usable(void) { - return is_kdump_kernel() && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0; + return elfcorehdr_addr != ELFCORE_ADDR_ERR && + elfcorehdr_addr != ELFCORE_ADDR_MAX ? 1 : 0; } /* vmcore_unusable() marks the vmcore as unusable, @@ -84,8 +87,7 @@ static inline int is_vmcore_usable(void) static inline void vmcore_unusable(void) { - if (is_kdump_kernel()) - elfcorehdr_addr = ELFCORE_ADDR_ERR; + elfcorehdr_addr = ELFCORE_ADDR_ERR; } /** diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h new file mode 100644 index 000000000000..5a9df944fb80 --- /dev/null +++ b/include/linux/crash_reserve.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_CRASH_RESERVE_H +#define LINUX_CRASH_RESERVE_H + +#include <linux/linkage.h> +#include <linux/elfcore.h> +#include <linux/elf.h> +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +#include <asm/crash_reserve.h> +#endif + +/* Location of a reserved region to hold the crash kernel. + */ +extern struct resource crashk_res; +extern struct resource crashk_low_res; + +int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, + unsigned long long *crash_size, unsigned long long *crash_base, + unsigned long long *low_size, bool *high); + +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +#ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE +#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) +#endif +#ifndef CRASH_ALIGN +#define CRASH_ALIGN SZ_2M +#endif +#ifndef CRASH_ADDR_LOW_MAX +#define CRASH_ADDR_LOW_MAX SZ_4G +#endif +#ifndef CRASH_ADDR_HIGH_MAX +#define CRASH_ADDR_HIGH_MAX memblock_end_of_DRAM() +#endif + +void __init reserve_crashkernel_generic(char *cmdline, + unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high); +#else +static inline void __init reserve_crashkernel_generic(char *cmdline, + unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high) +{} +#endif +#endif /* LINUX_CRASH_RESERVE_H */ diff --git a/include/linux/crc-ccitt.h b/include/linux/crc-ccitt.h index 72c92c396bb8..cd4f420231ba 100644 --- a/include/linux/crc-ccitt.h +++ b/include/linux/crc-ccitt.h @@ -5,19 +5,12 @@ #include <linux/types.h> extern u16 const crc_ccitt_table[256]; -extern u16 const crc_ccitt_false_table[256]; extern u16 crc_ccitt(u16 crc, const u8 *buffer, size_t len); -extern u16 crc_ccitt_false(u16 crc, const u8 *buffer, size_t len); static inline u16 crc_ccitt_byte(u16 crc, const u8 c) { return (crc >> 8) ^ crc_ccitt_table[(crc ^ c) & 0xff]; } -static inline u16 crc_ccitt_false_byte(u16 crc, const u8 c) -{ - return (crc << 8) ^ crc_ccitt_false_table[(crc >> 8) ^ c]; -} - #endif /* _LINUX_CRC_CCITT_H */ diff --git a/include/linux/cred.h b/include/linux/cred.h index f923528d5cc4..2976f534a7a3 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -12,6 +12,7 @@ #include <linux/init.h> #include <linux/key.h> #include <linux/atomic.h> +#include <linux/refcount.h> #include <linux/uidgid.h> #include <linux/sched.h> #include <linux/sched/user.h> @@ -23,7 +24,7 @@ struct inode; * COW Supplementary groups list */ struct group_info { - atomic_t usage; + refcount_t usage; int ngroups; kgid_t gid[]; } __randomize_layout; @@ -39,7 +40,7 @@ struct group_info { */ static inline struct group_info *get_group_info(struct group_info *gi) { - atomic_inc(&gi->usage); + refcount_inc(&gi->usage); return gi; } @@ -49,7 +50,7 @@ static inline struct group_info *get_group_info(struct group_info *gi) */ #define put_group_info(group_info) \ do { \ - if (atomic_dec_and_test(&(group_info)->usage)) \ + if (refcount_dec_and_test(&(group_info)->usage)) \ groups_free(group_info); \ } while (0) @@ -108,14 +109,7 @@ static inline int groups_search(const struct group_info *group_info, kgid_t grp) * same context as task->real_cred. */ struct cred { - atomic_t usage; -#ifdef CONFIG_DEBUG_CREDENTIALS - atomic_t subscribers; /* number of processes subscribed */ - void *put_addr; - unsigned magic; -#define CRED_MAGIC 0x43736564 -#define CRED_MAGIC_DEAD 0x44656144 -#endif + atomic_long_t usage; kuid_t uid; /* real UID of the task */ kgid_t gid; /* real GID of the task */ kuid_t suid; /* saved UID of the task */ @@ -171,46 +165,6 @@ extern int cred_fscmp(const struct cred *, const struct cred *); extern void __init cred_init(void); extern int set_cred_ucounts(struct cred *); -/* - * check for validity of credentials - */ -#ifdef CONFIG_DEBUG_CREDENTIALS -extern void __noreturn __invalid_creds(const struct cred *, const char *, unsigned); -extern void __validate_process_creds(struct task_struct *, - const char *, unsigned); - -extern bool creds_are_invalid(const struct cred *cred); - -static inline void __validate_creds(const struct cred *cred, - const char *file, unsigned line) -{ - if (unlikely(creds_are_invalid(cred))) - __invalid_creds(cred, file, line); -} - -#define validate_creds(cred) \ -do { \ - __validate_creds((cred), __FILE__, __LINE__); \ -} while(0) - -#define validate_process_creds() \ -do { \ - __validate_process_creds(current, __FILE__, __LINE__); \ -} while(0) - -extern void validate_creds_for_do_exit(struct task_struct *); -#else -static inline void validate_creds(const struct cred *cred) -{ -} -static inline void validate_creds_for_do_exit(struct task_struct *tsk) -{ -} -static inline void validate_process_creds(void) -{ -} -#endif - static inline bool cap_ambient_invariant_ok(const struct cred *cred) { return cap_issubset(cred->cap_ambient, @@ -219,6 +173,20 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) } /** + * get_new_cred_many - Get references on a new set of credentials + * @cred: The new credentials to reference + * @nr: Number of references to acquire + * + * Get references on the specified set of new credentials. The caller must + * release all acquired references. + */ +static inline struct cred *get_new_cred_many(struct cred *cred, int nr) +{ + atomic_long_add(nr, &cred->usage); + return cred; +} + +/** * get_new_cred - Get a reference on a new set of credentials * @cred: The new credentials to reference * @@ -227,16 +195,16 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) */ static inline struct cred *get_new_cred(struct cred *cred) { - atomic_inc(&cred->usage); - return cred; + return get_new_cred_many(cred, 1); } /** - * get_cred - Get a reference on a set of credentials + * get_cred_many - Get references on a set of credentials * @cred: The credentials to reference + * @nr: Number of references to acquire * - * Get a reference on the specified set of credentials. The caller must - * release the reference. If %NULL is passed, it is returned with no action. + * Get references on the specified set of credentials. The caller must release + * all acquired reference. If %NULL is passed, it is returned with no action. * * This is used to deal with a committed set of credentials. Although the * pointer is const, this will temporarily discard the const and increment the @@ -244,14 +212,27 @@ static inline struct cred *get_new_cred(struct cred *cred) * accidental alteration of a set of credentials that should be considered * immutable. */ -static inline const struct cred *get_cred(const struct cred *cred) +static inline const struct cred *get_cred_many(const struct cred *cred, int nr) { struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return cred; - validate_creds(cred); nonconst_cred->non_rcu = 0; - return get_new_cred(nonconst_cred); + return get_new_cred_many(nonconst_cred, nr); +} + +/* + * get_cred - Get a reference on a set of credentials + * @cred: The credentials to reference + * + * Get a reference on the specified set of credentials. The caller must + * release the reference. If %NULL is passed, it is returned with no action. + * + * This is used to deal with a committed set of credentials. + */ +static inline const struct cred *get_cred(const struct cred *cred) +{ + return get_cred_many(cred, 1); } static inline const struct cred *get_cred_rcu(const struct cred *cred) @@ -259,9 +240,8 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return NULL; - if (!atomic_inc_not_zero(&nonconst_cred->usage)) + if (!atomic_long_inc_not_zero(&nonconst_cred->usage)) return NULL; - validate_creds(cred); nonconst_cred->non_rcu = 0; return cred; } @@ -269,6 +249,7 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) /** * put_cred - Release a reference to a set of credentials * @cred: The credentials to release + * @nr: Number of references to release * * Release a reference to a set of credentials, deleting them when the last ref * is released. If %NULL is passed, nothing is done. @@ -277,17 +258,28 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) * on task_struct are attached by const pointers to prevent accidental * alteration of otherwise immutable credential sets. */ -static inline void put_cred(const struct cred *_cred) +static inline void put_cred_many(const struct cred *_cred, int nr) { struct cred *cred = (struct cred *) _cred; if (cred) { - validate_creds(cred); - if (atomic_dec_and_test(&(cred)->usage)) + if (atomic_long_sub_and_test(nr, &cred->usage)) __put_cred(cred); } } +/* + * put_cred - Release a reference to a set of credentials + * @cred: The credentials to release + * + * Release a reference to a set of credentials, deleting them when the last ref + * is released. If %NULL is passed, nothing is done. + */ +static inline void put_cred(const struct cred *cred) +{ + put_cred_many(cred, 1); +} + /** * current_cred - Access the current task's subjective credentials * diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 31f6fee0c36c..b164da5e129e 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -24,6 +24,7 @@ #define CRYPTO_ALG_TYPE_CIPHER 0x00000001 #define CRYPTO_ALG_TYPE_COMPRESS 0x00000002 #define CRYPTO_ALG_TYPE_AEAD 0x00000003 +#define CRYPTO_ALG_TYPE_LSKCIPHER 0x00000004 #define CRYPTO_ALG_TYPE_SKCIPHER 0x00000005 #define CRYPTO_ALG_TYPE_AKCIPHER 0x00000006 #define CRYPTO_ALG_TYPE_SIG 0x00000007 @@ -35,8 +36,6 @@ #define CRYPTO_ALG_TYPE_SHASH 0x0000000e #define CRYPTO_ALG_TYPE_AHASH 0x0000000f -#define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e -#define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e #define CRYPTO_ALG_TYPE_ACOMPRESS_MASK 0x0000000e #define CRYPTO_ALG_LARVAL 0x00000010 @@ -111,7 +110,6 @@ * crypto_aead_walksize() (with the remainder going at the end), no chunk * can cross a page boundary or a scatterlist element boundary. * ahash: - * - The result buffer must be aligned to the algorithm's alignmask. * - crypto_ahash_finup() must not be used unless the algorithm implements * ->finup() natively. */ @@ -279,18 +277,20 @@ struct compress_alg { * @cra_ctxsize: Size of the operational context of the transformation. This * value informs the kernel crypto API about the memory size * needed to be allocated for the transformation context. - * @cra_alignmask: Alignment mask for the input and output data buffer. The data - * buffer containing the input data for the algorithm must be - * aligned to this alignment mask. The data buffer for the - * output data must be aligned to this alignment mask. Note that - * the Crypto API will do the re-alignment in software, but - * only under special conditions and there is a performance hit. - * The re-alignment happens at these occasions for different - * @cra_u types: cipher -- For both input data and output data - * buffer; ahash -- For output hash destination buf; shash -- - * For output hash destination buf. - * This is needed on hardware which is flawed by design and - * cannot pick data from arbitrary addresses. + * @cra_alignmask: For cipher, skcipher, lskcipher, and aead algorithms this is + * 1 less than the alignment, in bytes, that the algorithm + * implementation requires for input and output buffers. When + * the crypto API is invoked with buffers that are not aligned + * to this alignment, the crypto API automatically utilizes + * appropriately aligned temporary buffers to comply with what + * the algorithm needs. (For scatterlists this happens only if + * the algorithm uses the skcipher_walk helper functions.) This + * misalignment handling carries a performance penalty, so it is + * preferred that algorithms do not set a nonzero alignmask. + * Also, crypto API users may wish to allocate buffers aligned + * to the alignmask of the algorithm being used, in order to + * avoid the API having to realign them. Note: the alignmask is + * not supported for hash algorithms and is always 0 for them. * @cra_priority: Priority of this transformation implementation. In case * multiple transformations with same @cra_name are available to * the Crypto API, the kernel will use the one with highest diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h new file mode 100644 index 000000000000..03fa6d50d46f --- /dev/null +++ b/include/linux/cxl-event.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2023 Intel Corporation. */ +#ifndef _LINUX_CXL_EVENT_H +#define _LINUX_CXL_EVENT_H + +/* + * Common Event Record Format + * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 + */ +struct cxl_event_record_hdr { + u8 length; + u8 flags[3]; + __le16 handle; + __le16 related_handle; + __le64 timestamp; + u8 maint_op_class; + u8 reserved[15]; +} __packed; + +#define CXL_EVENT_RECORD_DATA_LENGTH 0x50 +struct cxl_event_generic { + struct cxl_event_record_hdr hdr; + u8 data[CXL_EVENT_RECORD_DATA_LENGTH]; +} __packed; + +/* + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 +struct cxl_event_gen_media { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; + u8 descriptor; + u8 type; + u8 transaction_type; + u8 validity_flags[2]; + u8 channel; + u8 rank; + u8 device[3]; + u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; + u8 reserved[46]; +} __packed; + +/* + * DRAM Event Record - DER + * CXL rev 3.0 section 8.2.9.2.1.2; Table 3-44 + */ +#define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20 +struct cxl_event_dram { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; + u8 descriptor; + u8 type; + u8 transaction_type; + u8 validity_flags[2]; + u8 channel; + u8 rank; + u8 nibble_mask[3]; + u8 bank_group; + u8 bank; + u8 row[3]; + u8 column[2]; + u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE]; + u8 reserved[0x17]; +} __packed; + +/* + * Get Health Info Record + * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100 + */ +struct cxl_get_health_info { + u8 health_status; + u8 media_status; + u8 add_status; + u8 life_used; + u8 device_temp[2]; + u8 dirty_shutdown_cnt[4]; + u8 cor_vol_err_cnt[4]; + u8 cor_per_err_cnt[4]; +} __packed; + +/* + * Memory Module Event Record + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +struct cxl_event_mem_module { + struct cxl_event_record_hdr hdr; + u8 event_type; + struct cxl_get_health_info info; + u8 reserved[0x3d]; +} __packed; + +union cxl_event { + struct cxl_event_generic generic; + struct cxl_event_gen_media gen_media; + struct cxl_event_dram dram; + struct cxl_event_mem_module mem_module; +} __packed; + +/* + * Common Event Record Format; in event logs + * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 + */ +struct cxl_event_record_raw { + uuid_t id; + union cxl_event event; +} __packed; + +enum cxl_event_type { + CXL_CPER_EVENT_GENERIC, + CXL_CPER_EVENT_GEN_MEDIA, + CXL_CPER_EVENT_DRAM, + CXL_CPER_EVENT_MEM_MODULE, +}; + +#define CPER_CXL_DEVICE_ID_VALID BIT(0) +#define CPER_CXL_DEVICE_SN_VALID BIT(1) +#define CPER_CXL_COMP_EVENT_LOG_VALID BIT(2) +struct cxl_cper_event_rec { + struct { + u32 length; + u64 validation_bits; + struct cper_cxl_event_devid { + u16 vendor_id; + u16 device_id; + u8 func_num; + u8 device_num; + u8 bus_num; + u16 segment_num; + u16 slot_num; /* bits 2:0 reserved */ + u8 reserved; + } __packed device_id; + struct cper_cxl_event_sn { + u32 lower_dw; + u32 upper_dw; + } __packed dev_serial_num; + } __packed hdr; + + union cxl_event event; +} __packed; + +#endif /* _LINUX_CXL_EVENT_H */ diff --git a/include/linux/damon.h b/include/linux/damon.h index ae2664d1d5f1..886d07294f4e 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -2,7 +2,7 @@ /* * DAMON api * - * Author: SeongJae Park <sjpark@amazon.de> + * Author: SeongJae Park <sj@kernel.org> */ #ifndef _DAMON_H_ @@ -40,9 +40,24 @@ struct damon_addr_range { * @ar: The address range of the region. * @sampling_addr: Address of the sample for the next access check. * @nr_accesses: Access frequency of this region. + * @nr_accesses_bp: @nr_accesses in basis point (0.01%) that updated for + * each sampling interval. * @list: List head for siblings. * @age: Age of this region. * + * @nr_accesses is reset to zero for every &damon_attrs->aggr_interval and be + * increased for every &damon_attrs->sample_interval if an access to the region + * during the last sampling interval is found. The update of this field should + * not be done with direct access but with the helper function, + * damon_update_region_access_rate(). + * + * @nr_accesses_bp is another representation of @nr_accesses in basis point + * (1 in 10,000) that updated for every &damon_attrs->sample_interval in a + * manner similar to moving sum. By the algorithm, this value becomes + * @nr_accesses * 10000 for every &struct damon_attrs->aggr_interval. This can + * be used when the aggregation interval is too huge and therefore cannot wait + * for it before getting the access monitoring results. + * * @age is initially zero, increased for each aggregation interval, and reset * to zero again if the access frequency is significantly changed. If two * regions are merged into a new region, both @nr_accesses and @age of the new @@ -52,6 +67,7 @@ struct damon_region { struct damon_addr_range ar; unsigned long sampling_addr; unsigned int nr_accesses; + unsigned int nr_accesses_bp; struct list_head list; unsigned int age; @@ -111,10 +127,55 @@ enum damos_action { }; /** + * enum damos_quota_goal_metric - Represents the metric to be used as the goal + * + * @DAMOS_QUOTA_USER_INPUT: User-input value. + * @DAMOS_QUOTA_SOME_MEM_PSI_US: System level some memory PSI in us. + * @NR_DAMOS_QUOTA_GOAL_METRICS: Number of DAMOS quota goal metrics. + * + * Metrics equal to larger than @NR_DAMOS_QUOTA_GOAL_METRICS are unsupported. + */ +enum damos_quota_goal_metric { + DAMOS_QUOTA_USER_INPUT, + DAMOS_QUOTA_SOME_MEM_PSI_US, + NR_DAMOS_QUOTA_GOAL_METRICS, +}; + +/** + * struct damos_quota_goal - DAMOS scheme quota auto-tuning goal. + * @metric: Metric to be used for representing the goal. + * @target_value: Target value of @metric to achieve with the tuning. + * @current_value: Current value of @metric. + * @last_psi_total: Last measured total PSI + * @list: List head for siblings. + * + * Data structure for getting the current score of the quota tuning goal. The + * score is calculated by how close @current_value and @target_value are. Then + * the score is entered to DAMON's internal feedback loop mechanism to get the + * auto-tuned quota. + * + * If @metric is DAMOS_QUOTA_USER_INPUT, @current_value should be manually + * entered by the user, probably inside the kdamond callbacks. Otherwise, + * DAMON sets @current_value with self-measured value of @metric. + */ +struct damos_quota_goal { + enum damos_quota_goal_metric metric; + unsigned long target_value; + unsigned long current_value; + /* metric-dependent fields */ + union { + u64 last_psi_total; + }; + struct list_head list; +}; + +/** * struct damos_quota - Controls the aggressiveness of the given scheme. + * @reset_interval: Charge reset interval in milliseconds. * @ms: Maximum milliseconds that the scheme can use. * @sz: Maximum bytes of memory that the action can be applied. - * @reset_interval: Charge reset interval in milliseconds. + * @goals: Head of quota tuning goals (&damos_quota_goal) list. + * @esz: Effective size quota in bytes. * * @weight_sz: Weight of the region's size for prioritization. * @weight_nr_accesses: Weight of the region's nr_accesses for prioritization. @@ -132,6 +193,13 @@ enum damos_action { * throughput of the scheme's action. DAMON then compares it against &sz and * uses smaller one as the effective quota. * + * If @goals is not empt, DAMON calculates yet another size quota based on the + * goals using its internal feedback loop algorithm, for every @reset_interval. + * Then, if the new size quota is smaller than the effective quota, it uses the + * new size quota as the effective quota. + * + * The resulting effective size quota in bytes is set to @esz. + * * For selecting regions within the quota, DAMON prioritizes current scheme's * target memory regions using the &struct damon_operations->get_scheme_score. * You could customize the prioritization logic by setting &weight_sz, @@ -139,9 +207,11 @@ enum damos_action { * encouraged to respect those. */ struct damos_quota { + unsigned long reset_interval; unsigned long ms; unsigned long sz; - unsigned long reset_interval; + struct list_head goals; + unsigned long esz; unsigned int weight_sz; unsigned int weight_nr_accesses; @@ -152,8 +222,6 @@ struct damos_quota { unsigned long total_charged_sz; unsigned long total_charged_ns; - unsigned long esz; /* Effective size quota in bytes */ - /* For charging the quota */ unsigned long charged_sz; unsigned long charged_from; @@ -163,6 +231,9 @@ struct damos_quota { /* For prioritization */ unsigned long histogram[DAMOS_MAX_SCORE + 1]; unsigned int min_score; + + /* For feedback loop */ + unsigned long esz_bp; }; /** @@ -298,24 +369,24 @@ struct damos_access_pattern { * struct damos - Represents a Data Access Monitoring-based Operation Scheme. * @pattern: Access pattern of target regions. * @action: &damo_action to be applied to the target regions. + * @apply_interval_us: The time between applying the @action. * @quota: Control the aggressiveness of this scheme. * @wmarks: Watermarks for automated (in)activation of this scheme. * @filters: Additional set of &struct damos_filter for &action. * @stat: Statistics of this scheme. * @list: List head for siblings. * - * For each aggregation interval, DAMON finds regions which fit in the + * For each @apply_interval_us, DAMON finds regions which fit in the * &pattern and applies &action to those. To avoid consuming too much * CPU time or IO resources for the &action, "a is used. * + * If @apply_interval_us is zero, &damon_attrs->aggr_interval is used instead. + * * To do the work only when needed, schemes can be activated for specific * system situations using &wmarks. If all schemes that registered to the * monitoring context are inactive, DAMON stops monitoring either, and just * repeatedly checks the watermarks. * - * If all schemes that registered to a &struct damon_ctx are inactive, DAMON - * stops monitoring and just repeatedly checks the watermarks. - * * Before applying the &action to a memory region, &struct damon_operations * implementation could check pages of the region and skip &action to respect * &filters @@ -327,6 +398,14 @@ struct damos_access_pattern { struct damos { struct damos_access_pattern pattern; enum damos_action action; + unsigned long apply_interval_us; +/* private: internal use only */ + /* + * number of sample intervals that should be passed before applying + * @action + */ + unsigned long next_apply_sis; +/* public: */ struct damos_quota quota; struct damos_watermarks wmarks; struct list_head filters; @@ -472,13 +551,14 @@ struct damon_callback { * regions. * * For each @sample_interval, DAMON checks whether each region is accessed or - * not. It aggregates and keeps the access information (number of accesses to - * each region) for @aggr_interval time. DAMON also checks whether the target - * memory regions need update (e.g., by ``mmap()`` calls from the application, - * in case of virtual memory monitoring) and applies the changes for each - * @ops_update_interval. All time intervals are in micro-seconds. - * Please refer to &struct damon_operations and &struct damon_callback for more - * detail. + * not during the last @sample_interval. If such access is found, DAMON + * aggregates the information by increasing &damon_region->nr_accesses for + * @aggr_interval time. For each @aggr_interval, the count is reset. DAMON + * also checks whether the target memory regions need update (e.g., by + * ``mmap()`` calls from the application, in case of virtual memory monitoring) + * and applies the changes for each @ops_update_interval. All time intervals + * are in micro-seconds. Please refer to &struct damon_operations and &struct + * damon_callback for more detail. */ struct damon_attrs { unsigned long sample_interval; @@ -522,8 +602,20 @@ struct damon_ctx { struct damon_attrs attrs; /* private: internal use only */ - struct timespec64 last_aggregation; - struct timespec64 last_ops_update; + /* number of sample intervals that passed since this context started */ + unsigned long passed_sample_intervals; + /* + * number of sample intervals that should be passed before next + * aggregation + */ + unsigned long next_aggregation_sis; + /* + * number of sample intervals that should be passed before next ops + * update + */ + unsigned long next_ops_update_sis; + /* for waiting until the execution of the kdamond_fn is started */ + struct completion kdamond_started; /* public: */ struct task_struct *kdamond; @@ -583,6 +675,12 @@ static inline unsigned long damon_sz_region(struct damon_region *r) #define damon_for_each_scheme_safe(s, next, ctx) \ list_for_each_entry_safe(s, next, &(ctx)->schemes, list) +#define damos_for_each_quota_goal(goal, quota) \ + list_for_each_entry(goal, "a->goals, list) + +#define damos_for_each_quota_goal_safe(goal, next, quota) \ + list_for_each_entry_safe(goal, next, &(quota)->goals, list) + #define damos_for_each_filter(f, scheme) \ list_for_each_entry(f, &(scheme)->filters, list) @@ -608,14 +706,24 @@ void damon_add_region(struct damon_region *r, struct damon_target *t); void damon_destroy_region(struct damon_region *r, struct damon_target *t); int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, unsigned int nr_ranges); +void damon_update_region_access_rate(struct damon_region *r, bool accessed, + struct damon_attrs *attrs); struct damos_filter *damos_new_filter(enum damos_filter_type type, bool matching); void damos_add_filter(struct damos *s, struct damos_filter *f); void damos_destroy_filter(struct damos_filter *f); +struct damos_quota_goal *damos_new_quota_goal( + enum damos_quota_goal_metric metric, + unsigned long target_value); +void damos_add_quota_goal(struct damos_quota *q, struct damos_quota_goal *g); +void damos_destroy_quota_goal(struct damos_quota_goal *goal); + struct damos *damon_new_scheme(struct damos_access_pattern *pattern, - enum damos_action action, struct damos_quota *quota, + enum damos_action action, + unsigned long apply_interval_us, + struct damos_quota *quota, struct damos_watermarks *wmarks); void damon_add_scheme(struct damon_ctx *ctx, struct damos *s); void damon_destroy_scheme(struct damos *s); @@ -642,6 +750,13 @@ static inline bool damon_target_has_pid(const struct damon_ctx *ctx) return ctx->ops.id == DAMON_OPS_VADDR || ctx->ops.id == DAMON_OPS_FVADDR; } +static inline unsigned int damon_max_nr_accesses(const struct damon_attrs *attrs) +{ + /* {aggr,sample}_interval are unsigned long, hence could overflow */ + return min(attrs->aggr_interval / attrs->sample_interval, + (unsigned long)UINT_MAX); +} + int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive); int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); diff --git a/include/linux/dax.h b/include/linux/dax.h index 22cd9902345d..9d3e3327af4c 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -63,6 +63,8 @@ void kill_dax(struct dax_device *dax_dev); void dax_write_cache(struct dax_device *dax_dev, bool wc); bool dax_write_cache_enabled(struct dax_device *dax_dev); bool dax_synchronous(struct dax_device *dax_dev); +void set_dax_nocache(struct dax_device *dax_dev); +void set_dax_nomc(struct dax_device *dax_dev); void set_dax_synchronous(struct dax_device *dax_dev); size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); @@ -86,11 +88,7 @@ static inline void *dax_holder(struct dax_device *dax_dev) static inline struct dax_device *alloc_dax(void *private, const struct dax_operations *ops) { - /* - * Callers should check IS_ENABLED(CONFIG_DAX) to know if this - * NULL is an error or expected. - */ - return NULL; + return ERR_PTR(-EOPNOTSUPP); } static inline void put_dax(struct dax_device *dax_dev) { @@ -109,6 +107,12 @@ static inline bool dax_synchronous(struct dax_device *dax_dev) { return true; } +static inline void set_dax_nocache(struct dax_device *dax_dev) +{ +} +static inline void set_dax_nomc(struct dax_device *dax_dev) +{ +} static inline void set_dax_synchronous(struct dax_device *dax_dev) { } @@ -124,9 +128,6 @@ static inline size_t dax_recovery_write(struct dax_device *dax_dev, } #endif -void set_dax_nocache(struct dax_device *dax_dev); -void set_dax_nomc(struct dax_device *dax_dev); - struct writeback_control; #if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX) int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk); @@ -159,8 +160,8 @@ int dax_writeback_mapping_range(struct address_space *mapping, struct page *dax_layout_busy_page(struct address_space *mapping); struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end); -dax_entry_t dax_lock_page(struct page *page); -void dax_unlock_page(struct page *page, dax_entry_t cookie); +dax_entry_t dax_lock_folio(struct folio *folio); +void dax_unlock_folio(struct folio *folio, dax_entry_t cookie); dax_entry_t dax_lock_mapping_entry(struct address_space *mapping, unsigned long index, struct page **page); void dax_unlock_mapping_entry(struct address_space *mapping, @@ -182,14 +183,14 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping, return -EOPNOTSUPP; } -static inline dax_entry_t dax_lock_page(struct page *page) +static inline dax_entry_t dax_lock_folio(struct folio *folio) { - if (IS_DAX(page->mapping->host)) + if (IS_DAX(folio->mapping->host)) return ~0UL; return 0; } -static inline void dax_unlock_page(struct page *page, dax_entry_t cookie) +static inline void dax_unlock_folio(struct folio *folio, dax_entry_t cookie) { } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 6b351e009f59..bf53e3894aae 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -68,12 +68,12 @@ extern const struct qstr dotdot_name; * large memory footprint increase). */ #ifdef CONFIG_64BIT -# define DNAME_INLINE_LEN 32 /* 192 bytes */ +# define DNAME_INLINE_LEN 40 /* 192 bytes */ #else # ifdef CONFIG_SMP -# define DNAME_INLINE_LEN 36 /* 128 bytes */ -# else # define DNAME_INLINE_LEN 40 /* 128 bytes */ +# else +# define DNAME_INLINE_LEN 44 /* 128 bytes */ # endif #endif @@ -101,8 +101,8 @@ struct dentry { struct list_head d_lru; /* LRU list */ wait_queue_head_t *d_wait; /* in-lookup ones only */ }; - struct list_head d_child; /* child of parent list */ - struct list_head d_subdirs; /* our children */ + struct hlist_node d_sib; /* child of parent list */ + struct hlist_head d_children; /* our children */ /* * d_alias and d_rcu can share memory */ @@ -111,7 +111,7 @@ struct dentry { struct hlist_bl_node d_in_lookup_hash; /* only for in-lookup ones */ struct rcu_head d_rcu; } d_u; -} __randomize_layout; +}; /* * dentry->d_lock spinlock nesting subclasses: @@ -125,6 +125,11 @@ enum dentry_d_lock_class DENTRY_D_LOCK_NESTED }; +enum d_real_type { + D_REAL_DATA, + D_REAL_METADATA, +}; + struct dentry_operations { int (*d_revalidate)(struct dentry *, unsigned int); int (*d_weak_revalidate)(struct dentry *, unsigned int); @@ -139,7 +144,7 @@ struct dentry_operations { char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(const struct path *, bool); - struct dentry *(*d_real)(struct dentry *, const struct inode *); + struct dentry *(*d_real)(struct dentry *, enum d_real_type type); } ____cacheline_aligned; /* @@ -151,13 +156,13 @@ struct dentry_operations { */ /* d_flags entries */ -#define DCACHE_OP_HASH 0x00000001 -#define DCACHE_OP_COMPARE 0x00000002 -#define DCACHE_OP_REVALIDATE 0x00000004 -#define DCACHE_OP_DELETE 0x00000008 -#define DCACHE_OP_PRUNE 0x00000010 +#define DCACHE_OP_HASH BIT(0) +#define DCACHE_OP_COMPARE BIT(1) +#define DCACHE_OP_REVALIDATE BIT(2) +#define DCACHE_OP_DELETE BIT(3) +#define DCACHE_OP_PRUNE BIT(4) -#define DCACHE_DISCONNECTED 0x00000020 +#define DCACHE_DISCONNECTED BIT(5) /* This dentry is possibly not currently connected to the dcache tree, in * which case its parent will either be itself, or will have this flag as * well. nfsd will not use a dentry with this bit set, but will first @@ -168,50 +173,47 @@ struct dentry_operations { * dentry into place and return that dentry rather than the passed one, * typically using d_splice_alias. */ -#define DCACHE_REFERENCED 0x00000040 /* Recently used, don't discard. */ +#define DCACHE_REFERENCED BIT(6) /* Recently used, don't discard. */ -#define DCACHE_DONTCACHE 0x00000080 /* Purge from memory on final dput() */ +#define DCACHE_DONTCACHE BIT(7) /* Purge from memory on final dput() */ -#define DCACHE_CANT_MOUNT 0x00000100 -#define DCACHE_GENOCIDE 0x00000200 -#define DCACHE_SHRINK_LIST 0x00000400 +#define DCACHE_CANT_MOUNT BIT(8) +#define DCACHE_GENOCIDE BIT(9) +#define DCACHE_SHRINK_LIST BIT(10) -#define DCACHE_OP_WEAK_REVALIDATE 0x00000800 +#define DCACHE_OP_WEAK_REVALIDATE BIT(11) -#define DCACHE_NFSFS_RENAMED 0x00001000 +#define DCACHE_NFSFS_RENAMED BIT(12) /* this dentry has been "silly renamed" and has to be deleted on the last * dput() */ -#define DCACHE_COOKIE 0x00002000 /* For use by dcookie subsystem */ -#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x00004000 +#define DCACHE_FSNOTIFY_PARENT_WATCHED BIT(14) /* Parent inode is watched by some fsnotify listener */ -#define DCACHE_DENTRY_KILLED 0x00008000 +#define DCACHE_DENTRY_KILLED BIT(15) -#define DCACHE_MOUNTED 0x00010000 /* is a mountpoint */ -#define DCACHE_NEED_AUTOMOUNT 0x00020000 /* handle automount on this dir */ -#define DCACHE_MANAGE_TRANSIT 0x00040000 /* manage transit from this dirent */ +#define DCACHE_MOUNTED BIT(16) /* is a mountpoint */ +#define DCACHE_NEED_AUTOMOUNT BIT(17) /* handle automount on this dir */ +#define DCACHE_MANAGE_TRANSIT BIT(18) /* manage transit from this dirent */ #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) -#define DCACHE_LRU_LIST 0x00080000 +#define DCACHE_LRU_LIST BIT(19) -#define DCACHE_ENTRY_TYPE 0x00700000 -#define DCACHE_MISS_TYPE 0x00000000 /* Negative dentry (maybe fallthru to nowhere) */ -#define DCACHE_WHITEOUT_TYPE 0x00100000 /* Whiteout dentry (stop pathwalk) */ -#define DCACHE_DIRECTORY_TYPE 0x00200000 /* Normal directory */ -#define DCACHE_AUTODIR_TYPE 0x00300000 /* Lookupless directory (presumed automount) */ -#define DCACHE_REGULAR_TYPE 0x00400000 /* Regular file type (or fallthru to such) */ -#define DCACHE_SPECIAL_TYPE 0x00500000 /* Other file type (or fallthru to such) */ -#define DCACHE_SYMLINK_TYPE 0x00600000 /* Symlink (or fallthru to such) */ +#define DCACHE_ENTRY_TYPE (7 << 20) /* bits 20..22 are for storing type: */ +#define DCACHE_MISS_TYPE (0 << 20) /* Negative dentry */ +#define DCACHE_WHITEOUT_TYPE (1 << 20) /* Whiteout dentry (stop pathwalk) */ +#define DCACHE_DIRECTORY_TYPE (2 << 20) /* Normal directory */ +#define DCACHE_AUTODIR_TYPE (3 << 20) /* Lookupless directory (presumed automount) */ +#define DCACHE_REGULAR_TYPE (4 << 20) /* Regular file type */ +#define DCACHE_SPECIAL_TYPE (5 << 20) /* Other file type */ +#define DCACHE_SYMLINK_TYPE (6 << 20) /* Symlink */ -#define DCACHE_MAY_FREE 0x00800000 -#define DCACHE_FALLTHRU 0x01000000 /* Fall through to lower layer */ -#define DCACHE_NOKEY_NAME 0x02000000 /* Encrypted name encoded without key */ -#define DCACHE_OP_REAL 0x04000000 +#define DCACHE_NOKEY_NAME BIT(25) /* Encrypted name encoded without key */ +#define DCACHE_OP_REAL BIT(26) -#define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */ -#define DCACHE_DENTRY_CURSOR 0x20000000 -#define DCACHE_NORCU 0x40000000 /* No RCU delay for freeing */ +#define DCACHE_PAR_LOOKUP BIT(28) /* being looked up (with parent locked shared) */ +#define DCACHE_DENTRY_CURSOR BIT(29) +#define DCACHE_NORCU BIT(30) /* No RCU delay for freeing */ extern seqlock_t rename_lock; @@ -220,8 +222,6 @@ extern seqlock_t rename_lock; */ extern void d_instantiate(struct dentry *, struct inode *); extern void d_instantiate_new(struct dentry *, struct inode *); -extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); -extern struct dentry * d_instantiate_anon(struct dentry *, struct inode *); extern void __d_drop(struct dentry *dentry); extern void d_drop(struct dentry *dentry); extern void d_delete(struct dentry *); @@ -242,15 +242,12 @@ extern struct dentry * d_obtain_alias(struct inode *); extern struct dentry * d_obtain_root(struct inode *); extern void shrink_dcache_sb(struct super_block *); extern void shrink_dcache_parent(struct dentry *); -extern void shrink_dcache_for_umount(struct super_block *); extern void d_invalidate(struct dentry *); /* only used at mount-time */ extern struct dentry * d_make_root(struct inode *); -/* <clickety>-<click> the ramfs-type tree */ -extern void d_genocide(struct dentry *); - +extern void d_mark_tmpfile(struct file *, struct inode *); extern void d_tmpfile(struct file *, struct inode *); extern struct dentry *d_find_alias(struct inode *); @@ -273,12 +270,8 @@ extern void d_move(struct dentry *, struct dentry *); extern void d_exchange(struct dentry *, struct dentry *); extern struct dentry *d_ancestor(struct dentry *, struct dentry *); -/* appendix may either be NULL or be used for transname suffixes */ extern struct dentry *d_lookup(const struct dentry *, const struct qstr *); extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *); -extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *); -extern struct dentry *__d_lookup_rcu(const struct dentry *parent, - const struct qstr *name, unsigned *seq); static inline unsigned d_count(const struct dentry *dentry) { @@ -300,20 +293,40 @@ extern char *dentry_path(const struct dentry *, char *, int); /* Allocation counts.. */ /** - * dget, dget_dlock - get a reference to a dentry - * @dentry: dentry to get a reference to + * dget_dlock - get a reference to a dentry + * @dentry: dentry to get a reference to * - * Given a dentry or %NULL pointer increment the reference count - * if appropriate and return the dentry. A dentry will not be - * destroyed when it has references. + * Given a live dentry, increment the reference count and return the dentry. + * Caller must hold @dentry->d_lock. Making sure that dentry is alive is + * caller's resonsibility. There are many conditions sufficient to guarantee + * that; e.g. anything with non-negative refcount is alive, so's anything + * hashed, anything positive, anyone's parent, etc. */ static inline struct dentry *dget_dlock(struct dentry *dentry) { - if (dentry) - dentry->d_lockref.count++; + dentry->d_lockref.count++; return dentry; } + +/** + * dget - get a reference to a dentry + * @dentry: dentry to get a reference to + * + * Given a dentry or %NULL pointer increment the reference count + * if appropriate and return the dentry. A dentry will not be + * destroyed when it has references. Conversely, a dentry with + * no references can disappear for any number of reasons, starting + * with memory pressure. In other words, that primitive is + * used to clone an existing reference; using it on something with + * zero refcount is a bug. + * + * NOTE: it will spin if @dentry->d_lock is held. From the deadlock + * avoidance point of view it is equivalent to spin_lock()/increment + * refcount/spin_unlock(), so calling it under @dentry->d_lock is + * always a bug; so's calling it under ->d_lock on any of its descendents. + * + */ static inline struct dentry *dget(struct dentry *dentry) { if (dentry) @@ -324,12 +337,11 @@ static inline struct dentry *dget(struct dentry *dentry) extern struct dentry *dget_parent(struct dentry *dentry); /** - * d_unhashed - is dentry hashed - * @dentry: entry to check + * d_unhashed - is dentry hashed + * @dentry: entry to check * - * Returns true if the dentry passed is not currently hashed. + * Returns true if the dentry passed is not currently hashed. */ - static inline int d_unhashed(const struct dentry *dentry) { return hlist_bl_unhashed(&dentry->d_hash); @@ -489,14 +501,6 @@ static inline int simple_positive(const struct dentry *dentry) return d_really_is_positive(dentry) && !d_unhashed(dentry); } -extern void d_set_fallthru(struct dentry *dentry); - -static inline bool d_is_fallthru(const struct dentry *dentry) -{ - return dentry->d_flags & DCACHE_FALLTHRU; -} - - extern int sysctl_vfs_cache_pressure; static inline unsigned long vfs_pressure_ratio(unsigned long val) @@ -546,41 +550,25 @@ static inline struct inode *d_backing_inode(const struct dentry *upper) } /** - * d_backing_dentry - Get upper or lower dentry we should be using - * @upper: The upper layer - * - * This is the helper that should be used to get the dentry of the inode that - * will be used if this dentry were opened as a file. It may be the upper - * dentry or it may be a lower dentry pinned by the upper. - * - * Normal filesystems should not use this to access their own dentries. - */ -static inline struct dentry *d_backing_dentry(struct dentry *upper) -{ - return upper; -} - -/** * d_real - Return the real dentry * @dentry: the dentry to query - * @inode: inode to select the dentry from multiple layers (can be NULL) + * @type: the type of real dentry (data or metadata) * * If dentry is on a union/overlay, then return the underlying, real dentry. * Otherwise return the dentry itself. * * See also: Documentation/filesystems/vfs.rst */ -static inline struct dentry *d_real(struct dentry *dentry, - const struct inode *inode) +static inline struct dentry *d_real(struct dentry *dentry, enum d_real_type type) { if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) - return dentry->d_op->d_real(dentry, inode); + return dentry->d_op->d_real(dentry, type); else return dentry; } /** - * d_real_inode - Return the real inode + * d_real_inode - Return the real inode hosting the data * @dentry: The dentry to query * * If dentry is on a union/overlay, then return the underlying, real inode. @@ -589,7 +577,7 @@ static inline struct dentry *d_real(struct dentry *dentry, static inline struct inode *d_real_inode(const struct dentry *dentry) { /* This usage of d_real() results in const dentry */ - return d_backing_inode(d_real((struct dentry *) dentry, NULL)); + return d_inode(d_real((struct dentry *) dentry, D_REAL_DATA)); } struct name_snapshot { @@ -599,4 +587,14 @@ struct name_snapshot { void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *); void release_dentry_name_snapshot(struct name_snapshot *); +static inline struct dentry *d_first_child(const struct dentry *dentry) +{ + return hlist_entry_safe(dentry->d_children.first, struct dentry, d_sib); +} + +static inline struct dentry *d_next_sibling(const struct dentry *dentry) +{ + return hlist_entry_safe(dentry->d_sib.next, struct dentry, d_sib); +} + #endif /* __LINUX_DCACHE_H */ diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index ea2d919fd9c7..c9c65b132c0f 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -171,6 +171,25 @@ ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf, ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf, size_t count, loff_t *ppos); +/** + * struct debugfs_cancellation - cancellation data + * @list: internal, for keeping track + * @cancel: callback to call + * @cancel_data: extra data for the callback to call + */ +struct debugfs_cancellation { + struct list_head list; + void (*cancel)(struct dentry *, void *); + void *cancel_data; +}; + +void __acquires(cancellation) +debugfs_enter_cancellation(struct file *file, + struct debugfs_cancellation *cancellation); +void __releases(cancellation) +debugfs_leave_cancellation(struct file *file, + struct debugfs_cancellation *cancellation); + #else #include <linux/err.h> diff --git a/include/linux/devcoredump.h b/include/linux/devcoredump.h index c008169ed2c6..c8f7eb6cc191 100644 --- a/include/linux/devcoredump.h +++ b/include/linux/devcoredump.h @@ -63,6 +63,8 @@ void dev_coredumpm(struct device *dev, struct module *owner, void dev_coredumpsg(struct device *dev, struct scatterlist *table, size_t datalen, gfp_t gfp); + +void dev_coredump_put(struct device *dev); #else static inline void dev_coredumpv(struct device *dev, void *data, size_t datalen, gfp_t gfp) @@ -85,6 +87,9 @@ static inline void dev_coredumpsg(struct device *dev, struct scatterlist *table, { _devcd_free_sgtable(table); } +static inline void dev_coredump_put(struct device *dev) +{ +} #endif /* CONFIG_DEV_COREDUMP */ #endif /* __DEVCOREDUMP_H */ diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 69d0435c7ebb..82b2195efaca 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -165,6 +165,7 @@ void dm_error(const char *message); struct dm_dev { struct block_device *bdev; + struct file *bdev_file; struct dax_device *dax_dev; blk_mode_t mode; char name[16]; diff --git a/include/linux/device.h b/include/linux/device.h index 56d93a1ffb7b..b9f5464f44ed 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -42,7 +42,6 @@ struct class; struct subsys_private; struct device_node; struct fwnode_handle; -struct iommu_ops; struct iommu_group; struct dev_pin_info; struct dev_iommu; @@ -63,7 +62,7 @@ struct msi_device_data; */ struct subsys_interface { const char *name; - struct bus_type *subsys; + const struct bus_type *subsys; struct list_head node; int (*add_dev)(struct device *dev, struct subsys_interface *sif); void (*remove_dev)(struct device *dev, struct subsys_interface *sif); @@ -72,9 +71,9 @@ struct subsys_interface { int subsys_interface_register(struct subsys_interface *sif); void subsys_interface_unregister(struct subsys_interface *sif); -int subsys_system_register(struct bus_type *subsys, +int subsys_system_register(const struct bus_type *subsys, const struct attribute_group **groups); -int subsys_virtual_register(struct bus_type *subsys, +int subsys_virtual_register(const struct bus_type *subsys, const struct attribute_group **groups); /* @@ -389,8 +388,8 @@ void devm_remove_action(struct device *dev, void (*action)(void *), void *data); void devm_release_action(struct device *dev, void (*action)(void *), void *data); int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name); -#define devm_add_action(release, action, data) \ - __devm_add_action(release, action, data, #action) +#define devm_add_action(dev, action, data) \ + __devm_add_action(dev, action, data, #action) static inline int __devm_add_action_or_reset(struct device *dev, void (*action)(void *), void *data, const char *name) @@ -403,8 +402,8 @@ static inline int __devm_add_action_or_reset(struct device *dev, void (*action)( return ret; } -#define devm_add_action_or_reset(release, action, data) \ - __devm_add_action_or_reset(release, action, data, #action) +#define devm_add_action_or_reset(dev, action, data) \ + __devm_add_action_or_reset(dev, action, data, #action) /** * devm_alloc_percpu - Resource-managed alloc_percpu @@ -662,7 +661,6 @@ struct device_physical_location { * @id: device instance * @devres_lock: Spinlock to protect the resource of the device. * @devres_head: The resources list of the device. - * @knode_class: The node used to add the device to the class list. * @class: The class of the device. * @groups: Optional attribute groups. * @release: Callback to free the device after all references have @@ -1007,6 +1005,8 @@ static inline void device_unlock(struct device *dev) mutex_unlock(&dev->mutex); } +DEFINE_GUARD(device, struct device *, device_lock(_T), device_unlock(_T)) + static inline void device_lock_assert(struct device *dev) { lockdep_assert_held(&dev->mutex); @@ -1071,7 +1071,6 @@ int device_rename(struct device *dev, const char *new_name); int device_move(struct device *dev, struct device *new_parent, enum dpm_order dpm_order); int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid); -int device_is_dependent(struct device *dev, void *target); static inline bool device_supports_offline(struct device *dev) { @@ -1248,6 +1247,7 @@ void device_link_del(struct device_link *link); void device_link_remove(void *consumer, struct device *supplier); void device_links_supplier_sync_state_pause(void); void device_links_supplier_sync_state_resume(void); +void device_link_wait_removal(void); /* Create alias, so I can be autoloaded. */ #define MODULE_ALIAS_CHARDEV(major,minor) \ diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index ae10c4322754..5ef4ec1c36c3 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -62,9 +62,6 @@ struct fwnode_handle; * this bus. * @pm: Power management operations of this bus, callback the specific * device driver's pm-ops. - * @iommu_ops: IOMMU specific operations for this bus, used to attach IOMMU - * driver implementations to a bus and allow the driver to do - * bus-specific setup * @need_parent_lock: When probing or removing a device on this bus, the * device core should lock the device's parent. * @@ -104,8 +101,6 @@ struct bus_type { const struct dev_pm_ops *pm; - const struct iommu_ops *iommu_ops; - bool need_parent_lock; }; @@ -232,7 +227,7 @@ bus_find_device_by_acpi_dev(const struct bus_type *bus, const void *adev) int bus_for_each_drv(const struct bus_type *bus, struct device_driver *start, void *data, int (*fn)(struct device_driver *, void *)); -void bus_sort_breadthfirst(struct bus_type *bus, +void bus_sort_breadthfirst(const struct bus_type *bus, int (*compare)(const struct device *a, const struct device *b)); /* diff --git a/include/linux/device/class.h b/include/linux/device/class.h index abf3d3bfb6fe..c576b49c55c2 100644 --- a/include/linux/device/class.h +++ b/include/linux/device/class.h @@ -40,8 +40,6 @@ struct fwnode_handle; * for the devices belonging to the class. Usually tied to * device's namespace. * @pm: The default device power management operations of this class. - * @p: The private data of the driver core, no one other than the - * driver core can touch this. * * A class is a higher-level view of a device that abstracts out low-level * implementation details. Drivers may see a SCSI disk or an ATA disk, but, diff --git a/include/linux/dio.h b/include/linux/dio.h index 5abd07361eb5..2b5923909f96 100644 --- a/include/linux/dio.h +++ b/include/linux/dio.h @@ -68,7 +68,7 @@ struct dio_bus { }; extern struct dio_bus dio_bus; /* Single DIO bus */ -extern struct bus_type dio_bus_type; +extern const struct bus_type dio_bus_type; /* * DIO device IDs diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h index 75e7d8cbb532..d1503b815a78 100644 --- a/include/linux/dm-bufio.h +++ b/include/linux/dm-bufio.h @@ -64,6 +64,9 @@ void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start); void *dm_bufio_read(struct dm_bufio_client *c, sector_t block, struct dm_buffer **bp); +void *dm_bufio_read_with_ioprio(struct dm_bufio_client *c, sector_t block, + struct dm_buffer **bp, unsigned short ioprio); + /* * Like dm_bufio_read, but return buffer from cache, don't read * it. If the buffer is not in the cache, return NULL. @@ -86,6 +89,10 @@ void *dm_bufio_new(struct dm_bufio_client *c, sector_t block, void dm_bufio_prefetch(struct dm_bufio_client *c, sector_t block, unsigned int n_blocks); +void dm_bufio_prefetch_with_ioprio(struct dm_bufio_client *c, + sector_t block, unsigned int n_blocks, + unsigned short ioprio); + /* * Release a reference obtained with dm_bufio_{read,get,new}. The data * pointer and dm_buffer pointer is no longer valid after this call. diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index 7595142f3fc5..7b2968612b7e 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -80,7 +80,8 @@ void dm_io_client_destroy(struct dm_io_client *client); * error occurred doing io to the corresponding region. */ int dm_io(struct dm_io_request *io_req, unsigned int num_regions, - struct dm_io_region *region, unsigned int long *sync_error_bits); + struct dm_io_region *region, unsigned int long *sync_error_bits, + unsigned short ioprio); #endif /* __KERNEL__ */ #endif /* _LINUX_DM_IO_H */ diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 3f31baa3293f..36216d28d8bd 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -343,16 +343,19 @@ struct dma_buf { /** * @exp_name: * - * Name of the exporter; useful for debugging. See the - * DMA_BUF_SET_NAME IOCTL. + * Name of the exporter; useful for debugging. Must not be NULL */ const char *exp_name; /** * @name: * - * Userspace-provided name; useful for accounting and debugging, - * protected by dma_resv_lock() on @resv and @name_lock for read access. + * Userspace-provided name. Default value is NULL. If not NULL, + * length cannot be longer than DMA_BUF_NAME_LEN, including NIL + * char. Useful for accounting and debugging. Read/Write accesses + * are protected by @name_lock + * + * See the IOCTLs DMA_BUF_SET_NAME or DMA_BUF_SET_NAME_A/B */ const char *name; @@ -367,8 +370,10 @@ struct dma_buf { */ struct module *owner; +#if IS_ENABLED(CONFIG_DEBUG_FS) /** @list_node: node for dma_buf accounting and debugging. */ struct list_head list_node; +#endif /** @priv: exporter specific private data for this buffer object. */ void *priv; diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 18aade195884..3eb3589ff43e 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -21,7 +21,6 @@ struct bus_dma_region { phys_addr_t cpu_start; dma_addr_t dma_start; u64 size; - u64 offset; }; static inline dma_addr_t translate_phys_to_dma(struct device *dev, @@ -29,9 +28,12 @@ static inline dma_addr_t translate_phys_to_dma(struct device *dev, { const struct bus_dma_region *m; - for (m = dev->dma_range_map; m->size; m++) - if (paddr >= m->cpu_start && paddr - m->cpu_start < m->size) - return (dma_addr_t)paddr - m->offset; + for (m = dev->dma_range_map; m->size; m++) { + u64 offset = paddr - m->cpu_start; + + if (paddr >= m->cpu_start && offset < m->size) + return m->dma_start + offset; + } /* make sure dma_capable fails when no translation is available */ return DMA_MAPPING_ERROR; @@ -42,9 +44,12 @@ static inline phys_addr_t translate_dma_to_phys(struct device *dev, { const struct bus_dma_region *m; - for (m = dev->dma_range_map; m->size; m++) - if (dma_addr >= m->dma_start && dma_addr - m->dma_start < m->size) - return (phys_addr_t)dma_addr + m->offset; + for (m = dev->dma_range_map; m->size; m++) { + u64 offset = dma_addr - m->dma_start; + + if (dma_addr >= m->dma_start && offset < m->size) + return m->cpu_start + offset; + } return (phys_addr_t)-1; } diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index ebe78bd3d121..c3f9bb6602ba 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -21,6 +21,7 @@ #include <linux/sched.h> #include <linux/printk.h> #include <linux/rcupdate.h> +#include <linux/timekeeping.h> struct dma_fence; struct dma_fence_ops; @@ -499,6 +500,21 @@ static inline bool dma_fence_is_later(struct dma_fence *f1, } /** + * dma_fence_is_later_or_same - return true if f1 is later or same as f2 + * @f1: the first fence from the same context + * @f2: the second fence from the same context + * + * Returns true if f1 is chronologically later than f2 or the same fence. Both + * fences must be from the same context, since a seqno is not re-used across + * contexts. + */ +static inline bool dma_fence_is_later_or_same(struct dma_fence *f1, + struct dma_fence *f2) +{ + return f1 == f2 || dma_fence_is_later(f1, f2); +} + +/** * dma_fence_later - return the chronologically later fence * @f1: the first fence from the same context * @f2: the second fence from the same context @@ -666,4 +682,11 @@ static inline bool dma_fence_is_container(struct dma_fence *fence) return dma_fence_is_array(fence) || dma_fence_is_chain(fence); } +#define DMA_FENCE_WARN(f, fmt, args...) \ + do { \ + struct dma_fence *__ff = (f); \ + pr_warn("f %llu#%llu: " fmt, __ff->context, __ff->seqno,\ + ##args); \ + } while (0) + #endif /* __LINUX_DMA_FENCE_H */ diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index f2fc203fb8a1..4abc60f04209 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -11,6 +11,7 @@ #include <linux/slab.h> struct cma; +struct iommu_ops; /* * Values for struct dma_map_ops.flags: @@ -426,10 +427,10 @@ bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg, #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent); + bool coherent); #else static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, - u64 size, const struct iommu_ops *iommu, bool coherent) + u64 size, bool coherent) { } #endif /* CONFIG_ARCH_HAS_SETUP_DMA_OPS */ @@ -443,10 +444,10 @@ static inline void arch_teardown_dma_ops(struct device *dev) #endif /* CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS */ #ifdef CONFIG_DMA_API_DEBUG -void dma_debug_add_bus(struct bus_type *bus); +void dma_debug_add_bus(const struct bus_type *bus); void debug_dma_dump_mappings(struct device *dev); #else -static inline void dma_debug_add_bus(struct bus_type *bus) +static inline void dma_debug_add_bus(const struct bus_type *bus) { } static inline void debug_dma_dump_mappings(struct device *dev) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index f0ccca16a0ac..4a658de44ee9 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -144,6 +144,7 @@ bool dma_pci_p2pdma_supported(struct device *dev); int dma_set_mask(struct device *dev, u64 mask); int dma_set_coherent_mask(struct device *dev, u64 mask); u64 dma_get_required_mask(struct device *dev); +bool dma_addressing_limited(struct device *dev); size_t dma_max_mapping_size(struct device *dev); size_t dma_opt_mapping_size(struct device *dev); bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); @@ -264,6 +265,10 @@ static inline u64 dma_get_required_mask(struct device *dev) { return 0; } +static inline bool dma_addressing_limited(struct device *dev) +{ + return false; +} static inline size_t dma_max_mapping_size(struct device *dev) { return 0; @@ -465,20 +470,6 @@ static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask) return dma_set_mask_and_coherent(dev, mask); } -/** - * dma_addressing_limited - return if the device is addressing limited - * @dev: device to check - * - * Return %true if the devices DMA mask is too small to address all memory in - * the system, else %false. Lack of addressing bits is the prime reason for - * bounce buffering, but might not be the only one. - */ -static inline bool dma_addressing_limited(struct device *dev) -{ - return min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) < - dma_get_required_mask(dev); -} - static inline unsigned int dma_get_max_seg_size(struct device *dev) { if (dev->dma_parms && dev->dma_parms->max_segment_size) diff --git a/include/linux/dma/k3-udma-glue.h b/include/linux/dma/k3-udma-glue.h index e443be4d3b4b..1e491c5dcac2 100644 --- a/include/linux/dma/k3-udma-glue.h +++ b/include/linux/dma/k3-udma-glue.h @@ -26,6 +26,11 @@ struct k3_udma_glue_tx_channel; struct k3_udma_glue_tx_channel *k3_udma_glue_request_tx_chn(struct device *dev, const char *name, struct k3_udma_glue_tx_channel_cfg *cfg); +struct k3_udma_glue_tx_channel * +k3_udma_glue_request_tx_chn_for_thread_id(struct device *dev, + struct k3_udma_glue_tx_channel_cfg *cfg, + struct device_node *udmax_np, u32 thread_id); + void k3_udma_glue_release_tx_chn(struct k3_udma_glue_tx_channel *tx_chn); int k3_udma_glue_push_tx_chn(struct k3_udma_glue_tx_channel *tx_chn, struct cppi5_host_desc_t *desc_tx, @@ -109,6 +114,11 @@ struct k3_udma_glue_rx_channel *k3_udma_glue_request_rx_chn( const char *name, struct k3_udma_glue_rx_channel_cfg *cfg); +struct k3_udma_glue_rx_channel * +k3_udma_glue_request_remote_rx_chn_for_thread_id(struct device *dev, + struct k3_udma_glue_rx_channel_cfg *cfg, + struct device_node *udmax_np, u32 thread_id); + void k3_udma_glue_release_rx_chn(struct k3_udma_glue_rx_channel *rx_chn); int k3_udma_glue_enable_rx_chn(struct k3_udma_glue_rx_channel *rx_chn); void k3_udma_glue_disable_rx_chn(struct k3_udma_glue_rx_channel *rx_chn); diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index c3656e590213..752dbde4cec1 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -517,8 +517,6 @@ static inline const char *dma_chan_name(struct dma_chan *chan) return dev_name(&chan->dev->device); } -void dma_chan_cleanup(struct kref *kref); - /** * typedef dma_filter_fn - callback filter for dma_request_channel * @chan: channel to be reviewed @@ -955,7 +953,8 @@ static inline int dmaengine_slave_config(struct dma_chan *chan, static inline bool is_slave_direction(enum dma_transfer_direction direction) { - return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM); + return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM) || + (direction == DMA_DEV_TO_DEV); } static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single( diff --git a/include/linux/dpll.h b/include/linux/dpll.h new file mode 100644 index 000000000000..d275736230b3 --- /dev/null +++ b/include/linux/dpll.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023 Meta Platforms, Inc. and affiliates + * Copyright (c) 2023 Intel and affiliates + */ + +#ifndef __DPLL_H__ +#define __DPLL_H__ + +#include <uapi/linux/dpll.h> +#include <linux/device.h> +#include <linux/netlink.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> + +struct dpll_device; +struct dpll_pin; + +struct dpll_device_ops { + int (*mode_get)(const struct dpll_device *dpll, void *dpll_priv, + enum dpll_mode *mode, struct netlink_ext_ack *extack); + int (*lock_status_get)(const struct dpll_device *dpll, void *dpll_priv, + enum dpll_lock_status *status, + enum dpll_lock_status_error *status_error, + struct netlink_ext_ack *extack); + int (*temp_get)(const struct dpll_device *dpll, void *dpll_priv, + s32 *temp, struct netlink_ext_ack *extack); +}; + +struct dpll_pin_ops { + int (*frequency_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const u64 frequency, + struct netlink_ext_ack *extack); + int (*frequency_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u64 *frequency, struct netlink_ext_ack *extack); + int (*direction_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const enum dpll_pin_direction direction, + struct netlink_ext_ack *extack); + int (*direction_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + enum dpll_pin_direction *direction, + struct netlink_ext_ack *extack); + int (*state_on_pin_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_pin *parent_pin, + void *parent_pin_priv, + enum dpll_pin_state *state, + struct netlink_ext_ack *extack); + int (*state_on_dpll_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, enum dpll_pin_state *state, + struct netlink_ext_ack *extack); + int (*state_on_pin_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_pin *parent_pin, + void *parent_pin_priv, + const enum dpll_pin_state state, + struct netlink_ext_ack *extack); + int (*state_on_dpll_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, + const enum dpll_pin_state state, + struct netlink_ext_ack *extack); + int (*prio_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u32 *prio, struct netlink_ext_ack *extack); + int (*prio_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const u32 prio, struct netlink_ext_ack *extack); + int (*phase_offset_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + s64 *phase_offset, + struct netlink_ext_ack *extack); + int (*phase_adjust_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + s32 *phase_adjust, + struct netlink_ext_ack *extack); + int (*phase_adjust_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const s32 phase_adjust, + struct netlink_ext_ack *extack); + int (*ffo_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + s64 *ffo, struct netlink_ext_ack *extack); +}; + +struct dpll_pin_frequency { + u64 min; + u64 max; +}; + +#define DPLL_PIN_FREQUENCY_RANGE(_min, _max) \ + { \ + .min = _min, \ + .max = _max, \ + } + +#define DPLL_PIN_FREQUENCY(_val) DPLL_PIN_FREQUENCY_RANGE(_val, _val) +#define DPLL_PIN_FREQUENCY_1PPS \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_1_HZ) +#define DPLL_PIN_FREQUENCY_10MHZ \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_10_MHZ) +#define DPLL_PIN_FREQUENCY_IRIG_B \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_10_KHZ) +#define DPLL_PIN_FREQUENCY_DCF77 \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_77_5_KHZ) + +struct dpll_pin_phase_adjust_range { + s32 min; + s32 max; +}; + +struct dpll_pin_properties { + const char *board_label; + const char *panel_label; + const char *package_label; + enum dpll_pin_type type; + unsigned long capabilities; + u32 freq_supported_num; + struct dpll_pin_frequency *freq_supported; + struct dpll_pin_phase_adjust_range phase_range; +}; + +#if IS_ENABLED(CONFIG_DPLL) +void dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin); +void dpll_netdev_pin_clear(struct net_device *dev); + +size_t dpll_netdev_pin_handle_size(const struct net_device *dev); +int dpll_netdev_add_pin_handle(struct sk_buff *msg, + const struct net_device *dev); +#else +static inline void +dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin) { } +static inline void dpll_netdev_pin_clear(struct net_device *dev) { } + +static inline size_t dpll_netdev_pin_handle_size(const struct net_device *dev) +{ + return 0; +} + +static inline int +dpll_netdev_add_pin_handle(struct sk_buff *msg, const struct net_device *dev) +{ + return 0; +} +#endif + +struct dpll_device * +dpll_device_get(u64 clock_id, u32 dev_driver_id, struct module *module); + +void dpll_device_put(struct dpll_device *dpll); + +int dpll_device_register(struct dpll_device *dpll, enum dpll_type type, + const struct dpll_device_ops *ops, void *priv); + +void dpll_device_unregister(struct dpll_device *dpll, + const struct dpll_device_ops *ops, void *priv); + +struct dpll_pin * +dpll_pin_get(u64 clock_id, u32 dev_driver_id, struct module *module, + const struct dpll_pin_properties *prop); + +int dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +void dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +void dpll_pin_put(struct dpll_pin *pin); + +int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +int dpll_device_change_ntf(struct dpll_device *dpll); + +int dpll_pin_change_ntf(struct dpll_pin *pin); + +#endif diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index c177322f793d..b9dd35d4b8f5 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -28,7 +28,7 @@ /* Source and Destination MAC of follow-up meta frames. * Whereas the choice of SMAC only affects the unique identification of the * switch as sender of meta frames, the DMAC must be an address that is present - * in the DSA master port's multicast MAC filter. + * in the DSA conduit port's multicast MAC filter. * 01-80-C2-00-00-0E is a good choice for this, as all profiles of IEEE 1588 * over L2 use this address for some purpose already. */ diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h index 407c2f281b64..5693a4be0d9a 100644 --- a/include/linux/dynamic_queue_limits.h +++ b/include/linux/dynamic_queue_limits.h @@ -38,14 +38,22 @@ #ifdef __KERNEL__ +#include <linux/bitops.h> #include <asm/bug.h> +#define DQL_HIST_LEN 4 +#define DQL_HIST_ENT(dql, idx) ((dql)->history[(idx) % DQL_HIST_LEN]) + struct dql { /* Fields accessed in enqueue path (dql_queued) */ unsigned int num_queued; /* Total ever queued */ unsigned int adj_limit; /* limit + num_completed */ unsigned int last_obj_cnt; /* Count at last queuing */ + unsigned long history_head; /* top 58 bits of jiffies */ + /* stall entries, a bit per entry */ + unsigned long history[DQL_HIST_LEN]; + /* Fields accessed only by completion path (dql_completed) */ unsigned int limit ____cacheline_aligned_in_smp; /* Current limit */ @@ -62,6 +70,13 @@ struct dql { unsigned int max_limit; /* Max limit */ unsigned int min_limit; /* Minimum limit */ unsigned int slack_hold_time; /* Time to measure slack */ + + /* Stall threshold (in jiffies), defined by user */ + unsigned short stall_thrs; + /* Longest stall detected, reported to user */ + unsigned short stall_max; + unsigned long last_reap; /* Last reap (in jiffies) */ + unsigned long stall_cnt; /* Number of stalls */ }; /* Set some static maximums */ @@ -74,6 +89,8 @@ struct dql { */ static inline void dql_queued(struct dql *dql, unsigned int count) { + unsigned long map, now, now_hi, i; + BUG_ON(count > DQL_MAX_OBJECT); dql->last_obj_cnt = count; @@ -86,6 +103,34 @@ static inline void dql_queued(struct dql *dql, unsigned int count) barrier(); dql->num_queued += count; + + now = jiffies; + now_hi = now / BITS_PER_LONG; + + /* The following code set a bit in the ring buffer, where each + * bit trackes time the packet was queued. The dql->history buffer + * tracks DQL_HIST_LEN * BITS_PER_LONG time (jiffies) slot + */ + if (unlikely(now_hi != dql->history_head)) { + /* About to reuse slots, clear them */ + for (i = 0; i < DQL_HIST_LEN; i++) { + /* Multiplication masks high bits */ + if (now_hi * BITS_PER_LONG == + (dql->history_head + i) * BITS_PER_LONG) + break; + DQL_HIST_ENT(dql, dql->history_head + i + 1) = 0; + } + /* pairs with smp_rmb() in dql_check_stall() */ + smp_wmb(); + WRITE_ONCE(dql->history_head, now_hi); + } + + /* __set_bit() does not guarantee WRITE_ONCE() semantics */ + map = DQL_HIST_ENT(dql, now_hi); + + /* Populate the history with an entry (bit) per queued */ + if (!(map & BIT_MASK(now))) + WRITE_ONCE(DQL_HIST_ENT(dql, now_hi), map | BIT_MASK(now)); } /* Returns how many objects can be queued, < 0 indicates over limit. */ diff --git a/include/linux/edac.h b/include/linux/edac.h index fa4bda2a70f6..b4ee8961e623 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -30,7 +30,7 @@ struct device; extern int edac_op_state; -struct bus_type *edac_get_sysfs_subsys(void); +const struct bus_type *edac_get_sysfs_subsys(void); static inline void opstate_init(void) { @@ -187,6 +187,7 @@ static inline char *mc_event_error_type(const unsigned int err_type) * @MEM_NVDIMM: Non-volatile RAM * @MEM_WIO2: Wide I/O 2. * @MEM_HBM2: High bandwidth Memory Gen 2. + * @MEM_HBM3: High bandwidth Memory Gen 3. */ enum mem_type { MEM_EMPTY = 0, @@ -218,6 +219,7 @@ enum mem_type { MEM_NVDIMM, MEM_WIO2, MEM_HBM2, + MEM_HBM3, }; #define MEM_FLAG_EMPTY BIT(MEM_EMPTY) @@ -248,6 +250,7 @@ enum mem_type { #define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) #define MEM_FLAG_WIO2 BIT(MEM_WIO2) #define MEM_FLAG_HBM2 BIT(MEM_HBM2) +#define MEM_FLAG_HBM3 BIT(MEM_HBM3) /** * enum edac_type - Error Detection and Correction capabilities and mode @@ -492,7 +495,7 @@ struct edac_raw_error_desc { */ struct mem_ctl_info { struct device dev; - struct bus_type *bus; + const struct bus_type *bus; struct list_head link; /* for global list of mem_ctl_info structs */ diff --git a/include/linux/efi.h b/include/linux/efi.h index 80b21d1c6eaf..d59b0947fba0 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -40,6 +40,7 @@ struct screen_info; #define EFI_WRITE_PROTECTED ( 8 | (1UL << (BITS_PER_LONG-1))) #define EFI_OUT_OF_RESOURCES ( 9 | (1UL << (BITS_PER_LONG-1))) #define EFI_NOT_FOUND (14 | (1UL << (BITS_PER_LONG-1))) +#define EFI_ACCESS_DENIED (15 | (1UL << (BITS_PER_LONG-1))) #define EFI_TIMEOUT (18 | (1UL << (BITS_PER_LONG-1))) #define EFI_ABORTED (21 | (1UL << (BITS_PER_LONG-1))) #define EFI_SECURITY_VIOLATION (26 | (1UL << (BITS_PER_LONG-1))) @@ -358,13 +359,10 @@ void efi_native_runtime_setup(void); * where the UEFI SPEC breaks the line. */ #define NULL_GUID EFI_GUID(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00) -#define MPS_TABLE_GUID EFI_GUID(0xeb9d2d2f, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define ACPI_TABLE_GUID EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define ACPI_20_TABLE_GUID EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81) #define SMBIOS_TABLE_GUID EFI_GUID(0xeb9d2d31, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define SMBIOS3_TABLE_GUID EFI_GUID(0xf2fd1544, 0x9794, 0x4a2c, 0x99, 0x2e, 0xe5, 0xbb, 0xcf, 0x20, 0xe3, 0x94) -#define SAL_SYSTEM_TABLE_GUID EFI_GUID(0xeb9d2d32, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) -#define HCDP_TABLE_GUID EFI_GUID(0xf951938d, 0x620b, 0x42ef, 0x82, 0x79, 0xa8, 0x4b, 0x79, 0x61, 0x78, 0x98) #define UGA_IO_PROTOCOL_GUID EFI_GUID(0x61a4d49e, 0x6f68, 0x4f1b, 0xb9, 0x22, 0xa8, 0x6e, 0xed, 0x0b, 0x07, 0xa2) #define EFI_GLOBAL_VARIABLE_GUID EFI_GUID(0x8be4df61, 0x93ca, 0x11d2, 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c) #define UV_SYSTEM_TABLE_GUID EFI_GUID(0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93) @@ -388,6 +386,7 @@ void efi_native_runtime_setup(void); #define EFI_CONSOLE_OUT_DEVICE_GUID EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define APPLE_PROPERTIES_PROTOCOL_GUID EFI_GUID(0x91bd12fe, 0xf6c3, 0x44fb, 0xa5, 0xb7, 0x51, 0x22, 0xab, 0x30, 0x3a, 0xe0) #define EFI_TCG2_PROTOCOL_GUID EFI_GUID(0x607f766c, 0x7455, 0x42be, 0x93, 0x0b, 0xe4, 0xd7, 0x6d, 0xb2, 0x72, 0x0f) +#define EFI_TCG2_FINAL_EVENTS_TABLE_GUID EFI_GUID(0x1e2ed096, 0x30e2, 0x4254, 0xbd, 0x89, 0x86, 0x3b, 0xbe, 0xf8, 0x23, 0x25) #define EFI_LOAD_FILE_PROTOCOL_GUID EFI_GUID(0x56ec3091, 0x954c, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) #define EFI_LOAD_FILE2_PROTOCOL_GUID EFI_GUID(0x4006c0c1, 0xfcb3, 0x403e, 0x99, 0x6d, 0x4a, 0x6c, 0x87, 0x24, 0xe0, 0x6d) #define EFI_RT_PROPERTIES_TABLE_GUID EFI_GUID(0xeb66918a, 0x7eef, 0x402a, 0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9) @@ -402,6 +401,8 @@ void efi_native_runtime_setup(void); #define EFI_CERT_X509_GUID EFI_GUID(0xa5c059a1, 0x94e4, 0x4aa7, 0x87, 0xb5, 0xab, 0x15, 0x5c, 0x2b, 0xf0, 0x72) #define EFI_CERT_X509_SHA256_GUID EFI_GUID(0x3bd2a492, 0x96c0, 0x4079, 0xb4, 0x20, 0xfc, 0xf9, 0x8e, 0xf1, 0x03, 0xed) #define EFI_CC_BLOB_GUID EFI_GUID(0x067b1f5f, 0xcf26, 0x44c5, 0x85, 0x54, 0x93, 0xd7, 0x77, 0x91, 0x2d, 0x42) +#define EFI_CC_MEASUREMENT_PROTOCOL_GUID EFI_GUID(0x96751a3d, 0x72f4, 0x41a6, 0xa7, 0x94, 0xed, 0x5d, 0x0e, 0x67, 0xae, 0x6b) +#define EFI_CC_FINAL_EVENTS_TABLE_GUID EFI_GUID(0xdd4a4648, 0x2de7, 0x4665, 0x96, 0x4d, 0x21, 0xd9, 0xef, 0x5f, 0xb4, 0x46) /* * This GUID is used to pass to the kernel proper the struct screen_info @@ -413,7 +414,6 @@ void efi_native_runtime_setup(void); #define LINUX_EFI_LOADER_ENTRY_GUID EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf, 0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f) #define LINUX_EFI_RANDOM_SEED_TABLE_GUID EFI_GUID(0x1ce1e5bc, 0x7ceb, 0x42f2, 0x81, 0xe5, 0x8a, 0xad, 0xf1, 0x80, 0xf5, 0x7b) #define LINUX_EFI_TPM_EVENT_LOG_GUID EFI_GUID(0xb7799cb0, 0xeca2, 0x4943, 0x96, 0x67, 0x1f, 0xae, 0x07, 0xb7, 0x47, 0xfa) -#define LINUX_EFI_TPM_FINAL_LOG_GUID EFI_GUID(0x1e2ed096, 0x30e2, 0x4254, 0xbd, 0x89, 0x86, 0x3b, 0xbe, 0xf8, 0x23, 0x25) #define LINUX_EFI_MEMRESERVE_TABLE_GUID EFI_GUID(0x888eb0c6, 0x8ede, 0x4ff5, 0xa8, 0xf0, 0x9a, 0xee, 0x5c, 0xb9, 0x77, 0xc2) #define LINUX_EFI_INITRD_MEDIA_GUID EFI_GUID(0x5568e427, 0x68fc, 0x4f3d, 0xac, 0x74, 0xca, 0x55, 0x52, 0x31, 0xcc, 0x68) #define LINUX_EFI_MOK_VARIABLE_TABLE_GUID EFI_GUID(0xc451ed2b, 0x9694, 0x45d3, 0xba, 0xba, 0xed, 0x9f, 0x89, 0x88, 0xa3, 0x89) @@ -694,6 +694,11 @@ extern struct efi { extern struct mm_struct efi_mm; +static inline bool mm_is_efi(struct mm_struct *mm) +{ + return IS_ENABLED(CONFIG_EFI) && mm == &efi_mm; +} + static inline int efi_guidcmp (efi_guid_t left, efi_guid_t right) { @@ -851,10 +856,6 @@ static inline int efi_range_is_wc(unsigned long start, unsigned long len) return 1; } -#ifdef CONFIG_EFI_PCDP -extern int __init efi_setup_pcdp_console(char *); -#endif - /* * We play games with efi_enabled so that the compiler will, if * possible, remove EFI-related code altogether. @@ -1355,4 +1356,15 @@ bool efi_config_table_is_usable(const efi_guid_t *guid, unsigned long table) umode_t efi_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n); +/* + * efivar ops event type + */ +#define EFIVAR_OPS_RDONLY 0 +#define EFIVAR_OPS_RDWR 1 + +extern struct blocking_notifier_head efivar_ops_nh; + +void efivars_generic_ops_register(void); +void efivars_generic_ops_unregister(void); + #endif /* _LINUX_EFI_H */ diff --git a/include/linux/einj-cxl.h b/include/linux/einj-cxl.h new file mode 100644 index 000000000000..624ff6ff41f9 --- /dev/null +++ b/include/linux/einj-cxl.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * CXL protocol Error INJection support. + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Ben Cheatham <benjamin.cheatham@amd.com> + */ +#ifndef EINJ_CXL_H +#define EINJ_CXL_H + +#include <linux/errno.h> +#include <linux/types.h> + +struct pci_dev; +struct seq_file; + +#if IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL) +int einj_cxl_available_error_type_show(struct seq_file *m, void *v); +int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type); +int einj_cxl_inject_rch_error(u64 rcrb, u64 type); +bool einj_cxl_is_initialized(void); +#else /* !IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL) */ +static inline int einj_cxl_available_error_type_show(struct seq_file *m, + void *v) +{ + return -ENXIO; +} + +static inline int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type) +{ + return -ENXIO; +} + +static inline int einj_cxl_inject_rch_error(u64 rcrb, u64 type) +{ + return -ENXIO; +} + +static inline bool einj_cxl_is_initialized(void) { return false; } +#endif /* CONFIG_ACPI_APEI_EINJ_CXL */ + +#endif /* EINJ_CXL_H */ diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index b9caa01dfac4..70cd7258cd29 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -5,6 +5,7 @@ #include <linux/device.h> #include <linux/jump_label.h> #include <linux/kobject.h> +#include <linux/kref.h> #include <linux/rcupdate.h> #include <linux/sched/cpufreq.h> #include <linux/sched/topology.h> @@ -12,6 +13,7 @@ /** * struct em_perf_state - Performance state of a performance domain + * @performance: CPU performance (capacity) at a given frequency * @frequency: The frequency in KHz, for consistency with CPUFreq * @power: The power consumed at this level (by 1 CPU or by a registered * device). It can be a total power: static and dynamic. @@ -20,6 +22,7 @@ * @flags: see "em_perf_state flags" description below. */ struct em_perf_state { + unsigned long performance; unsigned long frequency; unsigned long power; unsigned long cost; @@ -37,8 +40,20 @@ struct em_perf_state { #define EM_PERF_STATE_INEFFICIENT BIT(0) /** + * struct em_perf_table - Performance states table + * @rcu: RCU used for safe access and destruction + * @kref: Reference counter to track the users + * @state: List of performance states, in ascending order + */ +struct em_perf_table { + struct rcu_head rcu; + struct kref kref; + struct em_perf_state state[]; +}; + +/** * struct em_perf_domain - Performance domain - * @table: List of performance states, in ascending order + * @em_table: Pointer to the runtime modifiable em_perf_table * @nr_perf_states: Number of performance states * @flags: See "em_perf_domain flags" * @cpus: Cpumask covering the CPUs of the domain. It's here @@ -53,7 +68,7 @@ struct em_perf_state { * field is unused. */ struct em_perf_domain { - struct em_perf_state *table; + struct em_perf_table __rcu *em_table; int nr_perf_states; unsigned long flags; unsigned long cpus[]; @@ -98,27 +113,6 @@ struct em_perf_domain { #define EM_MAX_NUM_CPUS 16 #endif -/* - * To avoid an overflow on 32bit machines while calculating the energy - * use a different order in the operation. First divide by the 'cpu_scale' - * which would reduce big value stored in the 'cost' field, then multiply by - * the 'sum_util'. This would allow to handle existing platforms, which have - * e.g. power ~1.3 Watt at max freq, so the 'cost' value > 1mln micro-Watts. - * In such scenario, where there are 4 CPUs in the Perf. Domain the 'sum_util' - * could be 4096, then multiplication: 'cost' * 'sum_util' would overflow. - * This reordering of operations has some limitations, we lose small - * precision in the estimation (comparing to 64bit platform w/o reordering). - * - * We are safe on 64bit machine. - */ -#ifdef CONFIG_64BIT -#define em_estimate_energy(cost, sum_util, scale_cpu) \ - (((cost) * (sum_util)) / (scale_cpu)) -#else -#define em_estimate_energy(cost, sum_util, scale_cpu) \ - (((cost) / (scale_cpu)) * (sum_util)) -#endif - struct em_data_callback { /** * active_power() - Provide power at the next performance state of @@ -168,40 +162,48 @@ struct em_data_callback { struct em_perf_domain *em_cpu_get(int cpu); struct em_perf_domain *em_pd_get(struct device *dev); +int em_dev_update_perf_domain(struct device *dev, + struct em_perf_table __rcu *new_table); int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, struct em_data_callback *cb, cpumask_t *span, bool microwatts); void em_dev_unregister_perf_domain(struct device *dev); +struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd); +void em_table_free(struct em_perf_table __rcu *table); +int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, + int nr_states); /** * em_pd_get_efficient_state() - Get an efficient performance state from the EM - * @pd : Performance domain for which we want an efficient frequency - * @freq : Frequency to map with the EM + * @table: List of performance states, in ascending order + * @nr_perf_states: Number of performance states + * @max_util: Max utilization to map with the EM + * @pd_flags: Performance Domain flags * * It is called from the scheduler code quite frequently and as a consequence * doesn't implement any check. * - * Return: An efficient performance state, high enough to meet @freq + * Return: An efficient performance state id, high enough to meet @max_util * requirement. */ -static inline -struct em_perf_state *em_pd_get_efficient_state(struct em_perf_domain *pd, - unsigned long freq) +static inline int +em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states, + unsigned long max_util, unsigned long pd_flags) { struct em_perf_state *ps; int i; - for (i = 0; i < pd->nr_perf_states; i++) { - ps = &pd->table[i]; - if (ps->frequency >= freq) { - if (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES && + for (i = 0; i < nr_perf_states; i++) { + ps = &table[i]; + if (ps->performance >= max_util) { + if (pd_flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES && ps->flags & EM_PERF_STATE_INEFFICIENT) continue; - break; + return i; } } - return ps; + return nr_perf_states - 1; } /** @@ -224,9 +226,13 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, unsigned long max_util, unsigned long sum_util, unsigned long allowed_cpu_cap) { - unsigned long freq, scale_cpu; + struct em_perf_table *em_table; struct em_perf_state *ps; - int cpu; + int i; + +#ifdef CONFIG_SCHED_DEBUG + WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n"); +#endif if (!sum_util) return 0; @@ -234,32 +240,29 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, /* * In order to predict the performance state, map the utilization of * the most utilized CPU of the performance domain to a requested - * frequency, like schedutil. Take also into account that the real - * frequency might be set lower (due to thermal capping). Thus, clamp + * performance, like schedutil. Take also into account that the real + * performance might be set lower (due to thermal capping). Thus, clamp * max utilization to the allowed CPU capacity before calculating - * effective frequency. + * effective performance. */ - cpu = cpumask_first(to_cpumask(pd->cpus)); - scale_cpu = arch_scale_cpu_capacity(cpu); - ps = &pd->table[pd->nr_perf_states - 1]; - - max_util = map_util_perf(max_util); max_util = min(max_util, allowed_cpu_cap); - freq = map_util_freq(max_util, ps->frequency, scale_cpu); /* * Find the lowest performance state of the Energy Model above the - * requested frequency. + * requested performance. */ - ps = em_pd_get_efficient_state(pd, freq); + em_table = rcu_dereference(pd->em_table); + i = em_pd_get_efficient_state(em_table->state, pd->nr_perf_states, + max_util, pd->flags); + ps = &em_table->state[i]; /* - * The capacity of a CPU in the domain at the performance state (ps) - * can be computed as: + * The performance (capacity) of a CPU in the domain at the performance + * state (ps) can be computed as: * - * ps->freq * scale_cpu - * ps->cap = -------------------- (1) - * cpu_max_freq + * ps->freq * scale_cpu + * ps->performance = -------------------- (1) + * cpu_max_freq * * So, ignoring the costs of idle states (which are not available in * the EM), the energy consumed by this CPU at that performance state @@ -267,9 +270,10 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * * ps->power * cpu_util * cpu_nrg = -------------------- (2) - * ps->cap + * ps->performance * - * since 'cpu_util / ps->cap' represents its percentage of busy time. + * since 'cpu_util / ps->performance' represents its percentage of busy + * time. * * NOTE: Although the result of this computation actually is in * units of power, it can be manipulated as an energy value @@ -279,9 +283,9 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * By injecting (1) in (2), 'cpu_nrg' can be re-expressed as a product * of two terms: * - * ps->power * cpu_max_freq cpu_util - * cpu_nrg = ------------------------ * --------- (3) - * ps->freq scale_cpu + * ps->power * cpu_max_freq + * cpu_nrg = ------------------------ * cpu_util (3) + * ps->freq * scale_cpu * * The first term is static, and is stored in the em_perf_state struct * as 'ps->cost'. @@ -291,11 +295,9 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * total energy of the domain (which is the simple sum of the energy of * all of its CPUs) can be factorized as: * - * ps->cost * \Sum cpu_util - * pd_nrg = ------------------------ (4) - * scale_cpu + * pd_nrg = ps->cost * \Sum cpu_util (4) */ - return em_estimate_energy(ps->cost, sum_util, scale_cpu); + return ps->cost * sum_util; } /** @@ -310,6 +312,23 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) return pd->nr_perf_states; } +/** + * em_perf_state_from_pd() - Get the performance states table of perf. + * domain + * @pd : performance domain for which this must be done + * + * To use this function the rcu_read_lock() should be hold. After the usage + * of the performance states table is finished, the rcu_read_unlock() should + * be called. + * + * Return: the pointer to performance states table of the performance domain + */ +static inline +struct em_perf_state *em_perf_state_from_pd(struct em_perf_domain *pd) +{ + return rcu_dereference(pd->em_table)->state; +} + #else struct em_data_callback {}; #define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) { } @@ -344,6 +363,29 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) { return 0; } +static inline +struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd) +{ + return NULL; +} +static inline void em_table_free(struct em_perf_table __rcu *table) {} +static inline +int em_dev_update_perf_domain(struct device *dev, + struct em_perf_table __rcu *new_table) +{ + return -EINVAL; +} +static inline +struct em_perf_state *em_perf_state_from_pd(struct em_perf_domain *pd) +{ + return NULL; +} +static inline +int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, + int nr_states) +{ + return -EINVAL; +} #endif #endif diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index d95ab85f96ba..b0fb775a600d 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -7,6 +7,11 @@ #include <linux/syscalls.h> #include <linux/seccomp.h> #include <linux/sched.h> +#include <linux/context_tracking.h> +#include <linux/livepatch.h> +#include <linux/resume_user_mode.h> +#include <linux/tick.h> +#include <linux/kmsan.h> #include <asm/entry-common.h> @@ -98,7 +103,19 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) {} * done between establishing state and enabling interrupts. The caller must * enable interrupts before invoking syscall_enter_from_user_mode_work(). */ -void enter_from_user_mode(struct pt_regs *regs); +static __always_inline void enter_from_user_mode(struct pt_regs *regs) +{ + arch_enter_from_user_mode(regs); + lockdep_hardirqs_off(CALLER_ADDR0); + + CT_WARN_ON(__ct_state() != CONTEXT_USER); + user_exit_irqoff(); + + instrumentation_begin(); + kmsan_unpoison_entry_regs(regs); + trace_hardirqs_off_finish(); + instrumentation_end(); +} /** * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts @@ -117,6 +134,9 @@ void enter_from_user_mode(struct pt_regs *regs); */ void syscall_enter_from_user_mode_prepare(struct pt_regs *regs); +long syscall_trace_enter(struct pt_regs *regs, long syscall, + unsigned long work); + /** * syscall_enter_from_user_mode_work - Check and handle work before invoking * a syscall @@ -140,7 +160,15 @@ void syscall_enter_from_user_mode_prepare(struct pt_regs *regs); * ptrace_report_syscall_entry(), __secure_computing(), trace_sys_enter() * 2) Invocation of audit_syscall_entry() */ -long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall); +static __always_inline long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall) +{ + unsigned long work = READ_ONCE(current_thread_info()->syscall_work); + + if (work & SYSCALL_WORK_ENTER) + syscall = syscall_trace_enter(regs, syscall, work); + + return syscall; +} /** * syscall_enter_from_user_mode - Establish state and check and handle work @@ -159,7 +187,19 @@ long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall); * Returns: The original or a modified syscall number. See * syscall_enter_from_user_mode_work() for further explanation. */ -long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall); +static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall) +{ + long ret; + + enter_from_user_mode(regs); + + instrumentation_begin(); + local_irq_enable(); + ret = syscall_enter_from_user_mode_work(regs, syscall); + instrumentation_end(); + + return ret; +} /** * local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable() @@ -259,6 +299,43 @@ static __always_inline void arch_exit_to_user_mode(void) { } void arch_do_signal_or_restart(struct pt_regs *regs); /** + * exit_to_user_mode_loop - do any pending work before leaving to user space + */ +unsigned long exit_to_user_mode_loop(struct pt_regs *regs, + unsigned long ti_work); + +/** + * exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required + * @regs: Pointer to pt_regs on entry stack + * + * 1) check that interrupts are disabled + * 2) call tick_nohz_user_enter_prepare() + * 3) call exit_to_user_mode_loop() if any flags from + * EXIT_TO_USER_MODE_WORK are set + * 4) check that interrupts are still disabled + */ +static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs) +{ + unsigned long ti_work; + + lockdep_assert_irqs_disabled(); + + /* Flush pending rcuog wakeup before the last need_resched() check */ + tick_nohz_user_enter_prepare(); + + ti_work = read_thread_flags(); + if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK)) + ti_work = exit_to_user_mode_loop(regs, ti_work); + + arch_exit_to_user_mode_prepare(regs, ti_work); + + /* Ensure that kernel state is sane for a return to userspace */ + kmap_assert_nomap(); + lockdep_assert_irqs_disabled(); + lockdep_sys_exit(); +} + +/** * exit_to_user_mode - Fixup state when exiting to user mode * * Syscall/interrupt exit enables interrupts, but the kernel state is @@ -276,7 +353,17 @@ void arch_do_signal_or_restart(struct pt_regs *regs); * non-instrumentable. * The caller has to invoke syscall_exit_to_user_mode_work() before this. */ -void exit_to_user_mode(void); +static __always_inline void exit_to_user_mode(void) +{ + instrumentation_begin(); + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(); + instrumentation_end(); + + user_enter_irqoff(); + arch_exit_to_user_mode(); + lockdep_hardirqs_on(CALLER_ADDR0); +} /** * syscall_exit_to_user_mode_work - Handle work before returning to user mode diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 224645f17c33..297231854ada 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -608,6 +608,31 @@ static inline void eth_hw_addr_gen(struct net_device *dev, const u8 *base_addr, } /** + * eth_skb_pkt_type - Assign packet type if destination address does not match + * @skb: Assigned a packet type if address does not match @dev address + * @dev: Network device used to compare packet address against + * + * If the destination MAC address of the packet does not match the network + * device address, assign an appropriate packet type. + */ +static inline void eth_skb_pkt_type(struct sk_buff *skb, + const struct net_device *dev) +{ + const struct ethhdr *eth = eth_hdr(skb); + + if (unlikely(!ether_addr_equal_64bits(eth->h_dest, dev->dev_addr))) { + if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) { + if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_MULTICAST; + } else { + skb->pkt_type = PACKET_OTHERHOST; + } + } +} + +/** * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame * @skb: Buffer to pad * diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 62b61527bcc4..9901e563f706 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -95,6 +95,7 @@ struct kernel_ethtool_ringparam { * @ETHTOOL_RING_USE_TX_PUSH: capture for setting tx_push * @ETHTOOL_RING_USE_RX_PUSH: capture for setting rx_push * @ETHTOOL_RING_USE_TX_PUSH_BUF_LEN: capture for setting tx_push_buf_len + * @ETHTOOL_RING_USE_TCP_DATA_SPLIT: capture for setting tcp_data_split */ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), @@ -102,6 +103,7 @@ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_TX_PUSH = BIT(2), ETHTOOL_RING_USE_RX_PUSH = BIT(3), ETHTOOL_RING_USE_TX_PUSH_BUF_LEN = BIT(4), + ETHTOOL_RING_USE_TCP_DATA_SPLIT = BIT(5), }; #define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) @@ -220,6 +222,16 @@ extern int __ethtool_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *link_ksettings); +struct ethtool_keee { + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised); + __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertised); + u32 tx_lpi_timer; + bool tx_lpi_enabled; + bool eee_active; + bool eee_enabled; +}; + struct kernel_ethtool_coalesce { u8 use_cqe_mode_tx; u8 use_cqe_mode_rx; @@ -409,8 +421,10 @@ struct ethtool_pause_stats { * not entire FEC data blocks. This is a non-standard statistic. * Reported to user space as %ETHTOOL_A_FEC_STAT_CORR_BITS. * - * @lane: per-lane/PCS-instance counts as defined by the standard - * @total: error counts for the entire port, for drivers incapable of reporting + * For each of the above fields, the two substructure members are: + * + * - @lanes: per-lane/PCS-instance counts as defined by the standard + * - @total: error counts for the entire port, for drivers incapable of reporting * per-lane stats * * Drivers should fill in either only total or per-lane statistics, core @@ -595,9 +609,46 @@ struct ethtool_mm_stats { }; /** + * struct ethtool_rxfh_param - RXFH (RSS) parameters + * @hfunc: Defines the current RSS hash function used by HW (or to be set to). + * Valid values are one of the %ETH_RSS_HASH_*. + * @indir_size: On SET, the array size of the user buffer for the + * indirection table, which may be zero, or + * %ETH_RXFH_INDIR_NO_CHANGE. On GET (read from the driver), + * the array size of the hardware indirection table. + * @indir: The indirection table of size @indir_size entries. + * @key_size: On SET, the array size of the user buffer for the hash key, + * which may be zero. On GET (read from the driver), the size of the + * hardware hash key. + * @key: The hash key of size @key_size bytes. + * @rss_context: RSS context identifier. Context 0 is the default for normal + * traffic; other contexts can be referenced as the destination for RX flow + * classification rules. On SET, %ETH_RXFH_CONTEXT_ALLOC is used + * to allocate a new RSS context; on return this field will + * contain the ID of the newly allocated context. + * @rss_delete: Set to non-ZERO to remove the @rss_context context. + * @input_xfrm: Defines how the input data is transformed. Valid values are one + * of %RXH_XFRM_*. + */ +struct ethtool_rxfh_param { + u8 hfunc; + u32 indir_size; + u32 *indir; + u32 key_size; + u8 *key; + u32 rss_context; + u8 rss_delete; + u8 input_xfrm; +}; + +/** * struct ethtool_ops - optional netdev operations * @cap_link_lanes_supported: indicates if the driver supports lanes * parameter. + * @cap_rss_ctx_supported: indicates if the driver supports RSS + * contexts. + * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor + * RSS. * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. * @get_drvinfo: Report driver/device information. Modern drivers no @@ -694,15 +745,6 @@ struct ethtool_mm_stats { * will remain unchanged. * Returns a negative error code or zero. An error code must be returned * if at least one unsupported change was requested. - * @get_rxfh_context: Get the contents of the RX flow hash indirection table, - * hash key, and/or hash function assiciated to the given rss context. - * Returns a negative error code or zero. - * @set_rxfh_context: Create, remove and configure RSS contexts. Allows setting - * the contents of the RX flow hash indirection table, hash key, and/or - * hash function associated to the given context. Arguments which are set - * to %NULL or zero will remain unchanged. - * Returns a negative error code or zero. An error code must be returned - * if at least one unsupported change was requested. * @get_channels: Get number of channels. * @set_channels: Set number of channels. Returns a negative error code or * zero. @@ -785,6 +827,8 @@ struct ethtool_mm_stats { */ struct ethtool_ops { u32 cap_link_lanes_supported:1; + u32 cap_rss_ctx_supported:1; + u32 cap_rss_sym_xor_supported:1; u32 supported_coalesce_params; u32 supported_ring_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); @@ -844,15 +888,9 @@ struct ethtool_ops { int (*reset)(struct net_device *, u32 *); u32 (*get_rxfh_key_size)(struct net_device *); u32 (*get_rxfh_indir_size)(struct net_device *); - int (*get_rxfh)(struct net_device *, u32 *indir, u8 *key, - u8 *hfunc); - int (*set_rxfh)(struct net_device *, const u32 *indir, - const u8 *key, const u8 hfunc); - int (*get_rxfh_context)(struct net_device *, u32 *indir, u8 *key, - u8 *hfunc, u32 rss_context); - int (*set_rxfh_context)(struct net_device *, const u32 *indir, - const u8 *key, const u8 hfunc, - u32 *rss_context, bool delete); + int (*get_rxfh)(struct net_device *, struct ethtool_rxfh_param *); + int (*set_rxfh)(struct net_device *, struct ethtool_rxfh_param *, + struct netlink_ext_ack *extack); void (*get_channels)(struct net_device *, struct ethtool_channels *); int (*set_channels)(struct net_device *, struct ethtool_channels *); int (*get_dump_flag)(struct net_device *, struct ethtool_dump *); @@ -864,8 +902,8 @@ struct ethtool_ops { struct ethtool_modinfo *); int (*get_module_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *); - int (*get_eee)(struct net_device *, struct ethtool_eee *); - int (*set_eee)(struct net_device *, struct ethtool_eee *); + int (*get_eee)(struct net_device *dev, struct ethtool_keee *eee); + int (*set_eee)(struct net_device *dev, struct ethtool_keee *eee); int (*get_tunable)(struct net_device *, const struct ethtool_tunable *, void *); int (*set_tunable)(struct net_device *, @@ -1044,12 +1082,52 @@ static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add, } /** + * ethtool_get_ts_info_by_layer - Obtains time stamping capabilities from the MAC or PHY layer. + * @dev: pointer to net_device structure + * @info: buffer to hold the result + * Returns zero on success, non-zero otherwise. + */ +int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info); + +/** * ethtool_sprintf - Write formatted string to ethtool string data - * @data: Pointer to start of string to update + * @data: Pointer to a pointer to the start of string to update * @fmt: Format of string to write * - * Write formatted string to data. Update data to point at start of + * Write formatted string to *data. Update *data to point at start of * next string. */ extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...); + +/** + * ethtool_puts - Write string to ethtool string data + * @data: Pointer to a pointer to the start of string to update + * @str: String to write + * + * Write string to *data without a trailing newline. Update *data + * to point at start of next string. + * + * Prefer this function to ethtool_sprintf() when given only + * two arguments or if @fmt is just "%s". + */ +extern void ethtool_puts(u8 **data, const char *str); + +/* Link mode to forced speed capabilities maps */ +struct ethtool_forced_speed_map { + u32 speed; + __ETHTOOL_DECLARE_LINK_MODE_MASK(caps); + + const u32 *cap_arr; + u32 arr_size; +}; + +#define ETHTOOL_FORCED_SPEED_MAP(prefix, value) \ +{ \ + .speed = SPEED_##value, \ + .cap_arr = prefix##_##value, \ + .arr_size = ARRAY_SIZE(prefix##_##value), \ +} + +void +ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size); #endif /* _LINUX_ETHTOOL_H */ diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index b9d83652c097..e32bee4345fb 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -35,8 +35,7 @@ void eventfd_ctx_put(struct eventfd_ctx *ctx); struct file *eventfd_fget(int fd); struct eventfd_ctx *eventfd_ctx_fdget(int fd); struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); -__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); -__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask); +void eventfd_signal_mask(struct eventfd_ctx *ctx, __poll_t mask); int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt); void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt); @@ -58,15 +57,8 @@ static inline struct eventfd_ctx *eventfd_ctx_fdget(int fd) return ERR_PTR(-ENOSYS); } -static inline int eventfd_signal(struct eventfd_ctx *ctx, __u64 n) +static inline void eventfd_signal_mask(struct eventfd_ctx *ctx, __poll_t mask) { - return -ENOSYS; -} - -static inline int eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, - unsigned mask) -{ - return -ENOSYS; } static inline void eventfd_ctx_put(struct eventfd_ctx *ctx) @@ -92,5 +84,10 @@ static inline void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) #endif +static inline void eventfd_signal(struct eventfd_ctx *ctx) +{ + eventfd_signal_mask(ctx, 0); +} + #endif /* _LINUX_EVENTFD_H */ diff --git a/include/linux/evm.h b/include/linux/evm.h index 01fc495a83e2..d48d6da32315 100644 --- a/include/linux/evm.h +++ b/include/linux/evm.h @@ -12,51 +12,12 @@ #include <linux/integrity.h> #include <linux/xattr.h> -struct integrity_iint_cache; - #ifdef CONFIG_EVM extern int evm_set_key(void *key, size_t keylen); extern enum integrity_status evm_verifyxattr(struct dentry *dentry, const char *xattr_name, void *xattr_value, - size_t xattr_value_len, - struct integrity_iint_cache *iint); -extern int evm_inode_setattr(struct mnt_idmap *idmap, - struct dentry *dentry, struct iattr *attr); -extern void evm_inode_post_setattr(struct dentry *dentry, int ia_valid); -extern int evm_inode_setxattr(struct mnt_idmap *idmap, - struct dentry *dentry, const char *name, - const void *value, size_t size); -extern void evm_inode_post_setxattr(struct dentry *dentry, - const char *xattr_name, - const void *xattr_value, - size_t xattr_value_len); -extern int evm_inode_removexattr(struct mnt_idmap *idmap, - struct dentry *dentry, const char *xattr_name); -extern void evm_inode_post_removexattr(struct dentry *dentry, - const char *xattr_name); -static inline void evm_inode_post_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - evm_inode_post_removexattr(dentry, acl_name); -} -extern int evm_inode_set_acl(struct mnt_idmap *idmap, - struct dentry *dentry, const char *acl_name, - struct posix_acl *kacl); -static inline int evm_inode_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - return evm_inode_set_acl(idmap, dentry, acl_name, NULL); -} -static inline void evm_inode_post_set_acl(struct dentry *dentry, - const char *acl_name, - struct posix_acl *kacl) -{ - return evm_inode_post_setxattr(dentry, acl_name, NULL, 0); -} - + size_t xattr_value_len); int evm_inode_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr, struct xattr *xattrs, int *xattr_count); @@ -84,80 +45,12 @@ static inline int evm_set_key(void *key, size_t keylen) static inline enum integrity_status evm_verifyxattr(struct dentry *dentry, const char *xattr_name, void *xattr_value, - size_t xattr_value_len, - struct integrity_iint_cache *iint) + size_t xattr_value_len) { return INTEGRITY_UNKNOWN; } #endif -static inline int evm_inode_setattr(struct mnt_idmap *idmap, - struct dentry *dentry, struct iattr *attr) -{ - return 0; -} - -static inline void evm_inode_post_setattr(struct dentry *dentry, int ia_valid) -{ - return; -} - -static inline int evm_inode_setxattr(struct mnt_idmap *idmap, - struct dentry *dentry, const char *name, - const void *value, size_t size) -{ - return 0; -} - -static inline void evm_inode_post_setxattr(struct dentry *dentry, - const char *xattr_name, - const void *xattr_value, - size_t xattr_value_len) -{ - return; -} - -static inline int evm_inode_removexattr(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *xattr_name) -{ - return 0; -} - -static inline void evm_inode_post_removexattr(struct dentry *dentry, - const char *xattr_name) -{ - return; -} - -static inline void evm_inode_post_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - return; -} - -static inline int evm_inode_set_acl(struct mnt_idmap *idmap, - struct dentry *dentry, const char *acl_name, - struct posix_acl *kacl) -{ - return 0; -} - -static inline int evm_inode_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - return 0; -} - -static inline void evm_inode_post_set_acl(struct dentry *dentry, - const char *acl_name, - struct posix_acl *kacl) -{ - return; -} - static inline int evm_inode_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr, struct xattr *xattrs, diff --git a/include/linux/export-internal.h b/include/linux/export-internal.h index 45fca09b2319..d445705ac13c 100644 --- a/include/linux/export-internal.h +++ b/include/linux/export-internal.h @@ -16,10 +16,13 @@ * and eliminates the need for absolute relocations that require runtime * processing on relocatable kernels. */ +#define __KSYM_ALIGN ".balign 4" #define __KSYM_REF(sym) ".long " #sym "- ." #elif defined(CONFIG_64BIT) +#define __KSYM_ALIGN ".balign 8" #define __KSYM_REF(sym) ".quad " #sym #else +#define __KSYM_ALIGN ".balign 4" #define __KSYM_REF(sym) ".long " #sym #endif @@ -42,7 +45,7 @@ " .asciz \"" ns "\"" "\n" \ " .previous" "\n" \ " .section \"___ksymtab" sec "+" #name "\", \"a\"" "\n" \ - " .balign 4" "\n" \ + __KSYM_ALIGN "\n" \ "__ksymtab_" #name ":" "\n" \ __KSYM_REF(sym) "\n" \ __KSYM_REF(__kstrtab_ ##name) "\n" \ @@ -50,9 +53,7 @@ " .previous" "\n" \ ) -#ifdef CONFIG_IA64 -#define KSYM_FUNC(name) @fptr(name) -#elif defined(CONFIG_PARISC) && defined(CONFIG_64BIT) +#if defined(CONFIG_PARISC) && defined(CONFIG_64BIT) #define KSYM_FUNC(name) P%name #else #define KSYM_FUNC(name) name @@ -63,6 +64,7 @@ #define SYMBOL_CRC(sym, crc, sec) \ asm(".section \"___kcrctab" sec "+" #sym "\",\"a\"" "\n" \ + ".balign 4" "\n" \ "__crc_" #sym ":" "\n" \ ".long " #crc "\n" \ ".previous" "\n") diff --git a/include/linux/export.h b/include/linux/export.h index 9911508a9604..0bbd02fd351d 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -7,15 +7,6 @@ #include <linux/stringify.h> /* - * Export symbols from the kernel to modules. Forked from module.h - * to reduce the amount of pointless cruft we feed to gcc when only - * exporting a simple symbol or two. - * - * Try not to add #includes here. It slows compilation and makes kernel - * hackers place grumpy comments in header files. - */ - -/* * This comment block is used by fixdep. Please do not remove. * * When CONFIG_MODVERSIONS is changed from n to y, all source files having @@ -23,15 +14,6 @@ * side effect of the *.o build rule. */ -#ifndef __ASSEMBLY__ -#ifdef MODULE -extern struct module __this_module; -#define THIS_MODULE (&__this_module) -#else -#define THIS_MODULE ((struct module *)0) -#endif -#endif /* __ASSEMBLY__ */ - #ifdef CONFIG_64BIT #define __EXPORT_SYMBOL_REF(sym) \ .balign 8 ASM_NL \ diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 11fbd0ee1370..bb37ad5cc954 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -99,12 +99,29 @@ enum fid_type { FILEID_FAT_WITH_PARENT = 0x72, /* + * 64 bit inode number, 32 bit generation number. + */ + FILEID_INO64_GEN = 0x81, + + /* + * 64 bit inode number, 32 bit generation number, + * 64 bit parent inode number, 32 bit parent generation. + */ + FILEID_INO64_GEN_PARENT = 0x82, + + /* * 128 bit child FID (struct lu_fid) * 128 bit parent FID (struct lu_fid) */ FILEID_LUSTRE = 0x97, /* + * 64 bit inode number, 32 bit subvolume, 32 bit generation number: + */ + FILEID_BCACHEFS_WITHOUT_PARENT = 0xb1, + FILEID_BCACHEFS_WITH_PARENT = 0xb2, + + /* * 64 bit unique kernfs id */ FILEID_KERNFS = 0xfe, @@ -123,7 +140,11 @@ struct fid { u32 parent_ino; u32 parent_gen; } i32; - struct { + struct { + u64 ino; + u32 gen; + } __packed i64; + struct { u32 block; u16 partref; u16 parent_partref; @@ -224,15 +245,56 @@ struct export_operations { atomic attribute updates */ #define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */ +#define EXPORT_OP_ASYNC_LOCK (0x40) /* fs can do async lock request */ unsigned long flags; }; +/** + * exportfs_lock_op_is_async() - export op supports async lock operation + * @export_ops: the nfs export operations to check + * + * Returns true if the nfs export_operations structure has + * EXPORT_OP_ASYNC_LOCK in their flags set + */ +static inline bool +exportfs_lock_op_is_async(const struct export_operations *export_ops) +{ + return export_ops->flags & EXPORT_OP_ASYNC_LOCK; +} + extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, int *max_len, struct inode *parent, int flags); extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, int flags); +static inline bool exportfs_can_encode_fid(const struct export_operations *nop) +{ + return !nop || nop->encode_fh; +} + +static inline bool exportfs_can_decode_fh(const struct export_operations *nop) +{ + return nop && nop->fh_to_dentry; +} + +static inline bool exportfs_can_encode_fh(const struct export_operations *nop, + int fh_flags) +{ + /* + * If a non-decodeable file handle was requested, we only need to make + * sure that filesystem did not opt-out of encoding fid. + */ + if (fh_flags & EXPORT_FH_FID) + return exportfs_can_encode_fid(nop); + + /* + * If a decodeable file handle was requested, we need to make sure that + * filesystem can also decode file handles. + */ + return exportfs_can_decode_fh(nop); +} + static inline int exportfs_encode_fid(struct inode *inode, struct fid *fid, int *max_len) { @@ -252,10 +314,12 @@ extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, /* * Generic helpers for filesystems. */ -extern struct dentry *generic_fh_to_dentry(struct super_block *sb, +int generic_encode_ino32_fh(struct inode *inode, __u32 *fh, int *max_len, + struct inode *parent); +struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type, struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen)); -extern struct dentry *generic_fh_to_parent(struct super_block *sb, +struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type, struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen)); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index a82a4bb6ce68..a357287eac1e 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -13,10 +13,10 @@ #define F2FS_SUPER_OFFSET 1024 /* byte-size offset */ #define F2FS_MIN_LOG_SECTOR_SIZE 9 /* 9 bits for 512 bytes */ -#define F2FS_MAX_LOG_SECTOR_SIZE 12 /* 12 bits for 4096 bytes */ -#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* log number for sector/blk */ -#define F2FS_BLKSIZE 4096 /* support only 4KB block */ -#define F2FS_BLKSIZE_BITS 12 /* bits for F2FS_BLKSIZE */ +#define F2FS_MAX_LOG_SECTOR_SIZE PAGE_SHIFT /* Max is Block Size */ +#define F2FS_LOG_SECTORS_PER_BLOCK (PAGE_SHIFT - 9) /* log number for sector/blk */ +#define F2FS_BLKSIZE PAGE_SIZE /* support only block == page */ +#define F2FS_BLKSIZE_BITS PAGE_SHIFT /* bits for F2FS_BLKSIZE */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ #define F2FS_EXTENSION_LEN 8 /* max size of extension */ #define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS) @@ -27,6 +27,7 @@ #define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS) #define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS) +#define F2FS_BLK_END_BYTES(blk) (F2FS_BLK_TO_BYTES(blk + 1) - 1) /* 0, 1(node nid), 2(meta nid) are reserved node id */ #define F2FS_RESERVED_NODE_NUM 3 @@ -40,12 +41,6 @@ #define F2FS_ENC_UTF8_12_1 1 -#define F2FS_IO_SIZE(sbi) BIT(F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */ -#define F2FS_IO_SIZE_KB(sbi) BIT(F2FS_OPTION(sbi).write_io_size_bits + 2) /* KB */ -#define F2FS_IO_SIZE_BITS(sbi) (F2FS_OPTION(sbi).write_io_size_bits) /* power of 2 */ -#define F2FS_IO_SIZE_MASK(sbi) (F2FS_IO_SIZE(sbi) - 1) -#define F2FS_IO_ALIGNED(sbi) (F2FS_IO_SIZE(sbi) > 1) - /* This flag is used by node and meta inodes, and by recovery */ #define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) @@ -81,6 +76,7 @@ enum stop_cp_reason { STOP_CP_REASON_CORRUPTED_SUMMARY, STOP_CP_REASON_UPDATE_INODE, STOP_CP_REASON_FLUSH_FAIL, + STOP_CP_REASON_NO_SEGMENT, STOP_CP_REASON_MAX, }; @@ -104,6 +100,7 @@ enum f2fs_error { ERROR_CORRUPTED_VERITY_XATTR, ERROR_CORRUPTED_XATTR, ERROR_INVALID_NODE_REFERENCE, + ERROR_INCONSISTENT_NAT, ERROR_MAX, }; @@ -210,14 +207,14 @@ struct f2fs_checkpoint { unsigned char sit_nat_version_bitmap[]; } __packed; -#define CP_CHKSUM_OFFSET 4092 /* default chksum offset in checkpoint */ +#define CP_CHKSUM_OFFSET (F2FS_BLKSIZE - sizeof(__le32)) /* default chksum offset in checkpoint */ #define CP_MIN_CHKSUM_OFFSET \ (offsetof(struct f2fs_checkpoint, sit_nat_version_bitmap)) /* * For orphan inode management */ -#define F2FS_ORPHANS_PER_BLOCK 1020 +#define F2FS_ORPHANS_PER_BLOCK ((F2FS_BLKSIZE - 4 * sizeof(__le32)) / sizeof(__le32)) #define GET_ORPHAN_BLOCKS(n) (((n) + F2FS_ORPHANS_PER_BLOCK - 1) / \ F2FS_ORPHANS_PER_BLOCK) @@ -243,14 +240,31 @@ struct f2fs_extent { #define F2FS_NAME_LEN 255 /* 200 bytes for inline xattrs by default */ #define DEFAULT_INLINE_XATTR_ADDRS 50 -#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ + +#define OFFSET_OF_END_OF_I_EXT 360 +#define SIZE_OF_I_NID 20 + +struct node_footer { + __le32 nid; /* node id */ + __le32 ino; /* inode number */ + __le32 flag; /* include cold/fsync/dentry marks and offset */ + __le64 cp_ver; /* checkpoint version */ + __le32 next_blkaddr; /* next node page block address */ +} __packed; + +/* Address Pointers in an Inode */ +#define DEF_ADDRS_PER_INODE ((F2FS_BLKSIZE - OFFSET_OF_END_OF_I_EXT \ + - SIZE_OF_I_NID \ + - sizeof(struct node_footer)) / sizeof(__le32)) #define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \ get_extra_isize(inode)) #define DEF_NIDS_PER_INODE 5 /* Node IDs in an Inode */ #define ADDRS_PER_INODE(inode) addrs_per_inode(inode) -#define DEF_ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ +/* Address Pointers in a Direct Block */ +#define DEF_ADDRS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) #define ADDRS_PER_BLOCK(inode) addrs_per_block(inode) -#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ +/* Node IDs in an Indirect Block */ +#define NIDS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) #define ADDRS_PER_PAGE(page, inode) \ (IS_INODE(page) ? ADDRS_PER_INODE(inode) : ADDRS_PER_BLOCK(inode)) @@ -342,14 +356,6 @@ enum { #define OFFSET_BIT_MASK GENMASK(OFFSET_BIT_SHIFT - 1, 0) -struct node_footer { - __le32 nid; /* node id */ - __le32 ino; /* inode number */ - __le32 flag; /* include cold/fsync/dentry marks and offset */ - __le64 cp_ver; /* checkpoint version */ - __le32 next_blkaddr; /* next node page block address */ -} __packed; - struct f2fs_node { /* can be one of three types: inode, direct, and indirect types */ union { @@ -363,7 +369,7 @@ struct f2fs_node { /* * For NAT entries */ -#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry)) +#define NAT_ENTRY_PER_BLOCK (F2FS_BLKSIZE / sizeof(struct f2fs_nat_entry)) struct f2fs_nat_entry { __u8 version; /* latest version of cached nat entry */ @@ -378,12 +384,13 @@ struct f2fs_nat_block { /* * For SIT entries * - * Each segment is 2MB in size by default so that a bitmap for validity of - * there-in blocks should occupy 64 bytes, 512 bits. + * A validity bitmap of 64 bytes covers 512 blocks of area. For a 4K page size, + * this results in a segment size of 2MB. For 16k pages, the default segment size + * is 8MB. * Not allow to change this. */ #define SIT_VBLOCK_MAP_SIZE 64 -#define SIT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_sit_entry)) +#define SIT_ENTRY_PER_BLOCK (F2FS_BLKSIZE / sizeof(struct f2fs_sit_entry)) /* * F2FS uses 4 bytes to represent block address. As a result, supported size of @@ -418,7 +425,7 @@ struct f2fs_sit_block { * For segment summary * * One summary block contains exactly 512 summary entries, which represents - * exactly 2MB segment by default. Not allow to change the basic units. + * exactly one segment by default. Not allow to change the basic units. * * NOTE: For initializing fields, you must use set_summary * @@ -429,12 +436,12 @@ struct f2fs_sit_block { * from node's page's beginning to get a data block address. * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node) */ -#define ENTRIES_IN_SUM 512 -#define SUMMARY_SIZE (7) /* sizeof(struct summary) */ +#define ENTRIES_IN_SUM (F2FS_BLKSIZE / 8) +#define SUMMARY_SIZE (7) /* sizeof(struct f2fs_summary) */ #define SUM_FOOTER_SIZE (5) /* sizeof(struct summary_footer) */ #define SUM_ENTRY_SIZE (SUMMARY_SIZE * ENTRIES_IN_SUM) -/* a summary entry for a 4KB-sized block in a segment */ +/* a summary entry for a block in a segment */ struct f2fs_summary { __le32 nid; /* parent node id */ union { @@ -518,7 +525,7 @@ struct f2fs_journal { }; } __packed; -/* 4KB-sized summary block structure */ +/* Block-sized summary block structure */ struct f2fs_summary_block { struct f2fs_summary entries[ENTRIES_IN_SUM]; struct f2fs_journal journal; @@ -559,11 +566,14 @@ typedef __le32 f2fs_hash_t; * Note: there are more reserved space in inline dentry than in regular * dentry, when converting inline dentry we should handle this carefully. */ -#define NR_DENTRY_IN_BLOCK 214 /* the number of dentry in a block */ + +/* the number of dentry in a block */ +#define NR_DENTRY_IN_BLOCK ((BITS_PER_BYTE * F2FS_BLKSIZE) / \ + ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * BITS_PER_BYTE + 1)) #define SIZE_OF_DIR_ENTRY 11 /* by byte */ #define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ BITS_PER_BYTE) -#define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \ +#define SIZE_OF_RESERVED (F2FS_BLKSIZE - ((SIZE_OF_DIR_ENTRY + \ F2FS_SLOT_LEN) * \ NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP)) #define MIN_INLINE_DENTRY_SIZE 40 /* just include '.' and '..' entries */ @@ -576,7 +586,7 @@ struct f2fs_dir_entry { __u8 file_type; /* file type */ } __packed; -/* 4KB-sized directory entry block */ +/* Block-sized directory entry block */ struct f2fs_dentry_block { /* validity bitmap for directory entries in each block */ __u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP]; diff --git a/include/linux/fb.h b/include/linux/fb.h index 94e2c44c6569..811e47f9d1c3 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -2,28 +2,30 @@ #ifndef _LINUX_FB_H #define _LINUX_FB_H -#include <linux/refcount.h> -#include <linux/kgdb.h> #include <uapi/linux/fb.h> #define FBIO_CURSOR _IOWR('F', 0x08, struct fb_cursor_user) -#include <linux/fs.h> -#include <linux/init.h> +#include <linux/mutex.h> +#include <linux/printk.h> +#include <linux/refcount.h> +#include <linux/types.h> #include <linux/workqueue.h> -#include <linux/notifier.h> -#include <linux/list.h> -#include <linux/backlight.h> -#include <linux/slab.h> #include <asm/fb.h> -struct vm_area_struct; -struct fb_info; +struct backlight_device; struct device; +struct device_node; +struct fb_info; struct file; +struct i2c_adapter; +struct inode; +struct module; +struct notifier_block; +struct page; struct videomode; -struct device_node; +struct vm_area_struct; /* Definitions below are used in the parsed monitor specs */ #define FB_DPMS_ACTIVE_OFF 1 @@ -143,9 +145,13 @@ struct fb_event { void *data; }; +/* Enough for the VT console needs, see its max_font_width/height */ +#define FB_MAX_BLIT_WIDTH 64 +#define FB_MAX_BLIT_HEIGHT 128 + struct fb_blit_caps { - u32 x; - u32 y; + DECLARE_BITMAP(x, FB_MAX_BLIT_WIDTH); + DECLARE_BITMAP(y, FB_MAX_BLIT_HEIGHT); u32 len; u32 flags; }; @@ -192,10 +198,12 @@ struct fb_pixmap { u32 scan_align; /* alignment per scanline */ u32 access_align; /* alignment per read/write (bits) */ u32 flags; /* see FB_PIXMAP_* */ - u32 blit_x; /* supported bit block dimensions (1-32)*/ - u32 blit_y; /* Format: blit_x = 1 << (width - 1) */ - /* blit_y = 1 << (height - 1) */ - /* if 0, will be set to 0xffffffff (all)*/ + /* supported bit block dimensions */ + /* Format: test_bit(width - 1, blit_x) */ + /* test_bit(height - 1, blit_y) */ + /* if zero, will be set to full (all) */ + DECLARE_BITMAP(blit_x, FB_MAX_BLIT_WIDTH); + DECLARE_BITMAP(blit_y, FB_MAX_BLIT_HEIGHT); /* access methods */ void (*writeio)(struct fb_info *info, void __iomem *dst, void *src, unsigned int size); void (*readio) (struct fb_info *info, void *dst, void __iomem *src, unsigned int size); @@ -536,6 +544,7 @@ extern ssize_t fb_io_read(struct fb_info *info, char __user *buf, size_t count, loff_t *ppos); extern ssize_t fb_io_write(struct fb_info *info, const char __user *buf, size_t count, loff_t *ppos); +int fb_io_mmap(struct fb_info *info, struct vm_area_struct *vma); #define __FB_DEFAULT_IOMEM_OPS_RDWR \ .fb_read = fb_io_read, \ @@ -547,7 +556,7 @@ extern ssize_t fb_io_write(struct fb_info *info, const char __user *buf, .fb_imageblit = cfb_imageblit #define __FB_DEFAULT_IOMEM_OPS_MMAP \ - .fb_mmap = NULL /* default implementation */ + .fb_mmap = fb_io_mmap #define FB_DEFAULT_IOMEM_OPS \ __FB_DEFAULT_IOMEM_OPS_RDWR, \ @@ -685,6 +694,10 @@ extern int fb_deferred_io_fsync(struct file *file, loff_t start, __FB_GEN_DEFAULT_DEFERRED_OPS_RDWR(__prefix, __damage_range, sys) \ __FB_GEN_DEFAULT_DEFERRED_OPS_DRAW(__prefix, __damage_area, sys) +#define FB_GEN_DEFAULT_DEFERRED_DMAMEM_OPS(__prefix, __damage_range, __damage_area) \ + __FB_GEN_DEFAULT_DEFERRED_OPS_RDWR(__prefix, __damage_range, sys) \ + __FB_GEN_DEFAULT_DEFERRED_OPS_DRAW(__prefix, __damage_area, sys) + /* * Initializes struct fb_ops for deferred I/O. */ @@ -839,16 +852,12 @@ extern int fb_find_mode(struct fb_var_screeninfo *var, const struct fb_videomode *default_mode, unsigned int default_bpp); -#if defined(CONFIG_VIDEO_NOMODESET) bool fb_modesetting_disabled(const char *drvname); -#else -static inline bool fb_modesetting_disabled(const char *drvname) -{ - return false; -} -#endif -/* Convenience logging macros */ +/* + * Convenience logging macros + */ + #define fb_err(fb_info, fmt, ...) \ pr_err("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__) #define fb_notice(info, fmt, ...) \ @@ -860,4 +869,12 @@ static inline bool fb_modesetting_disabled(const char *drvname) #define fb_dbg(fb_info, fmt, ...) \ pr_debug("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__) +#define fb_warn_once(fb_info, fmt, ...) \ + pr_warn_once("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__) + +#define fb_WARN_ONCE(fb_info, condition, fmt, ...) \ + WARN_ONCE(condition, "fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__) +#define fb_WARN_ON_ONCE(fb_info, x) \ + fb_WARN_ONCE(fb_info, (x), "%s", "fb_WARN_ON_ONCE(" __stringify(x) ")") + #endif /* _LINUX_FB_H */ diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index e066816f3519..78c8326d74ae 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -83,12 +83,17 @@ struct dentry; static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsigned int fd) { struct fdtable *fdt = rcu_dereference_raw(files->fdt); + unsigned long mask = array_index_mask_nospec(fd, fdt->max_fds); + struct file *needs_masking; - if (fd < fdt->max_fds) { - fd = array_index_nospec(fd, fdt->max_fds); - return rcu_dereference_raw(fdt->fd[fd]); - } - return NULL; + /* + * 'mask' is zero for an out-of-bounds fd, all ones for ok. + * 'fd&mask' is 'fd' for ok, or 0 for out of bounds. + * + * Accessing fdt->fd[0] is ok, but needs masking of the result. + */ + needs_masking = rcu_dereference_raw(fdt->fd[fd&mask]); + return (struct file *)(mask & (unsigned long)needs_masking); } static inline struct file *files_lookup_fd_locked(struct files_struct *files, unsigned int fd) @@ -98,20 +103,9 @@ static inline struct file *files_lookup_fd_locked(struct files_struct *files, un return files_lookup_fd_raw(files, fd); } -static inline struct file *files_lookup_fd_rcu(struct files_struct *files, unsigned int fd) -{ - RCU_LOCKDEP_WARN(!rcu_read_lock_held(), - "suspicious rcu_dereference_check() usage"); - return files_lookup_fd_raw(files, fd); -} - -static inline struct file *lookup_fd_rcu(unsigned int fd) -{ - return files_lookup_fd_rcu(current->files, fd); -} - -struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd); -struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *fd); +struct file *lookup_fdget_rcu(unsigned int fd); +struct file *task_lookup_fdget_rcu(struct task_struct *task, unsigned int fd); +struct file *task_lookup_next_fdget_rcu(struct task_struct *task, unsigned int *fd); struct task_struct; @@ -125,7 +119,7 @@ int iterate_fd(struct files_struct *, unsigned, extern int close_fd(unsigned int fd); extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); -extern struct file *close_fd_get_file(unsigned int fd); +extern struct file *file_close_fd(unsigned int fd); extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds, struct files_struct **new_fdp); diff --git a/include/linux/file.h b/include/linux/file.h index 6e9099d29343..169692cb1906 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -24,6 +24,8 @@ struct inode; struct path; extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *, const char *, int flags, const struct file_operations *); +extern struct file *alloc_file_pseudo_noaccount(struct inode *, struct vfsmount *, + const char *, int flags, const struct file_operations *); extern struct file *alloc_file_clone(struct file *, int flags, const struct file_operations *); @@ -96,18 +98,8 @@ DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T), extern void fd_install(unsigned int fd, struct file *file); -extern int __receive_fd(struct file *file, int __user *ufd, - unsigned int o_flags); - -extern int receive_fd(struct file *file, unsigned int o_flags); +int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags); -static inline int receive_fd_user(struct file *file, int __user *ufd, - unsigned int o_flags) -{ - if (ufd == NULL) - return -EFAULT; - return __receive_fd(file, ufd, o_flags); -} int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags); extern void flush_delayed_fput(void); diff --git a/include/linux/filelock.h b/include/linux/filelock.h index 95e868e09e29..daee999d05f3 100644 --- a/include/linux/filelock.h +++ b/include/linux/filelock.h @@ -27,6 +27,7 @@ #define FILE_LOCK_DEFERRED 1 struct file_lock; +struct file_lease; struct file_lock_operations { void (*fl_copy_lock)(struct file_lock *, struct file_lock *); @@ -39,14 +40,17 @@ struct lock_manager_operations { void (*lm_put_owner)(fl_owner_t); void (*lm_notify)(struct file_lock *); /* unblock callback */ int (*lm_grant)(struct file_lock *, int); - bool (*lm_break)(struct file_lock *); - int (*lm_change)(struct file_lock *, int, struct list_head *); - void (*lm_setup)(struct file_lock *, void **); - bool (*lm_breaker_owns_lease)(struct file_lock *); bool (*lm_lock_expirable)(struct file_lock *cfl); void (*lm_expire_lock)(void); }; +struct lease_manager_operations { + bool (*lm_break)(struct file_lease *); + int (*lm_change)(struct file_lease *, int, struct list_head *); + void (*lm_setup)(struct file_lease *, void **); + bool (*lm_breaker_owns_lease)(struct file_lease *); +}; + struct lock_manager { struct list_head list; /* @@ -85,31 +89,31 @@ bool opens_in_grace(struct net *); * * Obviously, the last two criteria only matter for POSIX locks. */ -struct file_lock { - struct file_lock *fl_blocker; /* The lock, that is blocking us */ - struct list_head fl_list; /* link into file_lock_context */ - struct hlist_node fl_link; /* node in global lists */ - struct list_head fl_blocked_requests; /* list of requests with + +struct file_lock_core { + struct file_lock_core *flc_blocker; /* The lock that is blocking us */ + struct list_head flc_list; /* link into file_lock_context */ + struct hlist_node flc_link; /* node in global lists */ + struct list_head flc_blocked_requests; /* list of requests with * ->fl_blocker pointing here */ - struct list_head fl_blocked_member; /* node in + struct list_head flc_blocked_member; /* node in * ->fl_blocker->fl_blocked_requests */ - fl_owner_t fl_owner; - unsigned int fl_flags; - unsigned char fl_type; - unsigned int fl_pid; - int fl_link_cpu; /* what cpu's list is this on? */ - wait_queue_head_t fl_wait; - struct file *fl_file; + fl_owner_t flc_owner; + unsigned int flc_flags; + unsigned char flc_type; + pid_t flc_pid; + int flc_link_cpu; /* what cpu's list is this on? */ + wait_queue_head_t flc_wait; + struct file *flc_file; +}; + +struct file_lock { + struct file_lock_core c; loff_t fl_start; loff_t fl_end; - struct fasync_struct * fl_fasync; /* for lease break notifications */ - /* for lease breaks: */ - unsigned long fl_break_time; - unsigned long fl_downgrade_time; - const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ union { @@ -126,6 +130,15 @@ struct file_lock { } fl_u; } __randomize_layout; +struct file_lease { + struct file_lock_core c; + struct fasync_struct * fl_fasync; /* for lease break notifications */ + /* for lease breaks: */ + unsigned long fl_break_time; + unsigned long fl_downgrade_time; + const struct lease_manager_operations *fl_lmops; /* Callbacks for lease managers */ +} __randomize_layout; + struct file_lock_context { spinlock_t flc_lock; struct list_head flc_flock; @@ -147,11 +160,31 @@ int fcntl_setlk64(unsigned int, struct file *, unsigned int, int fcntl_setlease(unsigned int fd, struct file *filp, int arg); int fcntl_getlease(struct file *filp); +static inline bool lock_is_unlock(struct file_lock *fl) +{ + return fl->c.flc_type == F_UNLCK; +} + +static inline bool lock_is_read(struct file_lock *fl) +{ + return fl->c.flc_type == F_RDLCK; +} + +static inline bool lock_is_write(struct file_lock *fl) +{ + return fl->c.flc_type == F_WRLCK; +} + +static inline void locks_wake_up(struct file_lock *fl) +{ + wake_up(&fl->c.flc_wait); +} + /* fs/locks.c */ void locks_free_lock_context(struct inode *inode); void locks_free_lock(struct file_lock *fl); void locks_init_lock(struct file_lock *); -struct file_lock * locks_alloc_lock(void); +struct file_lock *locks_alloc_lock(void); void locks_copy_lock(struct file_lock *, struct file_lock *); void locks_copy_conflock(struct file_lock *, struct file_lock *); void locks_remove_posix(struct file *, fl_owner_t); @@ -165,11 +198,16 @@ int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_l int vfs_cancel_lock(struct file *filp, struct file_lock *fl); bool vfs_inode_has_locks(struct inode *inode); int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl); + +void locks_init_lease(struct file_lease *); +void locks_free_lease(struct file_lease *fl); +struct file_lease *locks_alloc_lease(void); int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); void lease_get_mtime(struct inode *, struct timespec64 *time); -int generic_setlease(struct file *, int, struct file_lock **, void **priv); -int vfs_setlease(struct file *, int, struct file_lock **, void **); -int lease_modify(struct file_lock *, int, struct list_head *); +int generic_setlease(struct file *, int, struct file_lease **, void **priv); +int kernel_setlease(struct file *, int, struct file_lease **, void **); +int vfs_setlease(struct file *, int, struct file_lease **, void **); +int lease_modify(struct file_lease *, int, struct list_head *); struct notifier_block; int lease_register_notifier(struct notifier_block *); @@ -223,6 +261,25 @@ static inline int fcntl_getlease(struct file *filp) return F_UNLCK; } +static inline bool lock_is_unlock(struct file_lock *fl) +{ + return false; +} + +static inline bool lock_is_read(struct file_lock *fl) +{ + return false; +} + +static inline bool lock_is_write(struct file_lock *fl) +{ + return false; +} + +static inline void locks_wake_up(struct file_lock *fl) +{ +} + static inline void locks_free_lock_context(struct inode *inode) { @@ -233,6 +290,11 @@ static inline void locks_init_lock(struct file_lock *fl) return; } +static inline void locks_init_lease(struct file_lease *fl) +{ + return; +} + static inline void locks_copy_conflock(struct file_lock *new, struct file_lock *fl) { return; @@ -307,18 +369,24 @@ static inline void lease_get_mtime(struct inode *inode, } static inline int generic_setlease(struct file *filp, int arg, - struct file_lock **flp, void **priv) + struct file_lease **flp, void **priv) +{ + return -EINVAL; +} + +static inline int kernel_setlease(struct file *filp, int arg, + struct file_lease **lease, void **priv) { return -EINVAL; } static inline int vfs_setlease(struct file *filp, int arg, - struct file_lock **lease, void **priv) + struct file_lease **lease, void **priv) { return -EINVAL; } -static inline int lease_modify(struct file_lock *fl, int arg, +static inline int lease_modify(struct file_lease *fl, int arg, struct list_head *dispose) { return -EINVAL; @@ -341,6 +409,9 @@ locks_inode_context(const struct inode *inode) #endif /* !CONFIG_FILE_LOCKING */ +/* for walking lists of file_locks linked by fl_list */ +#define for_each_file_lock(_fl, _head) list_for_each_entry(_fl, _head, c.flc_list) + static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) { return locks_lock_inode_wait(file_inode(filp), fl); diff --git a/include/linux/filter.h b/include/linux/filter.h index 761af6b3cf2b..c99bc3df2d28 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -72,6 +72,9 @@ struct ctl_table_header; /* unused opcode to mark special ldsx instruction. Same as BPF_IND */ #define BPF_PROBE_MEMSX 0x40 +/* unused opcode to mark special load instruction. Same as BPF_MSH */ +#define BPF_PROBE_MEM32 0xa0 + /* unused opcode to mark call to interpreter with arguments */ #define BPF_CALL_ARGS 0xe0 @@ -117,21 +120,25 @@ struct ctl_table_header; /* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ -#define BPF_ALU64_IMM(OP, DST, IMM) \ +#define BPF_ALU64_IMM_OFF(OP, DST, IMM, OFF) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ - .off = 0, \ + .off = OFF, \ .imm = IMM }) +#define BPF_ALU64_IMM(OP, DST, IMM) \ + BPF_ALU64_IMM_OFF(OP, DST, IMM, 0) -#define BPF_ALU32_IMM(OP, DST, IMM) \ +#define BPF_ALU32_IMM_OFF(OP, DST, IMM, OFF) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ - .off = 0, \ + .off = OFF, \ .imm = IMM }) +#define BPF_ALU32_IMM(OP, DST, IMM) \ + BPF_ALU32_IMM_OFF(OP, DST, IMM, 0) /* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */ @@ -143,6 +150,16 @@ struct ctl_table_header; .off = 0, \ .imm = LEN }) +/* Byte Swap, bswap16/32/64 */ + +#define BPF_BSWAP(DST, LEN) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_END | BPF_SRC(BPF_TO_LE), \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = LEN }) + /* Short form of mov, dst_reg = src_reg */ #define BPF_MOV64_REG(DST, SRC) \ @@ -179,6 +196,24 @@ struct ctl_table_header; .off = 0, \ .imm = IMM }) +/* Short form of movsx, dst_reg = (s8,s16,s32)src_reg */ + +#define BPF_MOVSX64_REG(DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +#define BPF_MOVSX32_REG(DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Special form of mov32, used for doing explicit zero extension on dst. */ #define BPF_ZEXT_REG(DST) \ ((struct bpf_insn) { \ @@ -263,6 +298,16 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) .off = OFF, \ .imm = 0 }) +/* Memory load, dst_reg = *(signed size *) (src_reg + off16) */ + +#define BPF_LDX_MEMSX(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEMSX, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Memory store, *(uint *) (dst_reg + off16) = src_reg */ #define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ @@ -505,24 +550,27 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) __BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2, \ u64, __ur_3, u64, __ur_4, u64, __ur_5) -#define BPF_CALL_x(x, name, ...) \ +#define BPF_CALL_x(x, attr, name, ...) \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ - u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ - u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ + attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ + attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ { \ return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ } \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) -#define BPF_CALL_0(name, ...) BPF_CALL_x(0, name, __VA_ARGS__) -#define BPF_CALL_1(name, ...) BPF_CALL_x(1, name, __VA_ARGS__) -#define BPF_CALL_2(name, ...) BPF_CALL_x(2, name, __VA_ARGS__) -#define BPF_CALL_3(name, ...) BPF_CALL_x(3, name, __VA_ARGS__) -#define BPF_CALL_4(name, ...) BPF_CALL_x(4, name, __VA_ARGS__) -#define BPF_CALL_5(name, ...) BPF_CALL_x(5, name, __VA_ARGS__) +#define __NOATTR +#define BPF_CALL_0(name, ...) BPF_CALL_x(0, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_1(name, ...) BPF_CALL_x(1, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_2(name, ...) BPF_CALL_x(2, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_3(name, ...) BPF_CALL_x(3, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_4(name, ...) BPF_CALL_x(4, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_5(name, ...) BPF_CALL_x(5, __NOATTR, name, __VA_ARGS__) + +#define NOTRACE_BPF_CALL_1(name, ...) BPF_CALL_x(1, notrace, name, __VA_ARGS__) #define bpf_ctx_range(TYPE, MEMBER) \ offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1 @@ -694,7 +742,7 @@ static inline void bpf_compute_and_save_data_end( cb->data_end = skb->data + skb_headlen(skb); } -/* Restore data saved by bpf_compute_data_pointers(). */ +/* Restore data saved by bpf_compute_and_save_data_end(). */ static inline void bpf_restore_data_end( struct sk_buff *skb, void *saved_data_end) { @@ -912,6 +960,10 @@ bool bpf_jit_needs_zext(void); bool bpf_jit_supports_subprog_tailcalls(void); bool bpf_jit_supports_kfunc_call(void); bool bpf_jit_supports_far_kfunc_call(void); +bool bpf_jit_supports_exceptions(void); +bool bpf_jit_supports_ptr_xchg(void); +bool bpf_jit_supports_arena(void); +void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); bool bpf_helper_changes_pkt_data(void *func); static inline bool bpf_dump_raw_ok(const struct cred *cred) @@ -981,12 +1033,6 @@ int xdp_do_redirect_frame(struct net_device *dev, struct bpf_prog *prog); void xdp_do_flush(void); -/* The xdp_do_flush_map() helper has been renamed to drop the _map suffix, as - * it is no longer only flushing maps. Keep this define for compatibility - * until all drivers are updated - do not use xdp_do_flush_map() in new code! - */ -#define xdp_do_flush_map xdp_do_flush - void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act); #ifdef CONFIG_INET @@ -1029,7 +1075,7 @@ struct bpf_binary_header * bpf_jit_binary_pack_hdr(const struct bpf_prog *fp); void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns); -void bpf_prog_pack_free(struct bpf_binary_header *hdr); +void bpf_prog_pack_free(void *ptr, u32 size); static inline bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) { @@ -1101,7 +1147,7 @@ static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog) return false; if (!bpf_jit_harden) return false; - if (bpf_jit_harden == 1 && bpf_capable()) + if (bpf_jit_harden == 1 && bpf_token_capable(prog->aux->token, CAP_BPF)) return false; return true; @@ -1127,6 +1173,7 @@ const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, bool is_bpf_text_address(unsigned long addr); int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym); +struct bpf_prog *bpf_prog_ksym_find(unsigned long addr); static inline const char * bpf_address_lookup(unsigned long addr, unsigned long *size, @@ -1194,6 +1241,11 @@ static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value, return -ERANGE; } +static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) +{ + return NULL; +} + static inline const char * bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym) @@ -1285,6 +1337,7 @@ struct bpf_sock_addr_kern { */ u64 tmp_reg; void *t_ctx; /* Attach type specific context. */ + u32 uaddrlen; }; struct bpf_sock_ops_kern { diff --git a/include/linux/find.h b/include/linux/find.h index 5e4f39ef2e72..c69598e383c1 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -405,7 +405,7 @@ unsigned long find_next_and_bit_wrap(const unsigned long *addr1, { unsigned long bit = find_next_and_bit(addr1, addr2, size, offset); - if (bit < size) + if (bit < size || offset == 0) return bit; bit = find_first_and_bit(addr1, addr2, offset); @@ -413,8 +413,8 @@ unsigned long find_next_and_bit_wrap(const unsigned long *addr1, } /** - * find_next_bit_wrap - find the next set bit in both memory regions - * @addr: The first address to base the search on + * find_next_bit_wrap - find the next set bit in a memory region + * @addr: The address to base the search on * @size: The bitmap size in bits * @offset: The bitnumber to start searching at * @@ -427,7 +427,7 @@ unsigned long find_next_bit_wrap(const unsigned long *addr, { unsigned long bit = find_next_bit(addr, size, offset); - if (bit < size) + if (bit < size || offset == 0) return bit; bit = find_first_bit(addr, offset); diff --git a/include/linux/firewire.h b/include/linux/firewire.h index bd3fc75d4f14..dd9f2d765e68 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -75,7 +75,7 @@ void fw_csr_iterator_init(struct fw_csr_iterator *ci, const u32 *p); int fw_csr_iterator_next(struct fw_csr_iterator *ci, int *key, int *value); int fw_csr_string(const u32 *directory, int key, char *buf, size_t size); -extern struct bus_type fw_bus_type; +extern const struct bus_type fw_bus_type; struct fw_card_driver; struct fw_node; diff --git a/include/linux/firmware.h b/include/linux/firmware.h index de7fea3bca51..f026f8926d79 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -4,6 +4,7 @@ #include <linux/types.h> #include <linux/compiler.h> +#include <linux/cleanup.h> #include <linux/gfp.h> #define FW_ACTION_NOUEVENT 0 @@ -27,6 +28,7 @@ struct firmware { * @FW_UPLOAD_ERR_INVALID_SIZE: invalid firmware image size * @FW_UPLOAD_ERR_RW_ERROR: read or write to HW failed, see kernel log * @FW_UPLOAD_ERR_WEAROUT: FLASH device is approaching wear-out, wait & retry + * @FW_UPLOAD_ERR_FW_INVALID: invalid firmware file * @FW_UPLOAD_ERR_MAX: Maximum error code marker */ enum fw_upload_err { @@ -38,6 +40,7 @@ enum fw_upload_err { FW_UPLOAD_ERR_INVALID_SIZE, FW_UPLOAD_ERR_RW_ERROR, FW_UPLOAD_ERR_WEAROUT, + FW_UPLOAD_ERR_FW_INVALID, FW_UPLOAD_ERR_MAX }; @@ -196,4 +199,6 @@ static inline void firmware_upload_unregister(struct fw_upload *fw_upload) int firmware_request_cache(struct device *device, const char *name); +DEFINE_FREE(firmware, struct firmware *, release_firmware(_T)) + #endif diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h index 29cd11d5a3cf..23384a54d575 100644 --- a/include/linux/firmware/cirrus/cs_dsp.h +++ b/include/linux/firmware/cirrus/cs_dsp.h @@ -123,7 +123,6 @@ struct cs_dsp_client_ops; * @sysclk_mask: Mask of frequency bits within sysclk register (ADSP1 only) * @sysclk_shift: Shift of frequency bits within sysclk register (ADSP1 only) * @alg_regions: List of currently loaded algorithm regions - * @fw_file_name: Filename of the current firmware * @fw_name: Name of the current firmware * @fw_id: ID of the current firmware, obtained from the wmfw * @fw_id_version: Version of the firmware, obtained from the wmfw diff --git a/include/linux/firmware/meson/meson_sm.h b/include/linux/firmware/meson/meson_sm.h index 95b0da2326a9..8eaf8922ab02 100644 --- a/include/linux/firmware/meson/meson_sm.h +++ b/include/linux/firmware/meson/meson_sm.h @@ -19,7 +19,7 @@ enum { struct meson_sm_firmware; int meson_sm_call(struct meson_sm_firmware *fw, unsigned int cmd_index, - u32 *ret, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); + s32 *ret, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); int meson_sm_call_write(struct meson_sm_firmware *fw, void *buffer, unsigned int b_size, unsigned int cmd_index, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); diff --git a/include/linux/firmware/qcom/qcom_qseecom.h b/include/linux/firmware/qcom/qcom_qseecom.h new file mode 100644 index 000000000000..366243ee9609 --- /dev/null +++ b/include/linux/firmware/qcom/qcom_qseecom.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Driver for Qualcomm Secure Execution Environment (SEE) interface (QSEECOM). + * Responsible for setting up and managing QSEECOM client devices. + * + * Copyright (C) 2023 Maximilian Luz <luzmaximilian@gmail.com> + */ + +#ifndef __QCOM_QSEECOM_H +#define __QCOM_QSEECOM_H + +#include <linux/auxiliary_bus.h> +#include <linux/dma-mapping.h> +#include <linux/types.h> + +#include <linux/firmware/qcom/qcom_scm.h> + +/** + * struct qseecom_client - QSEECOM client device. + * @aux_dev: Underlying auxiliary device. + * @app_id: ID of the loaded application. + */ +struct qseecom_client { + struct auxiliary_device aux_dev; + u32 app_id; +}; + +/** + * qseecom_scm_dev() - Get the SCM device associated with the QSEECOM client. + * @client: The QSEECOM client device. + * + * Returns the SCM device under which the provided QSEECOM client device + * operates. This function is intended to be used for DMA allocations. + */ +static inline struct device *qseecom_scm_dev(struct qseecom_client *client) +{ + return client->aux_dev.dev.parent->parent; +} + +/** + * qseecom_dma_alloc() - Allocate DMA memory for a QSEECOM client. + * @client: The QSEECOM client to allocate the memory for. + * @size: The number of bytes to allocate. + * @dma_handle: Pointer to where the DMA address should be stored. + * @gfp: Allocation flags. + * + * Wrapper function for dma_alloc_coherent(), allocating DMA memory usable for + * TZ/QSEECOM communication. Refer to dma_alloc_coherent() for details. + */ +static inline void *qseecom_dma_alloc(struct qseecom_client *client, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ + return dma_alloc_coherent(qseecom_scm_dev(client), size, dma_handle, gfp); +} + +/** + * dma_free_coherent() - Free QSEECOM DMA memory. + * @client: The QSEECOM client for which the memory has been allocated. + * @size: The number of bytes allocated. + * @cpu_addr: Virtual memory address to free. + * @dma_handle: DMA memory address to free. + * + * Wrapper function for dma_free_coherent(), freeing memory previously + * allocated with qseecom_dma_alloc(). Refer to dma_free_coherent() for + * details. + */ +static inline void qseecom_dma_free(struct qseecom_client *client, size_t size, + void *cpu_addr, dma_addr_t dma_handle) +{ + return dma_free_coherent(qseecom_scm_dev(client), size, cpu_addr, dma_handle); +} + +/** + * qcom_qseecom_app_send() - Send to and receive data from a given QSEE app. + * @client: The QSEECOM client associated with the target app. + * @req: DMA address of the request buffer sent to the app. + * @req_size: Size of the request buffer. + * @rsp: DMA address of the response buffer, written to by the app. + * @rsp_size: Size of the response buffer. + * + * Sends a request to the QSEE app associated with the given client and read + * back its response. The caller must provide two DMA memory regions, one for + * the request and one for the response, and fill out the @req region with the + * respective (app-specific) request data. The QSEE app reads this and returns + * its response in the @rsp region. + * + * Note: This is a convenience wrapper around qcom_scm_qseecom_app_send(). + * Clients should prefer to use this wrapper. + * + * Return: Zero on success, nonzero on failure. + */ +static inline int qcom_qseecom_app_send(struct qseecom_client *client, + dma_addr_t req, size_t req_size, + dma_addr_t rsp, size_t rsp_size) +{ + return qcom_scm_qseecom_app_send(client->app_id, req, req_size, rsp, rsp_size); +} + +#endif /* __QCOM_QSEECOM_H */ diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index 0c091a3f6d49..aaa19f93ac43 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -59,12 +59,12 @@ enum qcom_scm_ice_cipher { #define QCOM_SCM_PERM_RW (QCOM_SCM_PERM_READ | QCOM_SCM_PERM_WRITE) #define QCOM_SCM_PERM_RWX (QCOM_SCM_PERM_RW | QCOM_SCM_PERM_EXEC) -extern bool qcom_scm_is_available(void); +bool qcom_scm_is_available(void); -extern int qcom_scm_set_cold_boot_addr(void *entry); -extern int qcom_scm_set_warm_boot_addr(void *entry); -extern void qcom_scm_cpu_power_down(u32 flags); -extern int qcom_scm_set_remote_state(u32 state, u32 id); +int qcom_scm_set_cold_boot_addr(void *entry); +int qcom_scm_set_warm_boot_addr(void *entry); +void qcom_scm_cpu_power_down(u32 flags); +int qcom_scm_set_remote_state(u32 state, u32 id); struct qcom_scm_pas_metadata { void *ptr; @@ -72,54 +72,69 @@ struct qcom_scm_pas_metadata { ssize_t size; }; -extern int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, - size_t size, - struct qcom_scm_pas_metadata *ctx); -extern void qcom_scm_pas_metadata_release(struct qcom_scm_pas_metadata *ctx); -extern int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, - phys_addr_t size); -extern int qcom_scm_pas_auth_and_reset(u32 peripheral); -extern int qcom_scm_pas_shutdown(u32 peripheral); -extern bool qcom_scm_pas_supported(u32 peripheral); +int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, size_t size, + struct qcom_scm_pas_metadata *ctx); +void qcom_scm_pas_metadata_release(struct qcom_scm_pas_metadata *ctx); +int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, phys_addr_t size); +int qcom_scm_pas_auth_and_reset(u32 peripheral); +int qcom_scm_pas_shutdown(u32 peripheral); +bool qcom_scm_pas_supported(u32 peripheral); -extern int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); -extern int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); +int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); +int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); -extern bool qcom_scm_restore_sec_cfg_available(void); -extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); -extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); -extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); -extern int qcom_scm_iommu_set_cp_pool_size(u32 spare, u32 size); -extern int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, - u32 cp_nonpixel_start, - u32 cp_nonpixel_size); -extern int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, - u64 *src, - const struct qcom_scm_vmperm *newvm, - unsigned int dest_cnt); +bool qcom_scm_restore_sec_cfg_available(void); +int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); +int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); +int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); +int qcom_scm_iommu_set_cp_pool_size(u32 spare, u32 size); +int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, + u32 cp_nonpixel_start, u32 cp_nonpixel_size); +int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, u64 *src, + const struct qcom_scm_vmperm *newvm, + unsigned int dest_cnt); -extern bool qcom_scm_ocmem_lock_available(void); -extern int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, - u32 size, u32 mode); -extern int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, u32 offset, - u32 size); +bool qcom_scm_ocmem_lock_available(void); +int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, u32 size, + u32 mode); +int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, u32 offset, u32 size); -extern bool qcom_scm_ice_available(void); -extern int qcom_scm_ice_invalidate_key(u32 index); -extern int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size, - enum qcom_scm_ice_cipher cipher, - u32 data_unit_size); +bool qcom_scm_ice_available(void); +int qcom_scm_ice_invalidate_key(u32 index); +int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size, + enum qcom_scm_ice_cipher cipher, u32 data_unit_size); -extern bool qcom_scm_hdcp_available(void); -extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, - u32 *resp); +bool qcom_scm_hdcp_available(void); +int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, u32 *resp); -extern int qcom_scm_iommu_set_pt_format(u32 sec_id, u32 ctx_num, u32 pt_fmt); -extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en); +int qcom_scm_iommu_set_pt_format(u32 sec_id, u32 ctx_num, u32 pt_fmt); +int qcom_scm_qsmmu500_wait_safe_toggle(bool en); -extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, - u64 limit_node, u32 node_id, u64 version); -extern int qcom_scm_lmh_profile_change(u32 profile_id); -extern bool qcom_scm_lmh_dcvsh_available(void); +int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, + u64 limit_node, u32 node_id, u64 version); +int qcom_scm_lmh_profile_change(u32 profile_id); +bool qcom_scm_lmh_dcvsh_available(void); + +#ifdef CONFIG_QCOM_QSEECOM + +int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id); +int qcom_scm_qseecom_app_send(u32 app_id, dma_addr_t req, size_t req_size, + dma_addr_t rsp, size_t rsp_size); + +#else /* CONFIG_QCOM_QSEECOM */ + +static inline int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id) +{ + return -EINVAL; +} + +static inline int qcom_scm_qseecom_app_send(u32 app_id, + dma_addr_t req, size_t req_size, + dma_addr_t rsp, size_t rsp_size) +{ + return -EINVAL; +} + +#endif /* CONFIG_QCOM_QSEECOM */ #endif diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index e8b12ec8b060..1a069a56c961 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -3,6 +3,7 @@ * Xilinx Zynq MPSoC Firmware layer * * Copyright (C) 2014-2021 Xilinx + * Copyright (C) 2022 - 2023, Advanced Micro Devices, Inc. * * Michal Simek <michal.simek@amd.com> * Davorin Mista <davorin.mista@aggios.com> @@ -32,6 +33,7 @@ #define PM_SIP_SVC 0xC2000000 /* PM API versions */ +#define PM_API_VERSION_1 1 #define PM_API_VERSION_2 2 #define PM_PINCTRL_PARAM_SET_VERSION 2 @@ -47,6 +49,9 @@ #define FAMILY_CODE_MASK GENMASK(27, 21) #define SUB_FAMILY_CODE_MASK GENMASK(20, 19) +#define API_ID_MASK GENMASK(7, 0) +#define MODULE_ID_MASK GENMASK(11, 8) + /* ATF only commands */ #define TF_A_PM_REGISTER_SGI 0xa04 #define PM_GET_TRUSTZONE_VERSION 0xa03 @@ -91,15 +96,41 @@ /* * Node IDs for the Error Events. */ -#define EVENT_ERROR_PMC_ERR1 (0x28100000U) -#define EVENT_ERROR_PMC_ERR2 (0x28104000U) -#define EVENT_ERROR_PSM_ERR1 (0x28108000U) -#define EVENT_ERROR_PSM_ERR2 (0x2810C000U) +#define VERSAL_EVENT_ERROR_PMC_ERR1 (0x28100000U) +#define VERSAL_EVENT_ERROR_PMC_ERR2 (0x28104000U) +#define VERSAL_EVENT_ERROR_PSM_ERR1 (0x28108000U) +#define VERSAL_EVENT_ERROR_PSM_ERR2 (0x2810C000U) + +#define VERSAL_NET_EVENT_ERROR_PMC_ERR1 (0x28100000U) +#define VERSAL_NET_EVENT_ERROR_PMC_ERR2 (0x28104000U) +#define VERSAL_NET_EVENT_ERROR_PMC_ERR3 (0x28108000U) +#define VERSAL_NET_EVENT_ERROR_PSM_ERR1 (0x2810C000U) +#define VERSAL_NET_EVENT_ERROR_PSM_ERR2 (0x28110000U) +#define VERSAL_NET_EVENT_ERROR_PSM_ERR3 (0x28114000U) +#define VERSAL_NET_EVENT_ERROR_PSM_ERR4 (0x28118000U) /* ZynqMP SD tap delay tuning */ #define SD_ITAPDLY 0xFF180314 #define SD_OTAPDLYSEL 0xFF180318 +/** + * XPM_EVENT_ERROR_MASK_DDRMC_CR: Error event mask for DDRMC MC Correctable ECC Error. + */ +#define XPM_EVENT_ERROR_MASK_DDRMC_CR BIT(18) + +/** + * XPM_EVENT_ERROR_MASK_DDRMC_NCR: Error event mask for DDRMC MC Non-Correctable ECC Error. + */ +#define XPM_EVENT_ERROR_MASK_DDRMC_NCR BIT(19) +#define XPM_EVENT_ERROR_MASK_NOC_NCR BIT(13) +#define XPM_EVENT_ERROR_MASK_NOC_CR BIT(12) + +enum pm_module_id { + PM_MODULE_ID = 0x0, + XSEM_MODULE_ID = 0x3, + TF_A_MODULE_ID = 0xa, +}; + enum pm_api_cb_id { PM_INIT_SUSPEND_CB = 30, PM_ACKNOWLEDGE_CB = 31, @@ -107,6 +138,7 @@ enum pm_api_cb_id { }; enum pm_api_id { + PM_API_FEATURES = 0, PM_GET_API_VERSION = 1, PM_REGISTER_NOTIFIER = 5, PM_FORCE_POWERDOWN = 8, @@ -126,7 +158,6 @@ enum pm_api_id { PM_SECURE_SHA = 26, PM_PINCTRL_REQUEST = 28, PM_PINCTRL_RELEASE = 29, - PM_PINCTRL_GET_FUNCTION = 30, PM_PINCTRL_SET_FUNCTION = 31, PM_PINCTRL_CONFIG_PARAM_GET = 32, PM_PINCTRL_CONFIG_PARAM_SET = 33, @@ -137,19 +168,20 @@ enum pm_api_id { PM_CLOCK_GETSTATE = 38, PM_CLOCK_SETDIVIDER = 39, PM_CLOCK_GETDIVIDER = 40, - PM_CLOCK_SETRATE = 41, - PM_CLOCK_GETRATE = 42, PM_CLOCK_SETPARENT = 43, PM_CLOCK_GETPARENT = 44, PM_FPGA_READ = 46, PM_SECURE_AES = 47, + PM_EFUSE_ACCESS = 53, PM_FEATURE_CHECK = 63, }; /* PMU-FW return status codes */ enum pm_ret_status { XST_PM_SUCCESS = 0, + XST_PM_INVALID_VERSION = 4, XST_PM_NO_FEATURE = 19, + XST_PM_INVALID_CRC = 301, XST_PM_INTERNAL = 2000, XST_PM_CONFLICT = 2001, XST_PM_NO_ACCESS = 2002, @@ -497,20 +529,18 @@ struct zynqmp_pm_query_data { u32 arg3; }; -int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 arg0, u32 arg1, - u32 arg2, u32 arg3, u32 *ret_payload); +int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 *ret_payload, u32 num_args, ...); #if IS_REACHABLE(CONFIG_ZYNQMP_FIRMWARE) int zynqmp_pm_get_api_version(u32 *version); int zynqmp_pm_get_chipid(u32 *idcode, u32 *version); +int zynqmp_pm_get_family_info(u32 *family, u32 *subfamily); int zynqmp_pm_query_data(struct zynqmp_pm_query_data qdata, u32 *out); int zynqmp_pm_clock_enable(u32 clock_id); int zynqmp_pm_clock_disable(u32 clock_id); int zynqmp_pm_clock_getstate(u32 clock_id, u32 *state); int zynqmp_pm_clock_setdivider(u32 clock_id, u32 divider); int zynqmp_pm_clock_getdivider(u32 clock_id, u32 *divider); -int zynqmp_pm_clock_setrate(u32 clock_id, u64 rate); -int zynqmp_pm_clock_getrate(u32 clock_id, u64 *rate); int zynqmp_pm_clock_setparent(u32 clock_id, u32 parent_id); int zynqmp_pm_clock_getparent(u32 clock_id, u32 *parent_id); int zynqmp_pm_set_pll_frac_mode(u32 clk_id, u32 mode); @@ -534,6 +564,7 @@ int zynqmp_pm_set_requirement(const u32 node, const u32 capabilities, const u32 qos, const enum zynqmp_pm_request_ack ack); int zynqmp_pm_aes_engine(const u64 address, u32 *out); +int zynqmp_pm_efuse_access(const u64 address, u32 *out); int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags); int zynqmp_pm_fpga_load(const u64 address, const u32 size, const u32 flags); int zynqmp_pm_fpga_get_status(u32 *value); @@ -547,7 +578,6 @@ int zynqmp_pm_system_shutdown(const u32 type, const u32 subtype); int zynqmp_pm_set_boot_health_status(u32 value); int zynqmp_pm_pinctrl_request(const u32 pin); int zynqmp_pm_pinctrl_release(const u32 pin); -int zynqmp_pm_pinctrl_get_function(const u32 pin, u32 *id); int zynqmp_pm_pinctrl_set_function(const u32 pin, const u32 id); int zynqmp_pm_pinctrl_get_config(const u32 pin, const u32 param, u32 *value); @@ -584,6 +614,11 @@ static inline int zynqmp_pm_get_chipid(u32 *idcode, u32 *version) return -ENODEV; } +static inline int zynqmp_pm_get_family_info(u32 *family, u32 *subfamily) +{ + return -ENODEV; +} + static inline int zynqmp_pm_query_data(struct zynqmp_pm_query_data qdata, u32 *out) { @@ -615,16 +650,6 @@ static inline int zynqmp_pm_clock_getdivider(u32 clock_id, u32 *divider) return -ENODEV; } -static inline int zynqmp_pm_clock_setrate(u32 clock_id, u64 rate) -{ - return -ENODEV; -} - -static inline int zynqmp_pm_clock_getrate(u32 clock_id, u64 *rate) -{ - return -ENODEV; -} - static inline int zynqmp_pm_clock_setparent(u32 clock_id, u32 parent_id) { return -ENODEV; @@ -727,6 +752,11 @@ static inline int zynqmp_pm_aes_engine(const u64 address, u32 *out) return -ENODEV; } +static inline int zynqmp_pm_efuse_access(const u64 address, u32 *out) +{ + return -ENODEV; +} + static inline int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags) { @@ -794,11 +824,6 @@ static inline int zynqmp_pm_pinctrl_release(const u32 pin) return -ENODEV; } -static inline int zynqmp_pm_pinctrl_get_function(const u32 pin, u32 *id) -{ - return -ENODEV; -} - static inline int zynqmp_pm_is_function_supported(const u32 api_id, const u32 id) { return -ENODEV; diff --git a/include/linux/flex_proportions.h b/include/linux/flex_proportions.h index 3e378b1fb0bc..e9a72fd0bfe7 100644 --- a/include/linux/flex_proportions.h +++ b/include/linux/flex_proportions.h @@ -39,38 +39,6 @@ void fprop_global_destroy(struct fprop_global *p); bool fprop_new_period(struct fprop_global *p, int periods); /* - * ---- SINGLE ---- - */ -struct fprop_local_single { - /* the local events counter */ - unsigned long events; - /* Period in which we last updated events */ - unsigned int period; - raw_spinlock_t lock; /* Protect period and numerator */ -}; - -#define INIT_FPROP_LOCAL_SINGLE(name) \ -{ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ -} - -int fprop_local_init_single(struct fprop_local_single *pl); -void fprop_local_destroy_single(struct fprop_local_single *pl); -void __fprop_inc_single(struct fprop_global *p, struct fprop_local_single *pl); -void fprop_fraction_single(struct fprop_global *p, - struct fprop_local_single *pl, unsigned long *numerator, - unsigned long *denominator); - -static inline -void fprop_inc_single(struct fprop_global *p, struct fprop_local_single *pl) -{ - unsigned long flags; - - local_irq_save(flags); - __fprop_inc_single(p, pl); - local_irq_restore(flags); -} - -/* * ---- PERCPU ---- */ struct fprop_local_percpu { diff --git a/include/linux/font.h b/include/linux/font.h index abf1442ce719..81caffd51bb4 100644 --- a/include/linux/font.h +++ b/include/linux/font.h @@ -57,7 +57,8 @@ extern const struct font_desc *find_font(const char *name); /* Get the default font for a specific screen size */ extern const struct font_desc *get_default_font(int xres, int yres, - u32 font_w, u32 font_h); + unsigned long *font_w, + unsigned long *font_h); /* Max. length for the name of a predefined font */ #define MAX_FONT_NAME 32 diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index da51a83b2829..6aeebe0a6777 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -2,6 +2,7 @@ #ifndef _LINUX_FORTIFY_STRING_H_ #define _LINUX_FORTIFY_STRING_H_ +#include <linux/bitfield.h> #include <linux/bug.h> #include <linux/const.h> #include <linux/limits.h> @@ -9,7 +10,46 @@ #define __FORTIFY_INLINE extern __always_inline __gnu_inline __overloadable #define __RENAME(x) __asm__(#x) -void fortify_panic(const char *name) __noreturn __cold; +#define FORTIFY_REASON_DIR(r) FIELD_GET(BIT(0), r) +#define FORTIFY_REASON_FUNC(r) FIELD_GET(GENMASK(7, 1), r) +#define FORTIFY_REASON(func, write) (FIELD_PREP(BIT(0), write) | \ + FIELD_PREP(GENMASK(7, 1), func)) + +#ifndef fortify_panic +# define fortify_panic(func, write, avail, size, retfail) \ + __fortify_panic(FORTIFY_REASON(func, write), avail, size) +#endif + +#define FORTIFY_READ 0 +#define FORTIFY_WRITE 1 + +#define EACH_FORTIFY_FUNC(macro) \ + macro(strncpy), \ + macro(strnlen), \ + macro(strlen), \ + macro(strscpy), \ + macro(strlcat), \ + macro(strcat), \ + macro(strncat), \ + macro(memset), \ + macro(memcpy), \ + macro(memmove), \ + macro(memscan), \ + macro(memcmp), \ + macro(memchr), \ + macro(memchr_inv), \ + macro(kmemdup), \ + macro(strcpy), \ + macro(UNKNOWN), + +#define MAKE_FORTIFY_FUNC(func) FORTIFY_FUNC_##func + +enum fortify_func { + EACH_FORTIFY_FUNC(MAKE_FORTIFY_FUNC) +}; + +void __fortify_report(const u8 reason, const size_t avail, const size_t size); +void __fortify_panic(const u8 reason, const size_t avail, const size_t size) __cold __noreturn; void __read_overflow(void) __compiletime_error("detected read beyond size of object (1st parameter)"); void __read_overflow2(void) __compiletime_error("detected read beyond size of object (2nd parameter)"); void __read_overflow2_field(size_t avail, size_t wanted) __compiletime_warning("detected read beyond size of field (2nd parameter); maybe use struct_group()?"); @@ -93,13 +133,9 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) #if __has_builtin(__builtin_dynamic_object_size) #define POS __pass_dynamic_object_size(1) #define POS0 __pass_dynamic_object_size(0) -#define __struct_size(p) __builtin_dynamic_object_size(p, 0) -#define __member_size(p) __builtin_dynamic_object_size(p, 1) #else #define POS __pass_object_size(1) #define POS0 __pass_object_size(0) -#define __struct_size(p) __builtin_object_size(p, 0) -#define __member_size(p) __builtin_object_size(p, 1) #endif #define __compiletime_lessthan(bounds, length) ( \ @@ -147,7 +183,7 @@ char *strncpy(char * const POS p, const char *q, __kernel_size_t size) if (__compiletime_lessthan(p_size, size)) __write_overflow(); if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strncpy, FORTIFY_WRITE, p_size, size, p); return __underlying_strncpy(p, q, size); } @@ -178,7 +214,7 @@ __FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size /* Do not check characters beyond the end of p. */ ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size); if (p_size <= ret && maxlen != ret) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strnlen, FORTIFY_READ, p_size, ret + 1, ret); return ret; } @@ -214,82 +250,13 @@ __kernel_size_t __fortify_strlen(const char * const POS p) return __underlying_strlen(p); ret = strnlen(p, p_size); if (p_size <= ret) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strlen, FORTIFY_READ, p_size, ret + 1, ret); return ret; } -/* Defined after fortified strlen() to reuse it. */ -extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy); -/** - * strlcpy - Copy a string into another string buffer - * - * @p: pointer to destination of copy - * @q: pointer to NUL-terminated source string to copy - * @size: maximum number of bytes to write at @p - * - * If strlen(@q) >= @size, the copy of @q will be truncated at - * @size - 1 bytes. @p will always be NUL-terminated. - * - * Do not use this function. While FORTIFY_SOURCE tries to avoid - * over-reads when calculating strlen(@q), it is still possible. - * Prefer strscpy(), though note its different return values for - * detecting truncation. - * - * Returns total number of bytes written to @p, including terminating NUL. - * - */ -__FORTIFY_INLINE size_t strlcpy(char * const POS p, const char * const POS q, size_t size) -{ - const size_t p_size = __member_size(p); - const size_t q_size = __member_size(q); - size_t q_len; /* Full count of source string length. */ - size_t len; /* Count of characters going into destination. */ - - if (p_size == SIZE_MAX && q_size == SIZE_MAX) - return __real_strlcpy(p, q, size); - q_len = strlen(q); - len = (q_len >= size) ? size - 1 : q_len; - if (__builtin_constant_p(size) && __builtin_constant_p(q_len) && size) { - /* Write size is always larger than destination. */ - if (len >= p_size) - __write_overflow(); - } - if (size) { - if (len >= p_size) - fortify_panic(__func__); - __underlying_memcpy(p, q, len); - p[len] = '\0'; - } - return q_len; -} - /* Defined after fortified strnlen() to reuse it. */ -extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy); -/** - * strscpy - Copy a C-string into a sized buffer - * - * @p: Where to copy the string to - * @q: Where to copy the string from - * @size: Size of destination buffer - * - * Copy the source string @q, or as much of it as fits, into the destination - * @p buffer. The behavior is undefined if the string buffers overlap. The - * destination @p buffer is always NUL terminated, unless it's zero-sized. - * - * Preferred to strlcpy() since the API doesn't require reading memory - * from the source @q string beyond the specified @size bytes, and since - * the return value is easier to error-check than strlcpy()'s. - * In addition, the implementation is robust to the string changing out - * from underneath it, unlike the current strlcpy() implementation. - * - * Preferred to strncpy() since it always returns a valid string, and - * doesn't unnecessarily force the tail of the destination buffer to be - * zero padded. If padding is desired please use strscpy_pad(). - * - * Returns the number of characters copied in @p (not including the - * trailing %NUL) or -E2BIG if @size is 0 or the copy of @q was truncated. - */ -__FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, size_t size) +extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(sized_strscpy); +__FORTIFY_INLINE ssize_t sized_strscpy(char * const POS p, const char * const POS q, size_t size) { /* Use string size rather than possible enclosing struct size. */ const size_t p_size = __member_size(p); @@ -333,8 +300,8 @@ __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, s * Generate a runtime write overflow error if len is greater than * p_size. */ - if (len > p_size) - fortify_panic(__func__); + if (p_size < len) + fortify_panic(FORTIFY_FUNC_strscpy, FORTIFY_WRITE, p_size, len, -E2BIG); /* * We can now safely call vanilla strscpy because we are protected from: @@ -392,7 +359,7 @@ size_t strlcat(char * const POS p, const char * const POS q, size_t avail) /* Give up if string is already overflowed. */ if (p_size <= p_len) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strlcat, FORTIFY_READ, p_size, p_len + 1, wanted); if (actual >= avail) { copy_len = avail - p_len - 1; @@ -401,7 +368,7 @@ size_t strlcat(char * const POS p, const char * const POS q, size_t avail) /* Give up if copy will overflow. */ if (p_size <= actual) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strlcat, FORTIFY_WRITE, p_size, actual + 1, wanted); __underlying_memcpy(p + p_len, q, copy_len); p[actual] = '\0'; @@ -428,9 +395,10 @@ __FORTIFY_INLINE __diagnose_as(__builtin_strcat, 1, 2) char *strcat(char * const POS p, const char *q) { const size_t p_size = __member_size(p); + const size_t wanted = strlcat(p, q, p_size); - if (strlcat(p, q, p_size) >= p_size) - fortify_panic(__func__); + if (p_size <= wanted) + fortify_panic(FORTIFY_FUNC_strcat, FORTIFY_WRITE, p_size, wanted + 1, p); return p; } @@ -459,20 +427,21 @@ char *strncat(char * const POS p, const char * const POS q, __kernel_size_t coun { const size_t p_size = __member_size(p); const size_t q_size = __member_size(q); - size_t p_len, copy_len; + size_t p_len, copy_len, total; if (p_size == SIZE_MAX && q_size == SIZE_MAX) return __underlying_strncat(p, q, count); p_len = strlen(p); copy_len = strnlen(q, count); - if (p_size < p_len + copy_len + 1) - fortify_panic(__func__); + total = p_len + copy_len + 1; + if (p_size < total) + fortify_panic(FORTIFY_FUNC_strncat, FORTIFY_WRITE, p_size, total, p); __underlying_memcpy(p + p_len, q, copy_len); p[p_len + copy_len] = '\0'; return p; } -__FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, +__FORTIFY_INLINE bool fortify_memset_chk(__kernel_size_t size, const size_t p_size, const size_t p_size_field) { @@ -507,7 +476,8 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, * lengths are unknown.) */ if (p_size != SIZE_MAX && p_size < size) - fortify_panic("memset"); + fortify_panic(FORTIFY_FUNC_memset, FORTIFY_WRITE, p_size, size, true); + return false; } #define __fortify_memset_chk(p, c, size, p_size, p_size_field) ({ \ @@ -561,7 +531,7 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, const size_t q_size, const size_t p_size_field, const size_t q_size_field, - const char *func) + const u8 func) { if (__builtin_constant_p(size)) { /* @@ -605,9 +575,10 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, * (The SIZE_MAX test is to optimize away checks where the buffer * lengths are unknown.) */ - if ((p_size != SIZE_MAX && p_size < size) || - (q_size != SIZE_MAX && q_size < size)) - fortify_panic(func); + if (p_size != SIZE_MAX && p_size < size) + fortify_panic(func, FORTIFY_WRITE, p_size, size, true); + else if (q_size != SIZE_MAX && q_size < size) + fortify_panic(func, FORTIFY_READ, p_size, size, true); /* * Warn when writing beyond destination field size. @@ -640,10 +611,10 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, const size_t __q_size_field = (q_size_field); \ WARN_ONCE(fortify_memcpy_chk(__fortify_size, __p_size, \ __q_size, __p_size_field, \ - __q_size_field, #op), \ + __q_size_field, FORTIFY_FUNC_ ##op), \ #op ": detected field-spanning write (size %zu) of single %s (size %zu)\n", \ __fortify_size, \ - "field \"" #p "\" at " __FILE__ ":" __stringify(__LINE__), \ + "field \"" #p "\" at " FILE_LINE, \ __p_size_field); \ __underlying_##op(p, q, __fortify_size); \ }) @@ -707,7 +678,7 @@ __FORTIFY_INLINE void *memscan(void * const POS0 p, int c, __kernel_size_t size) if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_memscan, FORTIFY_READ, p_size, size, NULL); return __real_memscan(p, c, size); } @@ -723,8 +694,10 @@ int memcmp(const void * const POS0 p, const void * const POS0 q, __kernel_size_t if (__compiletime_lessthan(q_size, size)) __read_overflow2(); } - if (p_size < size || q_size < size) - fortify_panic(__func__); + if (p_size < size) + fortify_panic(FORTIFY_FUNC_memcmp, FORTIFY_READ, p_size, size, INT_MIN); + else if (q_size < size) + fortify_panic(FORTIFY_FUNC_memcmp, FORTIFY_READ, q_size, size, INT_MIN); return __underlying_memcmp(p, q, size); } @@ -736,7 +709,7 @@ void *memchr(const void * const POS0 p, int c, __kernel_size_t size) if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_memchr, FORTIFY_READ, p_size, size, NULL); return __underlying_memchr(p, c, size); } @@ -748,7 +721,7 @@ __FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size) if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_memchr_inv, FORTIFY_READ, p_size, size, NULL); return __real_memchr_inv(p, c, size); } @@ -761,7 +734,7 @@ __FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_kmemdup, FORTIFY_READ, p_size, size, NULL); return __real_kmemdup(p, size, gfp); } @@ -798,7 +771,7 @@ char *strcpy(char * const POS p, const char * const POS q) __write_overflow(); /* Run-time check for dynamic size overflow. */ if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strcpy, FORTIFY_WRITE, p_size, size, p); __underlying_memcpy(p, q, size); return p; } diff --git a/include/linux/framer/framer-provider.h b/include/linux/framer/framer-provider.h new file mode 100644 index 000000000000..9724d4b44b9c --- /dev/null +++ b/include/linux/framer/framer-provider.h @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Generic framer profider header file + * + * Copyright 2023 CS GROUP France + * + * Author: Herve Codina <herve.codina@bootlin.com> + */ + +#ifndef __DRIVERS_PROVIDER_FRAMER_H +#define __DRIVERS_PROVIDER_FRAMER_H + +#include <linux/export.h> +#include <linux/framer/framer.h> +#include <linux/types.h> + +#define FRAMER_FLAG_POLL_STATUS BIT(0) + +/** + * struct framer_ops - set of function pointers for performing framer operations + * @init: operation to be performed for initializing the framer + * @exit: operation to be performed while exiting + * @power_on: powering on the framer + * @power_off: powering off the framer + * @flags: OR-ed flags (FRAMER_FLAG_*) to ask for core functionality + * - @FRAMER_FLAG_POLL_STATUS: + * Ask the core to perform a polling to get the framer status and + * notify consumers on change. + * The framer should call @framer_notify_status_change() when it + * detects a status change. This is usually done using interrupts. + * If the framer cannot detect this change, it can ask the core for + * a status polling. The core will call @get_status() periodically + * and, on change detected, it will notify the consumer. + * the @get_status() + * @owner: the module owner containing the ops + */ +struct framer_ops { + int (*init)(struct framer *framer); + void (*exit)(struct framer *framer); + int (*power_on)(struct framer *framer); + int (*power_off)(struct framer *framer); + + /** + * @get_status: + * + * Optional. + * + * Used to get the framer status. framer_init() must have + * been called on the framer. + * + * Returns: 0 if successful, an negative error code otherwise + */ + int (*get_status)(struct framer *framer, struct framer_status *status); + + /** + * @set_config: + * + * Optional. + * + * Used to set the framer configuration. framer_init() must have + * been called on the framer. + * + * Returns: 0 if successful, an negative error code otherwise + */ + int (*set_config)(struct framer *framer, const struct framer_config *config); + + /** + * @get_config: + * + * Optional. + * + * Used to get the framer configuration. framer_init() must have + * been called on the framer. + * + * Returns: 0 if successful, an negative error code otherwise + */ + int (*get_config)(struct framer *framer, struct framer_config *config); + + u32 flags; + struct module *owner; +}; + +/** + * struct framer_provider - represents the framer provider + * @dev: framer provider device + * @owner: the module owner having of_xlate + * @list: to maintain a linked list of framer providers + * @of_xlate: function pointer to obtain framer instance from framer pointer + */ +struct framer_provider { + struct device *dev; + struct module *owner; + struct list_head list; + struct framer * (*of_xlate)(struct device *dev, + const struct of_phandle_args *args); +}; + +static inline void framer_set_drvdata(struct framer *framer, void *data) +{ + dev_set_drvdata(&framer->dev, data); +} + +static inline void *framer_get_drvdata(struct framer *framer) +{ + return dev_get_drvdata(&framer->dev); +} + +#if IS_ENABLED(CONFIG_GENERIC_FRAMER) + +/* Create and destroy a framer */ +struct framer *framer_create(struct device *dev, struct device_node *node, + const struct framer_ops *ops); +void framer_destroy(struct framer *framer); + +/* devm version */ +struct framer *devm_framer_create(struct device *dev, struct device_node *node, + const struct framer_ops *ops); + +struct framer *framer_provider_simple_of_xlate(struct device *dev, + const struct of_phandle_args *args); + +struct framer_provider * +__framer_provider_of_register(struct device *dev, struct module *owner, + struct framer *(*of_xlate)(struct device *dev, + const struct of_phandle_args *args)); + +void framer_provider_of_unregister(struct framer_provider *framer_provider); + +struct framer_provider * +__devm_framer_provider_of_register(struct device *dev, struct module *owner, + struct framer *(*of_xlate)(struct device *dev, + const struct of_phandle_args *args)); + +void framer_notify_status_change(struct framer *framer); + +#else /* IS_ENABLED(CONFIG_GENERIC_FRAMER) */ + +static inline struct framer *framer_create(struct device *dev, struct device_node *node, + const struct framer_ops *ops) +{ + return ERR_PTR(-ENOSYS); +} + +static inline void framer_destroy(struct framer *framer) +{ +} + +/* devm version */ +static inline struct framer *devm_framer_create(struct device *dev, struct device_node *node, + const struct framer_ops *ops) +{ + return ERR_PTR(-ENOSYS); +} + +static inline struct framer *framer_provider_simple_of_xlate(struct device *dev, + const struct of_phandle_args *args) +{ + return ERR_PTR(-ENOSYS); +} + +static inline struct framer_provider * +__framer_provider_of_register(struct device *dev, struct module *owner, + struct framer *(*of_xlate)(struct device *dev, + const struct of_phandle_args *args)) +{ + return ERR_PTR(-ENOSYS); +} + +void framer_provider_of_unregister(struct framer_provider *framer_provider) +{ +} + +static inline struct framer_provider * +__devm_framer_provider_of_register(struct device *dev, struct module *owner, + struct framer *(*of_xlate)(struct device *dev, + const struct of_phandle_args *args)) +{ + return ERR_PTR(-ENOSYS); +} + +void framer_notify_status_change(struct framer *framer) +{ +} + +#endif /* IS_ENABLED(CONFIG_GENERIC_FRAMER) */ + +#define framer_provider_of_register(dev, xlate) \ + __framer_provider_of_register((dev), THIS_MODULE, (xlate)) + +#define devm_framer_provider_of_register(dev, xlate) \ + __devm_framer_provider_of_register((dev), THIS_MODULE, (xlate)) + +#endif /* __DRIVERS_PROVIDER_FRAMER_H */ diff --git a/include/linux/framer/framer.h b/include/linux/framer/framer.h new file mode 100644 index 000000000000..2b85fe9e7f9a --- /dev/null +++ b/include/linux/framer/framer.h @@ -0,0 +1,205 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Generic framer header file + * + * Copyright 2023 CS GROUP France + * + * Author: Herve Codina <herve.codina@bootlin.com> + */ + +#ifndef __DRIVERS_FRAMER_H +#define __DRIVERS_FRAMER_H + +#include <linux/err.h> +#include <linux/mutex.h> +#include <linux/notifier.h> +#include <linux/of.h> +#include <linux/device.h> +#include <linux/workqueue.h> + +/** + * enum framer_iface - Framer interface + * @FRAMER_IFACE_E1: E1 interface + * @FRAMER_IFACE_T1: T1 interface + */ +enum framer_iface { + FRAMER_IFACE_E1, + FRAMER_IFACE_T1, +}; + +/** + * enum framer_clock_type - Framer clock type + * @FRAMER_CLOCK_EXT: External clock + * @FRAMER_CLOCK_INT: Internal clock + */ +enum framer_clock_type { + FRAMER_CLOCK_EXT, + FRAMER_CLOCK_INT, +}; + +/** + * struct framer_config - Framer configuration + * @iface: Framer line interface + * @clock_type: Framer clock type + * @line_clock_rate: Framer line clock rate + */ +struct framer_config { + enum framer_iface iface; + enum framer_clock_type clock_type; + unsigned long line_clock_rate; +}; + +/** + * struct framer_status - Framer status + * @link_is_on: Framer link state. true, the link is on, false, the link is off. + */ +struct framer_status { + bool link_is_on; +}; + +/** + * enum framer_event - Event available for notification + * @FRAMER_EVENT_STATUS: Event notified on framer_status changes + */ +enum framer_event { + FRAMER_EVENT_STATUS, +}; + +/** + * struct framer - represents the framer device + * @dev: framer device + * @id: id of the framer device + * @ops: function pointers for performing framer operations + * @mutex: mutex to protect framer_ops + * @init_count: used to protect when the framer is used by multiple consumers + * @power_count: used to protect when the framer is used by multiple consumers + * @pwr: power regulator associated with the framer + * @notify_status_work: work structure used for status notifications + * @notifier_list: notifier list used for notifications + * @polling_work: delayed work structure used for the polling task + * @prev_status: previous read status used by the polling task to detect changes + */ +struct framer { + struct device dev; + int id; + const struct framer_ops *ops; + struct mutex mutex; /* Protect framer */ + int init_count; + int power_count; + struct regulator *pwr; + struct work_struct notify_status_work; + struct blocking_notifier_head notifier_list; + struct delayed_work polling_work; + struct framer_status prev_status; +}; + +#if IS_ENABLED(CONFIG_GENERIC_FRAMER) +int framer_pm_runtime_get(struct framer *framer); +int framer_pm_runtime_get_sync(struct framer *framer); +int framer_pm_runtime_put(struct framer *framer); +int framer_pm_runtime_put_sync(struct framer *framer); +int framer_init(struct framer *framer); +int framer_exit(struct framer *framer); +int framer_power_on(struct framer *framer); +int framer_power_off(struct framer *framer); +int framer_get_status(struct framer *framer, struct framer_status *status); +int framer_get_config(struct framer *framer, struct framer_config *config); +int framer_set_config(struct framer *framer, const struct framer_config *config); +int framer_notifier_register(struct framer *framer, struct notifier_block *nb); +int framer_notifier_unregister(struct framer *framer, struct notifier_block *nb); + +struct framer *framer_get(struct device *dev, const char *con_id); +void framer_put(struct device *dev, struct framer *framer); + +struct framer *devm_framer_get(struct device *dev, const char *con_id); +struct framer *devm_framer_optional_get(struct device *dev, const char *con_id); +#else +static inline int framer_pm_runtime_get(struct framer *framer) +{ + return -ENOSYS; +} + +static inline int framer_pm_runtime_get_sync(struct framer *framer) +{ + return -ENOSYS; +} + +static inline int framer_pm_runtime_put(struct framer *framer) +{ + return -ENOSYS; +} + +static inline int framer_pm_runtime_put_sync(struct framer *framer) +{ + return -ENOSYS; +} + +static inline int framer_init(struct framer *framer) +{ + return -ENOSYS; +} + +static inline int framer_exit(struct framer *framer) +{ + return -ENOSYS; +} + +static inline int framer_power_on(struct framer *framer) +{ + return -ENOSYS; +} + +static inline int framer_power_off(struct framer *framer) +{ + return -ENOSYS; +} + +static inline int framer_get_status(struct framer *framer, struct framer_status *status) +{ + return -ENOSYS; +} + +static inline int framer_get_config(struct framer *framer, struct framer_config *config) +{ + return -ENOSYS; +} + +static inline int framer_set_config(struct framer *framer, const struct framer_config *config) +{ + return -ENOSYS; +} + +static inline int framer_notifier_register(struct framer *framer, + struct notifier_block *nb) +{ + return -ENOSYS; +} + +static inline int framer_notifier_unregister(struct framer *framer, + struct notifier_block *nb) +{ + return -ENOSYS; +} + +static inline struct framer *framer_get(struct device *dev, const char *con_id) +{ + return ERR_PTR(-ENOSYS); +} + +static inline void framer_put(struct device *dev, struct framer *framer) +{ +} + +static inline struct framer *devm_framer_get(struct device *dev, const char *con_id) +{ + return ERR_PTR(-ENOSYS); +} + +static inline struct framer *devm_framer_optional_get(struct device *dev, const char *con_id) +{ + return NULL; +} + +#endif + +#endif /* __DRIVERS_FRAMER_H */ diff --git a/include/linux/framer/pef2256.h b/include/linux/framer/pef2256.h new file mode 100644 index 000000000000..71d80af58c40 --- /dev/null +++ b/include/linux/framer/pef2256.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * PEF2256 consumer API + * + * Copyright 2023 CS GROUP France + * + * Author: Herve Codina <herve.codina@bootlin.com> + */ +#ifndef __PEF2256_H__ +#define __PEF2256_H__ + +#include <linux/types.h> + +struct pef2256; +struct regmap; + +/* Retrieve the PEF2256 regmap */ +struct regmap *pef2256_get_regmap(struct pef2256 *pef2256); + +/* PEF2256 hardware versions */ +enum pef2256_version { + PEF2256_VERSION_UNKNOWN, + PEF2256_VERSION_1_2, + PEF2256_VERSION_2_1, + PEF2256_VERSION_2_2, +}; + +/* Get the PEF2256 hardware version */ +enum pef2256_version pef2256_get_version(struct pef2256 *pef2256); + +#endif /* __PEF2256_H__ */ diff --git a/include/linux/freelist.h b/include/linux/freelist.h deleted file mode 100644 index fc1842b96469..000000000000 --- a/include/linux/freelist.h +++ /dev/null @@ -1,129 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ -#ifndef FREELIST_H -#define FREELIST_H - -#include <linux/atomic.h> - -/* - * Copyright: cameron@moodycamel.com - * - * A simple CAS-based lock-free free list. Not the fastest thing in the world - * under heavy contention, but simple and correct (assuming nodes are never - * freed until after the free list is destroyed), and fairly speedy under low - * contention. - * - * Adapted from: https://moodycamel.com/blog/2014/solving-the-aba-problem-for-lock-free-free-lists - */ - -struct freelist_node { - atomic_t refs; - struct freelist_node *next; -}; - -struct freelist_head { - struct freelist_node *head; -}; - -#define REFS_ON_FREELIST 0x80000000 -#define REFS_MASK 0x7FFFFFFF - -static inline void __freelist_add(struct freelist_node *node, struct freelist_head *list) -{ - /* - * Since the refcount is zero, and nobody can increase it once it's - * zero (except us, and we run only one copy of this method per node at - * a time, i.e. the single thread case), then we know we can safely - * change the next pointer of the node; however, once the refcount is - * back above zero, then other threads could increase it (happens under - * heavy contention, when the refcount goes to zero in between a load - * and a refcount increment of a node in try_get, then back up to - * something non-zero, then the refcount increment is done by the other - * thread) -- so if the CAS to add the node to the actual list fails, - * decrese the refcount and leave the add operation to the next thread - * who puts the refcount back to zero (which could be us, hence the - * loop). - */ - struct freelist_node *head = READ_ONCE(list->head); - - for (;;) { - WRITE_ONCE(node->next, head); - atomic_set_release(&node->refs, 1); - - if (!try_cmpxchg_release(&list->head, &head, node)) { - /* - * Hmm, the add failed, but we can only try again when - * the refcount goes back to zero. - */ - if (atomic_fetch_add_release(REFS_ON_FREELIST - 1, &node->refs) == 1) - continue; - } - return; - } -} - -static inline void freelist_add(struct freelist_node *node, struct freelist_head *list) -{ - /* - * We know that the should-be-on-freelist bit is 0 at this point, so - * it's safe to set it using a fetch_add. - */ - if (!atomic_fetch_add_release(REFS_ON_FREELIST, &node->refs)) { - /* - * Oh look! We were the last ones referencing this node, and we - * know we want to add it to the free list, so let's do it! - */ - __freelist_add(node, list); - } -} - -static inline struct freelist_node *freelist_try_get(struct freelist_head *list) -{ - struct freelist_node *prev, *next, *head = smp_load_acquire(&list->head); - unsigned int refs; - - while (head) { - prev = head; - refs = atomic_read(&head->refs); - if ((refs & REFS_MASK) == 0 || - !atomic_try_cmpxchg_acquire(&head->refs, &refs, refs+1)) { - head = smp_load_acquire(&list->head); - continue; - } - - /* - * Good, reference count has been incremented (it wasn't at - * zero), which means we can read the next and not worry about - * it changing between now and the time we do the CAS. - */ - next = READ_ONCE(head->next); - if (try_cmpxchg_acquire(&list->head, &head, next)) { - /* - * Yay, got the node. This means it was on the list, - * which means should-be-on-freelist must be false no - * matter the refcount (because nobody else knows it's - * been taken off yet, it can't have been put back on). - */ - WARN_ON_ONCE(atomic_read(&head->refs) & REFS_ON_FREELIST); - - /* - * Decrease refcount twice, once for our ref, and once - * for the list's ref. - */ - atomic_fetch_add(-2, &head->refs); - - return head; - } - - /* - * OK, the head must have changed on us, but we still need to decrement - * the refcount we increased. - */ - refs = atomic_fetch_add(-1, &prev->refs); - if (refs == REFS_ON_FREELIST + 1) - __freelist_add(prev, list); - } - - return NULL; -} - -#endif /* FREELIST_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 4a40823c3c67..8dfd53b52744 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -43,6 +43,8 @@ #include <linux/cred.h> #include <linux/mnt_idmapping.h> #include <linux/slab.h> +#include <linux/maple_tree.h> +#include <linux/rw_hint.h> #include <asm/byteorder.h> #include <uapi/linux/fs.h> @@ -67,7 +69,7 @@ struct swap_info_struct; struct seq_file; struct workqueue_struct; struct iov_iter; -struct fscrypt_info; +struct fscrypt_inode_info; struct fscrypt_operations; struct fsverity_info; struct fsverity_operations; @@ -119,6 +121,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_PWRITE ((__force fmode_t)0x10) /* File is opened for execution with sys_execve / sys_uselib */ #define FMODE_EXEC ((__force fmode_t)0x20) +/* File writes are restricted (block device specific) */ +#define FMODE_WRITE_RESTRICTED ((__force fmode_t)0x40) /* 32bit hashes as llseek() offset (for directories) */ #define FMODE_32BITHASH ((__force fmode_t)0x200) /* 64bit hashes as llseek() offset (for directories) */ @@ -309,19 +313,6 @@ struct address_space; struct writeback_control; struct readahead_control; -/* - * Write life time hint values. - * Stored in struct inode as u8. - */ -enum rw_hint { - WRITE_LIFE_NOT_SET = 0, - WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE, - WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT, - WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM, - WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG, - WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, -}; - /* Match RWF_* bits to IOCB bits */ #define IOCB_HIPRI (__force int) RWF_HIPRI #define IOCB_DSYNC (__force int) RWF_DSYNC @@ -352,6 +343,8 @@ enum rw_hint { * unrelated IO (like cache flushing, new IO generation, etc). */ #define IOCB_DIO_CALLER_COMP (1 << 22) +/* kiocb is a read or write operation submitted by fs/aio.c. */ +#define IOCB_AIO_RW (1 << 23) /* for use in trace events */ #define TRACE_IOCB_STRINGS \ @@ -434,7 +427,7 @@ struct address_space_operations { bool (*is_partially_uptodate) (struct folio *, size_t from, size_t count); void (*is_dirty_writeback) (struct folio *, bool *dirty, bool *wb); - int (*error_remove_page)(struct address_space *, struct page *); + int (*error_remove_folio)(struct address_space *, struct folio *); /* swapfile support */ int (*swap_activate)(struct swap_info_struct *sis, struct file *file, @@ -454,7 +447,7 @@ extern const struct address_space_operations empty_aops; * It is also used to block modification of page cache contents through * memory mappings. * @gfp_mask: Memory allocation flags to use for allocating pages. - * @i_mmap_writable: Number of VM_SHARED mappings. + * @i_mmap_writable: Number of VM_SHARED, VM_MAYWRITE mappings. * @nr_thps: Number of THPs in the pagecache (non-shmem only). * @i_mmap: Tree of private and shared mappings. * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable. @@ -463,9 +456,9 @@ extern const struct address_space_operations empty_aops; * @a_ops: Methods. * @flags: Error bits and flags (AS_*). * @wb_err: The most recent error which has occurred. - * @private_lock: For use by the owner of the address_space. - * @private_list: For use by the owner of the address_space. - * @private_data: For use by the owner of the address_space. + * @i_private_lock: For use by the owner of the address_space. + * @i_private_list: For use by the owner of the address_space. + * @i_private_data: For use by the owner of the address_space. */ struct address_space { struct inode *host; @@ -482,11 +475,11 @@ struct address_space { pgoff_t writeback_index; const struct address_space_operations *a_ops; unsigned long flags; - struct rw_semaphore i_mmap_rwsem; errseq_t wb_err; - spinlock_t private_lock; - struct list_head private_list; - void *private_data; + spinlock_t i_private_lock; + struct list_head i_private_list; + struct rw_semaphore i_mmap_rwsem; + void * i_private_data; } __attribute__((aligned(sizeof(long)))) __randomize_layout; /* * On most architectures that alignment is already the case; but @@ -557,7 +550,7 @@ static inline int mapping_mapped(struct address_space *mapping) /* * Might pages of this file have been modified in userspace? - * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap + * Note that i_mmap_writable counts all VM_SHARED, VM_MAYWRITE vmas: do_mmap * marks vma as VM_SHARED if it is shared, and the file was opened for * writing i.e. vma may be mprotected writable even if now readonly. * @@ -671,13 +664,13 @@ struct inode { }; dev_t i_rdev; loff_t i_size; - struct timespec64 i_atime; - struct timespec64 i_mtime; + struct timespec64 __i_atime; + struct timespec64 __i_mtime; struct timespec64 __i_ctime; /* use inode_*_ctime accessors! */ spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; - u8 i_write_hint; + enum rw_hint i_write_hint; blkcnt_t i_blocks; #ifdef __NEED_I_SIZE_ORDERED @@ -738,7 +731,7 @@ struct inode { #endif #ifdef CONFIG_FS_ENCRYPTION - struct fscrypt_info *i_crypt_info; + struct fscrypt_inode_info *i_crypt_info; #endif #ifdef CONFIG_FS_VERITY @@ -907,7 +900,8 @@ static inline loff_t i_size_read(const struct inode *inode) preempt_enable(); return i_size; #else - return inode->i_size; + /* Pairs with smp_store_release() in i_size_write() */ + return smp_load_acquire(&inode->i_size); #endif } @@ -929,7 +923,12 @@ static inline void i_size_write(struct inode *inode, loff_t i_size) inode->i_size = i_size; preempt_enable(); #else - inode->i_size = i_size; + /* + * Pairs with smp_load_acquire() in i_size_read() to ensure + * changes related to inode size (such as page contents) are + * visible before we see the changed inode size. + */ + smp_store_release(&inode->i_size, i_size); #endif } @@ -991,8 +990,10 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) */ struct file { union { + /* fput() uses task work when closing and freeing file (default). */ + struct callback_head f_task_work; + /* fput() must use workqueue (most kernel threads). */ struct llist_node f_llist; - struct rcu_head f_rcuhead; unsigned int f_iocb_flags; }; @@ -1042,7 +1043,10 @@ static inline struct file *get_file(struct file *f) atomic_long_inc(&f->f_count); return f; } -#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count) + +struct file *get_file_rcu(struct file __rcu **f); +struct file *get_file_active(struct file **f); + #define file_count(x) atomic_long_read(&(x)->f_count) #define MAX_NON_LFS ((1UL<<31) - 1) @@ -1059,6 +1063,7 @@ static inline struct file *get_file(struct file *f) typedef void *fl_owner_t; struct file_lock; +struct file_lease; /* The following constant reflects the upper bound of the file/locking space */ #ifndef OFFSET_MAX @@ -1073,9 +1078,20 @@ static inline struct inode *file_inode(const struct file *f) return f->f_inode; } +/* + * file_dentry() is a relic from the days that overlayfs was using files with a + * "fake" path, meaning, f_path on overlayfs and f_inode on underlying fs. + * In those days, file_dentry() was needed to get the underlying fs dentry that + * matches f_inode. + * Files with "fake" path should not exist nowadays, so use an assertion to make + * sure that file_dentry() was not papering over filesystem bugs. + */ static inline struct dentry *file_dentry(const struct file *file) { - return d_real(file->f_path.dentry, file_inode(file)); + struct dentry *dentry = file->f_path.dentry; + + WARN_ON_ONCE(d_inode(dentry) != file_inode(file)); + return dentry; } struct fasync_struct { @@ -1119,7 +1135,7 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_NOATIME BIT(10) /* Do not update access times. */ #define SB_NODIRATIME BIT(11) /* Do not update directory access times */ #define SB_SILENT BIT(15) -#define SB_POSIXACL BIT(16) /* VFS does not apply the umask */ +#define SB_POSIXACL BIT(16) /* Supports POSIX ACLs */ #define SB_INLINECRYPT BIT(17) /* Use blk-crypto for encrypted files */ #define SB_KERNMOUNT BIT(22) /* this is a kern_mount call */ #define SB_I_VERSION BIT(23) /* Update inode I_version field */ @@ -1161,11 +1177,13 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ #define SB_I_IMA_UNVERIFIABLE_SIGNATURE 0x00000020 #define SB_I_UNTRUSTED_MOUNTER 0x00000040 +#define SB_I_EVM_UNSUPPORTED 0x00000080 #define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */ #define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */ #define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */ #define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */ +#define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */ /* Possible states of 'frozen' field */ enum { @@ -1181,7 +1199,8 @@ enum { struct sb_writers { unsigned short frozen; /* Is sb frozen? */ - unsigned short freeze_holders; /* Who froze fs? */ + int freeze_kcount; /* How many kernel freeze requests? */ + int freeze_ucount; /* How many userspace freeze requests? */ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; }; @@ -1206,7 +1225,7 @@ struct super_block { #ifdef CONFIG_SECURITY void *s_security; #endif - const struct xattr_handler **s_xattr; + const struct xattr_handler * const *s_xattr; #ifdef CONFIG_FS_ENCRYPTION const struct fscrypt_operations *s_cop; struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */ @@ -1220,7 +1239,8 @@ struct super_block { #endif struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ struct list_head s_mounts; /* list of mounts; _not_ for fs use */ - struct block_device *s_bdev; + struct block_device *s_bdev; /* can go away once we use an accessor for @s_bdev_file */ + struct file *s_bdev_file; struct backing_dev_info *s_bdi; struct mtd_info *s_mtd; struct hlist_node s_instances; @@ -1246,8 +1266,22 @@ struct super_block { struct fsnotify_mark_connector __rcu *s_fsnotify_marks; #endif + /* + * q: why are s_id and s_sysfs_name not the same? both are human + * readable strings that identify the filesystem + * a: s_id is allowed to change at runtime; it's used in log messages, + * and we want to when a device starts out as single device (s_id is dev + * name) but then a device is hot added and we have to switch to + * identifying it by UUID + * but s_sysfs_name is a handle for programmatic access, and can't + * change at runtime + */ char s_id[32]; /* Informational name */ uuid_t s_uuid; /* UUID */ + u8 s_uuid_len; /* Default 16, possibly smaller for weird filesystems */ + + /* if set, fs shows up under sysfs at /sys/fs/$FSTYP/s_sysfs_name */ + char s_sysfs_name[UUID_STRING_LEN + 1]; unsigned int s_max_links; @@ -1265,7 +1299,7 @@ struct super_block { const struct dentry_operations *s_d_op; /* default d_op for dentries */ - struct shrinker s_shrink; /* per-sb shrinker handle */ + struct shrinker *s_shrink; /* per-sb shrinker handle */ /* Number of inodes with nlink == 0 but still referenced */ atomic_long_t s_remove_count; @@ -1511,24 +1545,81 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, struct timespec64 current_time(struct inode *inode); struct timespec64 inode_set_ctime_current(struct inode *inode); -/** - * inode_get_ctime - fetch the current ctime from the inode - * @inode: inode from which to fetch ctime - * - * Grab the current ctime from the inode and return it. - */ +static inline time64_t inode_get_atime_sec(const struct inode *inode) +{ + return inode->__i_atime.tv_sec; +} + +static inline long inode_get_atime_nsec(const struct inode *inode) +{ + return inode->__i_atime.tv_nsec; +} + +static inline struct timespec64 inode_get_atime(const struct inode *inode) +{ + return inode->__i_atime; +} + +static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode, + struct timespec64 ts) +{ + inode->__i_atime = ts; + return ts; +} + +static inline struct timespec64 inode_set_atime(struct inode *inode, + time64_t sec, long nsec) +{ + struct timespec64 ts = { .tv_sec = sec, + .tv_nsec = nsec }; + return inode_set_atime_to_ts(inode, ts); +} + +static inline time64_t inode_get_mtime_sec(const struct inode *inode) +{ + return inode->__i_mtime.tv_sec; +} + +static inline long inode_get_mtime_nsec(const struct inode *inode) +{ + return inode->__i_mtime.tv_nsec; +} + +static inline struct timespec64 inode_get_mtime(const struct inode *inode) +{ + return inode->__i_mtime; +} + +static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode, + struct timespec64 ts) +{ + inode->__i_mtime = ts; + return ts; +} + +static inline struct timespec64 inode_set_mtime(struct inode *inode, + time64_t sec, long nsec) +{ + struct timespec64 ts = { .tv_sec = sec, + .tv_nsec = nsec }; + return inode_set_mtime_to_ts(inode, ts); +} + +static inline time64_t inode_get_ctime_sec(const struct inode *inode) +{ + return inode->__i_ctime.tv_sec; +} + +static inline long inode_get_ctime_nsec(const struct inode *inode) +{ + return inode->__i_ctime.tv_nsec; +} + static inline struct timespec64 inode_get_ctime(const struct inode *inode) { return inode->__i_ctime; } -/** - * inode_set_ctime_to_ts - set the ctime in the inode - * @inode: inode in which to set the ctime - * @ts: value to set in the ctime field - * - * Set the ctime in @inode to @ts - */ static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts) { @@ -1553,6 +1644,8 @@ static inline struct timespec64 inode_set_ctime(struct inode *inode, return inode_set_ctime_to_ts(inode, ts); } +struct timespec64 simple_inode_init_ts(struct inode *inode); + /* * Snapshotting support. */ @@ -1581,9 +1674,70 @@ static inline bool __sb_start_write_trylock(struct super_block *sb, int level) #define __sb_writers_release(sb, lev) \ percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) +/** + * __sb_write_started - check if sb freeze level is held + * @sb: the super we write to + * @level: the freeze level + * + * * > 0 - sb freeze level is held + * * 0 - sb freeze level is not held + * * < 0 - !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN + */ +static inline int __sb_write_started(const struct super_block *sb, int level) +{ + return lockdep_is_held_type(sb->s_writers.rw_sem + level - 1, 1); +} + +/** + * sb_write_started - check if SB_FREEZE_WRITE is held + * @sb: the super we write to + * + * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN. + */ static inline bool sb_write_started(const struct super_block *sb) { - return lockdep_is_held_type(sb->s_writers.rw_sem + SB_FREEZE_WRITE - 1, 1); + return __sb_write_started(sb, SB_FREEZE_WRITE); +} + +/** + * sb_write_not_started - check if SB_FREEZE_WRITE is not held + * @sb: the super we write to + * + * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN. + */ +static inline bool sb_write_not_started(const struct super_block *sb) +{ + return __sb_write_started(sb, SB_FREEZE_WRITE) <= 0; +} + +/** + * file_write_started - check if SB_FREEZE_WRITE is held + * @file: the file we write to + * + * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN. + * May be false positive with !S_ISREG, because file_start_write() has + * no effect on !S_ISREG. + */ +static inline bool file_write_started(const struct file *file) +{ + if (!S_ISREG(file_inode(file)->i_mode)) + return true; + return sb_write_started(file_inode(file)->i_sb); +} + +/** + * file_write_not_started - check if SB_FREEZE_WRITE is not held + * @file: the file we write to + * + * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN. + * May be false positive with !S_ISREG, because file_start_write() has + * no effect on !S_ISREG. + */ +static inline bool file_write_not_started(const struct file *file) +{ + if (!S_ISREG(file_inode(file)->i_mode)) + return true; + return sb_write_not_started(file_inode(file)->i_sb); } /** @@ -1876,7 +2030,7 @@ struct file_operations { ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); void (*splice_eof)(struct file *file); - int (*setlease)(struct file *, int, struct file_lock **, void **); + int (*setlease)(struct file *, int, struct file_lease **, void **); long (*fallocate)(struct file *file, int mode, loff_t offset, loff_t len); void (*show_fdinfo)(struct seq_file *m, struct file *f); @@ -1965,9 +2119,6 @@ extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); -extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - size_t len, unsigned int flags); int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *len, unsigned int remap_flags, @@ -1975,9 +2126,6 @@ int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *count, unsigned int remap_flags); -extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t len, unsigned int remap_flags); extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); @@ -1987,9 +2135,24 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, struct file *dst_file, loff_t dst_pos, loff_t len, unsigned int remap_flags); +/** + * enum freeze_holder - holder of the freeze + * @FREEZE_HOLDER_KERNEL: kernel wants to freeze or thaw filesystem + * @FREEZE_HOLDER_USERSPACE: userspace wants to freeze or thaw filesystem + * @FREEZE_MAY_NEST: whether nesting freeze and thaw requests is allowed + * + * Indicate who the owner of the freeze or thaw request is and whether + * the freeze needs to be exclusive or can nest. + * Without @FREEZE_MAY_NEST, multiple freeze and thaw requests from the + * same holder aren't allowed. It is however allowed to hold a single + * @FREEZE_HOLDER_USERSPACE and a single @FREEZE_HOLDER_KERNEL freeze at + * the same time. This is relied upon by some filesystems during online + * repair or similar. + */ enum freeze_holder { FREEZE_HOLDER_KERNEL = (1U << 0), FREEZE_HOLDER_USERSPACE = (1U << 1), + FREEZE_MAY_NEST = (1U << 2), }; struct super_operations { @@ -2018,7 +2181,7 @@ struct super_operations { #ifdef CONFIG_QUOTA ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); - struct dquot **(*get_dquots)(struct inode *); + struct dquot __rcu **(*get_dquots)(struct inode *); #endif long (*nr_cached_objects)(struct super_block *, struct shrink_control *); @@ -2081,7 +2244,12 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) + +#ifdef CONFIG_FS_POSIX_ACL #define IS_POSIXACL(inode) __IS_FLG(inode, SB_POSIXACL) +#else +#define IS_POSIXACL(inode) 0 +#endif #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) @@ -2225,7 +2393,7 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, #define I_CREATING (1 << 15) #define I_DONTCACHE (1 << 16) #define I_SYNC_QUEUED (1 << 17) -#define I_PINNING_FSCACHE_WB (1 << 18) +#define I_PINNING_NETFS_WB (1 << 18) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) @@ -2386,6 +2554,44 @@ extern __printf(2, 3) int super_setup_bdi_name(struct super_block *sb, char *fmt, ...); extern int super_setup_bdi(struct super_block *sb); +static inline void super_set_uuid(struct super_block *sb, const u8 *uuid, unsigned len) +{ + if (WARN_ON(len > sizeof(sb->s_uuid))) + len = sizeof(sb->s_uuid); + sb->s_uuid_len = len; + memcpy(&sb->s_uuid, uuid, len); +} + +/* set sb sysfs name based on sb->s_bdev */ +static inline void super_set_sysfs_name_bdev(struct super_block *sb) +{ + snprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), "%pg", sb->s_bdev); +} + +/* set sb sysfs name based on sb->s_uuid */ +static inline void super_set_sysfs_name_uuid(struct super_block *sb) +{ + WARN_ON(sb->s_uuid_len != sizeof(sb->s_uuid)); + snprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), "%pU", sb->s_uuid.b); +} + +/* set sb sysfs name based on sb->s_id */ +static inline void super_set_sysfs_name_id(struct super_block *sb) +{ + strscpy(sb->s_sysfs_name, sb->s_id, sizeof(sb->s_sysfs_name)); +} + +/* try to use something standard before you use this */ +__printf(2, 3) +static inline void super_set_sysfs_name_generic(struct super_block *sb, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vsnprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), fmt, args); + va_end(args); +} + extern int current_umask(void); extern void ihold(struct inode * inode); @@ -2409,7 +2615,7 @@ struct filename { }; static_assert(offsetof(struct filename, iname) % sizeof(long) == 0); -static inline struct mnt_idmap *file_mnt_idmap(struct file *file) +static inline struct mnt_idmap *file_mnt_idmap(const struct file *file) { return mnt_idmap(file->f_path.mnt); } @@ -2448,26 +2654,31 @@ struct file *dentry_open(const struct path *path, int flags, const struct cred *creds); struct file *dentry_create(const struct path *path, int flags, umode_t mode, const struct cred *cred); -struct file *backing_file_open(const struct path *path, int flags, - const struct path *real_path, - const struct cred *cred); -struct path *backing_file_real_path(struct file *f); +struct path *backing_file_user_path(struct file *f); /* - * file_real_path - get the path corresponding to f_inode - * - * When opening a backing file for a stackable filesystem (e.g., - * overlayfs) f_path may be on the stackable filesystem and f_inode on - * the underlying filesystem. When the path associated with f_inode is - * needed, this helper should be used instead of accessing f_path - * directly. -*/ -static inline const struct path *file_real_path(struct file *f) + * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file + * stored in ->vm_file is a backing file whose f_inode is on the underlying + * filesystem. When the mapped file path and inode number are displayed to + * user (e.g. via /proc/<pid>/maps), these helpers should be used to get the + * path and inode number to display to the user, which is the path of the fd + * that user has requested to map and the inode number that would be returned + * by fstat() on that same fd. + */ +/* Get the path to display in /proc/<pid>/maps */ +static inline const struct path *file_user_path(struct file *f) { if (unlikely(f->f_mode & FMODE_BACKING)) - return backing_file_real_path(f); + return backing_file_user_path(f); return &f->f_path; } +/* Get the inode whose inode number to display in /proc/<pid>/maps */ +static inline const struct inode *file_user_inode(struct file *f) +{ + if (unlikely(f->f_mode & FMODE_BACKING)) + return d_inode(backing_file_user_path(f)->dentry); + return file_inode(f); +} static inline struct file *file_clone_open(struct file *file) { @@ -2777,6 +2988,17 @@ extern bool path_is_under(const struct path *, const struct path *); extern char *file_path(struct file *, char *, int); +/** + * is_dot_dotdot - returns true only if @name is "." or ".." + * @name: file name to check + * @len: length of file name, in bytes + */ +static inline bool is_dot_dotdot(const char *name, size_t len) +{ + return len && unlikely(name[0] == '.') && + (len == 1 || (len == 2 && name[1] == '.')); +} + #include <linux/err.h> /* needed for stackable file system support */ @@ -2854,6 +3076,7 @@ extern struct inode *new_inode_pseudo(struct super_block *sb); extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *); +extern int file_remove_privs_flags(struct file *file, unsigned int flags); extern int file_remove_privs(struct file *); int setattr_should_drop_sgid(struct mnt_idmap *idmap, const struct inode *inode); @@ -2922,8 +3145,6 @@ ssize_t copy_splice_read(struct file *in, loff_t *ppos, size_t len, unsigned int flags); extern ssize_t iter_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); -extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, - loff_t *opos, size_t len, unsigned int flags); extern void @@ -3052,7 +3273,6 @@ extern int vfs_readlink(struct dentry *, char __user *, int); extern struct file_system_type *get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); -extern struct super_block *get_active_super(struct block_device *bdev); extern void drop_super(struct super_block *sb); extern void drop_super_exclusive(struct super_block *sb); extern void iterate_supers(void (*)(struct super_block *, void *), void *); @@ -3090,7 +3310,7 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping, extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); -extern int simple_nosetlease(struct file *, int, struct file_lock **, void **); +extern int simple_nosetlease(struct file *, int, struct file_lease **, void **); extern const struct dentry_operations simple_dentry_operations; extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); @@ -3112,13 +3332,14 @@ extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, const void __user *from, size_t count); struct offset_ctx { - struct xarray xa; - u32 next_offset; + struct maple_tree mt; + unsigned long next_offset; }; void simple_offset_init(struct offset_ctx *octx); int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry); void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry); +int simple_offset_empty(struct dentry *dentry); int simple_offset_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, @@ -3132,7 +3353,16 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_check_addressable(unsigned, u64); -extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry); +extern void generic_set_sb_d_ops(struct super_block *sb); + +static inline bool sb_has_encoding(const struct super_block *sb) +{ +#if IS_ENABLED(CONFIG_UNICODE) + return !!sb->s_encoding; +#else + return false; +#endif +} int may_setattr(struct mnt_idmap *idmap, struct inode *inode, unsigned int ia_valid); @@ -3187,6 +3417,8 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) return 0; if (unlikely(flags & ~RWF_SUPPORTED)) return -EOPNOTSUPP; + if (unlikely((flags & RWF_APPEND) && (flags & RWF_NOAPPEND))) + return -EINVAL; if (flags & RWF_NOWAIT) { if (!(ki->ki_filp->f_mode & FMODE_NOWAIT)) @@ -3197,6 +3429,12 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) if (flags & RWF_SYNC) kiocb_flags |= IOCB_DSYNC; + if ((flags & RWF_NOAPPEND) && (ki->ki_flags & IOCB_APPEND)) { + if (IS_APPEND(file_inode(ki->ki_filp))) + return -EPERM; + ki->ki_flags &= ~IOCB_APPEND; + } + ki->ki_flags |= kiocb_flags; return 0; } diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h index 010d39d0dc1c..2b1f74b24070 100644 --- a/include/linux/fs_stack.h +++ b/include/linux/fs_stack.h @@ -16,14 +16,14 @@ extern void fsstack_copy_inode_size(struct inode *dst, struct inode *src); static inline void fsstack_copy_attr_atime(struct inode *dest, const struct inode *src) { - dest->i_atime = src->i_atime; + inode_set_atime_to_ts(dest, inode_get_atime(src)); } static inline void fsstack_copy_attr_times(struct inode *dest, const struct inode *src) { - dest->i_atime = src->i_atime; - dest->i_mtime = src->i_mtime; + inode_set_atime_to_ts(dest, inode_get_atime(src)); + inode_set_mtime_to_ts(dest, inode_get_mtime(src)); inode_set_ctime_to_ts(dest, inode_get_ctime(src)); } diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index a174cedf4d90..bdf7f3eddf0a 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -189,17 +189,20 @@ extern atomic_t fscache_n_write; extern atomic_t fscache_n_no_write_space; extern atomic_t fscache_n_no_create_space; extern atomic_t fscache_n_culled; +extern atomic_t fscache_n_dio_misfit; #define fscache_count_read() atomic_inc(&fscache_n_read) #define fscache_count_write() atomic_inc(&fscache_n_write) #define fscache_count_no_write_space() atomic_inc(&fscache_n_no_write_space) #define fscache_count_no_create_space() atomic_inc(&fscache_n_no_create_space) #define fscache_count_culled() atomic_inc(&fscache_n_culled) +#define fscache_count_dio_misfit() atomic_inc(&fscache_n_dio_misfit) #else #define fscache_count_read() do {} while(0) #define fscache_count_write() do {} while(0) #define fscache_count_no_write_space() do {} while(0) #define fscache_count_no_create_space() do {} while(0) #define fscache_count_culled() do {} while(0) +#define fscache_count_dio_misfit() do {} while(0) #endif #endif /* _LINUX_FSCACHE_CACHE_H */ diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 8e312c8323a8..6e8562cbcc43 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -437,9 +437,6 @@ const struct netfs_cache_ops *fscache_operation_valid(const struct netfs_cache_r * indicates the cache resources to which the operation state should be * attached; @cookie indicates the cache object that will be accessed. * - * This is intended to be called from the ->begin_cache_operation() netfs lib - * operation as implemented by the network filesystem. - * * @cres->inval_counter is set from @cookie->inval_counter for comparison at * the end of the operation. This allows invalidation during the operation to * be detected by the caller. @@ -629,48 +626,6 @@ static inline void fscache_write_to_cache(struct fscache_cookie *cookie, } -#if __fscache_available -bool fscache_dirty_folio(struct address_space *mapping, struct folio *folio, - struct fscache_cookie *cookie); -#else -#define fscache_dirty_folio(MAPPING, FOLIO, COOKIE) \ - filemap_dirty_folio(MAPPING, FOLIO) -#endif - -/** - * fscache_unpin_writeback - Unpin writeback resources - * @wbc: The writeback control - * @cookie: The cookie referring to the cache object - * - * Unpin the writeback resources pinned by fscache_dirty_folio(). This is - * intended to be called by the netfs's ->write_inode() method. - */ -static inline void fscache_unpin_writeback(struct writeback_control *wbc, - struct fscache_cookie *cookie) -{ - if (wbc->unpinned_fscache_wb) - fscache_unuse_cookie(cookie, NULL, NULL); -} - -/** - * fscache_clear_inode_writeback - Clear writeback resources pinned by an inode - * @cookie: The cookie referring to the cache object - * @inode: The inode to clean up - * @aux: Auxiliary data to apply to the inode - * - * Clear any writeback resources held by an inode when the inode is evicted. - * This must be called before clear_inode() is called. - */ -static inline void fscache_clear_inode_writeback(struct fscache_cookie *cookie, - struct inode *inode, - const void *aux) -{ - if (inode->i_state & I_PINNING_FSCACHE_WB) { - loff_t i_size = i_size_read(inode); - fscache_unuse_cookie(cookie, aux, &i_size); - } -} - /** * fscache_note_page_release - Note that a netfs page got released * @cookie: The cookie corresponding to the file diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index c895b12737a1..772f822dc6b8 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -31,7 +31,7 @@ #define FSCRYPT_CONTENTS_ALIGNMENT 16 union fscrypt_policy; -struct fscrypt_info; +struct fscrypt_inode_info; struct fs_parameter; struct seq_file; @@ -59,26 +59,55 @@ struct fscrypt_name { #ifdef CONFIG_FS_ENCRYPTION -/* - * If set, the fscrypt bounce page pool won't be allocated (unless another - * filesystem needs it). Set this if the filesystem always uses its own bounce - * pages for writes and therefore won't need the fscrypt bounce page pool. - */ -#define FS_CFLG_OWN_PAGES (1U << 1) - /* Crypto operations for filesystems */ struct fscrypt_operations { - /* Set of optional flags; see above for allowed flags */ - unsigned int flags; + /* + * If set, then fs/crypto/ will allocate a global bounce page pool the + * first time an encryption key is set up for a file. The bounce page + * pool is required by the following functions: + * + * - fscrypt_encrypt_pagecache_blocks() + * - fscrypt_zeroout_range() for files not using inline crypto + * + * If the filesystem doesn't use those, it doesn't need to set this. + */ + unsigned int needs_bounce_pages : 1; + + /* + * If set, then fs/crypto/ will allow the use of encryption settings + * that assume inode numbers fit in 32 bits (i.e. + * FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64}), provided that the other + * prerequisites for these settings are also met. This is only useful + * if the filesystem wants to support inline encryption hardware that is + * limited to 32-bit or 64-bit data unit numbers and where programming + * keyslots is very slow. + */ + unsigned int has_32bit_inodes : 1; + + /* + * If set, then fs/crypto/ will allow users to select a crypto data unit + * size that is less than the filesystem block size. This is done via + * the log2_data_unit_size field of the fscrypt policy. This flag is + * not compatible with filesystems that encrypt variable-length blocks + * (i.e. blocks that aren't all equal to filesystem's block size), for + * example as a result of compression. It's also not compatible with + * the fscrypt_encrypt_block_inplace() and + * fscrypt_decrypt_block_inplace() functions. + */ + unsigned int supports_subblock_data_units : 1; /* - * If set, this is a filesystem-specific key description prefix that - * will be accepted for "logon" keys for v1 fscrypt policies, in - * addition to the generic prefix "fscrypt:". This functionality is - * deprecated, so new filesystems shouldn't set this field. + * This field exists only for backwards compatibility reasons and should + * only be set by the filesystems that are setting it already. It + * contains the filesystem-specific key description prefix that is + * accepted for "logon" keys for v1 fscrypt policies. This + * functionality is deprecated in favor of the generic prefix + * "fscrypt:", which itself is deprecated in favor of the filesystem + * keyring ioctls such as FS_IOC_ADD_ENCRYPTION_KEY. Filesystems that + * are newly adding fscrypt support should not set this field. */ - const char *key_prefix; + const char *legacy_key_prefix; /* * Get the fscrypt context of the given inode. @@ -146,21 +175,6 @@ struct fscrypt_operations { bool (*has_stable_inodes)(struct super_block *sb); /* - * Get the number of bits that the filesystem uses to represent inode - * numbers and file logical block numbers. - * - * By default, both of these are assumed to be 64-bit. This function - * can be implemented to declare that either or both of these numbers is - * shorter, which may allow the use of the - * FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64} flags and/or the use of - * inline crypto hardware whose maximum DUN length is less than 64 bits - * (e.g., eMMC v5.2 spec compliant hardware). This function only needs - * to be implemented if support for one of these features is needed. - */ - void (*get_ino_and_lblk_bits)(struct super_block *sb, - int *ino_bits_ret, int *lblk_bits_ret); - - /* * Return an array of pointers to the block devices to which the * filesystem may write encrypted file contents, NULL if the filesystem * only has a single such block device, or an ERR_PTR() on error. @@ -178,7 +192,10 @@ struct fscrypt_operations { unsigned int *num_devs); }; -static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode) +int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); + +static inline struct fscrypt_inode_info * +fscrypt_get_inode_info(const struct inode *inode) { /* * Pairs with the cmpxchg_release() in fscrypt_setup_encryption_info(). @@ -206,15 +223,29 @@ static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) } /* - * When d_splice_alias() moves a directory's no-key alias to its plaintext alias - * as a result of the encryption key being added, DCACHE_NOKEY_NAME must be - * cleared. Note that we don't have to support arbitrary moves of this flag - * because fscrypt doesn't allow no-key names to be the source or target of a - * rename(). + * When d_splice_alias() moves a directory's no-key alias to its + * plaintext alias as a result of the encryption key being added, + * DCACHE_NOKEY_NAME must be cleared and there might be an opportunity + * to disable d_revalidate. Note that we don't have to support the + * inverse operation because fscrypt doesn't allow no-key names to be + * the source or target of a rename(). */ static inline void fscrypt_handle_d_move(struct dentry *dentry) { - dentry->d_flags &= ~DCACHE_NOKEY_NAME; + /* + * VFS calls fscrypt_handle_d_move even for non-fscrypt + * filesystems. + */ + if (dentry->d_flags & DCACHE_NOKEY_NAME) { + dentry->d_flags &= ~DCACHE_NOKEY_NAME; + + /* + * Other filesystem features might be handling dentry + * revalidation, in which case it cannot be disabled. + */ + if (dentry->d_op->d_revalidate == fscrypt_d_revalidate) + dentry->d_flags &= ~DCACHE_OP_REVALIDATE; + } } /** @@ -246,6 +277,35 @@ static inline bool fscrypt_is_nokey_name(const struct dentry *dentry) return dentry->d_flags & DCACHE_NOKEY_NAME; } +static inline void fscrypt_prepare_dentry(struct dentry *dentry, + bool is_nokey_name) +{ + /* + * This code tries to only take ->d_lock when necessary to write + * to ->d_flags. We shouldn't be peeking on d_flags for + * DCACHE_OP_REVALIDATE unlocked, but in the unlikely case + * there is a race, the worst it can happen is that we fail to + * unset DCACHE_OP_REVALIDATE and pay the cost of an extra + * d_revalidate. + */ + if (is_nokey_name) { + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_NOKEY_NAME; + spin_unlock(&dentry->d_lock); + } else if (dentry->d_flags & DCACHE_OP_REVALIDATE && + dentry->d_op->d_revalidate == fscrypt_d_revalidate) { + /* + * Unencrypted dentries and encrypted dentries where the + * key is available are always valid from fscrypt + * perspective. Avoid the cost of calling + * fscrypt_d_revalidate unnecessarily. + */ + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_OP_REVALIDATE; + spin_unlock(&dentry->d_lock); + } +} + /* crypto.c */ void fscrypt_enqueue_decrypt_work(struct work_struct *); @@ -353,7 +413,6 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode, bool fscrypt_match_name(const struct fscrypt_name *fname, const u8 *de_name, u32 de_name_len); u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name); -int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); /* bio.c */ bool fscrypt_decrypt_bio(struct bio *bio); @@ -390,7 +449,8 @@ static inline void fscrypt_set_ops(struct super_block *sb, } #else /* !CONFIG_FS_ENCRYPTION */ -static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode) +static inline struct fscrypt_inode_info * +fscrypt_get_inode_info(const struct inode *inode) { return NULL; } @@ -409,6 +469,11 @@ static inline bool fscrypt_is_nokey_name(const struct dentry *dentry) return false; } +static inline void fscrypt_prepare_dentry(struct dentry *dentry, + bool is_nokey_name) +{ +} + /* crypto.c */ static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) { @@ -868,7 +933,7 @@ static inline bool fscrypt_inode_uses_fs_layer_crypto(const struct inode *inode) */ static inline bool fscrypt_has_encryption_key(const struct inode *inode) { - return fscrypt_get_info(inode) != NULL; + return fscrypt_get_inode_info(inode) != NULL; } /** @@ -966,6 +1031,9 @@ static inline int fscrypt_prepare_lookup(struct inode *dir, fname->usr_fname = &dentry->d_name; fname->disk_name.name = (unsigned char *)dentry->d_name.name; fname->disk_name.len = dentry->d_name.len; + + fscrypt_prepare_dentry(dentry, false); + return 0; } diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index ed48e4f1e755..1a9de119a0f7 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -17,6 +17,12 @@ #include <linux/slab.h> #include <linux/bug.h> +/* Are there any inode/mount/sb objects that are being watched at all? */ +static inline bool fsnotify_sb_has_watchers(struct super_block *sb) +{ + return atomic_long_read(&sb->s_fsnotify_connectors); +} + /* * Notify this @dir inode about a change in a child directory entry. * The directory entry may have turned positive or negative or its inode may @@ -30,7 +36,7 @@ static inline int fsnotify_name(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, u32 cookie) { - if (atomic_long_read(&dir->i_sb->s_fsnotify_connectors) == 0) + if (!fsnotify_sb_has_watchers(dir->i_sb)) return 0; return fsnotify(mask, data, data_type, dir, name, NULL, cookie); @@ -44,7 +50,7 @@ static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, static inline void fsnotify_inode(struct inode *inode, __u32 mask) { - if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0) + if (!fsnotify_sb_has_watchers(inode->i_sb)) return; if (S_ISDIR(inode->i_mode)) @@ -59,7 +65,7 @@ static inline int fsnotify_parent(struct dentry *dentry, __u32 mask, { struct inode *inode = d_inode(dentry); - if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0) + if (!fsnotify_sb_has_watchers(inode->i_sb)) return 0; if (S_ISDIR(inode->i_mode)) { @@ -96,35 +102,73 @@ static inline int fsnotify_file(struct file *file, __u32 mask) if (file->f_mode & FMODE_NONOTIFY) return 0; - /* Overlayfs internal files have fake f_path */ - path = file_real_path(file); + path = &file->f_path; return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); } -/* Simple call site for access decisions */ -static inline int fsnotify_perm(struct file *file, int mask) +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS +/* + * fsnotify_file_area_perm - permission hook before access to file range + */ +static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, + const loff_t *ppos, size_t count) { - int ret; - __u32 fsnotify_mask = 0; + __u32 fsnotify_mask = FS_ACCESS_PERM; + + /* + * filesystem may be modified in the context of permission events + * (e.g. by HSM filling a file on access), so sb freeze protection + * must not be held. + */ + lockdep_assert_once(file_write_not_started(file)); - if (!(mask & (MAY_READ | MAY_OPEN))) + if (!(perm_mask & MAY_READ)) return 0; - if (mask & MAY_OPEN) { - fsnotify_mask = FS_OPEN_PERM; + return fsnotify_file(file, fsnotify_mask); +} + +/* + * fsnotify_file_perm - permission hook before file access + */ +static inline int fsnotify_file_perm(struct file *file, int perm_mask) +{ + return fsnotify_file_area_perm(file, perm_mask, NULL, 0); +} - if (file->f_flags & __FMODE_EXEC) { - ret = fsnotify_file(file, FS_OPEN_EXEC_PERM); +/* + * fsnotify_open_perm - permission hook before file open + */ +static inline int fsnotify_open_perm(struct file *file) +{ + int ret; - if (ret) - return ret; - } - } else if (mask & MAY_READ) { - fsnotify_mask = FS_ACCESS_PERM; + if (file->f_flags & __FMODE_EXEC) { + ret = fsnotify_file(file, FS_OPEN_EXEC_PERM); + if (ret) + return ret; } - return fsnotify_file(file, fsnotify_mask); + return fsnotify_file(file, FS_OPEN_PERM); +} + +#else +static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, + const loff_t *ppos, size_t count) +{ + return 0; +} + +static inline int fsnotify_file_perm(struct file *file, int perm_mask) +{ + return 0; +} + +static inline int fsnotify_open_perm(struct file *file) +{ + return 0; } +#endif /* * fsnotify_link_count - inode's link count changed diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index c0892d75ce33..8f40c349b228 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -31,8 +31,8 @@ #define FS_ACCESS 0x00000001 /* File was accessed */ #define FS_MODIFY 0x00000002 /* File was modified */ #define FS_ATTRIB 0x00000004 /* Metadata changed */ -#define FS_CLOSE_WRITE 0x00000008 /* Writtable file was closed */ -#define FS_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */ +#define FS_CLOSE_WRITE 0x00000008 /* Writable file was closed */ +#define FS_CLOSE_NOWRITE 0x00000010 /* Unwritable file closed */ #define FS_OPEN 0x00000020 /* File was opened */ #define FS_MOVED_FROM 0x00000040 /* File was moved from X */ #define FS_MOVED_TO 0x00000080 /* File was moved to Y */ @@ -472,10 +472,8 @@ typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t; struct fsnotify_mark_connector { spinlock_t lock; unsigned short type; /* Type of object [lock] */ -#define FSNOTIFY_CONN_FLAG_HAS_FSID 0x01 #define FSNOTIFY_CONN_FLAG_HAS_IREF 0x02 unsigned short flags; /* flags [lock] */ - __kernel_fsid_t fsid; /* fsid of filesystem containing object */ union { /* Object pointer [lock] */ fsnotify_connp_t *obj; @@ -530,6 +528,8 @@ struct fsnotify_mark { #define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x0100 #define FSNOTIFY_MARK_FLAG_NO_IREF 0x0200 #define FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS 0x0400 +#define FSNOTIFY_MARK_FLAG_HAS_FSID 0x0800 +#define FSNOTIFY_MARK_FLAG_WEAK_FSID 0x1000 unsigned int flags; /* flags [mark->lock] */ }; @@ -763,11 +763,10 @@ extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp, /* attach the mark to the object */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, unsigned int obj_type, - int add_flags, __kernel_fsid_t *fsid); + int add_flags); extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_connp_t *connp, - unsigned int obj_type, int add_flags, - __kernel_fsid_t *fsid); + unsigned int obj_type, int add_flags); /* attach the mark to the inode */ static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark, @@ -775,15 +774,14 @@ static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark, int add_flags) { return fsnotify_add_mark(mark, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, add_flags, NULL); + FSNOTIFY_OBJ_TYPE_INODE, add_flags); } static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark, struct inode *inode, int add_flags) { return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, add_flags, - NULL); + FSNOTIFY_OBJ_TYPE_INODE, add_flags); } /* given a group and a mark, flag mark to be freed when all references are dropped */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e8921871ef9a..54d53f345d14 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1151,7 +1151,9 @@ static inline void unpause_graph_tracing(void) { } #ifdef CONFIG_TRACING enum ftrace_dump_mode; -extern enum ftrace_dump_mode ftrace_dump_on_oops; +#define MAX_TRACER_SIZE 100 +extern char ftrace_dump_on_oops[]; +extern int ftrace_dump_on_oops_enabled(void); extern int tracepoint_printk; extern void disable_trace_on_warning(void); diff --git a/include/linux/fw_table.h b/include/linux/fw_table.h new file mode 100644 index 000000000000..3ff4c277296f --- /dev/null +++ b/include/linux/fw_table.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * fw_tables.h - Parsing support for ACPI and ACPI-like tables provided by + * platform or device firmware + * + * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> + * Copyright (C) 2023 Intel Corp. + */ +#ifndef _FW_TABLE_H_ +#define _FW_TABLE_H_ + +union acpi_subtable_headers; + +typedef int (*acpi_tbl_entry_handler)(union acpi_subtable_headers *header, + const unsigned long end); + +typedef int (*acpi_tbl_entry_handler_arg)(union acpi_subtable_headers *header, + void *arg, const unsigned long end); + +struct acpi_subtable_proc { + int id; + acpi_tbl_entry_handler handler; + acpi_tbl_entry_handler_arg handler_arg; + void *arg; + int count; +}; + +union fw_table_header { + struct acpi_table_header acpi; + struct acpi_table_cdat cdat; +}; + +union acpi_subtable_headers { + struct acpi_subtable_header common; + struct acpi_hmat_structure hmat; + struct acpi_prmt_module_header prmt; + struct acpi_cedt_header cedt; + struct acpi_cdat_header cdat; +}; + +int acpi_parse_entries_array(char *id, unsigned long table_size, + union fw_table_header *table_header, + unsigned long max_length, + struct acpi_subtable_proc *proc, + int proc_num, unsigned int max_entries); + +int cdat_table_parse(enum acpi_cdat_type type, + acpi_tbl_entry_handler_arg handler_arg, void *arg, + struct acpi_table_cdat *table_header, + unsigned long length); + +/* CXL is the only non-ACPI consumer of the FIRMWARE_TABLE library */ +#if IS_ENABLED(CONFIG_ACPI) && !IS_ENABLED(CONFIG_CXL_BUS) +#define EXPORT_SYMBOL_FWTBL_LIB(x) EXPORT_SYMBOL_ACPI_LIB(x) +#define __init_or_fwtbl_lib __init_or_acpilib +#else +#define EXPORT_SYMBOL_FWTBL_LIB(x) EXPORT_SYMBOL_NS_GPL(x, CXL) +#define __init_or_fwtbl_lib +#endif + +#endif diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 5700451b300f..0d79070c5a70 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -9,10 +9,16 @@ #ifndef _LINUX_FWNODE_H_ #define _LINUX_FWNODE_H_ -#include <linux/types.h> -#include <linux/list.h> #include <linux/bits.h> #include <linux/err.h> +#include <linux/list.h> +#include <linux/types.h> + +enum dev_dma_attr { + DEV_DMA_NOT_SUPPORTED, + DEV_DMA_NON_COHERENT, + DEV_DMA_COHERENT, +}; struct fwnode_operations; struct device; @@ -41,6 +47,8 @@ struct device; struct fwnode_handle { struct fwnode_handle *secondary; const struct fwnode_operations *ops; + + /* The below is used solely by device links, don't use otherwise */ struct device *dev; struct list_head suppliers; struct list_head consumers; @@ -51,8 +59,10 @@ struct fwnode_handle { * fwnode link flags * * CYCLE: The fwnode link is part of a cycle. Don't defer probe. + * IGNORE: Completely ignore this link, even during cycle detection. */ #define FWLINK_FLAG_CYCLE BIT(0) +#define FWLINK_FLAG_IGNORE BIT(1) struct fwnode_link { struct fwnode_handle *supplier; @@ -185,7 +195,6 @@ struct fwnode_operations { if (fwnode_has_op(fwnode, op)) \ (fwnode)->ops->op(fwnode, ## __VA_ARGS__); \ } while (false) -#define get_dev_from_fwnode(fwnode) get_device((fwnode)->dev) static inline void fwnode_init(struct fwnode_handle *fwnode, const struct fwnode_operations *ops) @@ -207,9 +216,10 @@ static inline void fwnode_dev_initialized(struct fwnode_handle *fwnode, fwnode->flags &= ~FWNODE_FLAG_INITIALIZED; } -extern bool fw_devlink_is_strict(void); -int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup); +int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup, + u8 flags); void fwnode_links_purge(struct fwnode_handle *fwnode); void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode); +bool fw_devlink_is_strict(void); #endif diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h index 107613f7d792..f3512fddf3d7 100644 --- a/include/linux/generic-radix-tree.h +++ b/include/linux/generic-radix-tree.h @@ -5,7 +5,7 @@ * DOC: Generic radix trees/sparse arrays * * Very simple and minimalistic, supporting arbitrary size entries up to - * PAGE_SIZE. + * GENRADIX_NODE_SIZE. * * A genradix is defined with the type it will store, like so: * @@ -38,18 +38,22 @@ #include <asm/page.h> #include <linux/bug.h> +#include <linux/limits.h> #include <linux/log2.h> #include <linux/math.h> #include <linux/types.h> struct genradix_root; +#define GENRADIX_NODE_SHIFT 9 +#define GENRADIX_NODE_SIZE (1U << GENRADIX_NODE_SHIFT) + struct __genradix { struct genradix_root *root; }; /* - * NOTE: currently, sizeof(_type) must not be larger than PAGE_SIZE: + * NOTE: currently, sizeof(_type) must not be larger than GENRADIX_NODE_SIZE: */ #define __GENRADIX_INITIALIZER \ @@ -100,14 +104,14 @@ void __genradix_free(struct __genradix *); static inline size_t __idx_to_offset(size_t idx, size_t obj_size) { if (__builtin_constant_p(obj_size)) - BUILD_BUG_ON(obj_size > PAGE_SIZE); + BUILD_BUG_ON(obj_size > GENRADIX_NODE_SIZE); else - BUG_ON(obj_size > PAGE_SIZE); + BUG_ON(obj_size > GENRADIX_NODE_SIZE); if (!is_power_of_2(obj_size)) { - size_t objs_per_page = PAGE_SIZE / obj_size; + size_t objs_per_page = GENRADIX_NODE_SIZE / obj_size; - return (idx / objs_per_page) * PAGE_SIZE + + return (idx / objs_per_page) * GENRADIX_NODE_SIZE + (idx % objs_per_page) * obj_size; } else { return idx * obj_size; @@ -116,6 +120,11 @@ static inline size_t __idx_to_offset(size_t idx, size_t obj_size) #define __genradix_cast(_radix) (typeof((_radix)->type[0]) *) #define __genradix_obj_size(_radix) sizeof((_radix)->type[0]) +#define __genradix_objs_per_page(_radix) \ + (GENRADIX_NODE_SIZE / sizeof((_radix)->type[0])) +#define __genradix_page_remainder(_radix) \ + (GENRADIX_NODE_SIZE % sizeof((_radix)->type[0])) + #define __genradix_idx_to_offset(_radix, _idx) \ __idx_to_offset(_idx, __genradix_obj_size(_radix)) @@ -179,16 +188,40 @@ void *__genradix_iter_peek(struct genradix_iter *, struct __genradix *, size_t); #define genradix_iter_peek(_iter, _radix) \ (__genradix_cast(_radix) \ __genradix_iter_peek(_iter, &(_radix)->tree, \ - PAGE_SIZE / __genradix_obj_size(_radix))) + __genradix_objs_per_page(_radix))) + +void *__genradix_iter_peek_prev(struct genradix_iter *, struct __genradix *, + size_t, size_t); + +/** + * genradix_iter_peek_prev - get first entry at or below iterator's current + * position + * @_iter: a genradix_iter + * @_radix: genradix being iterated over + * + * If no more entries exist at or below @_iter's current position, returns NULL + */ +#define genradix_iter_peek_prev(_iter, _radix) \ + (__genradix_cast(_radix) \ + __genradix_iter_peek_prev(_iter, &(_radix)->tree, \ + __genradix_objs_per_page(_radix), \ + __genradix_obj_size(_radix) + \ + __genradix_page_remainder(_radix))) static inline void __genradix_iter_advance(struct genradix_iter *iter, size_t obj_size) { + if (iter->offset + obj_size < iter->offset) { + iter->offset = SIZE_MAX; + iter->pos = SIZE_MAX; + return; + } + iter->offset += obj_size; if (!is_power_of_2(obj_size) && - (iter->offset & (PAGE_SIZE - 1)) + obj_size > PAGE_SIZE) - iter->offset = round_up(iter->offset, PAGE_SIZE); + (iter->offset & (GENRADIX_NODE_SIZE - 1)) + obj_size > GENRADIX_NODE_SIZE) + iter->offset = round_up(iter->offset, GENRADIX_NODE_SIZE); iter->pos++; } @@ -196,6 +229,25 @@ static inline void __genradix_iter_advance(struct genradix_iter *iter, #define genradix_iter_advance(_iter, _radix) \ __genradix_iter_advance(_iter, __genradix_obj_size(_radix)) +static inline void __genradix_iter_rewind(struct genradix_iter *iter, + size_t obj_size) +{ + if (iter->offset == 0 || + iter->offset == SIZE_MAX) { + iter->offset = SIZE_MAX; + return; + } + + if ((iter->offset & (GENRADIX_NODE_SIZE - 1)) == 0) + iter->offset -= GENRADIX_NODE_SIZE % obj_size; + + iter->offset -= obj_size; + iter->pos--; +} + +#define genradix_iter_rewind(_iter, _radix) \ + __genradix_iter_rewind(_iter, __genradix_obj_size(_radix)) + #define genradix_for_each_from(_radix, _iter, _p, _start) \ for (_iter = genradix_iter_init(_radix, _start); \ (_p = genradix_iter_peek(&_iter, _radix)) != NULL; \ @@ -213,6 +265,23 @@ static inline void __genradix_iter_advance(struct genradix_iter *iter, #define genradix_for_each(_radix, _iter, _p) \ genradix_for_each_from(_radix, _iter, _p, 0) +#define genradix_last_pos(_radix) \ + (SIZE_MAX / GENRADIX_NODE_SIZE * __genradix_objs_per_page(_radix) - 1) + +/** + * genradix_for_each_reverse - iterate over entry in a genradix, reverse order + * @_radix: genradix to iterate over + * @_iter: a genradix_iter to track current position + * @_p: pointer to genradix entry type + * + * On every iteration, @_p will point to the current entry, and @_iter.pos + * will be the current entry's index. + */ +#define genradix_for_each_reverse(_radix, _iter, _p) \ + for (_iter = genradix_iter_init(_radix, genradix_last_pos(_radix));\ + (_p = genradix_iter_peek_prev(&_iter, _radix)) != NULL;\ + genradix_iter_rewind(&_iter, _radix)) + int __genradix_prealloc(struct __genradix *, size_t, gfp_t); /** diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 665f06675c83..c775ea3c6015 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -8,6 +8,7 @@ #include <linux/topology.h> struct vm_area_struct; +struct mempolicy; /* Convert GFP flags to their corresponding migrate type */ #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) @@ -262,7 +263,9 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, #ifdef CONFIG_NUMA struct page *alloc_pages(gfp_t gfp, unsigned int order); -struct folio *folio_alloc(gfp_t gfp, unsigned order); +struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order, + struct mempolicy *mpol, pgoff_t ilx, int nid); +struct folio *folio_alloc(gfp_t gfp, unsigned int order); struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma, unsigned long addr, bool hugepage); #else @@ -270,6 +273,11 @@ static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order) { return alloc_pages_node(numa_node_id(), gfp_mask, order); } +static inline struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order, + struct mempolicy *mpol, pgoff_t ilx, int nid) +{ + return alloc_pages(gfp, order); +} static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order) { return __folio_alloc_node(gfp, order, numa_node_id()); @@ -303,15 +311,23 @@ extern void __free_pages(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); struct page_frag_cache; +void page_frag_cache_drain(struct page_frag_cache *nc); extern void __page_frag_cache_drain(struct page *page, unsigned int count); -extern void *page_frag_alloc_align(struct page_frag_cache *nc, - unsigned int fragsz, gfp_t gfp_mask, - unsigned int align_mask); +void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, + gfp_t gfp_mask, unsigned int align_mask); + +static inline void *page_frag_alloc_align(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask, + unsigned int align) +{ + WARN_ON_ONCE(!is_power_of_2(align)); + return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align); +} static inline void *page_frag_alloc(struct page_frag_cache *nc, unsigned int fragsz, gfp_t gfp_mask) { - return page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); + return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); } extern void page_frag_free(void *addr); @@ -320,11 +336,13 @@ extern void page_frag_free(void *addr); #define free_page(addr) free_pages((addr), 0) void page_alloc_init_cpuhp(void); +int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp); void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp); void drain_all_pages(struct zone *zone); void drain_local_pages(struct zone *zone); void page_alloc_init_late(void); +void setup_pcp_cacheinfo(unsigned int cpu); /* * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what @@ -343,6 +361,15 @@ static inline bool gfp_has_io_fs(gfp_t gfp) return (gfp & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS); } +/* + * Check if the gfp flags allow compaction - GFP_NOIO is a really + * tricky context because the migration might require IO. + */ +static inline bool gfp_compaction_allowed(gfp_t gfp_mask) +{ + return IS_ENABLED(CONFIG_COMPACTION) && (gfp_mask & __GFP_IO); +} + extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma); #ifdef CONFIG_CONTIG_ALLOC diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h index 6583a58670c5..13becafe41df 100644 --- a/include/linux/gfp_types.h +++ b/include/linux/gfp_types.h @@ -2,6 +2,8 @@ #ifndef __LINUX_GFP_TYPES_H #define __LINUX_GFP_TYPES_H +#include <linux/bits.h> + /* The typedef is in types.h but we want the documentation here */ #if 0 /** @@ -21,44 +23,78 @@ typedef unsigned int __bitwise gfp_t; * include/trace/events/mmflags.h and tools/perf/builtin-kmem.c */ +enum { + ___GFP_DMA_BIT, + ___GFP_HIGHMEM_BIT, + ___GFP_DMA32_BIT, + ___GFP_MOVABLE_BIT, + ___GFP_RECLAIMABLE_BIT, + ___GFP_HIGH_BIT, + ___GFP_IO_BIT, + ___GFP_FS_BIT, + ___GFP_ZERO_BIT, + ___GFP_UNUSED_BIT, /* 0x200u unused */ + ___GFP_DIRECT_RECLAIM_BIT, + ___GFP_KSWAPD_RECLAIM_BIT, + ___GFP_WRITE_BIT, + ___GFP_NOWARN_BIT, + ___GFP_RETRY_MAYFAIL_BIT, + ___GFP_NOFAIL_BIT, + ___GFP_NORETRY_BIT, + ___GFP_MEMALLOC_BIT, + ___GFP_COMP_BIT, + ___GFP_NOMEMALLOC_BIT, + ___GFP_HARDWALL_BIT, + ___GFP_THISNODE_BIT, + ___GFP_ACCOUNT_BIT, + ___GFP_ZEROTAGS_BIT, +#ifdef CONFIG_KASAN_HW_TAGS + ___GFP_SKIP_ZERO_BIT, + ___GFP_SKIP_KASAN_BIT, +#endif +#ifdef CONFIG_LOCKDEP + ___GFP_NOLOCKDEP_BIT, +#endif + ___GFP_LAST_BIT +}; + /* Plain integer GFP bitmasks. Do not use this directly. */ -#define ___GFP_DMA 0x01u -#define ___GFP_HIGHMEM 0x02u -#define ___GFP_DMA32 0x04u -#define ___GFP_MOVABLE 0x08u -#define ___GFP_RECLAIMABLE 0x10u -#define ___GFP_HIGH 0x20u -#define ___GFP_IO 0x40u -#define ___GFP_FS 0x80u -#define ___GFP_ZERO 0x100u +#define ___GFP_DMA BIT(___GFP_DMA_BIT) +#define ___GFP_HIGHMEM BIT(___GFP_HIGHMEM_BIT) +#define ___GFP_DMA32 BIT(___GFP_DMA32_BIT) +#define ___GFP_MOVABLE BIT(___GFP_MOVABLE_BIT) +#define ___GFP_RECLAIMABLE BIT(___GFP_RECLAIMABLE_BIT) +#define ___GFP_HIGH BIT(___GFP_HIGH_BIT) +#define ___GFP_IO BIT(___GFP_IO_BIT) +#define ___GFP_FS BIT(___GFP_FS_BIT) +#define ___GFP_ZERO BIT(___GFP_ZERO_BIT) /* 0x200u unused */ -#define ___GFP_DIRECT_RECLAIM 0x400u -#define ___GFP_KSWAPD_RECLAIM 0x800u -#define ___GFP_WRITE 0x1000u -#define ___GFP_NOWARN 0x2000u -#define ___GFP_RETRY_MAYFAIL 0x4000u -#define ___GFP_NOFAIL 0x8000u -#define ___GFP_NORETRY 0x10000u -#define ___GFP_MEMALLOC 0x20000u -#define ___GFP_COMP 0x40000u -#define ___GFP_NOMEMALLOC 0x80000u -#define ___GFP_HARDWALL 0x100000u -#define ___GFP_THISNODE 0x200000u -#define ___GFP_ACCOUNT 0x400000u -#define ___GFP_ZEROTAGS 0x800000u +#define ___GFP_DIRECT_RECLAIM BIT(___GFP_DIRECT_RECLAIM_BIT) +#define ___GFP_KSWAPD_RECLAIM BIT(___GFP_KSWAPD_RECLAIM_BIT) +#define ___GFP_WRITE BIT(___GFP_WRITE_BIT) +#define ___GFP_NOWARN BIT(___GFP_NOWARN_BIT) +#define ___GFP_RETRY_MAYFAIL BIT(___GFP_RETRY_MAYFAIL_BIT) +#define ___GFP_NOFAIL BIT(___GFP_NOFAIL_BIT) +#define ___GFP_NORETRY BIT(___GFP_NORETRY_BIT) +#define ___GFP_MEMALLOC BIT(___GFP_MEMALLOC_BIT) +#define ___GFP_COMP BIT(___GFP_COMP_BIT) +#define ___GFP_NOMEMALLOC BIT(___GFP_NOMEMALLOC_BIT) +#define ___GFP_HARDWALL BIT(___GFP_HARDWALL_BIT) +#define ___GFP_THISNODE BIT(___GFP_THISNODE_BIT) +#define ___GFP_ACCOUNT BIT(___GFP_ACCOUNT_BIT) +#define ___GFP_ZEROTAGS BIT(___GFP_ZEROTAGS_BIT) #ifdef CONFIG_KASAN_HW_TAGS -#define ___GFP_SKIP_ZERO 0x1000000u -#define ___GFP_SKIP_KASAN 0x2000000u +#define ___GFP_SKIP_ZERO BIT(___GFP_SKIP_ZERO_BIT) +#define ___GFP_SKIP_KASAN BIT(___GFP_SKIP_KASAN_BIT) #else #define ___GFP_SKIP_ZERO 0 #define ___GFP_SKIP_KASAN 0 #endif #ifdef CONFIG_LOCKDEP -#define ___GFP_NOLOCKDEP 0x4000000u +#define ___GFP_NOLOCKDEP BIT(___GFP_NOLOCKDEP_BIT) #else #define ___GFP_NOLOCKDEP 0 #endif -/* If the above are modified, __GFP_BITS_SHIFT may need updating */ /* * Physical address zone modifiers (see linux/mmzone.h - low four bits) @@ -162,25 +198,25 @@ typedef unsigned int __bitwise gfp_t; * %__GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim. * * The default allocator behavior depends on the request size. We have a concept - * of so called costly allocations (with order > %PAGE_ALLOC_COSTLY_ORDER). + * of so-called costly allocations (with order > %PAGE_ALLOC_COSTLY_ORDER). * !costly allocations are too essential to fail so they are implicitly * non-failing by default (with some exceptions like OOM victims might fail so * the caller still has to check for failures) while costly requests try to be * not disruptive and back off even without invoking the OOM killer. * The following three modifiers might be used to override some of these - * implicit rules + * implicit rules. * * %__GFP_NORETRY: The VM implementation will try only very lightweight * memory direct reclaim to get some memory under memory pressure (thus * it can sleep). It will avoid disruptive actions like OOM killer. The * caller must handle the failure which is quite likely to happen under * heavy memory pressure. The flag is suitable when failure can easily be - * handled at small cost, such as reduced throughput + * handled at small cost, such as reduced throughput. * * %__GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim * procedures that have previously failed if there is some indication - * that progress has been made else where. It can wait for other - * tasks to attempt high level approaches to freeing memory such as + * that progress has been made elsewhere. It can wait for other + * tasks to attempt high-level approaches to freeing memory such as * compaction (which removes fragmentation) and page-out. * There is still a definite limit to the number of retries, but it is * a larger limit than with %__GFP_NORETRY. @@ -230,7 +266,7 @@ typedef unsigned int __bitwise gfp_t; * is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that * __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting * memory tags at the same time as zeroing memory has minimal additional - * performace impact. + * performance impact. * * %__GFP_SKIP_KASAN makes KASAN skip unpoisoning on page allocation. * Used for userspace and vmalloc pages; the latter are unpoisoned by @@ -249,7 +285,7 @@ typedef unsigned int __bitwise gfp_t; #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) /* Room for N __GFP_FOO bits */ -#define __GFP_BITS_SHIFT (26 + IS_ENABLED(CONFIG_LOCKDEP)) +#define __GFP_BITS_SHIFT ___GFP_LAST_BIT #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /** @@ -274,7 +310,8 @@ typedef unsigned int __bitwise gfp_t; * accounted to kmemcg. * * %GFP_NOWAIT is for kernel allocations that should not stall for direct - * reclaim, start physical IO or use any filesystem callback. + * reclaim, start physical IO or use any filesystem callback. It is very + * likely to fail to allocate memory, even for very small allocations. * * %GFP_NOIO will use direct reclaim to discard clean pages or slab pages * that do not require the starting of any physical IO. @@ -325,7 +362,7 @@ typedef unsigned int __bitwise gfp_t; #define GFP_ATOMIC (__GFP_HIGH|__GFP_KSWAPD_RECLAIM) #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) #define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT) -#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) +#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM | __GFP_NOWARN) #define GFP_NOIO (__GFP_RECLAIM) #define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) #define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL) diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 1c4385a00f88..db2dfbae8edb 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -159,7 +159,6 @@ int gpiod_set_raw_array_value_cansleep(unsigned int array_size, int gpiod_set_config(struct gpio_desc *desc, unsigned long config); int gpiod_set_debounce(struct gpio_desc *desc, unsigned int debounce); -int gpiod_set_transitory(struct gpio_desc *desc, bool transitory); void gpiod_toggle_active_low(struct gpio_desc *desc); int gpiod_is_active_low(const struct gpio_desc *desc); @@ -494,13 +493,6 @@ static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned int deboun return -ENOSYS; } -static inline int gpiod_set_transitory(struct gpio_desc *desc, bool transitory) -{ - /* GPIO can never have been requested */ - WARN_ON(desc); - return -ENOSYS; -} - static inline void gpiod_toggle_active_low(struct gpio_desc *desc) { /* GPIO can never have been requested */ @@ -614,8 +606,6 @@ void acpi_dev_remove_driver_gpios(struct acpi_device *adev); int devm_acpi_dev_add_driver_gpios(struct device *dev, const struct acpi_gpio_mapping *gpios); -struct gpio_desc *acpi_get_and_request_gpiod(char *path, unsigned int pin, char *label); - #else /* CONFIG_GPIOLIB && CONFIG_ACPI */ #include <linux/err.h> @@ -633,12 +623,6 @@ static inline int devm_acpi_dev_add_driver_gpios(struct device *dev, return -ENXIO; } -static inline struct gpio_desc *acpi_get_and_request_gpiod(char *path, unsigned int pin, - char *label) -{ - return ERR_PTR(-ENOSYS); -} - #endif /* CONFIG_GPIOLIB && CONFIG_ACPI */ diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 4f0c5d62c8f3..f8617eaf08ba 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -3,6 +3,8 @@ #define __LINUX_GPIO_DRIVER_H #include <linux/bits.h> +#include <linux/cleanup.h> +#include <linux/err.h> #include <linux/irqchip/chained_irq.h> #include <linux/irqdomain.h> #include <linux/irqhandler.h> @@ -333,10 +335,12 @@ struct gpio_irq_chip { * (same as GPIO_LINE_DIRECTION_OUT / GPIO_LINE_DIRECTION_IN), * or negative error. It is recommended to always implement this * function, even on input-only or output-only gpio chips. - * @direction_input: configures signal "offset" as input, or returns error - * This can be omitted on input-only or output-only gpio chips. - * @direction_output: configures signal "offset" as output, or returns error - * This can be omitted on input-only or output-only gpio chips. + * @direction_input: configures signal "offset" as input, returns 0 on success + * or a negative error number. This can be omitted on input-only or + * output-only gpio chips. + * @direction_output: configures signal "offset" as output, returns 0 on + * success or a negative error number. This can be omitted on input-only + * or output-only gpio chips. * @get: returns value for signal "offset", 0=low, 1=high, or negative error * @get_multiple: reads values for multiple signals defined by "mask" and * stores them in "bits", returns 0 on success or negative error @@ -529,29 +533,64 @@ struct gpio_chip { #endif /* CONFIG_OF_GPIO */ }; -extern const char *gpiochip_is_requested(struct gpio_chip *gc, - unsigned int offset); +char *gpiochip_dup_line_label(struct gpio_chip *gc, unsigned int offset); + + +struct _gpiochip_for_each_data { + const char **label; + unsigned int *i; +}; + +DEFINE_CLASS(_gpiochip_for_each_data, + struct _gpiochip_for_each_data, + if (*_T.label) kfree(*_T.label), + ({ + struct _gpiochip_for_each_data _data = { label, i }; + *_data.i = 0; + _data; + }), + const char **label, int *i) + +/** + * for_each_hwgpio - Iterates over all GPIOs for given chip. + * @_chip: Chip to iterate over. + * @_i: Loop counter. + * @_label: Place to store the address of the label if the GPIO is requested. + * Set to NULL for unused GPIOs. + */ +#define for_each_hwgpio(_chip, _i, _label) \ + for (CLASS(_gpiochip_for_each_data, _data)(&_label, &_i); \ + *_data.i < _chip->ngpio; \ + (*_data.i)++, kfree(*(_data.label)), *_data.label = NULL) \ + if (IS_ERR(*_data.label = \ + gpiochip_dup_line_label(_chip, *_data.i))) {} \ + else /** * for_each_requested_gpio_in_range - iterates over requested GPIOs in a given range - * @chip: the chip to query - * @i: loop variable - * @base: first GPIO in the range - * @size: amount of GPIOs to check starting from @base - * @label: label of current GPIO + * @_chip: the chip to query + * @_i: loop variable + * @_base: first GPIO in the range + * @_size: amount of GPIOs to check starting from @base + * @_label: label of current GPIO */ -#define for_each_requested_gpio_in_range(chip, i, base, size, label) \ - for (i = 0; i < size; i++) \ - if ((label = gpiochip_is_requested(chip, base + i)) == NULL) {} else +#define for_each_requested_gpio_in_range(_chip, _i, _base, _size, _label) \ + for (CLASS(_gpiochip_for_each_data, _data)(&_label, &_i); \ + *_data.i < _size; \ + (*_data.i)++, kfree(*(_data.label)), *_data.label = NULL) \ + if ((*_data.label = \ + gpiochip_dup_line_label(_chip, _base + *_data.i)) == NULL) {} \ + else if (IS_ERR(*_data.label)) {} \ + else /* Iterates over all requested GPIO of the given @chip */ #define for_each_requested_gpio(chip, i, label) \ for_each_requested_gpio_in_range(chip, i, 0, chip->ngpio, label) /* add/remove chips */ -extern int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, - struct lock_class_key *lock_key, - struct lock_class_key *request_key); +int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, + struct lock_class_key *lock_key, + struct lock_class_key *request_key); /** * gpiochip_add_data() - register a gpio_chip @@ -599,13 +638,22 @@ static inline int gpiochip_add(struct gpio_chip *gc) { return gpiochip_add_data(gc, NULL); } -extern void gpiochip_remove(struct gpio_chip *gc); -extern int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, void *data, - struct lock_class_key *lock_key, - struct lock_class_key *request_key); +void gpiochip_remove(struct gpio_chip *gc); +int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, + void *data, struct lock_class_key *lock_key, + struct lock_class_key *request_key); + +struct gpio_device *gpio_device_find(const void *data, + int (*match)(struct gpio_chip *gc, + const void *data)); -extern struct gpio_chip *gpiochip_find(void *data, - int (*match)(struct gpio_chip *gc, void *data)); +struct gpio_device *gpio_device_get(struct gpio_device *gdev); +void gpio_device_put(struct gpio_device *gdev); + +DEFINE_FREE(gpio_device_put, struct gpio_device *, + if (!IS_ERR_OR_NULL(_T)) gpio_device_put(_T)) + +struct device *gpio_device_to_device(struct gpio_device *gdev); bool gpiochip_line_is_irq(struct gpio_chip *gc, unsigned int offset); int gpiochip_reqres_irq(struct gpio_chip *gc, unsigned int offset); @@ -672,25 +720,12 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev, #define BGPIOF_NO_OUTPUT BIT(5) /* only input */ #define BGPIOF_NO_SET_ON_INPUT BIT(6) -int gpiochip_irq_map(struct irq_domain *d, unsigned int irq, - irq_hw_number_t hwirq); -void gpiochip_irq_unmap(struct irq_domain *d, unsigned int irq); - -int gpiochip_irq_domain_activate(struct irq_domain *domain, - struct irq_data *data, bool reserve); -void gpiochip_irq_domain_deactivate(struct irq_domain *domain, - struct irq_data *data); - -bool gpiochip_irqchip_irq_valid(const struct gpio_chip *gc, - unsigned int offset); - #ifdef CONFIG_GPIOLIB_IRQCHIP int gpiochip_irqchip_add_domain(struct gpio_chip *gc, struct irq_domain *domain); #else #include <asm/bug.h> -#include <asm/errno.h> static inline int gpiochip_irqchip_add_domain(struct gpio_chip *gc, struct irq_domain *domain) @@ -758,18 +793,29 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc, enum gpiod_flags dflags); void gpiochip_free_own_desc(struct gpio_desc *desc); +struct gpio_desc *gpiochip_get_desc(struct gpio_chip *gc, unsigned int hwnum); +struct gpio_desc * +gpio_device_get_desc(struct gpio_device *gdev, unsigned int hwnum); + +struct gpio_chip *gpio_device_get_chip(struct gpio_device *gdev); + #ifdef CONFIG_GPIOLIB /* lock/unlock as IRQ */ int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_unlock_as_irq(struct gpio_chip *gc, unsigned int offset); - struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc); +struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc); -#else /* CONFIG_GPIOLIB */ +/* struct gpio_device getters */ +int gpio_device_get_base(struct gpio_device *gdev); +const char *gpio_device_get_label(struct gpio_device *gdev); -#include <linux/err.h> +struct gpio_device *gpio_device_find_by_label(const char *label); +struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode); + +#else /* CONFIG_GPIOLIB */ #include <asm/bug.h> @@ -780,6 +826,36 @@ static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc) return ERR_PTR(-ENODEV); } +static inline struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc) +{ + WARN_ON(1); + return ERR_PTR(-ENODEV); +} + +static inline int gpio_device_get_base(struct gpio_device *gdev) +{ + WARN_ON(1); + return -ENODEV; +} + +static inline const char *gpio_device_get_label(struct gpio_device *gdev) +{ + WARN_ON(1); + return NULL; +} + +static inline struct gpio_device *gpio_device_find_by_label(const char *label) +{ + WARN_ON(1); + return NULL; +} + +static inline struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode) +{ + WARN_ON(1); + return NULL; +} + static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { diff --git a/include/linux/gpio/gpio-nomadik.h b/include/linux/gpio/gpio-nomadik.h new file mode 100644 index 000000000000..b5a84864650d --- /dev/null +++ b/include/linux/gpio/gpio-nomadik.h @@ -0,0 +1,294 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_GPIO_NOMADIK_H +#define __LINUX_GPIO_NOMADIK_H + +struct fwnode_handle; + +/* Package definitions */ +#define PINCTRL_NMK_STN8815 0 +#define PINCTRL_NMK_DB8500 1 + +#define GPIO_BLOCK_SHIFT 5 +#define NMK_GPIO_PER_CHIP BIT(GPIO_BLOCK_SHIFT) +#define NMK_MAX_BANKS DIV_ROUND_UP(512, NMK_GPIO_PER_CHIP) + +/* Register in the logic block */ +#define NMK_GPIO_DAT 0x00 +#define NMK_GPIO_DATS 0x04 +#define NMK_GPIO_DATC 0x08 +#define NMK_GPIO_PDIS 0x0c +#define NMK_GPIO_DIR 0x10 +#define NMK_GPIO_DIRS 0x14 +#define NMK_GPIO_DIRC 0x18 +#define NMK_GPIO_SLPC 0x1c +#define NMK_GPIO_AFSLA 0x20 +#define NMK_GPIO_AFSLB 0x24 +#define NMK_GPIO_LOWEMI 0x28 + +#define NMK_GPIO_RIMSC 0x40 +#define NMK_GPIO_FIMSC 0x44 +#define NMK_GPIO_IS 0x48 +#define NMK_GPIO_IC 0x4c +#define NMK_GPIO_RWIMSC 0x50 +#define NMK_GPIO_FWIMSC 0x54 +#define NMK_GPIO_WKS 0x58 +/* These appear in DB8540 and later ASICs */ +#define NMK_GPIO_EDGELEVEL 0x5C +#define NMK_GPIO_LEVEL 0x60 + +/* Pull up/down values */ +enum nmk_gpio_pull { + NMK_GPIO_PULL_NONE, + NMK_GPIO_PULL_UP, + NMK_GPIO_PULL_DOWN, +}; + +/* Sleep mode */ +enum nmk_gpio_slpm { + NMK_GPIO_SLPM_INPUT, + NMK_GPIO_SLPM_WAKEUP_ENABLE = NMK_GPIO_SLPM_INPUT, + NMK_GPIO_SLPM_NOCHANGE, + NMK_GPIO_SLPM_WAKEUP_DISABLE = NMK_GPIO_SLPM_NOCHANGE, +}; + +struct nmk_gpio_chip { + struct gpio_chip chip; + void __iomem *addr; + struct clk *clk; + unsigned int bank; + void (*set_ioforce)(bool enable); + spinlock_t lock; + bool sleepmode; + bool is_mobileye_soc; + /* Keep track of configured edges */ + u32 edge_rising; + u32 edge_falling; + u32 real_wake; + u32 rwimsc; + u32 fwimsc; + u32 rimsc; + u32 fimsc; + u32 pull_up; + u32 lowemi; +}; + +/* Alternate functions: function C is set in hw by setting both A and B */ +#define NMK_GPIO_ALT_GPIO 0 +#define NMK_GPIO_ALT_A 1 +#define NMK_GPIO_ALT_B 2 +#define NMK_GPIO_ALT_C (NMK_GPIO_ALT_A | NMK_GPIO_ALT_B) + +#define NMK_GPIO_ALT_CX_SHIFT 2 +#define NMK_GPIO_ALT_C1 ((1<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C) +#define NMK_GPIO_ALT_C2 ((2<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C) +#define NMK_GPIO_ALT_C3 ((3<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C) +#define NMK_GPIO_ALT_C4 ((4<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C) + +#define PRCM_GPIOCR_ALTCX(pin_num,\ + altc1_used, altc1_ri, altc1_cb,\ + altc2_used, altc2_ri, altc2_cb,\ + altc3_used, altc3_ri, altc3_cb,\ + altc4_used, altc4_ri, altc4_cb)\ +{\ + .pin = pin_num,\ + .altcx[PRCM_IDX_GPIOCR_ALTC1] = {\ + .used = altc1_used,\ + .reg_index = altc1_ri,\ + .control_bit = altc1_cb\ + },\ + .altcx[PRCM_IDX_GPIOCR_ALTC2] = {\ + .used = altc2_used,\ + .reg_index = altc2_ri,\ + .control_bit = altc2_cb\ + },\ + .altcx[PRCM_IDX_GPIOCR_ALTC3] = {\ + .used = altc3_used,\ + .reg_index = altc3_ri,\ + .control_bit = altc3_cb\ + },\ + .altcx[PRCM_IDX_GPIOCR_ALTC4] = {\ + .used = altc4_used,\ + .reg_index = altc4_ri,\ + .control_bit = altc4_cb\ + },\ +} + +/** + * enum prcm_gpiocr_reg_index + * Used to reference an PRCM GPIOCR register address. + */ +enum prcm_gpiocr_reg_index { + PRCM_IDX_GPIOCR1, + PRCM_IDX_GPIOCR2, + PRCM_IDX_GPIOCR3 +}; +/** + * enum prcm_gpiocr_altcx_index + * Used to reference an Other alternate-C function. + */ +enum prcm_gpiocr_altcx_index { + PRCM_IDX_GPIOCR_ALTC1, + PRCM_IDX_GPIOCR_ALTC2, + PRCM_IDX_GPIOCR_ALTC3, + PRCM_IDX_GPIOCR_ALTC4, + PRCM_IDX_GPIOCR_ALTC_MAX, +}; + +/** + * struct prcm_gpio_altcx - Other alternate-C function + * @used: other alternate-C function availability + * @reg_index: PRCM GPIOCR register index used to control the function + * @control_bit: PRCM GPIOCR bit used to control the function + */ +struct prcm_gpiocr_altcx { + bool used:1; + u8 reg_index:2; + u8 control_bit:5; +} __packed; + +/** + * struct prcm_gpio_altcx_pin_desc - Other alternate-C pin + * @pin: The pin number + * @altcx: array of other alternate-C[1-4] functions + */ +struct prcm_gpiocr_altcx_pin_desc { + unsigned short pin; + struct prcm_gpiocr_altcx altcx[PRCM_IDX_GPIOCR_ALTC_MAX]; +}; + +/** + * struct nmk_function - Nomadik pinctrl mux function + * @name: The name of the function, exported to pinctrl core. + * @groups: An array of pin groups that may select this function. + * @ngroups: The number of entries in @groups. + */ +struct nmk_function { + const char *name; + const char * const *groups; + unsigned int ngroups; +}; + +/** + * struct nmk_pingroup - describes a Nomadik pin group + * @grp: Generic data of the pin group (name and pins) + * @altsetting: the altsetting to apply to all pins in this group to + * configure them to be used by a function + */ +struct nmk_pingroup { + struct pingroup grp; + int altsetting; +}; + +#define NMK_PIN_GROUP(a, b) \ + { \ + .grp = PINCTRL_PINGROUP(#a, a##_pins, ARRAY_SIZE(a##_pins)), \ + .altsetting = b, \ + } + +/** + * struct nmk_pinctrl_soc_data - Nomadik pin controller per-SoC configuration + * @pins: An array describing all pins the pin controller affects. + * All pins which are also GPIOs must be listed first within the + * array, and be numbered identically to the GPIO controller's + * numbering. + * @npins: The number of entries in @pins. + * @functions: The functions supported on this SoC. + * @nfunction: The number of entries in @functions. + * @groups: An array describing all pin groups the pin SoC supports. + * @ngroups: The number of entries in @groups. + * @altcx_pins: The pins that support Other alternate-C function on this SoC + * @npins_altcx: The number of Other alternate-C pins + * @prcm_gpiocr_registers: The array of PRCM GPIOCR registers on this SoC + */ +struct nmk_pinctrl_soc_data { + const struct pinctrl_pin_desc *pins; + unsigned int npins; + const struct nmk_function *functions; + unsigned int nfunctions; + const struct nmk_pingroup *groups; + unsigned int ngroups; + const struct prcm_gpiocr_altcx_pin_desc *altcx_pins; + unsigned int npins_altcx; + const u16 *prcm_gpiocr_registers; +}; + +#ifdef CONFIG_PINCTRL_STN8815 + +void nmk_pinctrl_stn8815_init(const struct nmk_pinctrl_soc_data **soc); + +#else + +static inline void +nmk_pinctrl_stn8815_init(const struct nmk_pinctrl_soc_data **soc) +{ +} + +#endif + +#ifdef CONFIG_PINCTRL_DB8500 + +void nmk_pinctrl_db8500_init(const struct nmk_pinctrl_soc_data **soc); + +#else + +static inline void +nmk_pinctrl_db8500_init(const struct nmk_pinctrl_soc_data **soc) +{ +} + +#endif + +#ifdef CONFIG_PINCTRL_DB8540 + +void nmk_pinctrl_db8540_init(const struct nmk_pinctrl_soc_data **soc); + +#else + +static inline void +nmk_pinctrl_db8540_init(const struct nmk_pinctrl_soc_data **soc) +{ +} + +#endif + +struct platform_device; + +#ifdef CONFIG_DEBUG_FS + +/* + * Symbols declared in gpio-nomadik used by pinctrl-nomadik. If pinctrl-nomadik + * is enabled, then gpio-nomadik is enabled as well; the reverse if not always + * true. + */ +void nmk_gpio_dbg_show_one(struct seq_file *s, struct pinctrl_dev *pctldev, + struct gpio_chip *chip, unsigned int offset, + unsigned int gpio); + +#else + +static inline void nmk_gpio_dbg_show_one(struct seq_file *s, + struct pinctrl_dev *pctldev, + struct gpio_chip *chip, + unsigned int offset, + unsigned int gpio) +{ +} + +#endif + +void __nmk_gpio_make_output(struct nmk_gpio_chip *nmk_chip, + unsigned int offset, int val); +void __nmk_gpio_set_slpm(struct nmk_gpio_chip *nmk_chip, unsigned int offset, + enum nmk_gpio_slpm mode); +struct nmk_gpio_chip *nmk_gpio_populate_chip(struct fwnode_handle *fwnode, + struct platform_device *pdev); + +/* Symbols declared in pinctrl-nomadik used by gpio-nomadik. */ +#ifdef CONFIG_PINCTRL_NOMADIK +extern struct nmk_gpio_chip *nmk_gpio_chips[NMK_MAX_BANKS]; +extern spinlock_t nmk_gpio_slpm_lock; +int __maybe_unused nmk_prcm_gpiocr_get_mode(struct pinctrl_dev *pctldev, + int gpio); +#endif + +#endif /* __LINUX_GPIO_NOMADIK_H */ diff --git a/include/linux/gpio/property.h b/include/linux/gpio/property.h index 6c75c8bd44a0..1a14e239221f 100644 --- a/include/linux/gpio/property.h +++ b/include/linux/gpio/property.h @@ -2,7 +2,6 @@ #ifndef __LINUX_GPIO_PROPERTY_H #define __LINUX_GPIO_PROPERTY_H -#include <dt-bindings/gpio/gpio.h> /* for GPIO_* flags */ #include <linux/property.h> #define PROPERTY_ENTRY_GPIO(_name_, _chip_node_, _idx_, _flags_) \ diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h index 3f84aeb81e48..80fa930b04c6 100644 --- a/include/linux/gpio_keys.h +++ b/include/linux/gpio_keys.h @@ -21,6 +21,7 @@ struct device; * disable button via sysfs * @value: axis value for %EV_ABS * @irq: Irq number in case of interrupt keys + * @wakeirq: Optional dedicated wake-up interrupt */ struct gpio_keys_button { unsigned int code; @@ -34,6 +35,7 @@ struct gpio_keys_button { bool can_disable; int value; unsigned int irq; + unsigned int wakeirq; }; /** diff --git a/include/linux/greybus.h b/include/linux/greybus.h index 18c0fb958b74..634c9511cf78 100644 --- a/include/linux/greybus.h +++ b/include/linux/greybus.h @@ -104,44 +104,14 @@ void gb_debugfs_init(void); void gb_debugfs_cleanup(void); struct dentry *gb_debugfs_get(void); -extern struct bus_type greybus_bus_type; +extern const struct bus_type greybus_bus_type; -extern struct device_type greybus_hd_type; -extern struct device_type greybus_module_type; -extern struct device_type greybus_interface_type; -extern struct device_type greybus_control_type; -extern struct device_type greybus_bundle_type; -extern struct device_type greybus_svc_type; - -static inline int is_gb_host_device(const struct device *dev) -{ - return dev->type == &greybus_hd_type; -} - -static inline int is_gb_module(const struct device *dev) -{ - return dev->type == &greybus_module_type; -} - -static inline int is_gb_interface(const struct device *dev) -{ - return dev->type == &greybus_interface_type; -} - -static inline int is_gb_control(const struct device *dev) -{ - return dev->type == &greybus_control_type; -} - -static inline int is_gb_bundle(const struct device *dev) -{ - return dev->type == &greybus_bundle_type; -} - -static inline int is_gb_svc(const struct device *dev) -{ - return dev->type == &greybus_svc_type; -} +extern const struct device_type greybus_hd_type; +extern const struct device_type greybus_module_type; +extern const struct device_type greybus_interface_type; +extern const struct device_type greybus_control_type; +extern const struct device_type greybus_bundle_type; +extern const struct device_type greybus_svc_type; static inline bool cport_id_valid(struct gb_host_device *hd, u16 cport_id) { diff --git a/include/linux/greybus/greybus_protocols.h b/include/linux/greybus/greybus_protocols.h index aeb8f9243545..820134b0105c 100644 --- a/include/linux/greybus/greybus_protocols.h +++ b/include/linux/greybus/greybus_protocols.h @@ -232,9 +232,7 @@ struct gb_fw_download_fetch_firmware_request { __le32 size; } __packed; -struct gb_fw_download_fetch_firmware_response { - __u8 data[0]; -} __packed; +/* gb_fw_download_fetch_firmware_response contains no other data */ /* firmware download release firmware request */ struct gb_fw_download_release_firmware_request { @@ -414,9 +412,7 @@ struct gb_bootrom_get_firmware_request { __le32 size; } __packed; -struct gb_bootrom_get_firmware_response { - __u8 data[0]; -} __packed; +/* gb_bootrom_get_firmware_response contains no other data */ /* Bootrom protocol Ready to boot request */ struct gb_bootrom_ready_to_boot_request { diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h index 86ea7c63a0d2..f316c8d0f3fc 100644 --- a/include/linux/habanalabs/cpucp_if.h +++ b/include/linux/habanalabs/cpucp_if.h @@ -659,6 +659,12 @@ enum pq_init_status { * number (nonce) provided by the host to prevent replay attacks. * public key and certificate also provided as part of the FW response. * + * CPUCP_PACKET_INFO_SIGNED_GET - + * Get the device information signed by the Trusted Platform device. + * device info data is also hashed with some unique number (nonce) provided + * by the host to prevent replay attacks. public key and certificate also + * provided as part of the FW response. + * * CPUCP_PACKET_MONITOR_DUMP_GET - * Get monitors registers dump from the CpuCP kernel. * The CPU will put the registers dump in the a buffer allocated by the driver @@ -733,7 +739,7 @@ enum cpucp_packet_id { CPUCP_PACKET_ENGINE_CORE_ASID_SET, /* internal */ CPUCP_PACKET_RESERVED2, /* not used */ CPUCP_PACKET_SEC_ATTEST_GET, /* internal */ - CPUCP_PACKET_RESERVED3, /* not used */ + CPUCP_PACKET_INFO_SIGNED_GET, /* internal */ CPUCP_PACKET_RESERVED4, /* not used */ CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */ CPUCP_PACKET_RESERVED5, /* not used */ diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index 13b1e65fbdcc..6730ee900ee1 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -21,6 +21,10 @@ #define HID_USAGE_SENSOR_ALS 0x200041 #define HID_USAGE_SENSOR_DATA_LIGHT 0x2004d0 #define HID_USAGE_SENSOR_LIGHT_ILLUM 0x2004d1 +#define HID_USAGE_SENSOR_LIGHT_COLOR_TEMPERATURE 0x2004d2 +#define HID_USAGE_SENSOR_LIGHT_CHROMATICITY 0x2004d3 +#define HID_USAGE_SENSOR_LIGHT_CHROMATICITY_X 0x2004d4 +#define HID_USAGE_SENSOR_LIGHT_CHROMATICITY_Y 0x2004d5 /* PROX (200011) */ #define HID_USAGE_SENSOR_PROX 0x200011 diff --git a/include/linux/hid.h b/include/linux/hid.h index 964ca1f15e3f..b12cb1c8e682 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -679,14 +679,17 @@ struct hid_device { /* device report descriptor */ struct list_head debug_list; spinlock_t debug_list_lock; wait_queue_head_t debug_wait; + struct kref ref; unsigned int id; /* system unique id */ -#ifdef CONFIG_BPF +#ifdef CONFIG_HID_BPF struct hid_bpf bpf; /* hid-bpf data */ -#endif /* CONFIG_BPF */ +#endif /* CONFIG_HID_BPF */ }; +void hiddev_free(struct kref *ref); + #define to_hid_device(pdev) \ container_of(pdev, struct hid_device, dev) @@ -833,11 +836,11 @@ struct hid_driver { void (*feature_mapping)(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage); -#ifdef CONFIG_PM + int (*suspend)(struct hid_device *hdev, pm_message_t message); int (*resume)(struct hid_device *hdev); int (*reset_resume)(struct hid_device *hdev); -#endif + /* private: */ struct device_driver driver; }; @@ -909,7 +912,7 @@ extern bool hid_ignore(struct hid_device *); extern int hid_add_device(struct hid_device *); extern void hid_destroy_device(struct hid_device *); -extern struct bus_type hid_bus_type; +extern const struct bus_type hid_bus_type; extern int __must_check __hid_register_driver(struct hid_driver *, struct module *, const char *mod_name); diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index e9afb61e6ee0..7118ac28d468 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -77,17 +77,6 @@ enum hid_bpf_attach_flags { int hid_bpf_device_event(struct hid_bpf_ctx *ctx); int hid_bpf_rdesc_fixup(struct hid_bpf_ctx *ctx); -/* Following functions are kfunc that we export to BPF programs */ -/* available everywhere in HID-BPF */ -__u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t __sz); - -/* only available in syscall */ -int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags); -int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz, - enum hid_report_type rtype, enum hid_class_request reqtype); -struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id); -void hid_bpf_release_context(struct hid_bpf_ctx *ctx); - /* * Below is HID internal */ @@ -115,7 +104,7 @@ struct hid_bpf_ops { size_t len, enum hid_report_type rtype, enum hid_class_request reqtype); struct module *owner; - struct bus_type *bus_type; + const struct bus_type *bus_type; }; extern struct hid_bpf_ops *hid_bpf_ops; diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 99c474de800d..00341b56d291 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -439,6 +439,13 @@ static inline void memzero_page(struct page *page, size_t offset, size_t len) kunmap_local(addr); } +/** + * memcpy_from_folio - Copy a range of bytes from a folio. + * @to: The memory to copy to. + * @folio: The folio to read from. + * @offset: The first byte in the folio to read. + * @len: The number of bytes to copy. + */ static inline void memcpy_from_folio(char *to, struct folio *folio, size_t offset, size_t len) { @@ -454,12 +461,19 @@ static inline void memcpy_from_folio(char *to, struct folio *folio, memcpy(to, from, chunk); kunmap_local(from); - from += chunk; + to += chunk; offset += chunk; len -= chunk; } while (len > 0); } +/** + * memcpy_to_folio - Copy a range of bytes to a folio. + * @folio: The folio to write to. + * @offset: The first byte in the folio to store to. + * @from: The memory to copy from. + * @len: The number of bytes to copy. + */ static inline void memcpy_to_folio(struct folio *folio, size_t offset, const char *from, size_t len) { @@ -484,6 +498,82 @@ static inline void memcpy_to_folio(struct folio *folio, size_t offset, } /** + * folio_zero_tail - Zero the tail of a folio. + * @folio: The folio to zero. + * @offset: The byte offset in the folio to start zeroing at. + * @kaddr: The address the folio is currently mapped to. + * + * If you have already used kmap_local_folio() to map a folio, written + * some data to it and now need to zero the end of the folio (and flush + * the dcache), you can use this function. If you do not have the + * folio kmapped (eg the folio has been partially populated by DMA), + * use folio_zero_range() or folio_zero_segment() instead. + * + * Return: An address which can be passed to kunmap_local(). + */ +static inline __must_check void *folio_zero_tail(struct folio *folio, + size_t offset, void *kaddr) +{ + size_t len = folio_size(folio) - offset; + + if (folio_test_highmem(folio)) { + size_t max = PAGE_SIZE - offset_in_page(offset); + + while (len > max) { + memset(kaddr, 0, max); + kunmap_local(kaddr); + len -= max; + offset += max; + max = PAGE_SIZE; + kaddr = kmap_local_folio(folio, offset); + } + } + + memset(kaddr, 0, len); + flush_dcache_folio(folio); + + return kaddr; +} + +/** + * folio_fill_tail - Copy some data to a folio and pad with zeroes. + * @folio: The destination folio. + * @offset: The offset into @folio at which to start copying. + * @from: The data to copy. + * @len: How many bytes of data to copy. + * + * This function is most useful for filesystems which support inline data. + * When they want to copy data from the inode into the page cache, this + * function does everything for them. It supports large folios even on + * HIGHMEM configurations. + */ +static inline void folio_fill_tail(struct folio *folio, size_t offset, + const char *from, size_t len) +{ + char *to = kmap_local_folio(folio, offset); + + VM_BUG_ON(offset + len > folio_size(folio)); + + if (folio_test_highmem(folio)) { + size_t max = PAGE_SIZE - offset_in_page(offset); + + while (len > max) { + memcpy(to, from, max); + kunmap_local(to); + len -= max; + from += max; + offset += max; + max = PAGE_SIZE; + to = kmap_local_folio(folio, offset); + } + } + + memcpy(to, from, len); + to = folio_zero_tail(folio, offset + len, to + len); + kunmap_local(to); +} + +/** * memcpy_from_file_folio - Copy some bytes from a file folio. * @to: The destination buffer. * @folio: The folio to copy from. @@ -551,10 +641,24 @@ static inline void folio_zero_range(struct folio *folio, zero_user_segments(&folio->page, start, start + length, 0, 0); } -static inline void unmap_and_put_page(struct page *page, void *addr) +/** + * folio_release_kmap - Unmap a folio and drop a refcount. + * @folio: The folio to release. + * @addr: The address previously returned by a call to kmap_local_folio(). + * + * It is common, eg in directory handling to kmap a folio. This function + * unmaps the folio and drops the refcount that was being held to keep the + * folio alive while we accessed it. + */ +static inline void folio_release_kmap(struct folio *folio, void *addr) { kunmap_local(addr); - put_page(page); + folio_put(folio); +} + +static inline void unmap_and_put_page(struct page *page, void *addr) +{ + folio_release_kmap(page_folio(page), addr); } #endif /* _LINUX_HIGHMEM_H */ diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 39fbfb4be944..9d7754ad5e9b 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -43,6 +43,7 @@ #define QM_MB_CMD_CQC_BT 0x5 #define QM_MB_CMD_SQC_VFT_V2 0x6 #define QM_MB_CMD_STOP_QP 0x8 +#define QM_MB_CMD_FLUSH_QM 0x9 #define QM_MB_CMD_SRC 0xc #define QM_MB_CMD_DST 0xd @@ -108,17 +109,13 @@ enum qm_stop_reason { }; enum qm_state { - QM_INIT = 0, - QM_START, - QM_CLOSE, + QM_WORK = 0, QM_STOP, }; enum qp_state { - QP_INIT = 1, - QP_START, + QP_START = 1, QP_STOP, - QP_CLOSE, }; enum qm_hw_ver { @@ -144,15 +141,33 @@ enum qm_vf_state { QM_NOT_READY, }; +enum qm_misc_ctl_bits { + QM_DRIVER_REMOVING = 0x0, + QM_RST_SCHED, + QM_RESETTING, + QM_MODULE_PARAM, +}; + enum qm_cap_bits { QM_SUPPORT_DB_ISOLATION = 0x0, QM_SUPPORT_FUNC_QOS, QM_SUPPORT_STOP_QP, + QM_SUPPORT_STOP_FUNC, QM_SUPPORT_MB_COMMAND, QM_SUPPORT_SVA_PREFETCH, QM_SUPPORT_RPM, }; +struct qm_dev_alg { + u64 alg_msk; + const char *alg; +}; + +struct qm_dev_dfx { + u32 dev_state; + u32 dev_timeout; +}; + struct dfx_diff_registers { u32 *regs; u32 reg_offset; @@ -181,6 +196,7 @@ struct qm_debug { struct dentry *debug_root; struct dentry *qm_d; struct debugfs_file files[DEBUG_FILE_NUM]; + struct qm_dev_dfx dev_dfx; unsigned int *qm_last_words; /* ACC engines recoreding last regs */ unsigned int *last_words; @@ -258,6 +274,16 @@ struct hisi_qm_cap_info { u32 v3_val; }; +struct hisi_qm_cap_record { + u32 type; + u32 cap_val; +}; + +struct hisi_qm_cap_tables { + struct hisi_qm_cap_record *qm_cap_table; + struct hisi_qm_cap_record *dev_cap_table; +}; + struct hisi_qm_list { struct mutex lock; struct list_head list; @@ -269,6 +295,7 @@ struct hisi_qm_poll_data { struct hisi_qm *qm; struct work_struct work; u16 *qp_finish_id; + u16 eqe_num; }; /** @@ -285,6 +312,18 @@ struct qm_err_isolate { struct list_head qm_hw_errs; }; +struct qm_rsv_buf { + struct qm_sqc *sqc; + struct qm_cqc *cqc; + struct qm_eqc *eqc; + struct qm_aeqc *aeqc; + dma_addr_t sqc_dma; + dma_addr_t cqc_dma; + dma_addr_t eqc_dma; + dma_addr_t aeqc_dma; + struct qm_dma qcdma; +}; + struct hisi_qm { enum qm_hw_ver ver; enum qm_fun_type fun_type; @@ -317,6 +356,7 @@ struct hisi_qm { dma_addr_t cqc_dma; dma_addr_t eqe_dma; dma_addr_t aeqe_dma; + struct qm_rsv_buf xqc_buf; struct hisi_qm_status status; const struct hisi_qm_err_ini *err_ini; @@ -344,7 +384,6 @@ struct hisi_qm { struct work_struct rst_work; struct work_struct cmd_process; - const char *algs; bool use_sva; resource_size_t phys_base; @@ -355,6 +394,8 @@ struct hisi_qm { u32 mb_qos; u32 type_rate; struct qm_err_isolate isolate_data; + + struct hisi_qm_cap_tables cap_tables; }; struct hisi_qp_status { @@ -471,12 +512,26 @@ static inline void hisi_qm_init_list(struct hisi_qm_list *qm_list) mutex_init(&qm_list->lock); } +static inline void hisi_qm_add_list(struct hisi_qm *qm, struct hisi_qm_list *qm_list) +{ + mutex_lock(&qm_list->lock); + list_add_tail(&qm->list, &qm_list->list); + mutex_unlock(&qm_list->lock); +} + +static inline void hisi_qm_del_list(struct hisi_qm *qm, struct hisi_qm_list *qm_list) +{ + mutex_lock(&qm_list->lock); + list_del(&qm->list); + mutex_unlock(&qm_list->lock); +} + int hisi_qm_init(struct hisi_qm *qm); void hisi_qm_uninit(struct hisi_qm *qm); int hisi_qm_start(struct hisi_qm *qm); int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r); int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg); -int hisi_qm_stop_qp(struct hisi_qp *qp); +void hisi_qm_stop_qp(struct hisi_qp *qp); int hisi_qp_send(struct hisi_qp *qp, const void *msg); void hisi_qm_debug_init(struct hisi_qm *qm); void hisi_qm_debug_regs_clear(struct hisi_qm *qm); @@ -516,8 +571,8 @@ int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num); void hisi_qm_dev_shutdown(struct pci_dev *pdev); void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list); -int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list); -void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list); +int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list, int guard); +void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list, int guard); int hisi_qm_resume(struct device *dev); int hisi_qm_suspend(struct device *dev); void hisi_qm_pm_uninit(struct hisi_qm *qm); @@ -528,6 +583,8 @@ void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset); u32 hisi_qm_get_hw_info(struct hisi_qm *qm, const struct hisi_qm_cap_info *info_table, u32 index, bool is_read); +int hisi_qm_set_algs(struct hisi_qm *qm, u64 alg_msk, const struct qm_dev_alg *dev_algs, + u32 dev_algs_size); /* Used by VFIO ACC live migration driver */ struct pci_driver *hisi_sec_get_pf_driver(void); diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 0ee140176f10..aa1e65ccb615 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -13,16 +13,12 @@ #define _LINUX_HRTIMER_H #include <linux/hrtimer_defs.h> -#include <linux/rbtree.h> +#include <linux/hrtimer_types.h> #include <linux/init.h> #include <linux/list.h> -#include <linux/percpu.h> -#include <linux/seqlock.h> +#include <linux/percpu-defs.h> +#include <linux/rbtree.h> #include <linux/timer.h> -#include <linux/timerqueue.h> - -struct hrtimer_clock_base; -struct hrtimer_cpu_base; /* * Mode arguments of xxx_hrtimer functions: @@ -60,14 +56,6 @@ enum hrtimer_mode { }; /* - * Return values for the callback function - */ -enum hrtimer_restart { - HRTIMER_NORESTART, /* Timer is not restarted */ - HRTIMER_RESTART, /* Timer must be restarted */ -}; - -/* * Values to track state of the timer * * Possible states: @@ -95,38 +83,6 @@ enum hrtimer_restart { #define HRTIMER_STATE_ENQUEUED 0x01 /** - * struct hrtimer - the basic hrtimer structure - * @node: timerqueue node, which also manages node.expires, - * the absolute expiry time in the hrtimers internal - * representation. The time is related to the clock on - * which the timer is based. Is setup by adding - * slack to the _softexpires value. For non range timers - * identical to _softexpires. - * @_softexpires: the absolute earliest expiry time of the hrtimer. - * The time which was given as expiry time when the timer - * was armed. - * @function: timer expiry callback function - * @base: pointer to the timer base (per cpu and per clock) - * @state: state information (See bit values above) - * @is_rel: Set if the timer was armed relative - * @is_soft: Set if hrtimer will be expired in soft interrupt context. - * @is_hard: Set if hrtimer will be expired in hard interrupt context - * even on RT. - * - * The hrtimer structure must be initialized by hrtimer_init() - */ -struct hrtimer { - struct timerqueue_node node; - ktime_t _softexpires; - enum hrtimer_restart (*function)(struct hrtimer *); - struct hrtimer_clock_base *base; - u8 state; - u8 is_rel; - u8 is_soft; - u8 is_hard; -}; - -/** * struct hrtimer_sleeper - simple sleeper structure * @timer: embedded timer structure * @task: task to wake up @@ -138,105 +94,6 @@ struct hrtimer_sleeper { struct task_struct *task; }; -#ifdef CONFIG_64BIT -# define __hrtimer_clock_base_align ____cacheline_aligned -#else -# define __hrtimer_clock_base_align -#endif - -/** - * struct hrtimer_clock_base - the timer base for a specific clock - * @cpu_base: per cpu clock base - * @index: clock type index for per_cpu support when moving a - * timer to a base on another cpu. - * @clockid: clock id for per_cpu support - * @seq: seqcount around __run_hrtimer - * @running: pointer to the currently running hrtimer - * @active: red black tree root node for the active timers - * @get_time: function to retrieve the current time of the clock - * @offset: offset of this clock to the monotonic base - */ -struct hrtimer_clock_base { - struct hrtimer_cpu_base *cpu_base; - unsigned int index; - clockid_t clockid; - seqcount_raw_spinlock_t seq; - struct hrtimer *running; - struct timerqueue_head active; - ktime_t (*get_time)(void); - ktime_t offset; -} __hrtimer_clock_base_align; - -enum hrtimer_base_type { - HRTIMER_BASE_MONOTONIC, - HRTIMER_BASE_REALTIME, - HRTIMER_BASE_BOOTTIME, - HRTIMER_BASE_TAI, - HRTIMER_BASE_MONOTONIC_SOFT, - HRTIMER_BASE_REALTIME_SOFT, - HRTIMER_BASE_BOOTTIME_SOFT, - HRTIMER_BASE_TAI_SOFT, - HRTIMER_MAX_CLOCK_BASES, -}; - -/** - * struct hrtimer_cpu_base - the per cpu clock bases - * @lock: lock protecting the base and associated clock bases - * and timers - * @cpu: cpu number - * @active_bases: Bitfield to mark bases with active timers - * @clock_was_set_seq: Sequence counter of clock was set events - * @hres_active: State of high resolution mode - * @in_hrtirq: hrtimer_interrupt() is currently executing - * @hang_detected: The last hrtimer interrupt detected a hang - * @softirq_activated: displays, if the softirq is raised - update of softirq - * related settings is not required then. - * @nr_events: Total number of hrtimer interrupt events - * @nr_retries: Total number of hrtimer interrupt retries - * @nr_hangs: Total number of hrtimer interrupt hangs - * @max_hang_time: Maximum time spent in hrtimer_interrupt - * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are - * expired - * @timer_waiters: A hrtimer_cancel() invocation waits for the timer - * callback to finish. - * @expires_next: absolute time of the next event, is required for remote - * hrtimer enqueue; it is the total first expiry time (hard - * and soft hrtimer are taken into account) - * @next_timer: Pointer to the first expiring timer - * @softirq_expires_next: Time to check, if soft queues needs also to be expired - * @softirq_next_timer: Pointer to the first expiring softirq based timer - * @clock_base: array of clock bases for this cpu - * - * Note: next_timer is just an optimization for __remove_hrtimer(). - * Do not dereference the pointer because it is not reliable on - * cross cpu removals. - */ -struct hrtimer_cpu_base { - raw_spinlock_t lock; - unsigned int cpu; - unsigned int active_bases; - unsigned int clock_was_set_seq; - unsigned int hres_active : 1, - in_hrtirq : 1, - hang_detected : 1, - softirq_activated : 1; -#ifdef CONFIG_HIGH_RES_TIMERS - unsigned int nr_events; - unsigned short nr_retries; - unsigned short nr_hangs; - unsigned int max_hang_time; -#endif -#ifdef CONFIG_PREEMPT_RT - spinlock_t softirq_expiry_lock; - atomic_t timer_waiters; -#endif - ktime_t expires_next; - struct hrtimer *next_timer; - ktime_t softirq_expires_next; - struct hrtimer *softirq_next_timer; - struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; -} ____cacheline_aligned; - static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { timer->node.expires = time; @@ -485,20 +342,12 @@ extern u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval); /** - * hrtimer_forward_now - forward the timer expiry so it expires after now + * hrtimer_forward_now() - forward the timer expiry so it expires after now * @timer: hrtimer to forward * @interval: the interval to forward * - * Forward the timer expiry so it will expire after the current time - * of the hrtimer clock base. Returns the number of overruns. - * - * Can be safely called from the callback function of @timer. If - * called from other contexts @timer must neither be enqueued nor - * running the callback and the caller needs to take care of - * serialization. - * - * Note: This only updates the timer expiry value and does not requeue - * the timer. + * It is a variant of hrtimer_forward(). The timer will expire after the current + * time of the hrtimer clock base. See hrtimer_forward() for details. */ static inline u64 hrtimer_forward_now(struct hrtimer *timer, ktime_t interval) @@ -531,9 +380,9 @@ extern void sysrq_timer_list_show(void); int hrtimers_prepare_cpu(unsigned int cpu); #ifdef CONFIG_HOTPLUG_CPU -int hrtimers_dead_cpu(unsigned int cpu); +int hrtimers_cpu_dying(unsigned int cpu); #else -#define hrtimers_dead_cpu NULL +#define hrtimers_cpu_dying NULL #endif #endif diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h index 2d3e3c5fb946..c3b4b7ed7c16 100644 --- a/include/linux/hrtimer_defs.h +++ b/include/linux/hrtimer_defs.h @@ -3,6 +3,8 @@ #define _LINUX_HRTIMER_DEFS_H #include <linux/ktime.h> +#include <linux/timerqueue.h> +#include <linux/seqlock.h> #ifdef CONFIG_HIGH_RES_TIMERS @@ -24,4 +26,106 @@ #endif +#ifdef CONFIG_64BIT +# define __hrtimer_clock_base_align ____cacheline_aligned +#else +# define __hrtimer_clock_base_align +#endif + +/** + * struct hrtimer_clock_base - the timer base for a specific clock + * @cpu_base: per cpu clock base + * @index: clock type index for per_cpu support when moving a + * timer to a base on another cpu. + * @clockid: clock id for per_cpu support + * @seq: seqcount around __run_hrtimer + * @running: pointer to the currently running hrtimer + * @active: red black tree root node for the active timers + * @get_time: function to retrieve the current time of the clock + * @offset: offset of this clock to the monotonic base + */ +struct hrtimer_clock_base { + struct hrtimer_cpu_base *cpu_base; + unsigned int index; + clockid_t clockid; + seqcount_raw_spinlock_t seq; + struct hrtimer *running; + struct timerqueue_head active; + ktime_t (*get_time)(void); + ktime_t offset; +} __hrtimer_clock_base_align; + +enum hrtimer_base_type { + HRTIMER_BASE_MONOTONIC, + HRTIMER_BASE_REALTIME, + HRTIMER_BASE_BOOTTIME, + HRTIMER_BASE_TAI, + HRTIMER_BASE_MONOTONIC_SOFT, + HRTIMER_BASE_REALTIME_SOFT, + HRTIMER_BASE_BOOTTIME_SOFT, + HRTIMER_BASE_TAI_SOFT, + HRTIMER_MAX_CLOCK_BASES, +}; + +/** + * struct hrtimer_cpu_base - the per cpu clock bases + * @lock: lock protecting the base and associated clock bases + * and timers + * @cpu: cpu number + * @active_bases: Bitfield to mark bases with active timers + * @clock_was_set_seq: Sequence counter of clock was set events + * @hres_active: State of high resolution mode + * @in_hrtirq: hrtimer_interrupt() is currently executing + * @hang_detected: The last hrtimer interrupt detected a hang + * @softirq_activated: displays, if the softirq is raised - update of softirq + * related settings is not required then. + * @nr_events: Total number of hrtimer interrupt events + * @nr_retries: Total number of hrtimer interrupt retries + * @nr_hangs: Total number of hrtimer interrupt hangs + * @max_hang_time: Maximum time spent in hrtimer_interrupt + * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are + * expired + * @online: CPU is online from an hrtimers point of view + * @timer_waiters: A hrtimer_cancel() invocation waits for the timer + * callback to finish. + * @expires_next: absolute time of the next event, is required for remote + * hrtimer enqueue; it is the total first expiry time (hard + * and soft hrtimer are taken into account) + * @next_timer: Pointer to the first expiring timer + * @softirq_expires_next: Time to check, if soft queues needs also to be expired + * @softirq_next_timer: Pointer to the first expiring softirq based timer + * @clock_base: array of clock bases for this cpu + * + * Note: next_timer is just an optimization for __remove_hrtimer(). + * Do not dereference the pointer because it is not reliable on + * cross cpu removals. + */ +struct hrtimer_cpu_base { + raw_spinlock_t lock; + unsigned int cpu; + unsigned int active_bases; + unsigned int clock_was_set_seq; + unsigned int hres_active : 1, + in_hrtirq : 1, + hang_detected : 1, + softirq_activated : 1, + online : 1; +#ifdef CONFIG_HIGH_RES_TIMERS + unsigned int nr_events; + unsigned short nr_retries; + unsigned short nr_hangs; + unsigned int max_hang_time; +#endif +#ifdef CONFIG_PREEMPT_RT + spinlock_t softirq_expiry_lock; + atomic_t timer_waiters; +#endif + ktime_t expires_next; + struct hrtimer *next_timer; + ktime_t softirq_expires_next; + struct hrtimer *softirq_next_timer; + struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; +} ____cacheline_aligned; + + #endif diff --git a/include/linux/hrtimer_types.h b/include/linux/hrtimer_types.h new file mode 100644 index 000000000000..ad66a3081735 --- /dev/null +++ b/include/linux/hrtimer_types.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_HRTIMER_TYPES_H +#define _LINUX_HRTIMER_TYPES_H + +#include <linux/types.h> +#include <linux/timerqueue_types.h> + +struct hrtimer_clock_base; + +/* + * Return values for the callback function + */ +enum hrtimer_restart { + HRTIMER_NORESTART, /* Timer is not restarted */ + HRTIMER_RESTART, /* Timer must be restarted */ +}; + +/** + * struct hrtimer - the basic hrtimer structure + * @node: timerqueue node, which also manages node.expires, + * the absolute expiry time in the hrtimers internal + * representation. The time is related to the clock on + * which the timer is based. Is setup by adding + * slack to the _softexpires value. For non range timers + * identical to _softexpires. + * @_softexpires: the absolute earliest expiry time of the hrtimer. + * The time which was given as expiry time when the timer + * was armed. + * @function: timer expiry callback function + * @base: pointer to the timer base (per cpu and per clock) + * @state: state information (See bit values above) + * @is_rel: Set if the timer was armed relative + * @is_soft: Set if hrtimer will be expired in soft interrupt context. + * @is_hard: Set if hrtimer will be expired in hard interrupt context + * even on RT. + * + * The hrtimer structure must be initialized by hrtimer_init() + */ +struct hrtimer { + struct timerqueue_node node; + ktime_t _softexpires; + enum hrtimer_restart (*function)(struct hrtimer *); + struct hrtimer_clock_base *base; + u8 state; + u8 is_rel; + u8 is_soft; + u8 is_hard; +}; + +#endif /* _LINUX_HRTIMER_TYPES_H */ diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index fa0350b0812a..de0c89105076 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -67,6 +67,26 @@ extern struct kobj_attribute shmem_enabled_attr; #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER) +/* + * Mask of all large folio orders supported for anonymous THP; all orders up to + * and including PMD_ORDER, except order-0 (which is not "huge") and order-1 + * (which is a limitation of the THP implementation). + */ +#define THP_ORDERS_ALL_ANON ((BIT(PMD_ORDER + 1) - 1) & ~(BIT(0) | BIT(1))) + +/* + * Mask of all large folio orders supported for file THP. + */ +#define THP_ORDERS_ALL_FILE (BIT(PMD_ORDER) | BIT(PUD_ORDER)) + +/* + * Mask of all large folio orders supported for THP. + */ +#define THP_ORDERS_ALL (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE) + +#define thp_vma_allowable_order(vma, vm_flags, smaps, in_pf, enforce_sysfs, order) \ + (!!thp_vma_allowable_orders(vma, vm_flags, smaps, in_pf, enforce_sysfs, BIT(order))) + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define HPAGE_PMD_SHIFT PMD_SHIFT #define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT) @@ -77,45 +97,105 @@ extern struct kobj_attribute shmem_enabled_attr; #define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1)) extern unsigned long transparent_hugepage_flags; +extern unsigned long huge_anon_orders_always; +extern unsigned long huge_anon_orders_madvise; +extern unsigned long huge_anon_orders_inherit; -#define hugepage_flags_enabled() \ - (transparent_hugepage_flags & \ - ((1<<TRANSPARENT_HUGEPAGE_FLAG) | \ - (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))) -#define hugepage_flags_always() \ - (transparent_hugepage_flags & \ - (1<<TRANSPARENT_HUGEPAGE_FLAG)) +static inline bool hugepage_global_enabled(void) +{ + return transparent_hugepage_flags & + ((1<<TRANSPARENT_HUGEPAGE_FLAG) | + (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)); +} + +static inline bool hugepage_global_always(void) +{ + return transparent_hugepage_flags & + (1<<TRANSPARENT_HUGEPAGE_FLAG); +} + +static inline bool hugepage_flags_enabled(void) +{ + /* + * We cover both the anon and the file-backed case here; we must return + * true if globally enabled, even when all anon sizes are set to never. + * So we don't need to look at huge_anon_orders_inherit. + */ + return hugepage_global_enabled() || + huge_anon_orders_always || + huge_anon_orders_madvise; +} + +static inline int highest_order(unsigned long orders) +{ + return fls_long(orders) - 1; +} + +static inline int next_order(unsigned long *orders, int prev) +{ + *orders &= ~BIT(prev); + return highest_order(*orders); +} /* * Do the below checks: * - For file vma, check if the linear page offset of vma is - * HPAGE_PMD_NR aligned within the file. The hugepage is - * guaranteed to be hugepage-aligned within the file, but we must - * check that the PMD-aligned addresses in the VMA map to - * PMD-aligned offsets within the file, else the hugepage will - * not be PMD-mappable. - * - For all vmas, check if the haddr is in an aligned HPAGE_PMD_SIZE + * order-aligned within the file. The hugepage is + * guaranteed to be order-aligned within the file, but we must + * check that the order-aligned addresses in the VMA map to + * order-aligned offsets within the file, else the hugepage will + * not be mappable. + * - For all vmas, check if the haddr is in an aligned hugepage * area. */ -static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, - unsigned long addr) +static inline bool thp_vma_suitable_order(struct vm_area_struct *vma, + unsigned long addr, int order) { + unsigned long hpage_size = PAGE_SIZE << order; unsigned long haddr; /* Don't have to check pgoff for anonymous vma */ if (!vma_is_anonymous(vma)) { if (!IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, - HPAGE_PMD_NR)) + hpage_size >> PAGE_SHIFT)) return false; } - haddr = addr & HPAGE_PMD_MASK; + haddr = ALIGN_DOWN(addr, hpage_size); - if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) + if (haddr < vma->vm_start || haddr + hpage_size > vma->vm_end) return false; return true; } +/* + * Filter the bitfield of input orders to the ones suitable for use in the vma. + * See thp_vma_suitable_order(). + * All orders that pass the checks are returned as a bitfield. + */ +static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, + unsigned long addr, unsigned long orders) +{ + int order; + + /* + * Iterate over orders, highest to lowest, removing orders that don't + * meet alignment requirements from the set. Exit loop at first order + * that meets requirements, since all lower orders must also meet + * requirements. + */ + + order = highest_order(orders); + + while (orders) { + if (thp_vma_suitable_order(vma, addr, order)) + break; + order = next_order(&orders, order); + } + + return orders; +} + static inline bool file_thp_enabled(struct vm_area_struct *vma) { struct inode *inode; @@ -126,12 +206,55 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma) inode = vma->vm_file->f_inode; return (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS)) && - (vma->vm_flags & VM_EXEC) && !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode); } -bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags, - bool smaps, bool in_pf, bool enforce_sysfs); +unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, + unsigned long vm_flags, bool smaps, + bool in_pf, bool enforce_sysfs, + unsigned long orders); + +/** + * thp_vma_allowable_orders - determine hugepage orders that are allowed for vma + * @vma: the vm area to check + * @vm_flags: use these vm_flags instead of vma->vm_flags + * @smaps: whether answer will be used for smaps file + * @in_pf: whether answer will be used by page fault handler + * @enforce_sysfs: whether sysfs config should be taken into account + * @orders: bitfield of all orders to consider + * + * Calculates the intersection of the requested hugepage orders and the allowed + * hugepage orders for the provided vma. Permitted orders are encoded as a set + * bit at the corresponding bit position (bit-2 corresponds to order-2, bit-3 + * corresponds to order-3, etc). Order-0 is never considered a hugepage order. + * + * Return: bitfield of orders allowed for hugepage in the vma. 0 if no hugepage + * orders are allowed. + */ +static inline +unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, + unsigned long vm_flags, bool smaps, + bool in_pf, bool enforce_sysfs, + unsigned long orders) +{ + /* Optimization to check if required orders are enabled early. */ + if (enforce_sysfs && vma_is_anonymous(vma)) { + unsigned long mask = READ_ONCE(huge_anon_orders_always); + + if (vm_flags & VM_HUGEPAGE) + mask |= READ_ONCE(huge_anon_orders_madvise); + if (hugepage_global_always() || + ((vm_flags & VM_HUGEPAGE) && hugepage_global_enabled())) + mask |= READ_ONCE(huge_anon_orders_inherit); + + orders &= mask; + if (!orders) + return 0; + } + + return __thp_vma_allowable_orders(vma, vm_flags, smaps, in_pf, + enforce_sysfs, orders); +} #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ @@ -142,10 +265,11 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, void folio_prep_large_rmappable(struct folio *folio); bool can_split_folio(struct folio *folio, int *pextra_pins); -int split_huge_page_to_list(struct page *page, struct list_head *list); +int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, + unsigned int new_order); static inline int split_huge_page(struct page *page) { - return split_huge_page_to_list(page, NULL); + return split_huge_page_to_list_to_order(page, NULL, 0); } void deferred_split_folio(struct folio *folio); @@ -267,17 +391,24 @@ static inline bool folio_test_pmd_mappable(struct folio *folio) return false; } -static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, - unsigned long addr) +static inline bool thp_vma_suitable_order(struct vm_area_struct *vma, + unsigned long addr, int order) { return false; } -static inline bool hugepage_vma_check(struct vm_area_struct *vma, - unsigned long vm_flags, bool smaps, - bool in_pf, bool enforce_sysfs) +static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, + unsigned long addr, unsigned long orders) { - return false; + return 0; +} + +static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, + unsigned long vm_flags, bool smaps, + bool in_pf, bool enforce_sysfs, + unsigned long orders) +{ + return 0; } static inline void folio_prep_large_rmappable(struct folio *folio) {} @@ -292,7 +423,8 @@ can_split_folio(struct folio *folio, int *pextra_pins) return false; } static inline int -split_huge_page_to_list(struct page *page, struct list_head *list) +split_huge_page_to_list_to_order(struct page *page, struct list_head *list, + unsigned int new_order) { return 0; } @@ -389,17 +521,20 @@ static inline bool thp_migration_supported(void) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -static inline int split_folio_to_list(struct folio *folio, - struct list_head *list) +static inline int split_folio_to_list_to_order(struct folio *folio, + struct list_head *list, int new_order) { - return split_huge_page_to_list(&folio->page, list); + return split_huge_page_to_list_to_order(&folio->page, list, new_order); } -static inline int split_folio(struct folio *folio) +static inline int split_folio_to_order(struct folio *folio, int new_order) { - return split_folio_to_list(folio, NULL); + return split_folio_to_list_to_order(folio, NULL, new_order); } +#define split_folio_to_list(f, l) split_folio_to_list_to_order(f, l, 0) +#define split_folio(f) split_folio_to_order(f, 0) + /* * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to * limitations in the implementation like arm64 MTE can override this to diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index a30686e649f7..77b30a8c6076 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -30,7 +30,7 @@ void free_huge_folio(struct folio *folio); #ifdef CONFIG_HUGETLB_PAGE -#include <linux/mempolicy.h> +#include <linux/pagemap.h> #include <linux/shm.h> #include <asm/tlbflush.h> @@ -60,6 +60,7 @@ struct resv_map { long adds_in_progress; struct list_head region_cache; long region_cache_count; + struct rw_semaphore rw_sema; #ifdef CONFIG_CGROUP_HUGETLB /* * On private mappings, the counter to uncharge reservations is stored @@ -138,7 +139,7 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long, struct page *, zap_flags_t); -void __unmap_hugepage_range_final(struct mmu_gather *tlb, +void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page, zap_flags_t zap_flags); @@ -177,7 +178,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage); extern int sysctl_hugetlb_shm_group; -extern struct list_head huge_boot_pages; +extern struct list_head huge_boot_pages[MAX_NUMNODES]; /* arch callbacks */ @@ -245,6 +246,25 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end); +extern void __hugetlb_zap_begin(struct vm_area_struct *vma, + unsigned long *begin, unsigned long *end); +extern void __hugetlb_zap_end(struct vm_area_struct *vma, + struct zap_details *details); + +static inline void hugetlb_zap_begin(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ + if (is_vm_hugetlb_page(vma)) + __hugetlb_zap_begin(vma, start, end); +} + +static inline void hugetlb_zap_end(struct vm_area_struct *vma, + struct zap_details *details) +{ + if (is_vm_hugetlb_page(vma)) + __hugetlb_zap_end(vma, details); +} + void hugetlb_vma_lock_read(struct vm_area_struct *vma); void hugetlb_vma_unlock_read(struct vm_area_struct *vma); void hugetlb_vma_lock_write(struct vm_area_struct *vma); @@ -260,6 +280,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long cp_flags); bool is_hugetlb_entry_migration(pte_t pte); +bool is_hugetlb_entry_hwpoisoned(pte_t pte); void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); #else /* !CONFIG_HUGETLB_PAGE */ @@ -296,6 +317,18 @@ static inline void adjust_range_if_pmd_sharing_possible( { } +static inline void hugetlb_zap_begin( + struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ +} + +static inline void hugetlb_zap_end( + struct vm_area_struct *vma, + struct zap_details *details) +{ +} + static inline struct page *hugetlb_follow_page_mask( struct vm_area_struct *vma, unsigned long address, unsigned int flags, unsigned int *page_mask) @@ -441,7 +474,7 @@ static inline long hugetlb_change_protection( return 0; } -static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb, +static inline void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page, zap_flags_t zap_flags) @@ -512,7 +545,6 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) } struct hugetlbfs_inode_info { - struct shared_policy policy; struct inode vfs_inode; unsigned int seals; }; @@ -716,8 +748,6 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask); -struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma, - unsigned long address); int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping, pgoff_t idx); void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, @@ -799,7 +829,7 @@ static inline unsigned huge_page_shift(struct hstate *h) static inline bool hstate_is_gigantic(struct hstate *h) { - return huge_page_order(h) > MAX_ORDER; + return huge_page_order(h) > MAX_PAGE_ORDER; } static inline unsigned int pages_per_huge_page(const struct hstate *h) @@ -812,6 +842,12 @@ static inline unsigned int blocks_per_huge_page(struct hstate *h) return huge_page_size(h) / 512; } +static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h, + struct address_space *mapping, pgoff_t idx) +{ + return filemap_lock_folio(mapping, idx << huge_page_order(h)); +} + #include <asm/hugetlb.h> #ifndef is_hugepage_only_range @@ -1008,6 +1044,12 @@ static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio return NULL; } +static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h, + struct address_space *mapping, pgoff_t idx) +{ + return NULL; +} + static inline int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list) { @@ -1028,13 +1070,6 @@ alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, return NULL; } -static inline struct folio *alloc_hugetlb_folio_vma(struct hstate *h, - struct vm_area_struct *vma, - unsigned long address) -{ - return NULL; -} - static inline int __alloc_bootmem_huge_page(struct hstate *h) { return 0; @@ -1233,6 +1268,8 @@ static inline bool __vma_shareable_lock(struct vm_area_struct *vma) return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; } +bool __vma_private_lock(struct vm_area_struct *vma); + /* * Safe version of huge_pte_offset() to check the locks. See comments * above huge_pte_offset(). diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 3d82d91f49ac..e5d64b8b59c2 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -22,13 +22,6 @@ struct resv_map; struct file_region; #ifdef CONFIG_CGROUP_HUGETLB -/* - * Minimum page order trackable by hugetlb cgroup. - * At least 3 pages are necessary for all the tracking information. - * The second tail page contains all of the hugetlb-specific fields. - */ -#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__NR_USED_SUBPAGE) - enum hugetlb_memory_event { HUGETLB_MAX, HUGETLB_NR_MEMORY_EVENTS, @@ -68,8 +61,6 @@ static inline struct hugetlb_cgroup * __hugetlb_cgroup_from_folio(struct folio *folio, bool rsvd) { VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); - if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER) - return NULL; if (rsvd) return folio->_hugetlb_cgroup_rsvd; else @@ -91,8 +82,6 @@ static inline void __set_hugetlb_cgroup(struct folio *folio, struct hugetlb_cgroup *h_cg, bool rsvd) { VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); - if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER) - return; if (rsvd) folio->_hugetlb_cgroup_rsvd = h_cg; else diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 8a3115516a1b..136e9842120e 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -63,5 +63,6 @@ extern void hwrng_unregister(struct hwrng *rng); extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng); extern long hwrng_msleep(struct hwrng *rng, unsigned int msecs); +extern long hwrng_yield(struct hwrng *rng); #endif /* LINUX_HWRANDOM_H_ */ diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 8cd6a6b33593..edf96f249eb5 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -141,6 +141,7 @@ enum hwmon_in_attributes { hwmon_in_rated_min, hwmon_in_rated_max, hwmon_in_beep, + hwmon_in_fault, }; #define HWMON_I_ENABLE BIT(hwmon_in_enable) @@ -162,6 +163,7 @@ enum hwmon_in_attributes { #define HWMON_I_RATED_MIN BIT(hwmon_in_rated_min) #define HWMON_I_RATED_MAX BIT(hwmon_in_rated_max) #define HWMON_I_BEEP BIT(hwmon_in_beep) +#define HWMON_I_FAULT BIT(hwmon_in_fault) enum hwmon_curr_attributes { hwmon_curr_enable, @@ -293,6 +295,8 @@ enum hwmon_humidity_attributes { hwmon_humidity_fault, hwmon_humidity_rated_min, hwmon_humidity_rated_max, + hwmon_humidity_min_alarm, + hwmon_humidity_max_alarm, }; #define HWMON_H_ENABLE BIT(hwmon_humidity_enable) @@ -306,6 +310,8 @@ enum hwmon_humidity_attributes { #define HWMON_H_FAULT BIT(hwmon_humidity_fault) #define HWMON_H_RATED_MIN BIT(hwmon_humidity_rated_min) #define HWMON_H_RATED_MAX BIT(hwmon_humidity_rated_max) +#define HWMON_H_MIN_ALARM BIT(hwmon_humidity_min_alarm) +#define HWMON_H_MAX_ALARM BIT(hwmon_humidity_max_alarm) enum hwmon_fan_attributes { hwmon_fan_enable, @@ -425,12 +431,12 @@ struct hwmon_channel_info { const u32 *config; }; -#define HWMON_CHANNEL_INFO(stype, ...) \ - (&(struct hwmon_channel_info) { \ - .type = hwmon_##stype, \ - .config = (u32 []) { \ - __VA_ARGS__, 0 \ - } \ +#define HWMON_CHANNEL_INFO(stype, ...) \ + (&(const struct hwmon_channel_info) { \ + .type = hwmon_##stype, \ + .config = (const u32 []) { \ + __VA_ARGS__, 0 \ + } \ }) /** diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 2b00faf98017..96ceb4095425 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -164,8 +164,28 @@ struct hv_ring_buffer { u8 buffer[]; } __packed; + +/* + * If the requested ring buffer size is at least 8 times the size of the + * header, steal space from the ring buffer for the header. Otherwise, add + * space for the header so that is doesn't take too much of the ring buffer + * space. + * + * The factor of 8 is somewhat arbitrary. The goal is to prevent adding a + * relatively small header (4 Kbytes on x86) to a large-ish power-of-2 ring + * buffer size (such as 128 Kbytes) and so end up making a nearly twice as + * large allocation that will be almost half wasted. As a contrasting example, + * on ARM64 with 64 Kbyte page size, we don't want to take 64 Kbytes for the + * header from a 128 Kbyte allocation, leaving only 64 Kbytes for the ring. + * In this latter case, we must add 64 Kbytes for the header and not worry + * about what's wasted. + */ +#define VMBUS_HEADER_ADJ(payload_sz) \ + ((payload_sz) >= 8 * sizeof(struct hv_ring_buffer) ? \ + 0 : sizeof(struct hv_ring_buffer)) + /* Calculate the proper size of a ringbuffer, it must be page-aligned */ -#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(sizeof(struct hv_ring_buffer) + \ +#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(VMBUS_HEADER_ADJ(payload_sz) + \ (payload_sz)) struct hv_ring_buffer_info { @@ -812,6 +832,7 @@ struct vmbus_gpadl { u32 gpadl_handle; u32 size; void *buffer; + bool decrypted; }; struct vmbus_channel { diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 0dae9db27538..5e6cd43a6dbd 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -23,9 +23,9 @@ #include <linux/swab.h> /* for swab16 */ #include <uapi/linux/i2c.h> -extern struct bus_type i2c_bus_type; -extern struct device_type i2c_adapter_type; -extern struct device_type i2c_client_type; +extern const struct bus_type i2c_bus_type; +extern const struct device_type i2c_adapter_type; +extern const struct device_type i2c_client_type; /* --- General options ------------------------------------------------ */ @@ -746,6 +746,8 @@ struct i2c_adapter { struct irq_domain *host_notify_domain; struct regulator *bus_regulator; + + struct dentry *debugfs; }; #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev) @@ -850,7 +852,6 @@ static inline void i2c_mark_adapter_resumed(struct i2c_adapter *adap) /* i2c adapter classes (bitmask) */ #define I2C_CLASS_HWMON (1<<0) /* lm_sensors, ... */ -#define I2C_CLASS_DDC (1<<3) /* DDC bus on graphics adapters */ #define I2C_CLASS_SPD (1<<7) /* Memory modules */ /* Warn users that the adapter doesn't support classes anymore */ #define I2C_CLASS_DEPRECATED (1<<8) @@ -930,7 +931,7 @@ static inline int i2c_adapter_id(struct i2c_adapter *adap) static inline u8 i2c_8bit_addr_from_msg(const struct i2c_msg *msg) { - return (msg->addr << 1) | (msg->flags & I2C_M_RD ? 1 : 0); + return (msg->addr << 1) | (msg->flags & I2C_M_RD); } u8 *i2c_get_dma_safe_msg_buf(struct i2c_msg *msg, unsigned int threshold); diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index 90fa83464f00..e119f11948ef 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -54,6 +54,7 @@ enum i3c_hdr_mode { * struct i3c_priv_xfer - I3C SDR private transfer * @rnw: encodes the transfer direction. true for a read, false for a write * @len: transfer length in bytes of the transfer + * @actual_len: actual length in bytes are transferred by the controller * @data: input/output buffer * @data.in: input buffer. Must point to a DMA-able buffer * @data.out: output buffer. Must point to a DMA-able buffer @@ -62,6 +63,7 @@ enum i3c_hdr_mode { struct i3c_priv_xfer { u8 rnw; u16 len; + u16 actual_len; union { void *in; const void *out; @@ -96,7 +98,7 @@ enum i3c_dcr { /** * struct i3c_device_info - I3C device information - * @pid: Provisional ID + * @pid: Provisioned ID * @bcr: Bus Characteristic Register * @dcr: Device Characteristic Register * @static_addr: static/I2C address diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 0b52da4f2346..0ca27dd86956 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -24,6 +24,12 @@ struct i2c_client; +/* notifier actions. notifier call data is the struct i3c_bus */ +enum { + I3C_NOTIFY_BUS_ADD, + I3C_NOTIFY_BUS_REMOVE, +}; + struct i3c_master_controller; struct i3c_bus; struct i3c_device; @@ -70,7 +76,6 @@ struct i2c_dev_boardinfo { /** * struct i2c_dev_desc - I2C device descriptor * @common: common part of the I2C device descriptor - * @boardinfo: pointer to the boardinfo attached to this I2C device * @dev: I2C device object registered to the I2C framework * @addr: I2C device address * @lvr: LVR (Legacy Virtual Register) needed by the I3C core to know about @@ -129,6 +134,7 @@ struct i3c_ibi_slot { * rejected by the master * @num_slots: number of IBI slots reserved for this device * @enabled: reflect the IBI status + * @wq: workqueue used to execute IBI handlers. * @handler: IBI handler specified at i3c_device_request_ibi() call time. This * handler will be called from the controller workqueue, and as such * is allowed to sleep (though it is recommended to process the IBI @@ -151,6 +157,7 @@ struct i3c_device_ibi_info { unsigned int max_payload_len; unsigned int num_slots; unsigned int enabled; + struct workqueue_struct *wq; void (*handler)(struct i3c_device *dev, const struct i3c_ibi_payload *payload); }; @@ -166,7 +173,7 @@ struct i3c_device_ibi_info { * assigned a dynamic address by the master. Will be used during * bus initialization to assign it a specific dynamic address * before starting DAA (Dynamic Address Assignment) - * @pid: I3C Provisional ID exposed by the device. This is a unique identifier + * @pid: I3C Provisioned ID exposed by the device. This is a unique identifier * that may be used to attach boardinfo to i3c_dev_desc when the device * does not have a static address * @of_node: optional DT node in case the device has been described in the DT @@ -426,6 +433,8 @@ struct i3c_bus { * for a future IBI * This method is mandatory only if ->request_ibi is not * NULL. + * @enable_hotjoin: enable hot join event detect. + * @disable_hotjoin: disable hot join event detect. */ struct i3c_master_controller_ops { int (*bus_init)(struct i3c_master_controller *master); @@ -452,6 +461,8 @@ struct i3c_master_controller_ops { int (*disable_ibi)(struct i3c_dev_desc *dev); void (*recycle_ibi_slot)(struct i3c_dev_desc *dev, struct i3c_ibi_slot *slot); + int (*enable_hotjoin)(struct i3c_master_controller *master); + int (*disable_hotjoin)(struct i3c_master_controller *master); }; /** @@ -465,11 +476,12 @@ struct i3c_master_controller_ops { * @ops: master operations. See &struct i3c_master_controller_ops * @secondary: true if the master is a secondary master * @init_done: true when the bus initialization is done + * @hotjoin: true if the master support hotjoin * @boardinfo.i3c: list of I3C boardinfo objects * @boardinfo.i2c: list of I2C boardinfo objects * @boardinfo: board-level information attached to devices connected on the bus * @bus: I3C bus exposed by this master - * @wq: workqueue used to execute IBI handlers. Can also be used by master + * @wq: workqueue which can be used by master * drivers if they need to postpone operations that need to take place * in a thread context. Typical examples are Hot Join processing which * requires taking the bus lock in maintenance, which in turn, can only @@ -487,6 +499,7 @@ struct i3c_master_controller { const struct i3c_master_controller_ops *ops; unsigned int secondary : 1; unsigned int init_done : 1; + unsigned int hotjoin: 1; struct { struct list_head i3c; struct list_head i2c; @@ -543,6 +556,8 @@ int i3c_master_register(struct i3c_master_controller *master, const struct i3c_master_controller_ops *ops, bool secondary); void i3c_master_unregister(struct i3c_master_controller *master); +int i3c_master_enable_hotjoin(struct i3c_master_controller *master); +int i3c_master_disable_hotjoin(struct i3c_master_controller *master); /** * i3c_dev_get_master_data() - get master private data attached to an I3C @@ -652,4 +667,9 @@ void i3c_master_queue_ibi(struct i3c_dev_desc *dev, struct i3c_ibi_slot *slot); struct i3c_ibi_slot *i3c_master_get_free_ibi_slot(struct i3c_dev_desc *dev); +void i3c_for_each_bus_locked(int (*fn)(struct i3c_bus *bus, void *data), + void *data); +int i3c_register_notifier(struct notifier_block *nb); +int i3c_unregister_notifier(struct notifier_block *nb); + #endif /* I3C_MASTER_H */ diff --git a/include/linux/idr.h b/include/linux/idr.h index a0dce14090a9..da5f5fa4a3a6 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -200,7 +200,7 @@ static inline void idr_preload_end(void) */ #define idr_for_each_entry_ul(idr, entry, tmp, id) \ for (tmp = 0, id = 0; \ - tmp <= id && ((entry) = idr_get_next_ul(idr, &(id))) != NULL; \ + ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \ tmp = id, ++id) /** @@ -224,10 +224,12 @@ static inline void idr_preload_end(void) * @id: Entry ID. * * Continue to iterate over entries, continuing after the current position. + * After normal termination @entry is left with the value NULL. This + * is convenient for a "not found" value. */ #define idr_for_each_entry_continue_ul(idr, entry, tmp, id) \ for (tmp = id; \ - tmp <= id && ((entry) = idr_get_next_ul(idr, &(id))) != NULL; \ + ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \ tmp = id, ++id) /* diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index bd2f6e19c357..3385a2cc5b09 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -9,7 +9,7 @@ * Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net> * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH * Copyright (c) 2016 - 2017 Intel Deutschland GmbH - * Copyright (c) 2018 - 2023 Intel Corporation + * Copyright (c) 2018 - 2024 Intel Corporation */ #ifndef LINUX_IEEE80211_H @@ -172,11 +172,11 @@ #define IEEE80211_SN_MODULO (IEEE80211_MAX_SN + 1) -/* PV1 Layout 11ah 9.8.3.1 */ +/* PV1 Layout IEEE 802.11-2020 9.8.3.1 */ #define IEEE80211_PV1_FCTL_VERS 0x0003 #define IEEE80211_PV1_FCTL_FTYPE 0x001c #define IEEE80211_PV1_FCTL_STYPE 0x00e0 -#define IEEE80211_PV1_FCTL_TODS 0x0100 +#define IEEE80211_PV1_FCTL_FROMDS 0x0100 #define IEEE80211_PV1_FCTL_MOREFRAGS 0x0200 #define IEEE80211_PV1_FCTL_PM 0x0400 #define IEEE80211_PV1_FCTL_MOREDATA 0x0800 @@ -191,6 +191,11 @@ static inline bool ieee80211_sn_less(u16 sn1, u16 sn2) return ((sn1 - sn2) & IEEE80211_SN_MASK) > (IEEE80211_SN_MODULO >> 1); } +static inline bool ieee80211_sn_less_eq(u16 sn1, u16 sn2) +{ + return ((sn2 - sn1) & IEEE80211_SN_MASK) <= (IEEE80211_SN_MODULO >> 1); +} + static inline u16 ieee80211_sn_add(u16 sn1, u16 sn2) { return (sn1 + sn2) & IEEE80211_SN_MASK; @@ -307,6 +312,13 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) #define IEEE80211_TRIGGER_TYPE_BQRP 0x6 #define IEEE80211_TRIGGER_TYPE_NFRP 0x7 +/* UL-bandwidth within common_info of trigger frame */ +#define IEEE80211_TRIGGER_ULBW_MASK 0xc0000 +#define IEEE80211_TRIGGER_ULBW_20MHZ 0x0 +#define IEEE80211_TRIGGER_ULBW_40MHZ 0x1 +#define IEEE80211_TRIGGER_ULBW_80MHZ 0x2 +#define IEEE80211_TRIGGER_ULBW_160_80P80MHZ 0x3 + struct ieee80211_hdr { __le16 frame_control; __le16 duration_id; @@ -801,6 +813,11 @@ static inline bool ieee80211_is_frag(struct ieee80211_hdr *hdr) hdr->seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG); } +static inline u16 ieee80211_get_sn(struct ieee80211_hdr *hdr) +{ + return le16_get_bits(hdr->seq_ctrl, IEEE80211_SCTL_SEQ); +} + struct ieee80211s_hdr { u8 flags; u8 ttl; @@ -951,17 +968,24 @@ struct ieee80211_wide_bw_chansw_ie { * @dtim_count: DTIM Count * @dtim_period: DTIM Period * @bitmap_ctrl: Bitmap Control + * @required_octet: "Syntatic sugar" to force the struct size to the + * minimum valid size when carried in a non-S1G PPDU * @virtual_map: Partial Virtual Bitmap * * This structure represents the payload of the "TIM element" as - * described in IEEE Std 802.11-2020 section 9.4.2.5. + * described in IEEE Std 802.11-2020 section 9.4.2.5. Note that this + * definition is only applicable when the element is carried in a + * non-S1G PPDU. When the TIM is carried in an S1G PPDU, the Bitmap + * Control and Partial Virtual Bitmap may not be present. */ struct ieee80211_tim_ie { u8 dtim_count; u8 dtim_period; u8 bitmap_ctrl; - /* variable size: 1 - 251 bytes */ - u8 virtual_map[1]; + union { + u8 required_octet; + DECLARE_FLEX_ARRAY(u8, virtual_map); + }; } __packed; /** @@ -1239,6 +1263,30 @@ struct ieee80211_twt_setup { u8 params[]; } __packed; +#define IEEE80211_TTLM_MAX_CNT 2 +#define IEEE80211_TTLM_CONTROL_DIRECTION 0x03 +#define IEEE80211_TTLM_CONTROL_DEF_LINK_MAP 0x04 +#define IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT 0x08 +#define IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT 0x10 +#define IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE 0x20 + +#define IEEE80211_TTLM_DIRECTION_DOWN 0 +#define IEEE80211_TTLM_DIRECTION_UP 1 +#define IEEE80211_TTLM_DIRECTION_BOTH 2 + +/** + * struct ieee80211_ttlm_elem - TID-To-Link Mapping element + * + * Defined in section 9.4.2.314 in P802.11be_D4 + * + * @control: the first part of control field + * @optional: the second part of control field + */ +struct ieee80211_ttlm_elem { + u8 control; + u8 optional[]; +} __packed; + struct ieee80211_mgmt { __le16 frame_control; __le16 duration; @@ -1416,6 +1464,20 @@ struct ieee80211_mgmt { u8 max_tod_error; u8 max_toa_error; } __packed wnm_timing_msr; + struct { + u8 action_code; + u8 dialog_token; + u8 variable[]; + } __packed ttlm_req; + struct { + u8 action_code; + u8 dialog_token; + u8 status_code; + u8 variable[]; + } __packed ttlm_res; + struct { + u8 action_code; + } __packed ttlm_tear_down; } u; } __packed action; DECLARE_FLEX_ARRAY(u8, body); /* Generic frame body */ @@ -1674,6 +1736,8 @@ struct ieee80211_mcs_info { #define IEEE80211_HT_MCS_TX_MAX_STREAMS 4 #define IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION 0x10 +#define IEEE80211_HT_MCS_CHAINS(mcs) ((mcs) == 32 ? 1 : (1 + ((mcs) >> 3))) + /* * 802.11n D5.0 20.3.5 / 20.6 says: * - indices 0 to 7 and 32 are single spatial stream @@ -2680,6 +2744,7 @@ static inline bool ieee80211_he_capa_size_ok(const u8 *data, u8 len) #define IEEE80211_6GHZ_CTRL_REG_LPI_AP 0 #define IEEE80211_6GHZ_CTRL_REG_SP_AP 1 +#define IEEE80211_6GHZ_CTRL_REG_VLP_AP 2 /** * struct ieee80211_he_6ghz_oper - HE 6 GHz operation Information field @@ -2790,12 +2855,14 @@ ieee80211_he_oper_size(const u8 *he_oper_ie) static inline const struct ieee80211_he_6ghz_oper * ieee80211_he_6ghz_oper(const struct ieee80211_he_operation *he_oper) { - const u8 *ret = (const void *)&he_oper->optional; + const u8 *ret; u32 he_oper_params; if (!he_oper) return NULL; + ret = (const void *)&he_oper->optional; + he_oper_params = le32_to_cpu(he_oper->he_oper_params); if (!(he_oper_params & IEEE80211_HE_OPERATION_6GHZ_OP_INFO)) @@ -2993,6 +3060,9 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) #define IEEE80211_EHT_PHY_CAP5_SUPP_EXTRA_EHT_LTF 0x40 #define IEEE80211_EHT_PHY_CAP6_MAX_NUM_SUPP_EHT_LTF_MASK 0x07 +#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_80MHZ 0x08 +#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_160MHZ 0x30 +#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_320MHZ 0x40 #define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_MASK 0x78 #define IEEE80211_EHT_PHY_CAP6_EHT_DUP_6GHZ_SUPP 0x80 @@ -3132,6 +3202,44 @@ ieee80211_eht_oper_size_ok(const u8 *data, u8 len) return len >= needed; } +/* must validate ieee80211_eht_oper_size_ok() first */ +static inline u16 +ieee80211_eht_oper_dis_subchan_bitmap(const struct ieee80211_eht_operation *eht_oper) +{ + const struct ieee80211_eht_operation_info *info = + (const void *)eht_oper->optional; + + if (!(eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) + return 0; + + if (!(eht_oper->params & IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT)) + return 0; + + return get_unaligned_le16(info->optional); +} + +#define IEEE80211_BW_IND_DIS_SUBCH_PRESENT BIT(1) + +struct ieee80211_bandwidth_indication { + u8 params; + struct ieee80211_eht_operation_info info; +} __packed; + +static inline bool +ieee80211_bandwidth_indication_size_ok(const u8 *data, u8 len) +{ + const struct ieee80211_bandwidth_indication *bwi = (const void *)data; + + if (len < sizeof(*bwi)) + return false; + + if (bwi->params & IEEE80211_BW_IND_DIS_SUBCH_PRESENT && + len < sizeof(*bwi) + 2) + return false; + + return true; +} + #define LISTEN_INT_USF GENMASK(15, 14) #define LISTEN_INT_UI GENMASK(13, 0) @@ -3292,6 +3400,8 @@ enum ieee80211_statuscode { WLAN_STATUS_UNKNOWN_AUTHENTICATION_SERVER = 109, WLAN_STATUS_SAE_HASH_TO_ELEMENT = 126, WLAN_STATUS_SAE_PK = 127, + WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING = 133, + WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED = 134, }; @@ -3589,6 +3699,8 @@ enum ieee80211_eid_ext { WLAN_EID_EXT_EHT_OPERATION = 106, WLAN_EID_EXT_EHT_MULTI_LINK = 107, WLAN_EID_EXT_EHT_CAPABILITY = 108, + WLAN_EID_EXT_TID_TO_LINK_MAPPING = 109, + WLAN_EID_EXT_BANDWIDTH_INDICATION = 135, }; /* Action category code */ @@ -3615,6 +3727,7 @@ enum ieee80211_category { WLAN_CATEGORY_UNPROT_DMG = 20, WLAN_CATEGORY_VHT = 21, WLAN_CATEGORY_S1G = 22, + WLAN_CATEGORY_PROTECTED_EHT = 37, WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126, WLAN_CATEGORY_VENDOR_SPECIFIC = 127, }; @@ -3678,6 +3791,13 @@ enum ieee80211_unprotected_wnm_actioncode { WLAN_UNPROTECTED_WNM_ACTION_TIMING_MEASUREMENT_RESPONSE = 1, }; +/* Protected EHT action codes */ +enum ieee80211_protected_eht_actioncode { + WLAN_PROTECTED_EHT_ACTION_TTLM_REQ = 0, + WLAN_PROTECTED_EHT_ACTION_TTLM_RES = 1, + WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN = 2, +}; + /* Security key length */ enum ieee80211_key_len { WLAN_KEY_LEN_WEP40 = 5, @@ -4356,6 +4476,36 @@ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr, } /** + * ieee80211_is_protected_dual_of_public_action - check if skb contains a + * protected dual of public action management frame + * @skb: the skb containing the frame, length will be checked + * + * Return: true if the skb contains a protected dual of public action + * management frame, false otherwise. + */ +static inline bool +ieee80211_is_protected_dual_of_public_action(struct sk_buff *skb) +{ + u8 action; + + if (!ieee80211_is_public_action((void *)skb->data, skb->len) || + skb->len < IEEE80211_MIN_ACTION_SIZE + 1) + return false; + + action = *(u8 *)(skb->data + IEEE80211_MIN_ACTION_SIZE); + + return action != WLAN_PUB_ACTION_20_40_BSS_COEX && + action != WLAN_PUB_ACTION_DSE_REG_LOC_ANN && + action != WLAN_PUB_ACTION_MSMT_PILOT && + action != WLAN_PUB_ACTION_TDLS_DISCOVER_RES && + action != WLAN_PUB_ACTION_LOC_TRACK_NOTI && + action != WLAN_PUB_ACTION_FTM_REQUEST && + action != WLAN_PUB_ACTION_FTM_RESPONSE && + action != WLAN_PUB_ACTION_FILS_DISCOVERY && + action != WLAN_PUB_ACTION_VENDOR_SPECIFIC; +} + +/** * _ieee80211_is_group_privacy_action - check if frame is a group addressed * privacy action frame * @hdr: the frame @@ -4426,12 +4576,11 @@ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim, /** * ieee80211_get_tdls_action - get tdls packet action (or -1, if not tdls packet) * @skb: the skb containing the frame, length will not be checked - * @hdr_size: the size of the ieee80211_hdr that starts at skb->data * * This function assumes the frame is a data frame, and that the network header * is in the correct place. */ -static inline int ieee80211_get_tdls_action(struct sk_buff *skb, u32 hdr_size) +static inline int ieee80211_get_tdls_action(struct sk_buff *skb) { if (!skb_is_nonlinear(skb) && skb->len > (skb_network_offset(skb) + 2)) { @@ -4749,6 +4898,10 @@ struct ieee80211_multi_link_elem { #define IEEE80211_MLD_CAP_OP_MAX_SIMUL_LINKS 0x000f #define IEEE80211_MLD_CAP_OP_SRS_SUPPORT 0x0010 #define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP 0x0060 +#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_NO_SUPP 0 +#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_SAME 1 +#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_RESERVED 2 +#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_DIFF 3 #define IEEE80211_MLD_CAP_OP_FREQ_SEP_TYPE_IND 0x0f80 #define IEEE80211_MLD_CAP_OP_AAR_SUPPORT 0x1000 @@ -4812,18 +4965,43 @@ static inline u8 ieee80211_mle_common_size(const u8 *data) } /** + * ieee80211_mle_get_link_id - returns the link ID + * @data: the basic multi link element + * + * The element is assumed to be of the correct type (BASIC) and big enough, + * this must be checked using ieee80211_mle_type_ok(). + * + * If the BSS link ID can't be found, -1 will be returned + */ +static inline int ieee80211_mle_get_link_id(const u8 *data) +{ + const struct ieee80211_multi_link_elem *mle = (const void *)data; + u16 control = le16_to_cpu(mle->control); + const u8 *common = mle->variable; + + /* common points now at the beginning of ieee80211_mle_basic_common_info */ + common += sizeof(struct ieee80211_mle_basic_common_info); + + if (!(control & IEEE80211_MLC_BASIC_PRES_LINK_ID)) + return -1; + + return *common; +} + +/** * ieee80211_mle_get_bss_param_ch_cnt - returns the BSS parameter change count - * @mle: the basic multi link element + * @data: pointer to the basic multi link element * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). * * If the BSS parameter change count value can't be found (the presence bit - * for it is clear), 0 will be returned. + * for it is clear), -1 will be returned. */ -static inline u8 -ieee80211_mle_get_bss_param_ch_cnt(const struct ieee80211_multi_link_elem *mle) +static inline int +ieee80211_mle_get_bss_param_ch_cnt(const u8 *data) { + const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control = le16_to_cpu(mle->control); const u8 *common = mle->variable; @@ -4831,7 +5009,7 @@ ieee80211_mle_get_bss_param_ch_cnt(const struct ieee80211_multi_link_elem *mle) common += sizeof(struct ieee80211_mle_basic_common_info); if (!(control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT)) - return 0; + return -1; if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) common += 1; @@ -4901,6 +5079,81 @@ static inline u16 ieee80211_mle_get_eml_cap(const u8 *data) } /** + * ieee80211_mle_get_mld_capa_op - returns the MLD capabilities and operations. + * @data: pointer to the multi link EHT IE + * + * The element is assumed to be of the correct type (BASIC) and big enough, + * this must be checked using ieee80211_mle_type_ok(). + * + * If the MLD capabilities and operations field is not present, 0 will be + * returned. + */ +static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data) +{ + const struct ieee80211_multi_link_elem *mle = (const void *)data; + u16 control = le16_to_cpu(mle->control); + const u8 *common = mle->variable; + + /* + * common points now at the beginning of + * ieee80211_mle_basic_common_info + */ + common += sizeof(struct ieee80211_mle_basic_common_info); + + if (!(control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP)) + return 0; + + if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA) + common += 2; + + return get_unaligned_le16(common); +} + +/** + * ieee80211_mle_get_mld_id - returns the MLD ID + * @data: pointer to the multi link element + * + * The element is assumed to be of the correct type (BASIC) and big enough, + * this must be checked using ieee80211_mle_type_ok(). + * + * If the MLD ID is not present, 0 will be returned. + */ +static inline u8 ieee80211_mle_get_mld_id(const u8 *data) +{ + const struct ieee80211_multi_link_elem *mle = (const void *)data; + u16 control = le16_to_cpu(mle->control); + const u8 *common = mle->variable; + + /* + * common points now at the beginning of + * ieee80211_mle_basic_common_info + */ + common += sizeof(struct ieee80211_mle_basic_common_info); + + if (!(control & IEEE80211_MLC_BASIC_PRES_MLD_ID)) + return 0; + + if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP) + common += 2; + + return *common; +} + +/** * ieee80211_mle_size_ok - validate multi-link element size * @data: pointer to the element data * @len: length of the containing element @@ -5125,6 +5378,39 @@ static inline bool ieee80211_mle_reconf_sta_prof_size_ok(const u8 *data, fixed + prof->sta_info_len - 1 <= len; } +static inline bool ieee80211_tid_to_link_map_size_ok(const u8 *data, size_t len) +{ + const struct ieee80211_ttlm_elem *t2l = (const void *)data; + u8 control, fixed = sizeof(*t2l), elem_len = 0; + + if (len < fixed) + return false; + + control = t2l->control; + + if (control & IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT) + elem_len += 2; + if (control & IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT) + elem_len += 3; + + if (!(control & IEEE80211_TTLM_CONTROL_DEF_LINK_MAP)) { + u8 bm_size; + + elem_len += 1; + if (len < fixed + elem_len) + return false; + + if (control & IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE) + bm_size = 1; + else + bm_size = 2; + + elem_len += hweight8(t2l->optional[0]) * bm_size; + } + + return len >= fixed + elem_len; +} + #define for_each_mle_subelement(_elem, _data, _len) \ if (ieee80211_mle_size_ok(_data, _len)) \ for_each_element(_elem, \ diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 2a7660843444..043d442994b0 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -27,44 +27,54 @@ struct tun_xdp_hdr { #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) struct socket *tun_get_socket(struct file *); struct ptr_ring *tun_get_tx_ring(struct file *file); + static inline bool tun_is_xdp_frame(void *ptr) { - return (unsigned long)ptr & TUN_XDP_FLAG; + return (unsigned long)ptr & TUN_XDP_FLAG; } + static inline void *tun_xdp_to_ptr(struct xdp_frame *xdp) { - return (void *)((unsigned long)xdp | TUN_XDP_FLAG); + return (void *)((unsigned long)xdp | TUN_XDP_FLAG); } + static inline struct xdp_frame *tun_ptr_to_xdp(void *ptr) { - return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG); + return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG); } + void tun_ptr_free(void *ptr); #else #include <linux/err.h> #include <linux/errno.h> struct file; struct socket; + static inline struct socket *tun_get_socket(struct file *f) { return ERR_PTR(-EINVAL); } + static inline struct ptr_ring *tun_get_tx_ring(struct file *f) { return ERR_PTR(-EINVAL); } + static inline bool tun_is_xdp_frame(void *ptr) { return false; } + static inline void *tun_xdp_to_ptr(struct xdp_frame *xdp) { return NULL; } + static inline struct xdp_frame *tun_ptr_to_xdp(void *ptr) { return NULL; } + static inline void tun_ptr_free(void *ptr) { } diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 3028af87716e..c1645c86eed9 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -540,7 +540,7 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) struct vlan_ethhdr *veth = skb_vlan_eth_hdr(skb); if (!eth_type_vlan(veth->h_vlan_proto)) - return -EINVAL; + return -ENODATA; *vlan_tci = ntohs(veth->h_vlan_TCI); return 0; @@ -561,7 +561,7 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb, return 0; } else { *vlan_tci = 0; - return -EINVAL; + return -ENODATA; } } diff --git a/include/linux/igmp.h b/include/linux/igmp.h index ebf4349a53af..5171231f70a8 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -39,7 +39,7 @@ struct ip_sf_socklist { unsigned int sl_max; unsigned int sl_count; struct rcu_head rcu; - __be32 sl_addr[]; + __be32 sl_addr[] __counted_by(sl_max); }; #define IP_SFBLOCK 10 /* allocate this many at once */ diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 7852f6c9a714..719cf9cc6e1a 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -8,6 +8,8 @@ #ifndef __AD_SIGMA_DELTA_H__ #define __AD_SIGMA_DELTA_H__ +#include <linux/iio/iio.h> + enum ad_sigma_delta_mode { AD_SD_MODE_CONTINUOUS = 0, AD_SD_MODE_SINGLE = 1, @@ -99,7 +101,7 @@ struct ad_sigma_delta { * 'rx_buf' is up to 32 bits per sample + 64 bit timestamp, * rounded to 16 bytes to take into account padding. */ - uint8_t tx_buf[4] ____cacheline_aligned; + uint8_t tx_buf[4] __aligned(IIO_DMA_MINALIGN); uint8_t rx_buf[16] __aligned(8); }; diff --git a/include/linux/iio/adc/adi-axi-adc.h b/include/linux/iio/adc/adi-axi-adc.h deleted file mode 100644 index 52620e5b8052..000000000000 --- a/include/linux/iio/adc/adi-axi-adc.h +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Analog Devices Generic AXI ADC IP core driver/library - * Link: https://wiki.analog.com/resources/fpga/docs/axi_adc_ip - * - * Copyright 2012-2020 Analog Devices Inc. - */ -#ifndef __ADI_AXI_ADC_H__ -#define __ADI_AXI_ADC_H__ - -struct device; -struct iio_chan_spec; - -/** - * struct adi_axi_adc_chip_info - Chip specific information - * @name Chip name - * @id Chip ID (usually product ID) - * @channels Channel specifications of type @struct iio_chan_spec - * @num_channels Number of @channels - * @scale_table Supported scales by the chip; tuples of 2 ints - * @num_scales Number of scales in the table - * @max_rate Maximum sampling rate supported by the device - */ -struct adi_axi_adc_chip_info { - const char *name; - unsigned int id; - - const struct iio_chan_spec *channels; - unsigned int num_channels; - - const unsigned int (*scale_table)[2]; - int num_scales; - - unsigned long max_rate; -}; - -/** - * struct adi_axi_adc_conv - data of the ADC attached to the AXI ADC - * @chip_info chip info details for the client ADC - * @preenable_setup op to run in the client before enabling the AXI ADC - * @reg_access IIO debugfs_reg_access hook for the client ADC - * @read_raw IIO read_raw hook for the client ADC - * @write_raw IIO write_raw hook for the client ADC - */ -struct adi_axi_adc_conv { - const struct adi_axi_adc_chip_info *chip_info; - - int (*preenable_setup)(struct adi_axi_adc_conv *conv); - int (*reg_access)(struct adi_axi_adc_conv *conv, unsigned int reg, - unsigned int writeval, unsigned int *readval); - int (*read_raw)(struct adi_axi_adc_conv *conv, - struct iio_chan_spec const *chan, - int *val, int *val2, long mask); - int (*write_raw)(struct adi_axi_adc_conv *conv, - struct iio_chan_spec const *chan, - int val, int val2, long mask); -}; - -struct adi_axi_adc_conv *devm_adi_axi_adc_conv_register(struct device *dev, - size_t sizeof_priv); - -void *adi_axi_adc_conv_priv(struct adi_axi_adc_conv *conv); - -#endif diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h new file mode 100644 index 000000000000..a6d79381866e --- /dev/null +++ b/include/linux/iio/backend.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _IIO_BACKEND_H_ +#define _IIO_BACKEND_H_ + +#include <linux/types.h> + +struct fwnode_handle; +struct iio_backend; +struct device; +struct iio_dev; + +enum iio_backend_data_type { + IIO_BACKEND_TWOS_COMPLEMENT, + IIO_BACKEND_OFFSET_BINARY, + IIO_BACKEND_DATA_TYPE_MAX +}; + +/** + * struct iio_backend_data_fmt - Backend data format + * @type: Data type. + * @sign_extend: Bool to tell if the data is sign extended. + * @enable: Enable/Disable the data format module. If disabled, + * not formatting will happen. + */ +struct iio_backend_data_fmt { + enum iio_backend_data_type type; + bool sign_extend; + bool enable; +}; + +/** + * struct iio_backend_ops - operations structure for an iio_backend + * @enable: Enable backend. + * @disable: Disable backend. + * @chan_enable: Enable one channel. + * @chan_disable: Disable one channel. + * @data_format_set: Configure the data format for a specific channel. + * @request_buffer: Request an IIO buffer. + * @free_buffer: Free an IIO buffer. + **/ +struct iio_backend_ops { + int (*enable)(struct iio_backend *back); + void (*disable)(struct iio_backend *back); + int (*chan_enable)(struct iio_backend *back, unsigned int chan); + int (*chan_disable)(struct iio_backend *back, unsigned int chan); + int (*data_format_set)(struct iio_backend *back, unsigned int chan, + const struct iio_backend_data_fmt *data); + struct iio_buffer *(*request_buffer)(struct iio_backend *back, + struct iio_dev *indio_dev); + void (*free_buffer)(struct iio_backend *back, + struct iio_buffer *buffer); +}; + +int iio_backend_chan_enable(struct iio_backend *back, unsigned int chan); +int iio_backend_chan_disable(struct iio_backend *back, unsigned int chan); +int devm_iio_backend_enable(struct device *dev, struct iio_backend *back); +int iio_backend_data_format_set(struct iio_backend *back, unsigned int chan, + const struct iio_backend_data_fmt *data); +int devm_iio_backend_request_buffer(struct device *dev, + struct iio_backend *back, + struct iio_dev *indio_dev); + +void *iio_backend_get_priv(const struct iio_backend *conv); +struct iio_backend *devm_iio_backend_get(struct device *dev, const char *name); +struct iio_backend * +__devm_iio_backend_get_from_fwnode_lookup(struct device *dev, + struct fwnode_handle *fwnode); + +int devm_iio_backend_register(struct device *dev, + const struct iio_backend_ops *ops, void *priv); + +#endif diff --git a/include/linux/iio/buffer-dma.h b/include/linux/iio/buffer-dma.h index 6564bdcdac66..18d3702fa95d 100644 --- a/include/linux/iio/buffer-dma.h +++ b/include/linux/iio/buffer-dma.h @@ -19,14 +19,12 @@ struct device; /** * enum iio_block_state - State of a struct iio_dma_buffer_block - * @IIO_BLOCK_STATE_DEQUEUED: Block is not queued * @IIO_BLOCK_STATE_QUEUED: Block is on the incoming queue * @IIO_BLOCK_STATE_ACTIVE: Block is currently being processed by the DMA * @IIO_BLOCK_STATE_DONE: Block is on the outgoing queue * @IIO_BLOCK_STATE_DEAD: Block has been marked as to be freed */ enum iio_block_state { - IIO_BLOCK_STATE_DEQUEUED, IIO_BLOCK_STATE_QUEUED, IIO_BLOCK_STATE_ACTIVE, IIO_BLOCK_STATE_DONE, @@ -73,12 +71,15 @@ struct iio_dma_buffer_block { * @active_block: Block being used in read() * @pos: Read offset in the active block * @block_size: Size of each block + * @next_dequeue: index of next block that will be dequeued */ struct iio_dma_buffer_queue_fileio { struct iio_dma_buffer_block *blocks[2]; struct iio_dma_buffer_block *active_block; size_t pos; size_t block_size; + + unsigned int next_dequeue; }; /** @@ -93,7 +94,6 @@ struct iio_dma_buffer_queue_fileio { * list and typically also a list of active blocks in the part that handles * the DMA controller * @incoming: List of buffers on the incoming queue - * @outgoing: List of buffers on the outgoing queue * @active: Whether the buffer is currently active * @fileio: FileIO state */ @@ -105,7 +105,6 @@ struct iio_dma_buffer_queue { struct mutex lock; spinlock_t list_lock; struct list_head incoming; - struct list_head outgoing; bool active; diff --git a/include/linux/iio/buffer-dmaengine.h b/include/linux/iio/buffer-dmaengine.h index 5c355be89814..cbb8ba957fad 100644 --- a/include/linux/iio/buffer-dmaengine.h +++ b/include/linux/iio/buffer-dmaengine.h @@ -10,6 +10,9 @@ struct iio_dev; struct device; +struct iio_buffer *iio_dmaengine_buffer_alloc(struct device *dev, + const char *channel); +void iio_dmaengine_buffer_free(struct iio_buffer *buffer); int devm_iio_dmaengine_buffer_setup(struct device *dev, struct iio_dev *indio_dev, const char *channel); diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 607c3a89a647..f9ae5cdd884f 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -258,9 +258,9 @@ struct st_sensor_data { bool hw_irq_trigger; s64 hw_timestamp; - char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned; - struct mutex odr_lock; + + char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] __aligned(IIO_DMA_MINALIGN); }; #ifdef CONFIG_IIO_BUFFER diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 202e55b0a28b..e370a7bb3300 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -9,6 +9,7 @@ #include <linux/device.h> #include <linux/cdev.h> +#include <linux/cleanup.h> #include <linux/slab.h> #include <linux/iio/types.h> /* IIO TODO LIST */ @@ -427,18 +428,14 @@ struct iio_trigger; /* forward declaration */ * @write_event_config: set if the event is enabled. * @read_event_value: read a configuration value associated with the event. * @write_event_value: write a configuration value for the event. + * @read_event_label: function to request label name for a specified label, + * for better event identification. * @validate_trigger: function to validate the trigger when the * current trigger gets changed. * @update_scan_mode: function to configure device and scan buffer when * channels have changed * @debugfs_reg_access: function to read or write register value of device - * @of_xlate: function pointer to obtain channel specifier index. - * When #iio-cells is greater than '0', the driver could - * provide a custom of_xlate function that reads the - * *args* and returns the appropriate index in registered - * IIO channels array. * @fwnode_xlate: fwnode based function pointer to obtain channel specifier index. - * Functionally the same as @of_xlate. * @hwfifo_set_watermark: function pointer to set the current hardware * fifo watermark level; see hwfifo_* entries in * Documentation/ABI/testing/sysfs-bus-iio for details on @@ -511,6 +508,12 @@ struct iio_info { enum iio_event_direction dir, enum iio_event_info info, int val, int val2); + int (*read_event_label)(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + enum iio_event_type type, + enum iio_event_direction dir, + char *label); + int (*validate_trigger)(struct iio_dev *indio_dev, struct iio_trigger *trig); int (*update_scan_mode)(struct iio_dev *indio_dev, @@ -556,7 +559,9 @@ struct iio_buffer_setup_ops { * and owner * @buffer: [DRIVER] any buffer present * @scan_bytes: [INTERN] num bytes captured to be fed to buffer demux - * @available_scan_masks: [DRIVER] optional array of allowed bitmasks + * @available_scan_masks: [DRIVER] optional array of allowed bitmasks. Sort the + * array in order of preference, the most preferred + * masks first. * @masklength: [INTERN] the length of the mask established from * channels * @active_scan_mask: [INTERN] union of all scan masks requested by buffers @@ -634,10 +639,37 @@ int __devm_iio_device_register(struct device *dev, struct iio_dev *indio_dev, int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp); int iio_device_claim_direct_mode(struct iio_dev *indio_dev); void iio_device_release_direct_mode(struct iio_dev *indio_dev); + +/* + * This autocleanup logic is normally used via + * iio_device_claim_direct_scoped(). + */ +DEFINE_GUARD(iio_claim_direct, struct iio_dev *, iio_device_claim_direct_mode(_T), + iio_device_release_direct_mode(_T)) + +DEFINE_GUARD_COND(iio_claim_direct, _try, ({ + struct iio_dev *dev; + int d = iio_device_claim_direct_mode(_T); + + if (d < 0) + dev = NULL; + else + dev = _T; + dev; + })) + +/** + * iio_device_claim_direct_scoped() - Scoped call to iio_device_claim_direct. + * @fail: What to do on failure to claim device. + * @iio_dev: Pointer to the IIO devices structure + */ +#define iio_device_claim_direct_scoped(fail, iio_dev) \ + scoped_cond_guard(iio_claim_direct_try, fail, iio_dev) + int iio_device_claim_buffer_mode(struct iio_dev *indio_dev); void iio_device_release_buffer_mode(struct iio_dev *indio_dev); -extern struct bus_type iio_bus_type; +extern const struct bus_type iio_bus_type; /** * iio_device_put() - reference counted deallocation of struct device diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index dc9ea299e088..8898966bc0f0 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -11,6 +11,7 @@ #include <linux/spi/spi.h> #include <linux/interrupt.h> +#include <linux/iio/iio.h> #include <linux/iio/types.h> #define ADIS_WRITE_REG(reg) ((0x80 | (reg))) @@ -131,7 +132,7 @@ struct adis { unsigned long irq_flag; void *buffer; - u8 tx[10] ____cacheline_aligned; + u8 tx[10] __aligned(IIO_DMA_MINALIGN); u8 rx[4]; }; diff --git a/include/linux/iio/sw_device.h b/include/linux/iio/sw_device.h index eff1e6b2595c..0f7fe7b522e3 100644 --- a/include/linux/iio/sw_device.h +++ b/include/linux/iio/sw_device.h @@ -51,9 +51,6 @@ void iio_unregister_sw_device_type(struct iio_sw_device_type *dt); struct iio_sw_device *iio_sw_device_create(const char *, const char *); void iio_sw_device_destroy(struct iio_sw_device *); -int iio_sw_device_type_configfs_register(struct iio_sw_device_type *dt); -void iio_sw_device_type_configfs_unregister(struct iio_sw_device_type *dt); - static inline void iio_swd_group_init_type_name(struct iio_sw_device *d, const char *name, diff --git a/include/linux/iio/sw_trigger.h b/include/linux/iio/sw_trigger.h index 47de2443e984..bc77f88df303 100644 --- a/include/linux/iio/sw_trigger.h +++ b/include/linux/iio/sw_trigger.h @@ -51,9 +51,6 @@ void iio_unregister_sw_trigger_type(struct iio_sw_trigger_type *tt); struct iio_sw_trigger *iio_sw_trigger_create(const char *, const char *); void iio_sw_trigger_destroy(struct iio_sw_trigger *); -int iio_sw_trigger_type_configfs_register(struct iio_sw_trigger_type *tt); -void iio_sw_trigger_type_configfs_unregister(struct iio_sw_trigger_type *tt); - static inline void iio_swt_group_init_type_name(struct iio_sw_trigger *t, const char *name, diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h index 117bde7d6ad7..d89982c98368 100644 --- a/include/linux/iio/types.h +++ b/include/linux/iio/types.h @@ -68,6 +68,7 @@ enum iio_chan_info_enum { IIO_CHAN_INFO_THERMOCOUPLE_TYPE, IIO_CHAN_INFO_CALIBAMBIENT, IIO_CHAN_INFO_ZEROPOINT, + IIO_CHAN_INFO_TROUGH, }; #endif /* _IIO_TYPES_H_ */ diff --git a/include/linux/ima.h b/include/linux/ima.h index 86b57757c7b1..0bae61a15b60 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -16,23 +16,6 @@ struct linux_binprm; #ifdef CONFIG_IMA extern enum hash_algo ima_get_current_hash_algo(void); -extern int ima_bprm_check(struct linux_binprm *bprm); -extern int ima_file_check(struct file *file, int mask); -extern void ima_post_create_tmpfile(struct mnt_idmap *idmap, - struct inode *inode); -extern void ima_file_free(struct file *file); -extern int ima_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags); -extern int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot); -extern int ima_load_data(enum kernel_load_data_id id, bool contents); -extern int ima_post_load_data(char *buf, loff_t size, - enum kernel_load_data_id id, char *description); -extern int ima_read_file(struct file *file, enum kernel_read_file_id id, - bool contents); -extern int ima_post_read_file(struct file *file, void *buf, loff_t size, - enum kernel_read_file_id id); -extern void ima_post_path_mknod(struct mnt_idmap *idmap, - struct dentry *dentry); extern int ima_file_hash(struct file *file, char *buf, size_t buf_size); extern int ima_inode_hash(struct inode *inode, char *buf, size_t buf_size); extern void ima_kexec_cmdline(int kernel_fd, const void *buf, int size); @@ -57,68 +40,6 @@ static inline enum hash_algo ima_get_current_hash_algo(void) return HASH_ALGO__LAST; } -static inline int ima_bprm_check(struct linux_binprm *bprm) -{ - return 0; -} - -static inline int ima_file_check(struct file *file, int mask) -{ - return 0; -} - -static inline void ima_post_create_tmpfile(struct mnt_idmap *idmap, - struct inode *inode) -{ -} - -static inline void ima_file_free(struct file *file) -{ - return; -} - -static inline int ima_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags) -{ - return 0; -} - -static inline int ima_file_mprotect(struct vm_area_struct *vma, - unsigned long prot) -{ - return 0; -} - -static inline int ima_load_data(enum kernel_load_data_id id, bool contents) -{ - return 0; -} - -static inline int ima_post_load_data(char *buf, loff_t size, - enum kernel_load_data_id id, - char *description) -{ - return 0; -} - -static inline int ima_read_file(struct file *file, enum kernel_read_file_id id, - bool contents) -{ - return 0; -} - -static inline int ima_post_read_file(struct file *file, void *buf, loff_t size, - enum kernel_read_file_id id) -{ - return 0; -} - -static inline void ima_post_path_mknod(struct mnt_idmap *idmap, - struct dentry *dentry) -{ - return; -} - static inline int ima_file_hash(struct file *file, char *buf, size_t buf_size) { return -EOPNOTSUPP; @@ -169,76 +90,13 @@ static inline void ima_add_kexec_buffer(struct kimage *image) {} #endif -#ifdef CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS -extern void ima_post_key_create_or_update(struct key *keyring, - struct key *key, - const void *payload, size_t plen, - unsigned long flags, bool create); -#else -static inline void ima_post_key_create_or_update(struct key *keyring, - struct key *key, - const void *payload, - size_t plen, - unsigned long flags, - bool create) {} -#endif /* CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS */ - #ifdef CONFIG_IMA_APPRAISE extern bool is_ima_appraise_enabled(void); -extern void ima_inode_post_setattr(struct mnt_idmap *idmap, - struct dentry *dentry); -extern int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name, - const void *xattr_value, size_t xattr_value_len); -extern int ima_inode_set_acl(struct mnt_idmap *idmap, - struct dentry *dentry, const char *acl_name, - struct posix_acl *kacl); -static inline int ima_inode_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - return ima_inode_set_acl(idmap, dentry, acl_name, NULL); -} -extern int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name); #else static inline bool is_ima_appraise_enabled(void) { return 0; } - -static inline void ima_inode_post_setattr(struct mnt_idmap *idmap, - struct dentry *dentry) -{ - return; -} - -static inline int ima_inode_setxattr(struct dentry *dentry, - const char *xattr_name, - const void *xattr_value, - size_t xattr_value_len) -{ - return 0; -} - -static inline int ima_inode_set_acl(struct mnt_idmap *idmap, - struct dentry *dentry, const char *acl_name, - struct posix_acl *kacl) -{ - - return 0; -} - -static inline int ima_inode_removexattr(struct dentry *dentry, - const char *xattr_name) -{ - return 0; -} - -static inline int ima_inode_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - return 0; -} #endif /* CONFIG_IMA_APPRAISE */ #if defined(CONFIG_IMA_APPRAISE) && defined(CONFIG_INTEGRITY_TRUSTED_KEYRING) diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h index c1c76a70a6ce..35227d47cfc9 100644 --- a/include/linux/indirect_call_wrapper.h +++ b/include/linux/indirect_call_wrapper.h @@ -2,7 +2,7 @@ #ifndef _LINUX_INDIRECT_CALL_WRAPPER_H #define _LINUX_INDIRECT_CALL_WRAPPER_H -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE /* * INDIRECT_CALL_$NR - wrapper for indirect calls with $NR known builtin @@ -11,7 +11,7 @@ * @__VA_ARGS__: arguments for @f * * Avoid retpoline overhead for known builtin, checking @f vs each of them and - * eventually invoking directly the builtin function. The functions are check + * eventually invoking directly the builtin function. The functions are checked * in the given order. Fallback to the indirect call. */ #define INDIRECT_CALL_1(f, f1, ...) \ diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 84abb30a3fbb..a9033696b0aa 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -8,6 +8,7 @@ struct inet_hashinfo; struct inet_diag_handler { + struct module *owner; void (*dump)(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r); diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index ddb27fc0ee8c..cb5280e6cc21 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -53,13 +53,15 @@ struct in_device { }; #define IPV4_DEVCONF(cnf, attr) ((cnf).data[IPV4_DEVCONF_ ## attr - 1]) +#define IPV4_DEVCONF_RO(cnf, attr) READ_ONCE(IPV4_DEVCONF(cnf, attr)) #define IPV4_DEVCONF_ALL(net, attr) \ IPV4_DEVCONF((*(net)->ipv4.devconf_all), attr) +#define IPV4_DEVCONF_ALL_RO(net, attr) READ_ONCE(IPV4_DEVCONF_ALL(net, attr)) -static inline int ipv4_devconf_get(struct in_device *in_dev, int index) +static inline int ipv4_devconf_get(const struct in_device *in_dev, int index) { index--; - return in_dev->cnf.data[index]; + return READ_ONCE(in_dev->cnf.data[index]); } static inline void ipv4_devconf_set(struct in_device *in_dev, int index, @@ -67,7 +69,7 @@ static inline void ipv4_devconf_set(struct in_device *in_dev, int index, { index--; set_bit(index, in_dev->cnf.state); - in_dev->cnf.data[index] = val; + WRITE_ONCE(in_dev->cnf.data[index], val); } static inline void ipv4_devconf_setall(struct in_device *in_dev) @@ -81,18 +83,18 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) ipv4_devconf_set((in_dev), IPV4_DEVCONF_ ## attr, (val)) #define IN_DEV_ANDCONF(in_dev, attr) \ - (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr) && \ + (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr) && \ IN_DEV_CONF_GET((in_dev), attr)) #define IN_DEV_NET_ORCONF(in_dev, net, attr) \ - (IPV4_DEVCONF_ALL(net, attr) || \ + (IPV4_DEVCONF_ALL_RO(net, attr) || \ IN_DEV_CONF_GET((in_dev), attr)) #define IN_DEV_ORCONF(in_dev, attr) \ IN_DEV_NET_ORCONF(in_dev, dev_net(in_dev->dev), attr) #define IN_DEV_MAXCONF(in_dev, attr) \ - (max(IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr), \ + (max(IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr), \ IN_DEV_CONF_GET((in_dev), attr))) #define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING) diff --git a/include/linux/init.h b/include/linux/init.h index 266c3e1640d4..58cef4c2e59a 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -89,9 +89,6 @@ __latent_entropy #define __meminitdata __section(".meminit.data") #define __meminitconst __section(".meminit.rodata") -#define __memexit __section(".memexit.text") __exitused __cold notrace -#define __memexitdata __section(".memexit.data") -#define __memexitconst __section(".memexit.rodata") /* For assembly routines */ #define __HEAD .section ".head.text","ax" @@ -171,17 +168,20 @@ extern initcall_entry_t __initcall_end[]; extern struct file_system_type rootfs_fs_type; -#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX) extern bool rodata_enabled; -#endif -#ifdef CONFIG_STRICT_KERNEL_RWX void mark_rodata_ro(void); -#endif extern void (*late_time_init)(void); extern bool initcall_debug; +#ifdef MODULE +extern struct module __this_module; +#define THIS_MODULE (&__this_module) +#else +#define THIS_MODULE ((struct module *)0) +#endif + #endif #ifndef MODULE diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 40fc5813cf93..bccb3f1f6262 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -37,13 +37,6 @@ extern struct cred init_cred; #define INIT_TASK_COMM "swapper" -/* Attach to the init_task data structure for proper alignment */ -#ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK -#define __init_task_data __section(".data..init_task") -#else -#define __init_task_data /**/ -#endif - /* Attach to the thread_info data structure for proper alignment */ #define __init_thread_info __section(".data..init_thread_info") diff --git a/include/linux/input.h b/include/linux/input.h index 49790c1bd2c4..c22ac465254b 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -514,7 +514,7 @@ void input_enable_softrepeat(struct input_dev *dev, int delay, int period); bool input_device_enabled(struct input_dev *dev); -extern struct class input_class; +extern const struct class input_class; /** * struct ff_device - force-feedback part of an input device @@ -562,7 +562,7 @@ struct ff_device { int max_effects; struct ff_effect *effects; - struct file *effect_owners[]; + struct file *effect_owners[] __counted_by(max_effects); }; int input_ff_create(struct input_dev *dev, unsigned int max_effects); diff --git a/include/linux/input/as5011.h b/include/linux/input/as5011.h index 5fba52a56cd6..5705d5de3aea 100644 --- a/include/linux/input/as5011.h +++ b/include/linux/input/as5011.h @@ -7,7 +7,6 @@ */ struct as5011_platform_data { - unsigned int button_gpio; unsigned int axis_irq; /* irq number */ unsigned long axis_irqflags; char xp, xn; /* threshold for x axis */ diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h index 3b8580bd33c1..2cf89a538b18 100644 --- a/include/linux/input/mt.h +++ b/include/linux/input/mt.h @@ -47,7 +47,7 @@ struct input_mt { unsigned int flags; unsigned int frame; int *red; - struct input_mt_slot slots[]; + struct input_mt_slot slots[] __counted_by(num_slots); }; static inline void input_mt_set_value(struct input_mt_slot *slot, diff --git a/include/linux/input/navpoint.h b/include/linux/input/navpoint.h deleted file mode 100644 index d464ffb4db52..000000000000 --- a/include/linux/input/navpoint.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 Paul Parsons <lost.distance@yahoo.com> - */ - -struct navpoint_platform_data { - int port; /* PXA SSP port for pxa_ssp_request() */ - int gpio; /* GPIO for power on/off */ -}; diff --git a/include/linux/integrity.h b/include/linux/integrity.h index 2ea0f2f65ab6..459b79683783 100644 --- a/include/linux/integrity.h +++ b/include/linux/integrity.h @@ -19,40 +19,13 @@ enum integrity_status { INTEGRITY_UNKNOWN, }; -/* List of EVM protected security xattrs */ #ifdef CONFIG_INTEGRITY -extern struct integrity_iint_cache *integrity_inode_get(struct inode *inode); -extern void integrity_inode_free(struct inode *inode); extern void __init integrity_load_keys(void); #else -static inline struct integrity_iint_cache * - integrity_inode_get(struct inode *inode) -{ - return NULL; -} - -static inline void integrity_inode_free(struct inode *inode) -{ - return; -} - static inline void integrity_load_keys(void) { } #endif /* CONFIG_INTEGRITY */ -#ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS - -extern int integrity_kernel_module_request(char *kmod_name); - -#else - -static inline int integrity_kernel_module_request(char *kmod_name) -{ - return 0; -} - -#endif /* CONFIG_INTEGRITY_ASYMMETRIC_KEYS */ - #endif /* _LINUX_INTEGRITY_H */ diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h index f45f13304add..771622650247 100644 --- a/include/linux/intel-ish-client-if.h +++ b/include/linux/intel-ish-client-if.h @@ -94,6 +94,9 @@ int ishtp_cl_link(struct ishtp_cl *cl); void ishtp_cl_unlink(struct ishtp_cl *cl); int ishtp_cl_disconnect(struct ishtp_cl *cl); int ishtp_cl_connect(struct ishtp_cl *cl); +int ishtp_cl_establish_connection(struct ishtp_cl *cl, const guid_t *uuid, + int tx_size, int rx_size, bool reset); +void ishtp_cl_destroy_connection(struct ishtp_cl *cl, bool reset); int ishtp_cl_send(struct ishtp_cl *cl, uint8_t *buf, size_t length); int ishtp_cl_flush_queues(struct ishtp_cl *cl); int ishtp_cl_io_rb_recycle(struct ishtp_cl_rb *rb); diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h index 33f21bd85dbf..f3196f82fd8a 100644 --- a/include/linux/intel_rapl.h +++ b/include/linux/intel_rapl.h @@ -178,6 +178,12 @@ struct rapl_package { struct rapl_if_priv *priv; }; +struct rapl_package *rapl_find_package_domain_cpuslocked(int id, struct rapl_if_priv *priv, + bool id_is_cpu); +struct rapl_package *rapl_add_package_cpuslocked(int id, struct rapl_if_priv *priv, + bool id_is_cpu); +void rapl_remove_package_cpuslocked(struct rapl_package *rp); + struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu); struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu); void rapl_remove_package(struct rapl_package *rp); diff --git a/include/linux/intel_tcc.h b/include/linux/intel_tcc.h index f422612c28d6..8ff8eabb4a98 100644 --- a/include/linux/intel_tcc.h +++ b/include/linux/intel_tcc.h @@ -13,6 +13,6 @@ int intel_tcc_get_tjmax(int cpu); int intel_tcc_get_offset(int cpu); int intel_tcc_set_offset(int cpu, int offset); -int intel_tcc_get_temp(int cpu, bool pkg); +int intel_tcc_get_temp(int cpu, int *temp, bool pkg); #endif /* __INTEL_TCC_H__ */ diff --git a/include/linux/intel_tpmi.h b/include/linux/intel_tpmi.h index 04d937ad4dc4..a3529b962be6 100644 --- a/include/linux/intel_tpmi.h +++ b/include/linux/intel_tpmi.h @@ -6,6 +6,25 @@ #ifndef _INTEL_TPMI_H_ #define _INTEL_TPMI_H_ +#include <linux/bitfield.h> + +#define TPMI_VERSION_INVALID 0xff +#define TPMI_MINOR_VERSION(val) FIELD_GET(GENMASK(4, 0), val) +#define TPMI_MAJOR_VERSION(val) FIELD_GET(GENMASK(7, 5), val) + +/* + * List of supported TMPI IDs. + * Some TMPI IDs are not used by Linux, so the numbers are not consecutive. + */ +enum intel_tpmi_id { + TPMI_ID_RAPL = 0, /* Running Average Power Limit */ + TPMI_ID_PEM = 1, /* Power and Perf excursion Monitor */ + TPMI_ID_UNCORE = 2, /* Uncore Frequency Scaling */ + TPMI_ID_SST = 5, /* Speed Select Technology */ + TPMI_CONTROL_ID = 0x80, /* Special ID for getting feature status */ + TPMI_INFO_ID = 0x81, /* Special ID for PCI BDF and Package ID information */ +}; + /** * struct intel_tpmi_plat_info - Platform information for a TPMI device instance * @package_id: CPU Package id @@ -26,7 +45,6 @@ struct intel_tpmi_plat_info { struct intel_tpmi_plat_info *tpmi_get_platform_data(struct auxiliary_device *auxdev); struct resource *tpmi_get_resource_at_index(struct auxiliary_device *auxdev, int index); int tpmi_get_resource_count(struct auxiliary_device *auxdev); - -int tpmi_get_feature_status(struct auxiliary_device *auxdev, int feature_id, int *locked, - int *disabled); +int tpmi_get_feature_status(struct auxiliary_device *auxdev, int feature_id, bool *read_blocked, + bool *write_blocked); #endif diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h index 7ba183f221f1..f5aef8784692 100644 --- a/include/linux/interconnect-provider.h +++ b/include/linux/interconnect-provider.h @@ -36,7 +36,7 @@ struct icc_onecell_data { struct icc_node *nodes[] __counted_by(num_nodes); }; -struct icc_node *of_icc_xlate_onecell(struct of_phandle_args *spec, +struct icc_node *of_icc_xlate_onecell(const struct of_phandle_args *spec, void *data); /** @@ -65,8 +65,9 @@ struct icc_provider { u32 peak_bw, u32 *agg_avg, u32 *agg_peak); void (*pre_aggregate)(struct icc_node *node); int (*get_bw)(struct icc_node *node, u32 *avg, u32 *peak); - struct icc_node* (*xlate)(struct of_phandle_args *spec, void *data); - struct icc_node_data* (*xlate_extended)(struct of_phandle_args *spec, void *data); + struct icc_node* (*xlate)(const struct of_phandle_args *spec, void *data); + struct icc_node_data* (*xlate_extended)(const struct of_phandle_args *spec, + void *data); struct device *dev; int users; bool inter_set; @@ -124,7 +125,7 @@ int icc_nodes_remove(struct icc_provider *provider); void icc_provider_init(struct icc_provider *provider); int icc_provider_register(struct icc_provider *provider); void icc_provider_deregister(struct icc_provider *provider); -struct icc_node_data *of_icc_get_from_provider(struct of_phandle_args *spec); +struct icc_node_data *of_icc_get_from_provider(const struct of_phandle_args *spec); void icc_sync_state(struct device *dev); #else @@ -171,7 +172,7 @@ static inline int icc_provider_register(struct icc_provider *provider) static inline void icc_provider_deregister(struct icc_provider *provider) { } -static inline struct icc_node_data *of_icc_get_from_provider(struct of_phandle_args *spec) +static inline struct icc_node_data *of_icc_get_from_provider(const struct of_phandle_args *spec) { return ERR_PTR(-ENOTSUPP); } diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 4a1dc88ddbff..5c9bdd3ffccc 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -67,6 +67,8 @@ * later. * IRQF_NO_DEBUG - Exclude from runnaway detection for IPI and similar handlers, * depends on IRQF_PERCPU. + * IRQF_COND_ONESHOT - Agree to do IRQF_ONESHOT if already set for a shared + * interrupt. */ #define IRQF_SHARED 0x00000080 #define IRQF_PROBE_SHARED 0x00000100 @@ -82,6 +84,7 @@ #define IRQF_COND_SUSPEND 0x00040000 #define IRQF_NO_AUTOEN 0x00080000 #define IRQF_NO_DEBUG 0x00100000 +#define IRQF_COND_ONESHOT 0x00200000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) @@ -566,7 +569,7 @@ enum * * _ RCU: * 1) rcutree_migrate_callbacks() migrates the queue. - * 2) rcu_report_dead() reports the final quiescent states. + * 2) rcutree_report_cpu_dead() reports the final quiescent states. * * _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue * diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 1b7a44b35616..86cf1f7ae389 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -100,6 +100,30 @@ struct io_pgtable_cfg { const struct iommu_flush_ops *tlb; struct device *iommu_dev; + /** + * @alloc: Custom page allocator. + * + * Optional hook used to allocate page tables. If this function is NULL, + * @free must be NULL too. + * + * Memory returned should be zeroed and suitable for dma_map_single() and + * virt_to_phys(). + * + * Not all formats support custom page allocators. Before considering + * passing a non-NULL value, make sure the chosen page format supports + * this feature. + */ + void *(*alloc)(void *cookie, size_t size, gfp_t gfp); + + /** + * @free: Custom page de-allocator. + * + * Optional hook used to free page tables allocated with the @alloc + * hook. Must be non-NULL if @alloc is not NULL, must be NULL + * otherwise. + */ + void (*free)(void *cookie, void *pages, size_t size); + /* Low-level data specific to the table format */ union { struct { @@ -166,6 +190,10 @@ struct io_pgtable_ops { struct iommu_iotlb_gather *gather); phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops, unsigned long iova); + int (*read_and_clear_dirty)(struct io_pgtable_ops *ops, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty); }; /** @@ -238,15 +266,25 @@ io_pgtable_tlb_add_page(struct io_pgtable *iop, } /** + * enum io_pgtable_caps - IO page table backend capabilities. + */ +enum io_pgtable_caps { + /** @IO_PGTABLE_CAP_CUSTOM_ALLOCATOR: Backend accepts custom page table allocators. */ + IO_PGTABLE_CAP_CUSTOM_ALLOCATOR = BIT(0), +}; + +/** * struct io_pgtable_init_fns - Alloc/free a set of page tables for a * particular format. * * @alloc: Allocate a set of page tables described by cfg. * @free: Free the page tables associated with iop. + * @caps: Combination of @io_pgtable_caps flags encoding the backend capabilities. */ struct io_pgtable_init_fns { struct io_pgtable *(*alloc)(struct io_pgtable_cfg *cfg, void *cookie); void (*free)(struct io_pgtable *iop); + u32 caps; }; extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns; diff --git a/include/linux/io.h b/include/linux/io.h index 7304f2a69960..235ba7d80a8f 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -23,12 +23,19 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count); #ifdef CONFIG_MMU int ioremap_page_range(unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot); +int vmap_page_range(unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot); #else static inline int ioremap_page_range(unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { return 0; } +static inline int vmap_page_range(unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot) +{ + return 0; +} #endif /* diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 106cdc55ff3b..68ed6697fece 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -6,63 +6,13 @@ #include <linux/xarray.h> #include <uapi/linux/io_uring.h> -enum io_uring_cmd_flags { - IO_URING_F_COMPLETE_DEFER = 1, - IO_URING_F_UNLOCKED = 2, - /* the request is executed from poll, it should not be freed */ - IO_URING_F_MULTISHOT = 4, - /* executed by io-wq */ - IO_URING_F_IOWQ = 8, - /* int's last bit, sign checks are usually faster than a bit test */ - IO_URING_F_NONBLOCK = INT_MIN, - - /* ctx state flags, for URING_CMD */ - IO_URING_F_SQE128 = (1 << 8), - IO_URING_F_CQE32 = (1 << 9), - IO_URING_F_IOPOLL = (1 << 10), -}; - -struct io_uring_cmd { - struct file *file; - const struct io_uring_sqe *sqe; - union { - /* callback to defer completions to task context */ - void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned); - /* used for polled completion */ - void *cookie; - }; - u32 cmd_op; - u32 flags; - u8 pdu[32]; /* available inline for free use */ -}; - -static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe) -{ - return sqe->cmd; -} - #if defined(CONFIG_IO_URING) -int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, - struct iov_iter *iter, void *ioucmd); -void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2, - unsigned issue_flags); -struct sock *io_uring_get_socket(struct file *file); void __io_uring_cancel(bool cancel_all); void __io_uring_free(struct task_struct *tsk); void io_uring_unreg_ringfd(void); const char *io_uring_get_opcode(u8 opcode); -void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *, unsigned), - unsigned flags); -/* users should follow semantics of IOU_F_TWQ_LAZY_WAKE */ -void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *, unsigned)); - -static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *, unsigned)) -{ - __io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0); -} +int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags); +bool io_is_uring_fops(struct file *file); static inline void io_uring_files_cancel(void) { @@ -81,29 +31,7 @@ static inline void io_uring_free(struct task_struct *tsk) if (tsk->io_uring) __io_uring_free(tsk); } -int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags); #else -static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, - struct iov_iter *iter, void *ioucmd) -{ - return -EOPNOTSUPP; -} -static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, - ssize_t ret2, unsigned issue_flags) -{ -} -static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *, unsigned)) -{ -} -static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *, unsigned)) -{ -} -static inline struct sock *io_uring_get_socket(struct file *file) -{ - return NULL; -} static inline void io_uring_task_cancel(void) { } @@ -122,6 +50,10 @@ static inline int io_uring_cmd_sock(struct io_uring_cmd *cmd, { return -EOPNOTSUPP; } +static inline bool io_is_uring_fops(struct file *file) +{ + return false; +} #endif #endif diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h new file mode 100644 index 000000000000..e453a997c060 --- /dev/null +++ b/include/linux/io_uring/cmd.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _LINUX_IO_URING_CMD_H +#define _LINUX_IO_URING_CMD_H + +#include <uapi/linux/io_uring.h> +#include <linux/io_uring_types.h> + +/* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */ +#define IORING_URING_CMD_CANCELABLE (1U << 30) + +struct io_uring_cmd { + struct file *file; + const struct io_uring_sqe *sqe; + /* callback to defer completions to task context */ + void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned); + u32 cmd_op; + u32 flags; + u8 pdu[32]; /* available inline for free use */ +}; + +static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe) +{ + return sqe->cmd; +} + +#if defined(CONFIG_IO_URING) +int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, + struct iov_iter *iter, void *ioucmd); +void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2, + unsigned issue_flags); +void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd, + void (*task_work_cb)(struct io_uring_cmd *, unsigned), + unsigned flags); + +void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, + unsigned int issue_flags); + +#else +static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, + struct iov_iter *iter, void *ioucmd) +{ + return -EOPNOTSUPP; +} +static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, + ssize_t ret2, unsigned issue_flags) +{ +} +static inline void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd, + void (*task_work_cb)(struct io_uring_cmd *, unsigned), + unsigned flags) +{ +} +static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ +} +#endif + +/* users must follow the IOU_F_TWQ_LAZY_WAKE semantics */ +static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd, + void (*task_work_cb)(struct io_uring_cmd *, unsigned)) +{ + __io_uring_cmd_do_in_task(ioucmd, task_work_cb, IOU_F_TWQ_LAZY_WAKE); +} + +static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, + void (*task_work_cb)(struct io_uring_cmd *, unsigned)) +{ + __io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0); +} + +static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd) +{ + return cmd_to_io_kiocb(cmd)->task; +} + +#endif /* _LINUX_IO_URING_CMD_H */ diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 13d19b9be9f4..ac333ea81d31 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -2,11 +2,43 @@ #define IO_URING_TYPES_H #include <linux/blkdev.h> +#include <linux/hashtable.h> #include <linux/task_work.h> #include <linux/bitmap.h> #include <linux/llist.h> #include <uapi/linux/io_uring.h> +enum { + /* + * A hint to not wake right away but delay until there are enough of + * tw's queued to match the number of CQEs the task is waiting for. + * + * Must not be used with requests generating more than one CQE. + * It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set. + */ + IOU_F_TWQ_LAZY_WAKE = 1, +}; + +enum io_uring_cmd_flags { + IO_URING_F_COMPLETE_DEFER = 1, + IO_URING_F_UNLOCKED = 2, + /* the request is executed from poll, it should not be freed */ + IO_URING_F_MULTISHOT = 4, + /* executed by io-wq */ + IO_URING_F_IOWQ = 8, + /* int's last bit, sign checks are usually faster than a bit test */ + IO_URING_F_NONBLOCK = INT_MIN, + + /* ctx state flags, for URING_CMD */ + IO_URING_F_SQE128 = (1 << 8), + IO_URING_F_CQE32 = (1 << 9), + IO_URING_F_IOPOLL = (1 << 10), + + /* set when uring wants to cancel a previously issued command */ + IO_URING_F_CANCEL = (1 << 11), + IO_URING_F_COMPAT = (1 << 12), +}; + struct io_wq_work_node { struct io_wq_work_node *next; }; @@ -209,12 +241,14 @@ struct io_ring_ctx { unsigned int poll_activated: 1; unsigned int drain_disabled: 1; unsigned int compat: 1; + unsigned int iowq_limits_set : 1; struct task_struct *submitter_task; struct io_rings *rings; struct percpu_ref refs; enum task_work_notify_mode notify_method; + unsigned sq_thread_idle; } ____cacheline_aligned_in_smp; /* submission data */ @@ -243,14 +277,23 @@ struct io_ring_ctx { */ struct io_rsrc_node *rsrc_node; atomic_t cancel_seq; + + /* + * ->iopoll_list is protected by the ctx->uring_lock for + * io_uring instances that don't use IORING_SETUP_SQPOLL. + * For SQPOLL, only the single threaded io_sq_thread() will + * manipulate the list, hence no extra locking is needed there. + */ + bool poll_multi_queue; + struct io_wq_work_list iopoll_list; + struct io_file_table file_table; + struct io_mapped_ubuf **user_bufs; unsigned nr_user_files; unsigned nr_user_bufs; - struct io_mapped_ubuf **user_bufs; struct io_submit_state submit_state; - struct io_buffer_list *io_bl; struct xarray io_bl_xa; struct io_hash_table cancel_table_locked; @@ -258,13 +301,10 @@ struct io_ring_ctx { struct io_alloc_cache netmsg_cache; /* - * ->iopoll_list is protected by the ctx->uring_lock for - * io_uring instances that don't use IORING_SETUP_SQPOLL. - * For SQPOLL, only the single threaded io_sq_thread() will - * manipulate the list, hence no extra locking is needed there. + * Any cancelable uring_cmd is added to this list in + * ->uring_cmd() by io_uring_cmd_insert_cancelable() */ - struct io_wq_work_list iopoll_list; - bool poll_multi_queue; + struct hlist_head cancelable_uring_cmd; } ____cacheline_aligned_in_smp; struct { @@ -306,13 +346,20 @@ struct io_ring_ctx { spinlock_t completion_lock; /* IRQ completion list, under ->completion_lock */ - struct io_wq_work_list locked_free_list; unsigned int locked_free_nr; + struct io_wq_work_list locked_free_list; struct list_head io_buffers_comp; struct list_head cq_overflow_list; struct io_hash_table cancel_table; + struct hlist_head waitid_list; + +#ifdef CONFIG_FUTEX + struct hlist_head futex_list; + struct io_alloc_cache futex_cache; +#endif + const struct cred *sq_creds; /* cred used for __io_sq_thread() */ struct io_sq_data *sq_data; /* if using sq thread polling */ @@ -322,11 +369,11 @@ struct io_ring_ctx { unsigned int file_alloc_start; unsigned int file_alloc_end; - struct xarray personalities; - u32 pers_next; - struct list_head io_buffers_cache; + /* deferred free list, protected by ->uring_lock */ + struct hlist_head io_buf_list; + /* Keep this last, we don't need it for the fast path */ struct wait_queue_head poll_wq; struct io_restriction restrictions; @@ -342,11 +389,9 @@ struct io_ring_ctx { struct wait_queue_head rsrc_quiesce_wq; unsigned rsrc_quiesce; - struct list_head io_buffers_pages; + u32 pers_next; + struct xarray personalities; - #if defined(CONFIG_UNIX) - struct socket *ring_sock; - #endif /* hashed buffered write serialization */ struct io_wq_hash *hash_map; @@ -363,11 +408,22 @@ struct io_ring_ctx { /* io-wq management, e.g. thread count */ u32 iowq_limits[2]; - bool iowq_limits_set; struct callback_head poll_wq_task_work; struct list_head defer_list; - unsigned sq_thread_idle; + +#ifdef CONFIG_NET_RX_BUSY_POLL + struct list_head napi_list; /* track busy poll napi_id */ + spinlock_t napi_lock; /* napi_list lock */ + + /* napi busy poll default timeout */ + unsigned int napi_busy_poll_to; + bool napi_prefer_busy_poll; + bool napi_enabled; + + DECLARE_HASHTABLE(napi_ht, 4); +#endif + /* protected by ->completion_lock */ unsigned evfd_last_cq_tail; @@ -413,80 +469,95 @@ enum { REQ_F_SKIP_LINK_CQES_BIT, REQ_F_SINGLE_POLL_BIT, REQ_F_DOUBLE_POLL_BIT, - REQ_F_PARTIAL_IO_BIT, REQ_F_APOLL_MULTISHOT_BIT, REQ_F_CLEAR_POLLIN_BIT, REQ_F_HASH_LOCKED_BIT, /* keep async read/write and isreg together and in order */ REQ_F_SUPPORT_NOWAIT_BIT, REQ_F_ISREG_BIT, + REQ_F_POLL_NO_LAZY_BIT, + REQ_F_CANCEL_SEQ_BIT, + REQ_F_CAN_POLL_BIT, + REQ_F_BL_EMPTY_BIT, + REQ_F_BL_NO_RECYCLE_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, }; +typedef u64 __bitwise io_req_flags_t; +#define IO_REQ_FLAG(bitno) ((__force io_req_flags_t) BIT_ULL((bitno))) + enum { /* ctx owns file */ - REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT), + REQ_F_FIXED_FILE = IO_REQ_FLAG(REQ_F_FIXED_FILE_BIT), /* drain existing IO first */ - REQ_F_IO_DRAIN = BIT(REQ_F_IO_DRAIN_BIT), + REQ_F_IO_DRAIN = IO_REQ_FLAG(REQ_F_IO_DRAIN_BIT), /* linked sqes */ - REQ_F_LINK = BIT(REQ_F_LINK_BIT), + REQ_F_LINK = IO_REQ_FLAG(REQ_F_LINK_BIT), /* doesn't sever on completion < 0 */ - REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT), + REQ_F_HARDLINK = IO_REQ_FLAG(REQ_F_HARDLINK_BIT), /* IOSQE_ASYNC */ - REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT), + REQ_F_FORCE_ASYNC = IO_REQ_FLAG(REQ_F_FORCE_ASYNC_BIT), /* IOSQE_BUFFER_SELECT */ - REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT), + REQ_F_BUFFER_SELECT = IO_REQ_FLAG(REQ_F_BUFFER_SELECT_BIT), /* IOSQE_CQE_SKIP_SUCCESS */ - REQ_F_CQE_SKIP = BIT(REQ_F_CQE_SKIP_BIT), + REQ_F_CQE_SKIP = IO_REQ_FLAG(REQ_F_CQE_SKIP_BIT), /* fail rest of links */ - REQ_F_FAIL = BIT(REQ_F_FAIL_BIT), + REQ_F_FAIL = IO_REQ_FLAG(REQ_F_FAIL_BIT), /* on inflight list, should be cancelled and waited on exit reliably */ - REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT), + REQ_F_INFLIGHT = IO_REQ_FLAG(REQ_F_INFLIGHT_BIT), /* read/write uses file position */ - REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), + REQ_F_CUR_POS = IO_REQ_FLAG(REQ_F_CUR_POS_BIT), /* must not punt to workers */ - REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT), + REQ_F_NOWAIT = IO_REQ_FLAG(REQ_F_NOWAIT_BIT), /* has or had linked timeout */ - REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), + REQ_F_LINK_TIMEOUT = IO_REQ_FLAG(REQ_F_LINK_TIMEOUT_BIT), /* needs cleanup */ - REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT), + REQ_F_NEED_CLEANUP = IO_REQ_FLAG(REQ_F_NEED_CLEANUP_BIT), /* already went through poll handler */ - REQ_F_POLLED = BIT(REQ_F_POLLED_BIT), + REQ_F_POLLED = IO_REQ_FLAG(REQ_F_POLLED_BIT), /* buffer already selected */ - REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT), + REQ_F_BUFFER_SELECTED = IO_REQ_FLAG(REQ_F_BUFFER_SELECTED_BIT), /* buffer selected from ring, needs commit */ - REQ_F_BUFFER_RING = BIT(REQ_F_BUFFER_RING_BIT), + REQ_F_BUFFER_RING = IO_REQ_FLAG(REQ_F_BUFFER_RING_BIT), /* caller should reissue async */ - REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT), + REQ_F_REISSUE = IO_REQ_FLAG(REQ_F_REISSUE_BIT), /* supports async reads/writes */ - REQ_F_SUPPORT_NOWAIT = BIT(REQ_F_SUPPORT_NOWAIT_BIT), + REQ_F_SUPPORT_NOWAIT = IO_REQ_FLAG(REQ_F_SUPPORT_NOWAIT_BIT), /* regular file */ - REQ_F_ISREG = BIT(REQ_F_ISREG_BIT), + REQ_F_ISREG = IO_REQ_FLAG(REQ_F_ISREG_BIT), /* has creds assigned */ - REQ_F_CREDS = BIT(REQ_F_CREDS_BIT), + REQ_F_CREDS = IO_REQ_FLAG(REQ_F_CREDS_BIT), /* skip refcounting if not set */ - REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT), + REQ_F_REFCOUNT = IO_REQ_FLAG(REQ_F_REFCOUNT_BIT), /* there is a linked timeout that has to be armed */ - REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT), + REQ_F_ARM_LTIMEOUT = IO_REQ_FLAG(REQ_F_ARM_LTIMEOUT_BIT), /* ->async_data allocated */ - REQ_F_ASYNC_DATA = BIT(REQ_F_ASYNC_DATA_BIT), + REQ_F_ASYNC_DATA = IO_REQ_FLAG(REQ_F_ASYNC_DATA_BIT), /* don't post CQEs while failing linked requests */ - REQ_F_SKIP_LINK_CQES = BIT(REQ_F_SKIP_LINK_CQES_BIT), + REQ_F_SKIP_LINK_CQES = IO_REQ_FLAG(REQ_F_SKIP_LINK_CQES_BIT), /* single poll may be active */ - REQ_F_SINGLE_POLL = BIT(REQ_F_SINGLE_POLL_BIT), + REQ_F_SINGLE_POLL = IO_REQ_FLAG(REQ_F_SINGLE_POLL_BIT), /* double poll may active */ - REQ_F_DOUBLE_POLL = BIT(REQ_F_DOUBLE_POLL_BIT), - /* request has already done partial IO */ - REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT), + REQ_F_DOUBLE_POLL = IO_REQ_FLAG(REQ_F_DOUBLE_POLL_BIT), /* fast poll multishot mode */ - REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT), + REQ_F_APOLL_MULTISHOT = IO_REQ_FLAG(REQ_F_APOLL_MULTISHOT_BIT), /* recvmsg special flag, clear EPOLLIN */ - REQ_F_CLEAR_POLLIN = BIT(REQ_F_CLEAR_POLLIN_BIT), + REQ_F_CLEAR_POLLIN = IO_REQ_FLAG(REQ_F_CLEAR_POLLIN_BIT), /* hashed into ->cancel_hash_locked, protected by ->uring_lock */ - REQ_F_HASH_LOCKED = BIT(REQ_F_HASH_LOCKED_BIT), + REQ_F_HASH_LOCKED = IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT), + /* don't use lazy poll wake for this request */ + REQ_F_POLL_NO_LAZY = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT), + /* cancel sequence is set and valid */ + REQ_F_CANCEL_SEQ = IO_REQ_FLAG(REQ_F_CANCEL_SEQ_BIT), + /* file is pollable */ + REQ_F_CAN_POLL = IO_REQ_FLAG(REQ_F_CAN_POLL_BIT), + /* buffer list was empty after selection of buffer */ + REQ_F_BL_EMPTY = IO_REQ_FLAG(REQ_F_BL_EMPTY_BIT), + /* don't recycle provided buffers for this request */ + REQ_F_BL_NO_RECYCLE = IO_REQ_FLAG(REQ_F_BL_NO_RECYCLE_BIT), }; typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts); @@ -547,15 +618,17 @@ struct io_kiocb { * and after selection it points to the buffer ID itself. */ u16 buf_index; - unsigned int flags; + + unsigned nr_tw; + + /* REQ_F_* flags */ + io_req_flags_t flags; struct io_cqe cqe; struct io_ring_ctx *ctx; struct task_struct *task; - struct io_rsrc_node *rsrc_node; - union { /* store used ubuf, so we can prevent reloading */ struct io_mapped_ubuf *imu; @@ -576,10 +649,12 @@ struct io_kiocb { /* cache ->apoll->events */ __poll_t apoll_events; }; + + struct io_rsrc_node *rsrc_node; + atomic_t refs; atomic_t poll_refs; struct io_task_work io_task_work; - unsigned nr_tw; /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ struct hlist_node hash_node; /* internal polling, see IORING_FEAT_FAST_POLL */ diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 96dd0acbba44..6fc1c858013d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -293,22 +293,32 @@ struct iomap_ioend { struct list_head io_list; /* next ioend in chain */ u16 io_type; u16 io_flags; /* IOMAP_F_* */ - u32 io_folios; /* folios added to ioend */ struct inode *io_inode; /* file being written to */ size_t io_size; /* size of the extent */ loff_t io_offset; /* offset in the file */ sector_t io_sector; /* start sector of ioend */ - struct bio *io_bio; /* bio being built */ - struct bio io_inline_bio; /* MUST BE LAST! */ + struct bio io_bio; /* MUST BE LAST! */ }; +static inline struct iomap_ioend *iomap_ioend_from_bio(struct bio *bio) +{ + return container_of(bio, struct iomap_ioend, io_bio); +} + struct iomap_writeback_ops { /* * Required, maps the blocks so that writeback can be performed on * the range starting at offset. + * + * Can return arbitrarily large regions, but we need to call into it at + * least once per folio to allow the file systems to synchronize with + * the write path that could be invalidating mappings. + * + * An existing mapping from a previous call to this method can be reused + * by the file system if it is still valid. */ int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, - loff_t offset); + loff_t offset, unsigned len); /* * Optional, allows the file systems to perform actions just before @@ -329,6 +339,7 @@ struct iomap_writepage_ctx { struct iomap iomap; struct iomap_ioend *ioend; const struct iomap_writeback_ops *ops; + u32 nr_folios; /* folios added to the ioend */ }; void iomap_finish_ioends(struct iomap_ioend *ioend, int error); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c50a769d569a..2e925b5eba53 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -13,7 +13,7 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/of.h> -#include <uapi/linux/iommu.h> +#include <linux/iova_bitmap.h> #define IOMMU_READ (1 << 0) #define IOMMU_WRITE (1 << 1) @@ -37,10 +37,113 @@ struct bus_type; struct device; struct iommu_domain; struct iommu_domain_ops; +struct iommu_dirty_ops; struct notifier_block; struct iommu_sva; -struct iommu_fault_event; struct iommu_dma_cookie; +struct iommu_fault_param; + +#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */ +#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */ +#define IOMMU_FAULT_PERM_EXEC (1 << 2) /* exec */ +#define IOMMU_FAULT_PERM_PRIV (1 << 3) /* privileged */ + +/* Generic fault types, can be expanded IRQ remapping fault */ +enum iommu_fault_type { + IOMMU_FAULT_PAGE_REQ = 1, /* page request fault */ +}; + +/** + * struct iommu_fault_page_request - Page Request data + * @flags: encodes whether the corresponding fields are valid and whether this + * is the last page in group (IOMMU_FAULT_PAGE_REQUEST_* values). + * When IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID is set, the page response + * must have the same PASID value as the page request. When it is clear, + * the page response should not have a PASID. + * @pasid: Process Address Space ID + * @grpid: Page Request Group Index + * @perm: requested page permissions (IOMMU_FAULT_PERM_* values) + * @addr: page address + * @private_data: device-specific private information + */ +struct iommu_fault_page_request { +#define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID (1 << 0) +#define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1) +#define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2) +#define IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID (1 << 3) + u32 flags; + u32 pasid; + u32 grpid; + u32 perm; + u64 addr; + u64 private_data[2]; +}; + +/** + * struct iommu_fault - Generic fault data + * @type: fault type from &enum iommu_fault_type + * @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ + */ +struct iommu_fault { + u32 type; + struct iommu_fault_page_request prm; +}; + +/** + * enum iommu_page_response_code - Return status of fault handlers + * @IOMMU_PAGE_RESP_SUCCESS: Fault has been handled and the page tables + * populated, retry the access. This is "Success" in PCI PRI. + * @IOMMU_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from + * this device if possible. This is "Response Failure" in PCI PRI. + * @IOMMU_PAGE_RESP_INVALID: Could not handle this fault, don't retry the + * access. This is "Invalid Request" in PCI PRI. + */ +enum iommu_page_response_code { + IOMMU_PAGE_RESP_SUCCESS = 0, + IOMMU_PAGE_RESP_INVALID, + IOMMU_PAGE_RESP_FAILURE, +}; + +/** + * struct iommu_page_response - Generic page response information + * @pasid: Process Address Space ID + * @grpid: Page Request Group Index + * @code: response code from &enum iommu_page_response_code + */ +struct iommu_page_response { + u32 pasid; + u32 grpid; + u32 code; +}; + +struct iopf_fault { + struct iommu_fault fault; + /* node for pending lists */ + struct list_head list; +}; + +struct iopf_group { + struct iopf_fault last_fault; + struct list_head faults; + /* list node for iommu_fault_param::faults */ + struct list_head pending_node; + struct work_struct work; + struct iommu_domain *domain; + /* The device's fault data parameter. */ + struct iommu_fault_param *fault_param; +}; + +/** + * struct iopf_queue - IO Page Fault queue + * @wq: the fault workqueue + * @devices: devices attached to this queue + * @lock: protects the device list + */ +struct iopf_queue { + struct workqueue_struct *wq; + struct list_head devices; + struct mutex lock; +}; /* iommu fault flags */ #define IOMMU_FAULT_READ 0x0 @@ -48,7 +151,6 @@ struct iommu_dma_cookie; typedef int (*iommu_fault_handler_t)(struct iommu_domain *, struct device *, unsigned long, int, void *); -typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault *, void *); struct iommu_domain_geometry { dma_addr_t aperture_start; /* First address that can be mapped */ @@ -64,6 +166,10 @@ struct iommu_domain_geometry { #define __IOMMU_DOMAIN_DMA_FQ (1U << 3) /* DMA-API uses flush queue */ #define __IOMMU_DOMAIN_SVA (1U << 4) /* Shared process address space */ +#define __IOMMU_DOMAIN_PLATFORM (1U << 5) + +#define __IOMMU_DOMAIN_NESTED (1U << 6) /* User-managed address space nested + on a stage-2 translation */ #define IOMMU_DOMAIN_ALLOC_FLAGS ~__IOMMU_DOMAIN_DMA_FQ /* @@ -81,6 +187,8 @@ struct iommu_domain_geometry { * invalidation. * IOMMU_DOMAIN_SVA - DMA addresses are shared process addresses * represented by mm_struct's. + * IOMMU_DOMAIN_PLATFORM - Legacy domain for drivers that do their own + * dma_api stuff. Do not use in new drivers. */ #define IOMMU_DOMAIN_BLOCKED (0U) #define IOMMU_DOMAIN_IDENTITY (__IOMMU_DOMAIN_PT) @@ -91,15 +199,18 @@ struct iommu_domain_geometry { __IOMMU_DOMAIN_DMA_API | \ __IOMMU_DOMAIN_DMA_FQ) #define IOMMU_DOMAIN_SVA (__IOMMU_DOMAIN_SVA) +#define IOMMU_DOMAIN_PLATFORM (__IOMMU_DOMAIN_PLATFORM) +#define IOMMU_DOMAIN_NESTED (__IOMMU_DOMAIN_NESTED) struct iommu_domain { unsigned type; const struct iommu_domain_ops *ops; + const struct iommu_dirty_ops *dirty_ops; + const struct iommu_ops *owner; /* Whose domain_alloc we came from */ unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ struct iommu_domain_geometry geometry; struct iommu_dma_cookie *iova_cookie; - enum iommu_page_response_code (*iopf_handler)(struct iommu_fault *fault, - void *data); + int (*iopf_handler)(struct iopf_group *group); void *fault_data; union { struct { @@ -109,6 +220,11 @@ struct iommu_domain { struct { /* IOMMU_DOMAIN_SVA */ struct mm_struct *mm; int users; + /* + * Next iommu_domain in mm->iommu_mm->sva-domains list + * protected by iommu_sva_lock. + */ + struct list_head next; }; }; }; @@ -133,6 +249,7 @@ enum iommu_cap { * usefully support the non-strict DMA flush queue. */ IOMMU_CAP_DEFERRED_FLUSH, + IOMMU_CAP_DIRTY_TRACKING, /* IOMMU supports dirty tracking */ }; /* These are the possible reserved region types */ @@ -228,20 +345,183 @@ struct iommu_iotlb_gather { }; /** + * struct iommu_dirty_bitmap - Dirty IOVA bitmap state + * @bitmap: IOVA bitmap + * @gather: Range information for a pending IOTLB flush + */ +struct iommu_dirty_bitmap { + struct iova_bitmap *bitmap; + struct iommu_iotlb_gather *gather; +}; + +/* Read but do not clear any dirty bits */ +#define IOMMU_DIRTY_NO_CLEAR (1 << 0) + +/** + * struct iommu_dirty_ops - domain specific dirty tracking operations + * @set_dirty_tracking: Enable or Disable dirty tracking on the iommu domain + * @read_and_clear_dirty: Walk IOMMU page tables for dirtied PTEs marshalled + * into a bitmap, with a bit represented as a page. + * Reads the dirty PTE bits and clears it from IO + * pagetables. + */ +struct iommu_dirty_ops { + int (*set_dirty_tracking)(struct iommu_domain *domain, bool enabled); + int (*read_and_clear_dirty)(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty); +}; + +/** + * struct iommu_user_data - iommu driver specific user space data info + * @type: The data type of the user buffer + * @uptr: Pointer to the user buffer for copy_from_user() + * @len: The length of the user buffer in bytes + * + * A user space data is an uAPI that is defined in include/uapi/linux/iommufd.h + * @type, @uptr and @len should be just copied from an iommufd core uAPI struct. + */ +struct iommu_user_data { + unsigned int type; + void __user *uptr; + size_t len; +}; + +/** + * struct iommu_user_data_array - iommu driver specific user space data array + * @type: The data type of all the entries in the user buffer array + * @uptr: Pointer to the user buffer array + * @entry_len: The fixed-width length of an entry in the array, in bytes + * @entry_num: The number of total entries in the array + * + * The user buffer includes an array of requests with format defined in + * include/uapi/linux/iommufd.h + */ +struct iommu_user_data_array { + unsigned int type; + void __user *uptr; + size_t entry_len; + u32 entry_num; +}; + +/** + * __iommu_copy_struct_from_user - Copy iommu driver specific user space data + * @dst_data: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @src_data: Pointer to a struct iommu_user_data for user space data info + * @data_type: The data type of the @dst_data. Must match with @src_data.type + * @data_len: Length of current user data structure, i.e. sizeof(struct _dst) + * @min_len: Initial length of user data structure for backward compatibility. + * This should be offsetofend using the last member in the user data + * struct that was initially added to include/uapi/linux/iommufd.h + */ +static inline int __iommu_copy_struct_from_user( + void *dst_data, const struct iommu_user_data *src_data, + unsigned int data_type, size_t data_len, size_t min_len) +{ + if (src_data->type != data_type) + return -EINVAL; + if (WARN_ON(!dst_data || !src_data)) + return -EINVAL; + if (src_data->len < min_len || data_len < src_data->len) + return -EINVAL; + return copy_struct_from_user(dst_data, data_len, src_data->uptr, + src_data->len); +} + +/** + * iommu_copy_struct_from_user - Copy iommu driver specific user space data + * @kdst: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @user_data: Pointer to a struct iommu_user_data for user space data info + * @data_type: The data type of the @kdst. Must match with @user_data->type + * @min_last: The last memember of the data structure @kdst points in the + * initial version. + * Return 0 for success, otherwise -error. + */ +#define iommu_copy_struct_from_user(kdst, user_data, data_type, min_last) \ + __iommu_copy_struct_from_user(kdst, user_data, data_type, \ + sizeof(*kdst), \ + offsetofend(typeof(*kdst), min_last)) + +/** + * __iommu_copy_struct_from_user_array - Copy iommu driver specific user space + * data from an iommu_user_data_array + * @dst_data: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @src_array: Pointer to a struct iommu_user_data_array for a user space array + * @data_type: The data type of the @dst_data. Must match with @src_array.type + * @index: Index to the location in the array to copy user data from + * @data_len: Length of current user data structure, i.e. sizeof(struct _dst) + * @min_len: Initial length of user data structure for backward compatibility. + * This should be offsetofend using the last member in the user data + * struct that was initially added to include/uapi/linux/iommufd.h + */ +static inline int __iommu_copy_struct_from_user_array( + void *dst_data, const struct iommu_user_data_array *src_array, + unsigned int data_type, unsigned int index, size_t data_len, + size_t min_len) +{ + struct iommu_user_data src_data; + + if (WARN_ON(!src_array || index >= src_array->entry_num)) + return -EINVAL; + if (!src_array->entry_num) + return -EINVAL; + src_data.uptr = src_array->uptr + src_array->entry_len * index; + src_data.len = src_array->entry_len; + src_data.type = src_array->type; + + return __iommu_copy_struct_from_user(dst_data, &src_data, data_type, + data_len, min_len); +} + +/** + * iommu_copy_struct_from_user_array - Copy iommu driver specific user space + * data from an iommu_user_data_array + * @kdst: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @user_array: Pointer to a struct iommu_user_data_array for a user space + * array + * @data_type: The data type of the @kdst. Must match with @user_array->type + * @index: Index to the location in the array to copy user data from + * @min_last: The last member of the data structure @kdst points in the + * initial version. + * Return 0 for success, otherwise -error. + */ +#define iommu_copy_struct_from_user_array(kdst, user_array, data_type, index, \ + min_last) \ + __iommu_copy_struct_from_user_array( \ + kdst, user_array, data_type, index, sizeof(*(kdst)), \ + offsetofend(typeof(*(kdst)), min_last)) + +/** * struct iommu_ops - iommu ops and capabilities * @capable: check capability * @hw_info: report iommu hardware information. The data buffer returned by this * op is allocated in the iommu driver and freed by the caller after * use. The information type is one of enum iommu_hw_info_type defined * in include/uapi/linux/iommufd.h. - * @domain_alloc: allocate iommu domain + * @domain_alloc: allocate and return an iommu domain if success. Otherwise + * NULL is returned. The domain is not fully initialized until + * the caller iommu_domain_alloc() returns. + * @domain_alloc_user: Allocate an iommu domain corresponding to the input + * parameters as defined in include/uapi/linux/iommufd.h. + * Unlike @domain_alloc, it is called only by IOMMUFD and + * must fully initialize the new domain before return. + * Upon success, if the @user_data is valid and the @parent + * points to a kernel-managed domain, the new domain must be + * IOMMU_DOMAIN_NESTED type; otherwise, the @parent must be + * NULL while the @user_data can be optionally provided, the + * new domain must support __IOMMU_DOMAIN_PAGING. + * Upon failure, ERR_PTR must be returned. + * @domain_alloc_paging: Allocate an iommu_domain that can be used for + * UNMANAGED, DMA, and DMA_FQ domain types. * @probe_device: Add device to iommu driver handling * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU * group and attached to the groups domain - * @set_platform_dma_ops: Returning control back to the platform DMA ops. This op - * is to support old IOMMU drivers, new drivers should use - * default domains, and the common IOMMU DMA ops. * @device_group: find iommu group for a particular device * @get_resv_regions: Request list of reserved regions for a device * @of_xlate: add OF master IDs to iommu grouping @@ -260,6 +540,13 @@ struct iommu_iotlb_gather { * will be blocked by the hardware. * @pgsize_bitmap: bitmap of all possible supported page sizes * @owner: Driver module providing these ops + * @identity_domain: An always available, always attachable identity + * translation. + * @blocked_domain: An always available, always attachable blocking + * translation. + * @default_domain: If not NULL this will always be set as the default domain. + * This should be an IDENTITY/BLOCKED/PLATFORM domain. + * Do not use in new drivers. */ struct iommu_ops { bool (*capable)(struct device *dev, enum iommu_cap); @@ -267,26 +554,28 @@ struct iommu_ops { /* Domain allocation and freeing by the iommu driver */ struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); + struct iommu_domain *(*domain_alloc_user)( + struct device *dev, u32 flags, struct iommu_domain *parent, + const struct iommu_user_data *user_data); + struct iommu_domain *(*domain_alloc_paging)(struct device *dev); struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); void (*probe_finalize)(struct device *dev); - void (*set_platform_dma_ops)(struct device *dev); struct iommu_group *(*device_group)(struct device *dev); /* Request/Free a list of reserved regions for a device */ void (*get_resv_regions)(struct device *dev, struct list_head *list); - int (*of_xlate)(struct device *dev, struct of_phandle_args *args); + int (*of_xlate)(struct device *dev, const struct of_phandle_args *args); bool (*is_attach_deferred)(struct device *dev); /* Per device IOMMU features */ int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); - int (*page_response)(struct device *dev, - struct iommu_fault_event *evt, - struct iommu_page_response *msg); + void (*page_response)(struct device *dev, struct iopf_fault *evt, + struct iommu_page_response *msg); int (*def_domain_type)(struct device *dev); void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid); @@ -294,6 +583,10 @@ struct iommu_ops { const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; struct module *owner; + struct iommu_domain *identity_domain; + struct iommu_domain *blocked_domain; + struct iommu_domain *release_domain; + struct iommu_domain *default_domain; }; /** @@ -312,15 +605,20 @@ struct iommu_ops { * * ENODEV - device specific errors, not able to be attached * * <others> - treated as ENODEV by the caller. Use is discouraged * @set_dev_pasid: set an iommu domain to a pasid of device - * @map: map a physically contiguous memory region to an iommu domain * @map_pages: map a physically contiguous set of pages of the same size to * an iommu domain. - * @unmap: unmap a physically contiguous memory region from an iommu domain * @unmap_pages: unmap a number of pages of the same size from an iommu domain * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain * @iotlb_sync_map: Sync mappings created recently using @map to the hardware * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush * queue + * @cache_invalidate_user: Flush hardware cache for user space IO page table. + * The @domain must be IOMMU_DOMAIN_NESTED. The @array + * passes in the cache invalidation requests, in form + * of a driver data structure. The driver must update + * array->entry_num to report the number of handled + * invalidation requests. The driver data structure + * must be defined in include/uapi/linux/iommufd.h * @iova_to_phys: translate iova to physical address * @enforce_cache_coherency: Prevent any kind of DMA from bypassing IOMMU_CACHE, * including no-snoop TLPs on PCIe or other platform @@ -334,22 +632,20 @@ struct iommu_domain_ops { int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); - int (*map)(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp); int (*map_pages)(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped); - size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *iotlb_gather); size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova, size_t pgsize, size_t pgcount, struct iommu_iotlb_gather *iotlb_gather); void (*flush_iotlb_all)(struct iommu_domain *domain); - void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, - size_t size); + int (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, + size_t size); void (*iotlb_sync)(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather); + int (*cache_invalidate_user)(struct iommu_domain *domain, + struct iommu_user_data_array *array); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); @@ -368,6 +664,7 @@ struct iommu_domain_ops { * @list: Used by the iommu-core to keep a list of registered iommus * @ops: iommu-ops for talking to this iommu * @dev: struct device for sysfs handling + * @singleton_group: Used internally for drivers that have only one group * @max_pasids: number of supported PASIDs */ struct iommu_device { @@ -375,42 +672,39 @@ struct iommu_device { const struct iommu_ops *ops; struct fwnode_handle *fwnode; struct device *dev; + struct iommu_group *singleton_group; u32 max_pasids; }; /** - * struct iommu_fault_event - Generic fault event - * - * Can represent recoverable faults such as a page requests or - * unrecoverable faults such as DMA or IRQ remapping faults. - * - * @fault: fault descriptor - * @list: pending fault event list, used for tracking responses - */ -struct iommu_fault_event { - struct iommu_fault fault; - struct list_head list; -}; - -/** * struct iommu_fault_param - per-device IOMMU fault data - * @handler: Callback function to handle IOMMU faults at device level - * @data: handler private data - * @faults: holds the pending faults which needs response * @lock: protect pending faults list + * @users: user counter to manage the lifetime of the data + * @rcu: rcu head for kfree_rcu() + * @dev: the device that owns this param + * @queue: IOPF queue + * @queue_list: index into queue->devices + * @partial: faults that are part of a Page Request Group for which the last + * request hasn't been submitted yet. + * @faults: holds the pending faults which need response */ struct iommu_fault_param { - iommu_dev_fault_handler_t handler; - void *data; - struct list_head faults; struct mutex lock; + refcount_t users; + struct rcu_head rcu; + + struct device *dev; + struct iopf_queue *queue; + struct list_head queue_list; + + struct list_head partial; + struct list_head faults; }; /** * struct dev_iommu - Collection of per-device IOMMU data * * @fault_param: IOMMU detected device fault reporting data - * @iopf_param: I/O Page Fault queue and data * @fwspec: IOMMU fwspec data * @iommu_dev: IOMMU device this device is linked to * @priv: IOMMU Driver private data @@ -418,14 +712,14 @@ struct iommu_fault_param { * @attach_deferred: the dma domain attachment is deferred * @pci_32bit_workaround: Limit DMA allocations to 32-bit IOVAs * @require_direct: device requires IOMMU_RESV_DIRECT regions + * @shadow_on_flush: IOTLB flushes are used to sync shadow tables * * TODO: migrate other per device data pointers under iommu_dev_data, e.g. * struct iommu_group *iommu_group; */ struct dev_iommu { struct mutex lock; - struct iommu_fault_param *fault_param; - struct iopf_device_param *iopf_param; + struct iommu_fault_param __rcu *fault_param; struct iommu_fwspec *fwspec; struct iommu_device *iommu_dev; void *priv; @@ -433,6 +727,7 @@ struct dev_iommu { u32 attach_deferred:1; u32 pci_32bit_workaround:1; u32 require_direct:1; + u32 shadow_on_flush:1; }; int iommu_device_register(struct iommu_device *iommu, @@ -453,6 +748,22 @@ static inline struct iommu_device *dev_to_iommu_device(struct device *dev) return (struct iommu_device *)dev_get_drvdata(dev); } +/** + * iommu_get_iommu_dev - Get iommu_device for a device + * @dev: an end-point device + * + * Note that this function must be called from the iommu_ops + * to retrieve the iommu_device for a device, which the core code + * guarentees it will not invoke the op without an attached iommu. + */ +static inline struct iommu_device *__iommu_get_iommu_dev(struct device *dev) +{ + return dev->iommu->iommu_dev; +} + +#define iommu_get_iommu_dev(dev, type, member) \ + container_of(__iommu_get_iommu_dev(dev), type, member) + static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) { *gather = (struct iommu_iotlb_gather) { @@ -518,16 +829,6 @@ extern int iommu_group_for_each_dev(struct iommu_group *group, void *data, extern struct iommu_group *iommu_group_get(struct device *dev); extern struct iommu_group *iommu_group_ref_get(struct iommu_group *group); extern void iommu_group_put(struct iommu_group *group); -extern int iommu_register_device_fault_handler(struct device *dev, - iommu_dev_fault_handler_t handler, - void *data); - -extern int iommu_unregister_device_fault_handler(struct device *dev); - -extern int iommu_report_device_fault(struct device *dev, - struct iommu_fault_event *evt); -extern int iommu_page_response(struct device *dev, - struct iommu_page_response *msg); extern int iommu_group_id(struct iommu_group *group); extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *); @@ -632,12 +933,35 @@ static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather) return gather && gather->queued; } +static inline void iommu_dirty_bitmap_init(struct iommu_dirty_bitmap *dirty, + struct iova_bitmap *bitmap, + struct iommu_iotlb_gather *gather) +{ + if (gather) + iommu_iotlb_gather_init(gather); + + dirty->bitmap = bitmap; + dirty->gather = gather; +} + +static inline void iommu_dirty_bitmap_record(struct iommu_dirty_bitmap *dirty, + unsigned long iova, + unsigned long length) +{ + if (dirty->bitmap) + iova_bitmap_set(dirty->bitmap, iova, length); + + if (dirty->gather) + iommu_iotlb_gather_add_range(dirty->gather, iova, length); +} + /* PCI device grouping function */ extern struct iommu_group *pci_device_group(struct device *dev); /* Generic device grouping function */ extern struct iommu_group *generic_device_group(struct device *dev); /* FSL-MC device grouping function */ struct iommu_group *fsl_mc_device_group(struct device *dev); +extern struct iommu_group *generic_single_device_group(struct device *dev); /** * struct iommu_fwspec - per-device IOMMU instance data @@ -668,13 +992,21 @@ struct iommu_fwspec { struct iommu_sva { struct device *dev; struct iommu_domain *domain; + struct list_head handle_item; + refcount_t users; +}; + +struct iommu_mm_data { + u32 pasid; + struct list_head sva_domains; + struct list_head sva_handles; }; int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, const struct iommu_ops *ops); void iommu_fwspec_free(struct device *dev); -int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids); -const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode); +int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids); +const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode); static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { @@ -698,11 +1030,9 @@ static inline void *dev_iommu_priv_get(struct device *dev) return NULL; } -static inline void dev_iommu_priv_set(struct device *dev, void *priv) -{ - dev->iommu->priv = priv; -} +void dev_iommu_priv_set(struct device *dev, void *priv); +extern struct mutex iommu_probe_device_lock; int iommu_probe_device(struct device *dev); int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); @@ -718,8 +1048,6 @@ bool iommu_group_dma_owner_claimed(struct iommu_group *group); int iommu_device_claim_dma_owner(struct device *dev, void *owner); void iommu_device_release_dma_owner(struct device *dev); -struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, - struct mm_struct *mm); int iommu_attach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); void iommu_detach_device_pasid(struct iommu_domain *domain, @@ -737,6 +1065,8 @@ struct iommu_fwspec {}; struct iommu_device {}; struct iommu_fault_param {}; struct iommu_iotlb_gather {}; +struct iommu_dirty_bitmap {}; +struct iommu_dirty_ops {}; static inline bool iommu_present(const struct bus_type *bus) { @@ -906,31 +1236,6 @@ static inline void iommu_group_put(struct iommu_group *group) { } -static inline -int iommu_register_device_fault_handler(struct device *dev, - iommu_dev_fault_handler_t handler, - void *data) -{ - return -ENODEV; -} - -static inline int iommu_unregister_device_fault_handler(struct device *dev) -{ - return 0; -} - -static inline -int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) -{ - return -ENODEV; -} - -static inline int iommu_page_response(struct device *dev, - struct iommu_page_response *msg) -{ - return -ENODEV; -} - static inline int iommu_group_id(struct iommu_group *group) { return -ENODEV; @@ -969,6 +1274,18 @@ static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather) return false; } +static inline void iommu_dirty_bitmap_init(struct iommu_dirty_bitmap *dirty, + struct iova_bitmap *bitmap, + struct iommu_iotlb_gather *gather) +{ +} + +static inline void iommu_dirty_bitmap_record(struct iommu_dirty_bitmap *dirty, + unsigned long iova, + unsigned long length) +{ +} + static inline void iommu_device_unregister(struct iommu_device *iommu) { } @@ -1012,7 +1329,7 @@ static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids, } static inline -const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) +const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) { return NULL; } @@ -1067,12 +1384,6 @@ static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner) return -ENODEV; } -static inline struct iommu_domain * -iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) -{ - return NULL; -} - static inline int iommu_attach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid) { @@ -1099,6 +1410,14 @@ static inline ioasid_t iommu_alloc_global_pasid(struct device *dev) static inline void iommu_free_global_pasid(ioasid_t pasid) {} #endif /* CONFIG_IOMMU_API */ +#if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API) +void iommu_group_mutex_assert(struct device *dev); +#else +static inline void iommu_group_mutex_assert(struct device *dev) +{ +} +#endif + /** * iommu_map_sgtable - Map the given buffer to the IOMMU domain * @domain: The IOMMU domain to perform the mapping @@ -1109,7 +1428,7 @@ static inline void iommu_free_global_pasid(ioasid_t pasid) {} * Creates a mapping at @iova for the buffer described by a scatterlist * stored in the given sg_table object in the provided IOMMU domain. */ -static inline size_t iommu_map_sgtable(struct iommu_domain *domain, +static inline ssize_t iommu_map_sgtable(struct iommu_domain *domain, unsigned long iova, struct sg_table *sgt, int prot) { return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot, @@ -1180,20 +1499,40 @@ static inline bool tegra_dev_iommu_get_stream_id(struct device *dev, u32 *stream return false; } -#ifdef CONFIG_IOMMU_SVA +#ifdef CONFIG_IOMMU_MM_DATA static inline void mm_pasid_init(struct mm_struct *mm) { - mm->pasid = IOMMU_PASID_INVALID; + /* + * During dup_mm(), a new mm will be memcpy'd from an old one and that makes + * the new mm and the old one point to a same iommu_mm instance. When either + * one of the two mms gets released, the iommu_mm instance is freed, leaving + * the other mm running into a use-after-free/double-free problem. To avoid + * the problem, zeroing the iommu_mm pointer of a new mm is needed here. + */ + mm->iommu_mm = NULL; } + static inline bool mm_valid_pasid(struct mm_struct *mm) { - return mm->pasid != IOMMU_PASID_INVALID; + return READ_ONCE(mm->iommu_mm); +} + +static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm) +{ + struct iommu_mm_data *iommu_mm = READ_ONCE(mm->iommu_mm); + + if (!iommu_mm) + return IOMMU_PASID_INVALID; + return iommu_mm->pasid; } + void mm_pasid_drop(struct mm_struct *mm); struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm); void iommu_sva_unbind_device(struct iommu_sva *handle); u32 iommu_sva_get_pasid(struct iommu_sva *handle); +struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm); #else static inline struct iommu_sva * iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) @@ -1211,7 +1550,75 @@ static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle) } static inline void mm_pasid_init(struct mm_struct *mm) {} static inline bool mm_valid_pasid(struct mm_struct *mm) { return false; } + +static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm) +{ + return IOMMU_PASID_INVALID; +} + static inline void mm_pasid_drop(struct mm_struct *mm) {} + +static inline struct iommu_domain * +iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) +{ + return NULL; +} #endif /* CONFIG_IOMMU_SVA */ +#ifdef CONFIG_IOMMU_IOPF +int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev); +void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev); +int iopf_queue_flush_dev(struct device *dev); +struct iopf_queue *iopf_queue_alloc(const char *name); +void iopf_queue_free(struct iopf_queue *queue); +int iopf_queue_discard_partial(struct iopf_queue *queue); +void iopf_free_group(struct iopf_group *group); +void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt); +void iopf_group_response(struct iopf_group *group, + enum iommu_page_response_code status); +#else +static inline int +iopf_queue_add_device(struct iopf_queue *queue, struct device *dev) +{ + return -ENODEV; +} + +static inline void +iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) +{ +} + +static inline int iopf_queue_flush_dev(struct device *dev) +{ + return -ENODEV; +} + +static inline struct iopf_queue *iopf_queue_alloc(const char *name) +{ + return NULL; +} + +static inline void iopf_queue_free(struct iopf_queue *queue) +{ +} + +static inline int iopf_queue_discard_partial(struct iopf_queue *queue) +{ + return -ENODEV; +} + +static inline void iopf_free_group(struct iopf_group *group) +{ +} + +static inline void +iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) +{ +} + +static inline void iopf_group_response(struct iopf_group *group, + enum iommu_page_response_code status) +{ +} +#endif /* CONFIG_IOMMU_IOPF */ #endif /* __LINUX_IOMMU_H */ diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 25d768d48970..db7fe25f3370 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -229,7 +229,7 @@ static inline unsigned long resource_ext_type(const struct resource *res) return res->flags & IORESOURCE_EXT_TYPE_BITS; } /* True iff r1 completely contains r2 */ -static inline bool resource_contains(struct resource *r1, struct resource *r2) +static inline bool resource_contains(const struct resource *r1, const struct resource *r2) { if (resource_type(r1) != resource_type(r2)) return false; @@ -239,13 +239,13 @@ static inline bool resource_contains(struct resource *r1, struct resource *r2) } /* True if any part of r1 overlaps r2 */ -static inline bool resource_overlaps(struct resource *r1, struct resource *r2) +static inline bool resource_overlaps(const struct resource *r1, const struct resource *r2) { return r1->start <= r2->end && r1->end >= r2->start; } -static inline bool -resource_intersection(struct resource *r1, struct resource *r2, struct resource *r) +static inline bool resource_intersection(const struct resource *r1, const struct resource *r2, + struct resource *r) { if (!resource_overlaps(r1, r2)) return false; @@ -254,8 +254,8 @@ resource_intersection(struct resource *r1, struct resource *r2, struct resource return true; } -static inline bool -resource_union(struct resource *r1, struct resource *r2, struct resource *r) +static inline bool resource_union(const struct resource *r1, const struct resource *r2, + struct resource *r) { if (!resource_overlaps(r1, r2)) return false; @@ -331,6 +331,9 @@ extern int walk_system_ram_res(u64 start, u64 end, void *arg, int (*func)(struct resource *, void *)); extern int +walk_system_ram_res_rev(u64 start, u64 end, void *arg, + int (*func)(struct resource *, void *)); +extern int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end, void *arg, int (*func)(struct resource *, void *)); diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 7578d4f6a969..db1249cd9692 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -47,7 +47,30 @@ static inline int task_nice_ioclass(struct task_struct *task) } #ifdef CONFIG_BLOCK -int __get_task_ioprio(struct task_struct *p); +/* + * If the task has set an I/O priority, use that. Otherwise, return + * the default I/O priority. + * + * Expected to be called for current task or with task_lock() held to keep + * io_context stable. + */ +static inline int __get_task_ioprio(struct task_struct *p) +{ + struct io_context *ioc = p->io_context; + int prio; + + if (!ioc) + return IOPRIO_DEFAULT; + + if (p != current) + lockdep_assert_held(&p->alloc_lock); + + prio = ioc->ioprio; + if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE) + prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(p), + task_nice_ioprio(p)); + return prio; +} #else static inline int __get_task_ioprio(struct task_struct *p) { diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h index 1b06d074ade0..4696abfd311c 100644 --- a/include/linux/iosys-map.h +++ b/include/linux/iosys-map.h @@ -34,7 +34,7 @@ * the same driver for allocation, read and write operations. * * Open-coding access to :c:type:`struct iosys_map <iosys_map>` is considered - * bad style. Rather then accessing its fields directly, use one of the provided + * bad style. Rather than accessing its fields directly, use one of the provided * helper functions, or implement your own. For example, instances of * :c:type:`struct iosys_map <iosys_map>` can be initialized statically with * IOSYS_MAP_INIT_VADDR(), or at runtime with iosys_map_set_vaddr(). These @@ -168,9 +168,9 @@ struct iosys_map { * about the use of uninitialized variable. */ #define IOSYS_MAP_INIT_OFFSET(map_, offset_) ({ \ - struct iosys_map copy = *map_; \ - iosys_map_incr(©, offset_); \ - copy; \ + struct iosys_map copy_ = *map_; \ + iosys_map_incr(©_, offset_); \ + copy_; \ }) /** @@ -391,14 +391,14 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * Returns: * The value read from the mapping. */ -#define iosys_map_rd(map__, offset__, type__) ({ \ - type__ val; \ - if ((map__)->is_iomem) { \ - __iosys_map_rd_io(val, (map__)->vaddr_iomem + (offset__), type__);\ - } else { \ - __iosys_map_rd_sys(val, (map__)->vaddr + (offset__), type__); \ - } \ - val; \ +#define iosys_map_rd(map__, offset__, type__) ({ \ + type__ val_; \ + if ((map__)->is_iomem) { \ + __iosys_map_rd_io(val_, (map__)->vaddr_iomem + (offset__), type__); \ + } else { \ + __iosys_map_rd_sys(val_, (map__)->vaddr + (offset__), type__); \ + } \ + val_; \ }) /** @@ -413,13 +413,13 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * or if pointer may be unaligned (and problematic for the architecture * supported), use iosys_map_memcpy_to() */ -#define iosys_map_wr(map__, offset__, type__, val__) ({ \ - type__ val = (val__); \ - if ((map__)->is_iomem) { \ - __iosys_map_wr_io(val, (map__)->vaddr_iomem + (offset__), type__);\ - } else { \ - __iosys_map_wr_sys(val, (map__)->vaddr + (offset__), type__); \ - } \ +#define iosys_map_wr(map__, offset__, type__, val__) ({ \ + type__ val_ = (val__); \ + if ((map__)->is_iomem) { \ + __iosys_map_wr_io(val_, (map__)->vaddr_iomem + (offset__), type__); \ + } else { \ + __iosys_map_wr_sys(val_, (map__)->vaddr + (offset__), type__); \ + } \ }) /** @@ -485,9 +485,9 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * The value read from the mapping. */ #define iosys_map_rd_field(map__, struct_offset__, struct_type__, field__) ({ \ - struct_type__ *s; \ + struct_type__ *s_; \ iosys_map_rd(map__, struct_offset__ + offsetof(struct_type__, field__), \ - typeof(s->field__)); \ + typeof(s_->field__)); \ }) /** @@ -508,9 +508,9 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * usage and memory layout. */ #define iosys_map_wr_field(map__, struct_offset__, struct_type__, field__, val__) ({ \ - struct_type__ *s; \ + struct_type__ *s_; \ iosys_map_wr(map__, struct_offset__ + offsetof(struct_type__, field__), \ - typeof(s->field__), val__); \ + typeof(s_->field__), val__); \ }) #endif /* __IOSYS_MAP_H__ */ diff --git a/include/linux/iov_iter.h b/include/linux/iov_iter.h new file mode 100644 index 000000000000..270454a6703d --- /dev/null +++ b/include/linux/iov_iter.h @@ -0,0 +1,274 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* I/O iterator iteration building functions. + * + * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#ifndef _LINUX_IOV_ITER_H +#define _LINUX_IOV_ITER_H + +#include <linux/uio.h> +#include <linux/bvec.h> + +typedef size_t (*iov_step_f)(void *iter_base, size_t progress, size_t len, + void *priv, void *priv2); +typedef size_t (*iov_ustep_f)(void __user *iter_base, size_t progress, size_t len, + void *priv, void *priv2); + +/* + * Handle ITER_UBUF. + */ +static __always_inline +size_t iterate_ubuf(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_ustep_f step) +{ + void __user *base = iter->ubuf; + size_t progress = 0, remain; + + remain = step(base + iter->iov_offset, 0, len, priv, priv2); + progress = len - remain; + iter->iov_offset += progress; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_IOVEC. + */ +static __always_inline +size_t iterate_iovec(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_ustep_f step) +{ + const struct iovec *p = iter->__iov; + size_t progress = 0, skip = iter->iov_offset; + + do { + size_t remain, consumed; + size_t part = min(len, p->iov_len - skip); + + if (likely(part)) { + remain = step(p->iov_base + skip, progress, part, priv, priv2); + consumed = part - remain; + progress += consumed; + skip += consumed; + len -= consumed; + if (skip < p->iov_len) + break; + } + p++; + skip = 0; + } while (len); + + iter->nr_segs -= p - iter->__iov; + iter->__iov = p; + iter->iov_offset = skip; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_KVEC. + */ +static __always_inline +size_t iterate_kvec(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + const struct kvec *p = iter->kvec; + size_t progress = 0, skip = iter->iov_offset; + + do { + size_t remain, consumed; + size_t part = min(len, p->iov_len - skip); + + if (likely(part)) { + remain = step(p->iov_base + skip, progress, part, priv, priv2); + consumed = part - remain; + progress += consumed; + skip += consumed; + len -= consumed; + if (skip < p->iov_len) + break; + } + p++; + skip = 0; + } while (len); + + iter->nr_segs -= p - iter->kvec; + iter->kvec = p; + iter->iov_offset = skip; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_BVEC. + */ +static __always_inline +size_t iterate_bvec(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + const struct bio_vec *p = iter->bvec; + size_t progress = 0, skip = iter->iov_offset; + + do { + size_t remain, consumed; + size_t offset = p->bv_offset + skip, part; + void *kaddr = kmap_local_page(p->bv_page + offset / PAGE_SIZE); + + part = min3(len, + (size_t)(p->bv_len - skip), + (size_t)(PAGE_SIZE - offset % PAGE_SIZE)); + remain = step(kaddr + offset % PAGE_SIZE, progress, part, priv, priv2); + kunmap_local(kaddr); + consumed = part - remain; + len -= consumed; + progress += consumed; + skip += consumed; + if (skip >= p->bv_len) { + skip = 0; + p++; + } + if (remain) + break; + } while (len); + + iter->nr_segs -= p - iter->bvec; + iter->bvec = p; + iter->iov_offset = skip; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_XARRAY. + */ +static __always_inline +size_t iterate_xarray(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + struct folio *folio; + size_t progress = 0; + loff_t start = iter->xarray_start + iter->iov_offset; + pgoff_t index = start / PAGE_SIZE; + XA_STATE(xas, iter->xarray, index); + + rcu_read_lock(); + xas_for_each(&xas, folio, ULONG_MAX) { + size_t remain, consumed, offset, part, flen; + + if (xas_retry(&xas, folio)) + continue; + if (WARN_ON(xa_is_value(folio))) + break; + if (WARN_ON(folio_test_hugetlb(folio))) + break; + + offset = offset_in_folio(folio, start + progress); + flen = min(folio_size(folio) - offset, len); + + while (flen) { + void *base = kmap_local_folio(folio, offset); + + part = min_t(size_t, flen, + PAGE_SIZE - offset_in_page(offset)); + remain = step(base, progress, part, priv, priv2); + kunmap_local(base); + + consumed = part - remain; + progress += consumed; + len -= consumed; + + if (remain || len == 0) + goto out; + flen -= consumed; + offset += consumed; + } + } + +out: + rcu_read_unlock(); + iter->iov_offset += progress; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_DISCARD. + */ +static __always_inline +size_t iterate_discard(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + size_t progress = len; + + iter->count -= progress; + return progress; +} + +/** + * iterate_and_advance2 - Iterate over an iterator + * @iter: The iterator to iterate over. + * @len: The amount to iterate over. + * @priv: Data for the step functions. + * @priv2: More data for the step functions. + * @ustep: Function for UBUF/IOVEC iterators; given __user addresses. + * @step: Function for other iterators; given kernel addresses. + * + * Iterate over the next part of an iterator, up to the specified length. The + * buffer is presented in segments, which for kernel iteration are broken up by + * physical pages and mapped, with the mapped address being presented. + * + * Two step functions, @step and @ustep, must be provided, one for handling + * mapped kernel addresses and the other is given user addresses which have the + * potential to fault since no pinning is performed. + * + * The step functions are passed the address and length of the segment, @priv, + * @priv2 and the amount of data so far iterated over (which can, for example, + * be added to @priv to point to the right part of a second buffer). The step + * functions should return the amount of the segment they didn't process (ie. 0 + * indicates complete processsing). + * + * This function returns the amount of data processed (ie. 0 means nothing was + * processed and the value of @len means processes to completion). + */ +static __always_inline +size_t iterate_and_advance2(struct iov_iter *iter, size_t len, void *priv, + void *priv2, iov_ustep_f ustep, iov_step_f step) +{ + if (unlikely(iter->count < len)) + len = iter->count; + if (unlikely(!len)) + return 0; + + if (likely(iter_is_ubuf(iter))) + return iterate_ubuf(iter, len, priv, priv2, ustep); + if (likely(iter_is_iovec(iter))) + return iterate_iovec(iter, len, priv, priv2, ustep); + if (iov_iter_is_bvec(iter)) + return iterate_bvec(iter, len, priv, priv2, step); + if (iov_iter_is_kvec(iter)) + return iterate_kvec(iter, len, priv, priv2, step); + if (iov_iter_is_xarray(iter)) + return iterate_xarray(iter, len, priv, priv2, step); + return iterate_discard(iter, len, priv, priv2, step); +} + +/** + * iterate_and_advance - Iterate over an iterator + * @iter: The iterator to iterate over. + * @len: The amount to iterate over. + * @priv: Data for the step functions. + * @ustep: Function for UBUF/IOVEC iterators; given __user addresses. + * @step: Function for other iterators; given kernel addresses. + * + * As iterate_and_advance2(), but priv2 is always NULL. + */ +static __always_inline +size_t iterate_and_advance(struct iov_iter *iter, size_t len, void *priv, + iov_ustep_f ustep, iov_step_f step) +{ + return iterate_and_advance2(iter, len, priv, NULL, ustep, step); +} + +#endif /* _LINUX_IOV_ITER_H */ diff --git a/include/linux/iova_bitmap.h b/include/linux/iova_bitmap.h index c006cf0a25f3..1c338f5e5b7a 100644 --- a/include/linux/iova_bitmap.h +++ b/include/linux/iova_bitmap.h @@ -7,6 +7,7 @@ #define _IOVA_BITMAP_H_ #include <linux/types.h> +#include <linux/errno.h> struct iova_bitmap; @@ -14,6 +15,7 @@ typedef int (*iova_bitmap_fn_t)(struct iova_bitmap *bitmap, unsigned long iova, size_t length, void *opaque); +#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER) struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length, unsigned long page_size, u64 __user *data); @@ -22,5 +24,29 @@ int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, iova_bitmap_fn_t fn); void iova_bitmap_set(struct iova_bitmap *bitmap, unsigned long iova, size_t length); +#else +static inline struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, + size_t length, + unsigned long page_size, + u64 __user *data) +{ + return NULL; +} + +static inline void iova_bitmap_free(struct iova_bitmap *bitmap) +{ +} + +static inline int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, + iova_bitmap_fn_t fn) +{ + return -EOPNOTSUPP; +} + +static inline void iova_bitmap_set(struct iova_bitmap *bitmap, + unsigned long iova, size_t length) +{ +} +#endif #endif diff --git a/include/linux/ipc.h b/include/linux/ipc.h index e1c9eea6015b..9b1434247aab 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -2,7 +2,7 @@ #ifndef _LINUX_IPC_H #define _LINUX_IPC_H -#include <linux/spinlock.h> +#include <linux/spinlock_types.h> #include <linux/uidgid.h> #include <linux/rhashtable-types.h> #include <uapi/linux/ipc.h> diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index af8a771a053c..383a0ea2ab91 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -3,6 +3,7 @@ #define _IPV6_H #include <uapi/linux/ipv6.h> +#include <linux/cache.h> #define ipv6_optlen(p) (((p)->hdrlen+1) << 3) #define ipv6_authlen(p) (((p)->hdrlen+2) << 2) @@ -10,9 +11,16 @@ * This structure contains configuration options per IPv6 link. */ struct ipv6_devconf { - __s32 forwarding; + /* RX & TX fastpath fields. */ + __cacheline_group_begin(ipv6_devconf_read_txrx); + __s32 disable_ipv6; __s32 hop_limit; __s32 mtu6; + __s32 forwarding; + __s32 disable_policy; + __s32 proxy_ndp; + __cacheline_group_end(ipv6_devconf_read_txrx); + __s32 accept_ra; __s32 accept_redirects; __s32 autoconf; @@ -27,6 +35,7 @@ struct ipv6_devconf { __s32 use_tempaddr; __s32 temp_valid_lft; __s32 temp_prefered_lft; + __s32 regen_min_advance; __s32 regen_max_retry; __s32 max_desync_factor; __s32 max_addresses; @@ -44,7 +53,6 @@ struct ipv6_devconf { __s32 accept_ra_rt_info_max_plen; #endif #endif - __s32 proxy_ndp; __s32 accept_source_route; __s32 accept_ra_from_local; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD @@ -54,7 +62,6 @@ struct ipv6_devconf { #ifdef CONFIG_IPV6_MROUTE atomic_t mc_forwarding; #endif - __s32 disable_ipv6; __s32 drop_unicast_in_l2_multicast; __s32 accept_dad; __s32 force_tllao; @@ -75,13 +82,13 @@ struct ipv6_devconf { #endif __u32 enhanced_dad; __u32 addr_gen_mode; - __s32 disable_policy; __s32 ndisc_tclass; __s32 rpl_seg_enabled; __u32 ioam6_id; __u32 ioam6_id_wide; __u8 ioam6_enabled; __u8 ndisc_evict_nocarrier; + __u8 ra_honor_pio_life; struct ctl_table_header *sysctl_header; }; @@ -213,28 +220,9 @@ struct ipv6_pinfo { __be32 flow_label; __u32 frag_size; - /* - * Packed in 16bits. - * Omit one shift by putting the signed field at MSB. - */ -#if defined(__BIG_ENDIAN_BITFIELD) - __s16 hop_limit:9; - __u16 __unused_1:7; -#else - __u16 __unused_1:7; - __s16 hop_limit:9; -#endif + s16 hop_limit; + u8 mcast_hops; -#if defined(__BIG_ENDIAN_BITFIELD) - /* Packed in 16bits. */ - __s16 mcast_hops:9; - __u16 __unused_2:6, - mc_loop:1; -#else - __u16 mc_loop:1, - __unused_2:6; - __s16 mcast_hops:9; -#endif int ucast_oif; int mcast_oif; @@ -262,21 +250,11 @@ struct ipv6_pinfo { } rxopt; /* sockopt flags */ - __u16 recverr:1, - sndflow:1, - repflow:1, - pmtudisc:3, - padding:1, /* 1 bit hole */ - srcprefs:3, /* 001: prefer temporary address + __u8 srcprefs; /* 001: prefer temporary address * 010: prefer public address * 100: prefer care-of address */ - dontfrag:1, - autoflowlabel:1, - autoflowlabel_set:1, - mc_all:1, - recverr_rfc4884:1, - rtalert_isolate:1; + __u8 pmtudisc; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; @@ -293,6 +271,18 @@ struct ipv6_pinfo { struct inet6_cork cork; }; +/* We currently use available bits from inet_sk(sk)->inet_flags, + * this could change in the future. + */ +#define inet6_test_bit(nr, sk) \ + test_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet6_set_bit(nr, sk) \ + set_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet6_clear_bit(nr, sk) \ + clear_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet6_assign_bit(nr, sk, val) \ + assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val) + /* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */ struct raw6_sock { /* inet_sock has to be the first member of raw6_sock */ diff --git a/include/linux/irq.h b/include/linux/irq.h index d8a6fdce9373..97baa937ab5b 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -179,7 +179,7 @@ struct irq_common_data { struct irq_data { u32 mask; unsigned int irq; - unsigned long hwirq; + irq_hw_number_t hwirq; struct irq_common_data *common; struct irq_chip *chip; struct irq_domain *domain; @@ -215,8 +215,6 @@ struct irq_data { * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target * IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set * IRQD_CAN_RESERVE - Can use reservation mode - * IRQD_MSI_NOMASK_QUIRK - Non-maskable MSI quirk for affinity change - * required * IRQD_HANDLE_ENFORCE_IRQCTX - Enforce that handle_irq_*() is only invoked * from actual interrupt context. * IRQD_AFFINITY_ON_ACTIVATE - Affinity is set on activation. Don't call @@ -247,11 +245,10 @@ enum { IRQD_SINGLE_TARGET = BIT(24), IRQD_DEFAULT_TRIGGER_SET = BIT(25), IRQD_CAN_RESERVE = BIT(26), - IRQD_MSI_NOMASK_QUIRK = BIT(27), - IRQD_HANDLE_ENFORCE_IRQCTX = BIT(28), - IRQD_AFFINITY_ON_ACTIVATE = BIT(29), - IRQD_IRQ_ENABLED_ON_SUSPEND = BIT(30), - IRQD_RESEND_WHEN_IN_PROGRESS = BIT(31), + IRQD_HANDLE_ENFORCE_IRQCTX = BIT(27), + IRQD_AFFINITY_ON_ACTIVATE = BIT(28), + IRQD_IRQ_ENABLED_ON_SUSPEND = BIT(29), + IRQD_RESEND_WHEN_IN_PROGRESS = BIT(30), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -426,21 +423,6 @@ static inline bool irqd_can_reserve(struct irq_data *d) return __irqd_to_state(d) & IRQD_CAN_RESERVE; } -static inline void irqd_set_msi_nomask_quirk(struct irq_data *d) -{ - __irqd_to_state(d) |= IRQD_MSI_NOMASK_QUIRK; -} - -static inline void irqd_clr_msi_nomask_quirk(struct irq_data *d) -{ - __irqd_to_state(d) &= ~IRQD_MSI_NOMASK_QUIRK; -} - -static inline bool irqd_msi_nomask_quirk(struct irq_data *d) -{ - return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK; -} - static inline void irqd_set_affinity_on_activate(struct irq_data *d) { __irqd_to_state(d) |= IRQD_AFFINITY_ON_ACTIVATE; diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 8cd11a223260..136f2980cba3 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -66,6 +66,9 @@ void irq_work_sync(struct irq_work *work); void irq_work_run(void); bool irq_work_needs_cpu(void); void irq_work_single(void *arg); + +void arch_irq_work_raise(void); + #else static inline bool irq_work_needs_cpu(void) { return false; } static inline void irq_work_run(void) { } diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 51c254b7fec2..21ecf582a0fe 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -174,7 +174,7 @@ struct irq_domain { irq_hw_number_t hwirq_max; unsigned int revmap_size; struct radix_tree_root revmap_tree; - struct irq_data __rcu *revmap[]; + struct irq_data __rcu *revmap[] __counted_by(revmap_size); }; /* Irq domain flags */ @@ -619,6 +619,23 @@ static inline bool irq_domain_is_msi_device(struct irq_domain *domain) #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ +#ifdef CONFIG_GENERIC_MSI_IRQ +int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, + unsigned int type); +void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq); +#else +static inline int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, + unsigned int type) +{ + WARN_ON_ONCE(1); + return -EINVAL; +} +static inline void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq) +{ + WARN_ON_ONCE(1); +} +#endif + #else /* CONFIG_IRQ_DOMAIN */ static inline void irq_dispose_mapping(unsigned int virq) { } static inline struct irq_domain *irq_find_matching_fwnode( diff --git a/include/linux/irqdomain_defs.h b/include/linux/irqdomain_defs.h index c29921fd8cd1..5c1fe6f1fcde 100644 --- a/include/linux/irqdomain_defs.h +++ b/include/linux/irqdomain_defs.h @@ -26,6 +26,8 @@ enum irq_domain_bus_token { DOMAIN_BUS_DMAR, DOMAIN_BUS_AMDVI, DOMAIN_BUS_PCI_DEVICE_IMS, + DOMAIN_BUS_DEVICE_MSI, + DOMAIN_BUS_WIRED_TO_MSI, }; #endif /* _LINUX_IRQDOMAIN_DEFS_H */ diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 2b665c32f5fe..3f003d5fde53 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -12,6 +12,7 @@ #ifndef _LINUX_TRACE_IRQFLAGS_H #define _LINUX_TRACE_IRQFLAGS_H +#include <linux/irqflags_types.h> #include <linux/typecheck.h> #include <linux/cleanup.h> #include <asm/irqflags.h> @@ -34,19 +35,6 @@ #ifdef CONFIG_TRACE_IRQFLAGS -/* Per-task IRQ trace events information. */ -struct irqtrace_events { - unsigned int irq_events; - unsigned long hardirq_enable_ip; - unsigned long hardirq_disable_ip; - unsigned int hardirq_enable_event; - unsigned int hardirq_disable_event; - unsigned long softirq_disable_ip; - unsigned long softirq_enable_ip; - unsigned int softirq_disable_event; - unsigned int softirq_enable_event; -}; - DECLARE_PER_CPU(int, hardirqs_enabled); DECLARE_PER_CPU(int, hardirq_context); @@ -126,7 +114,7 @@ do { \ # define lockdep_softirq_enter() do { } while (0) # define lockdep_softirq_exit() do { } while (0) # define lockdep_hrtimer_enter(__hrtimer) false -# define lockdep_hrtimer_exit(__context) do { } while (0) +# define lockdep_hrtimer_exit(__context) do { (void)(__context); } while (0) # define lockdep_posixtimer_enter() do { } while (0) # define lockdep_posixtimer_exit() do { } while (0) # define lockdep_irq_work_enter(__work) do { } while (0) diff --git a/include/linux/irqflags_types.h b/include/linux/irqflags_types.h new file mode 100644 index 000000000000..c13f0d915097 --- /dev/null +++ b/include/linux/irqflags_types.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_IRQFLAGS_TYPES_H +#define _LINUX_IRQFLAGS_TYPES_H + +#ifdef CONFIG_TRACE_IRQFLAGS + +/* Per-task IRQ trace events information. */ +struct irqtrace_events { + unsigned int irq_events; + unsigned long hardirq_enable_ip; + unsigned long hardirq_disable_ip; + unsigned int hardirq_enable_event; + unsigned int hardirq_disable_event; + unsigned long softirq_disable_ip; + unsigned long softirq_enable_ip; + unsigned int softirq_disable_event; + unsigned int softirq_enable_event; +}; + +#endif + +#endif /* _LINUX_IRQFLAGS_TYPES_H */ diff --git a/include/linux/irqhandler.h b/include/linux/irqhandler.h index c30f454a9518..72dd1eb3a0e7 100644 --- a/include/linux/irqhandler.h +++ b/include/linux/irqhandler.h @@ -8,7 +8,7 @@ */ struct irq_desc; -struct irq_data; + typedef void (*irq_flow_handler_t)(struct irq_desc *desc); #endif diff --git a/include/linux/ism.h b/include/linux/ism.h index 9a4c204df3da..5428edd90982 100644 --- a/include/linux/ism.h +++ b/include/linux/ism.h @@ -86,7 +86,6 @@ int ism_register_dmb(struct ism_dev *dev, struct ism_dmb *dmb, int ism_unregister_dmb(struct ism_dev *dev, struct ism_dmb *dmb); int ism_move(struct ism_dev *dev, u64 dmb_tok, unsigned int idx, bool sf, unsigned int offset, void *data, unsigned int size); -u8 *ism_get_seid(void); const struct smcd_ops *ism_get_smcd_ops(void); diff --git a/include/linux/iversion.h b/include/linux/iversion.h index f174ff1b59ee..8f972eaca2ed 100644 --- a/include/linux/iversion.h +++ b/include/linux/iversion.h @@ -256,7 +256,7 @@ inode_peek_iversion(const struct inode *inode) * For filesystems without any sort of change attribute, the best we can * do is fake one up from the ctime: */ -static inline u64 time_to_chattr(struct timespec64 *t) +static inline u64 time_to_chattr(const struct timespec64 *t) { u64 chattr = t->tv_sec; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 52772c826c86..971f3e826e15 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -756,11 +756,6 @@ struct journal_s unsigned long j_flags; /** - * @j_atomic_flags: Atomic journaling state flags. - */ - unsigned long j_atomic_flags; - - /** * @j_errno: * * Is there an outstanding uncleared error on the journal (from a prior @@ -886,7 +881,7 @@ struct journal_s * Journal head shrinker, reclaim buffer's journal head which * has been written back. */ - struct shrinker j_shrinker; + struct shrinker *j_shrinker; /** * @j_checkpoint_jh_count: @@ -999,6 +994,13 @@ struct journal_s struct block_device *j_fs_dev; /** + * @j_fs_dev_wb_err: + * + * Records the errseq of the client fs's backing block device. + */ + errseq_t j_fs_dev_wb_err; + + /** * @j_total_len: Total maximum capacity of the journal region on disk. */ unsigned int j_total_len; @@ -1374,6 +1376,9 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum2, CSUM_V2) JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3) JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT) +/* Journal high priority write IO operation flags */ +#define JBD2_JOURNAL_REQ_FLAGS (REQ_META | REQ_SYNC | REQ_IDLE) + /* * Journal flag definitions */ @@ -1397,12 +1402,6 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT) JBD2_JOURNAL_FLUSH_ZEROOUT) /* - * Journal atomic flag definitions - */ -#define JBD2_CHECKPOINT_IO_ERROR 0x001 /* Detect io error while writing - * buffer back to disk */ - -/* * Function declarations for the journaling transaction and buffer * management */ @@ -1695,6 +1694,25 @@ static inline void jbd2_journal_abort_handle(handle_t *handle) handle->h_aborted = 1; } +static inline void jbd2_init_fs_dev_write_error(journal_t *journal) +{ + struct address_space *mapping = journal->j_fs_dev->bd_inode->i_mapping; + + /* + * Save the original wb_err value of client fs's bdev mapping which + * could be used to detect the client fs's metadata async write error. + */ + errseq_check_and_advance(&mapping->wb_err, &journal->j_fs_dev_wb_err); +} + +static inline int jbd2_check_fs_dev_write_error(journal_t *journal) +{ + struct address_space *mapping = journal->j_fs_dev->bd_inode->i_mapping; + + return errseq_check(&mapping->wb_err, + READ_ONCE(journal->j_fs_dev_wb_err)); +} + #endif /* __KERNEL__ */ /* Comparison functions for transaction IDs: perform comparisons using diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index e0ae2a43e0eb..d9f1435a5a13 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -102,12 +102,15 @@ static inline u64 get_jiffies_64(void) } #endif -/* - * These inlines deal with timer wrapping correctly. You are - * strongly encouraged to use them: - * 1. Because people otherwise forget - * 2. Because if the timer wrap changes in future you won't have to - * alter your driver code. +/** + * DOC: General information about time_* inlines + * + * These inlines deal with timer wrapping correctly. You are strongly encouraged + * to use them: + * + * #. Because people otherwise forget + * #. Because if the timer wrap changes in future you won't have to alter your + * driver code. */ /** diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 842623d708c2..70d6a8f6e25d 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -4,6 +4,7 @@ #include <linux/bug.h> #include <linux/kasan-enabled.h> +#include <linux/kasan-tags.h> #include <linux/kernel.h> #include <linux/static_key.h> #include <linux/types.h> @@ -129,20 +130,39 @@ static __always_inline void kasan_poison_slab(struct slab *slab) __kasan_poison_slab(slab); } -void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object); -static __always_inline void kasan_unpoison_object_data(struct kmem_cache *cache, +void __kasan_unpoison_new_object(struct kmem_cache *cache, void *object); +/** + * kasan_unpoison_new_object - Temporarily unpoison a new slab object. + * @cache: Cache the object belong to. + * @object: Pointer to the object. + * + * This function is intended for the slab allocator's internal use. It + * temporarily unpoisons an object from a newly allocated slab without doing + * anything else. The object must later be repoisoned by + * kasan_poison_new_object(). + */ +static __always_inline void kasan_unpoison_new_object(struct kmem_cache *cache, void *object) { if (kasan_enabled()) - __kasan_unpoison_object_data(cache, object); + __kasan_unpoison_new_object(cache, object); } -void __kasan_poison_object_data(struct kmem_cache *cache, void *object); -static __always_inline void kasan_poison_object_data(struct kmem_cache *cache, +void __kasan_poison_new_object(struct kmem_cache *cache, void *object); +/** + * kasan_unpoison_new_object - Repoison a new slab object. + * @cache: Cache the object belong to. + * @object: Pointer to the object. + * + * This function is intended for the slab allocator's internal use. It + * repoisons an object that was previously unpoisoned by + * kasan_unpoison_new_object() without doing anything else. + */ +static __always_inline void kasan_poison_new_object(struct kmem_cache *cache, void *object) { if (kasan_enabled()) - __kasan_poison_object_data(cache, object); + __kasan_poison_new_object(cache, object); } void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache, @@ -172,13 +192,6 @@ static __always_inline void kasan_kfree_large(void *ptr) __kasan_kfree_large(ptr, _RET_IP_); } -void __kasan_slab_free_mempool(void *ptr, unsigned long ip); -static __always_inline void kasan_slab_free_mempool(void *ptr) -{ - if (kasan_enabled()) - __kasan_slab_free_mempool(ptr, _RET_IP_); -} - void * __must_check __kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags, bool init); static __always_inline void * __must_check kasan_slab_alloc( @@ -219,6 +232,113 @@ static __always_inline void * __must_check kasan_krealloc(const void *object, return (void *)object; } +bool __kasan_mempool_poison_pages(struct page *page, unsigned int order, + unsigned long ip); +/** + * kasan_mempool_poison_pages - Check and poison a mempool page allocation. + * @page: Pointer to the page allocation. + * @order: Order of the allocation. + * + * This function is intended for kernel subsystems that cache page allocations + * to reuse them instead of freeing them back to page_alloc (e.g. mempool). + * + * This function is similar to kasan_mempool_poison_object() but operates on + * page allocations. + * + * Before the poisoned allocation can be reused, it must be unpoisoned via + * kasan_mempool_unpoison_pages(). + * + * Return: true if the allocation can be safely reused; false otherwise. + */ +static __always_inline bool kasan_mempool_poison_pages(struct page *page, + unsigned int order) +{ + if (kasan_enabled()) + return __kasan_mempool_poison_pages(page, order, _RET_IP_); + return true; +} + +void __kasan_mempool_unpoison_pages(struct page *page, unsigned int order, + unsigned long ip); +/** + * kasan_mempool_unpoison_pages - Unpoison a mempool page allocation. + * @page: Pointer to the page allocation. + * @order: Order of the allocation. + * + * This function is intended for kernel subsystems that cache page allocations + * to reuse them instead of freeing them back to page_alloc (e.g. mempool). + * + * This function unpoisons a page allocation that was previously poisoned by + * kasan_mempool_poison_pages() without zeroing the allocation's memory. For + * the tag-based modes, this function assigns a new tag to the allocation. + */ +static __always_inline void kasan_mempool_unpoison_pages(struct page *page, + unsigned int order) +{ + if (kasan_enabled()) + __kasan_mempool_unpoison_pages(page, order, _RET_IP_); +} + +bool __kasan_mempool_poison_object(void *ptr, unsigned long ip); +/** + * kasan_mempool_poison_object - Check and poison a mempool slab allocation. + * @ptr: Pointer to the slab allocation. + * + * This function is intended for kernel subsystems that cache slab allocations + * to reuse them instead of freeing them back to the slab allocator (e.g. + * mempool). + * + * This function poisons a slab allocation and saves a free stack trace for it + * without initializing the allocation's memory and without putting it into the + * quarantine (for the Generic mode). + * + * This function also performs checks to detect double-free and invalid-free + * bugs and reports them. The caller can use the return value of this function + * to find out if the allocation is buggy. + * + * Before the poisoned allocation can be reused, it must be unpoisoned via + * kasan_mempool_unpoison_object(). + * + * This function operates on all slab allocations including large kmalloc + * allocations (the ones returned by kmalloc_large() or by kmalloc() with the + * size > KMALLOC_MAX_SIZE). + * + * Return: true if the allocation can be safely reused; false otherwise. + */ +static __always_inline bool kasan_mempool_poison_object(void *ptr) +{ + if (kasan_enabled()) + return __kasan_mempool_poison_object(ptr, _RET_IP_); + return true; +} + +void __kasan_mempool_unpoison_object(void *ptr, size_t size, unsigned long ip); +/** + * kasan_mempool_unpoison_object - Unpoison a mempool slab allocation. + * @ptr: Pointer to the slab allocation. + * @size: Size to be unpoisoned. + * + * This function is intended for kernel subsystems that cache slab allocations + * to reuse them instead of freeing them back to the slab allocator (e.g. + * mempool). + * + * This function unpoisons a slab allocation that was previously poisoned via + * kasan_mempool_poison_object() and saves an alloc stack trace for it without + * initializing the allocation's memory. For the tag-based modes, this function + * does not assign a new tag to the allocation and instead restores the + * original tags based on the pointer value. + * + * This function operates on all slab allocations including large kmalloc + * allocations (the ones returned by kmalloc_large() or by kmalloc() with the + * size > KMALLOC_MAX_SIZE). + */ +static __always_inline void kasan_mempool_unpoison_object(void *ptr, + size_t size) +{ + if (kasan_enabled()) + __kasan_mempool_unpoison_object(ptr, size, _RET_IP_); +} + /* * Unlike kasan_check_read/write(), kasan_check_byte() is performed even for * the hardware tag-based mode that doesn't rely on compiler instrumentation. @@ -242,9 +362,9 @@ static inline bool kasan_unpoison_pages(struct page *page, unsigned int order, return false; } static inline void kasan_poison_slab(struct slab *slab) {} -static inline void kasan_unpoison_object_data(struct kmem_cache *cache, +static inline void kasan_unpoison_new_object(struct kmem_cache *cache, void *object) {} -static inline void kasan_poison_object_data(struct kmem_cache *cache, +static inline void kasan_poison_new_object(struct kmem_cache *cache, void *object) {} static inline void *kasan_init_slab_obj(struct kmem_cache *cache, const void *object) @@ -256,7 +376,6 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object, bool init return false; } static inline void kasan_kfree_large(void *ptr) {} -static inline void kasan_slab_free_mempool(void *ptr) {} static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags, bool init) { @@ -276,6 +395,17 @@ static inline void *kasan_krealloc(const void *object, size_t new_size, { return (void *)object; } +static inline bool kasan_mempool_poison_pages(struct page *page, unsigned int order) +{ + return true; +} +static inline void kasan_mempool_unpoison_pages(struct page *page, unsigned int order) {} +static inline bool kasan_mempool_poison_object(void *ptr) +{ + return true; +} +static inline void kasan_mempool_unpoison_object(void *ptr, size_t size) {} + static inline bool kasan_check_byte(const void *address) { return true; @@ -285,8 +415,10 @@ static inline bool kasan_check_byte(const void *address) #if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK) void kasan_unpoison_task_stack(struct task_struct *task); +asmlinkage void kasan_unpoison_task_stack_below(const void *watermark); #else static inline void kasan_unpoison_task_stack(struct task_struct *task) {} +static inline void kasan_unpoison_task_stack_below(const void *watermark) {} #endif #ifdef CONFIG_KASAN_GENERIC @@ -297,7 +429,6 @@ struct kasan_cache { }; size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object); -slab_flags_t kasan_never_merge(void); void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, slab_flags_t *flags); @@ -314,11 +445,6 @@ static inline size_t kasan_metadata_size(struct kmem_cache *cache, { return 0; } -/* And thus nothing prevents cache merging. */ -static inline slab_flags_t kasan_never_merge(void) -{ - return 0; -} /* And no cache-related metadata initialization is required. */ static inline void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, @@ -466,10 +592,10 @@ static inline void kasan_free_module_shadow(const struct vm_struct *vm) {} #endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ -#ifdef CONFIG_KASAN_INLINE +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) void kasan_non_canonical_hook(unsigned long addr); -#else /* CONFIG_KASAN_INLINE */ +#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ static inline void kasan_non_canonical_hook(unsigned long addr) { } -#endif /* CONFIG_KASAN_INLINE */ +#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ #endif /* LINUX_KASAN_H */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index cee8fe87e9f4..be2e8c0a187e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -13,6 +13,7 @@ #include <linux/stdarg.h> #include <linux/align.h> +#include <linux/array_size.h> #include <linux/limits.h> #include <linux/linkage.h> #include <linux/stddef.h> @@ -32,30 +33,18 @@ #include <linux/sprintf.h> #include <linux/static_call_types.h> #include <linux/instruction_pointer.h> +#include <linux/wordpart.h> + #include <asm/byteorder.h> #include <uapi/linux/kernel.h> #define STACK_MAGIC 0xdeadbeef -/** - * REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value - * @x: value to repeat - * - * NOTE: @x is not checked for > 0xff; larger values produce odd results. - */ -#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) - /* generic data direction definitions */ #define READ 0 #define WRITE 1 -/** - * ARRAY_SIZE - get the number of elements in array @arr - * @arr: array to be sized - */ -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) - #define PTR_IF(cond, ptr) ((cond) ? (ptr) : NULL) #define u64_to_user_ptr(x) ( \ @@ -65,34 +54,6 @@ } \ ) -/** - * upper_32_bits - return bits 32-63 of a number - * @n: the number we're accessing - * - * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress - * the "right shift count >= width of type" warning when that quantity is - * 32-bits. - */ -#define upper_32_bits(n) ((u32)(((n) >> 16) >> 16)) - -/** - * lower_32_bits - return bits 0-31 of a number - * @n: the number we're accessing - */ -#define lower_32_bits(n) ((u32)((n) & 0xffffffff)) - -/** - * upper_16_bits - return bits 16-31 of a number - * @n: the number we're accessing - */ -#define upper_16_bits(n) ((u16)((n) >> 16)) - -/** - * lower_16_bits - return bits 0-15 of a number - * @n: the number we're accessing - */ -#define lower_16_bits(n) ((u16)((n) & 0xffff)) - struct completion; struct user; @@ -204,12 +165,6 @@ static inline void might_fault(void) { } void do_exit(long error_code) __noreturn; -extern int get_option(char **str, int *pint); -extern char *get_options(const char *str, int nints, int *ints); -extern unsigned long long memparse(const char *ptr, char **retptr); -extern bool parse_option_str(const char *str, const char *option); -extern char *next_arg(char *args, char **param, char **val); - extern int core_kernel_text(unsigned long addr); extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); @@ -260,6 +215,7 @@ enum ftrace_dump_mode { DUMP_NONE, DUMP_ALL, DUMP_ORIG, + DUMP_PARAM, }; #ifdef CONFIG_TRACING diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 2a36f3218b51..87c79d076d6d 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -206,23 +206,25 @@ struct kernfs_node { const void *ns; /* namespace tag */ unsigned int hash; /* ns + name hash */ + unsigned short flags; + umode_t mode; + union { struct kernfs_elem_dir dir; struct kernfs_elem_symlink symlink; struct kernfs_elem_attr attr; }; - void *priv; - /* * 64bit unique ID. On 64bit ino setups, id is the ino. On 32bit, * the low 32bits are ino and upper generation. */ u64 id; - unsigned short flags; - umode_t mode; + void *priv; struct kernfs_iattrs *iattr; + + struct rcu_head rcu; }; /* @@ -316,6 +318,7 @@ struct kernfs_ops { struct poll_table_struct *pt); int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma); + loff_t (*llseek)(struct kernfs_open_file *of, loff_t offset, int whence); }; /* diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 32c78078552c..060835bb82d5 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -15,17 +15,14 @@ #if !defined(__ASSEMBLY__) -#include <linux/crash_core.h> +#include <linux/vmcore_info.h> +#include <linux/crash_reserve.h> #include <asm/io.h> #include <linux/range.h> #include <uapi/linux/kexec.h> #include <linux/verification.h> -/* Location of a reserved region to hold the crash kernel. - */ -extern struct resource crashk_res; -extern struct resource crashk_low_res; extern note_buf_t __percpu *crash_notes; #ifdef CONFIG_KEXEC_CORE @@ -35,6 +32,7 @@ extern note_buf_t __percpu *crash_notes; #include <linux/module.h> #include <linux/highmem.h> #include <asm/kexec.h> +#include <linux/crash_core.h> /* Verify architecture specific macros are defined */ @@ -382,13 +380,6 @@ extern struct page *kimage_alloc_control_pages(struct kimage *image, static inline int machine_kexec_post_load(struct kimage *image) { return 0; } #endif -extern void __crash_kexec(struct pt_regs *); -extern void crash_kexec(struct pt_regs *); -int kexec_should_crash(struct task_struct *); -int kexec_crash_loaded(void); -void crash_save_cpu(struct pt_regs *regs, int cpu); -extern int kimage_crash_copy_vmcoreinfo(struct kimage *image); - extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; @@ -407,29 +398,11 @@ bool kexec_load_permitted(int kexec_image_type); /* List of defined/legal kexec file flags */ #define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ - KEXEC_FILE_NO_INITRAMFS) + KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG) /* flag to track if kexec reboot is in progress */ extern bool kexec_in_progress; -int crash_shrink_memory(unsigned long new_size); -ssize_t crash_get_memory_size(void); - -#ifndef arch_kexec_protect_crashkres -/* - * Protection mechanism for crashkernel reserved memory after - * the kdump kernel is loaded. - * - * Provide an empty default implementation here -- architecture - * code may override this - */ -static inline void arch_kexec_protect_crashkres(void) { } -#endif - -#ifndef arch_kexec_unprotect_crashkres -static inline void arch_kexec_unprotect_crashkres(void) { } -#endif - #ifndef page_to_boot_pfn static inline unsigned long page_to_boot_pfn(struct page *page) { @@ -486,23 +459,12 @@ static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, g static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { } #endif -#ifndef arch_crash_handle_hotplug_event -static inline void arch_crash_handle_hotplug_event(struct kimage *image) { } -#endif +extern bool kexec_file_dbg_print; -int crash_check_update_elfcorehdr(void); - -#ifndef crash_hotplug_cpu_support -static inline int crash_hotplug_cpu_support(void) { return 0; } -#endif - -#ifndef crash_hotplug_memory_support -static inline int crash_hotplug_memory_support(void) { return 0; } -#endif - -#ifndef crash_get_elfcorehdr_size -static inline unsigned int crash_get_elfcorehdr_size(void) { return 0; } -#endif +#define kexec_dprintk(fmt, ...) \ + printk("%s" fmt, \ + kexec_file_dbg_print ? KERN_INFO : KERN_DEBUG, \ + ##__VA_ARGS__) #else /* !CONFIG_KEXEC_CORE */ struct pt_regs; diff --git a/include/linux/key-type.h b/include/linux/key-type.h index 7d985a1dfe4a..5caf3ce82373 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -73,6 +73,7 @@ struct key_type { unsigned int flags; #define KEY_TYPE_NET_DOMAIN 0x00000001 /* Keys of this type have a net namespace domain */ +#define KEY_TYPE_INSTANT_REAP 0x00000002 /* Keys of this type don't have a delay after expiring */ /* vet a description */ int (*vet_description)(const char *description); diff --git a/include/linux/key.h b/include/linux/key.h index 938d7ecfb495..943a432da3ae 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -515,6 +515,7 @@ extern void key_init(void); #define key_init() do { } while(0) #define key_free_user_ns(ns) do { } while(0) #define key_remove_domain(d) do { } while(0) +#define key_lookup(k) NULL #endif /* CONFIG_KEYS */ #endif /* __KERNEL__ */ diff --git a/include/linux/kfence.h b/include/linux/kfence.h index 401af4757514..88100cc9caba 100644 --- a/include/linux/kfence.h +++ b/include/linux/kfence.h @@ -223,6 +223,8 @@ bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *sla #else /* CONFIG_KFENCE */ +#define kfence_sample_interval (0) + static inline bool is_kfence_address(const void *addr) { return false; } static inline void kfence_alloc_pool_and_metadata(void) { } static inline void kfence_init(void) { } diff --git a/include/linux/kmsan_types.h b/include/linux/kmsan_types.h index 8bfa6c98176d..929287981afe 100644 --- a/include/linux/kmsan_types.h +++ b/include/linux/kmsan_types.h @@ -9,6 +9,8 @@ #ifndef _LINUX_KMSAN_TYPES_H #define _LINUX_KMSAN_TYPES_H +#include <linux/types.h> + /* These constants are defined in the MSan LLVM instrumentation pass. */ #define KMSAN_RETVAL_SIZE 800 #define KMSAN_PARAM_SIZE 800 diff --git a/include/linux/kobject.h b/include/linux/kobject.h index c30affcc43b4..c8219505a79f 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -38,7 +38,7 @@ extern char uevent_helper[]; #endif /* counter to tag the uevent, read only except for the kobject core */ -extern u64 uevent_seqnum; +extern atomic64_t uevent_seqnum; /* * The actions here must match the index to the string array diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 85a64cb95d75..0ff44d6633e3 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -26,8 +26,7 @@ #include <linux/rcupdate.h> #include <linux/mutex.h> #include <linux/ftrace.h> -#include <linux/refcount.h> -#include <linux/freelist.h> +#include <linux/objpool.h> #include <linux/rethook.h> #include <asm/kprobes.h> @@ -140,8 +139,8 @@ static inline bool kprobe_ftrace(struct kprobe *p) * */ struct kretprobe_holder { - struct kretprobe *rp; - refcount_t ref; + struct kretprobe __rcu *rp; + struct objpool_head pool; }; struct kretprobe { @@ -154,7 +153,6 @@ struct kretprobe { #ifdef CONFIG_KRETPROBE_ON_RETHOOK struct rethook *rh; #else - struct freelist_head freelist; struct kretprobe_holder *rph; #endif }; @@ -165,10 +163,7 @@ struct kretprobe_instance { #ifdef CONFIG_KRETPROBE_ON_RETHOOK struct rethook_node node; #else - union { - struct freelist_node freelist; - struct rcu_head rcu; - }; + struct rcu_head rcu; struct llist_node llist; struct kretprobe_holder *rph; kprobe_opcode_t *ret_addr; @@ -202,10 +197,8 @@ extern int arch_trampoline_kprobe(struct kprobe *p); #ifdef CONFIG_KRETPROBE_ON_RETHOOK static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri) { - RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(), - "Kretprobe is accessed from instance under preemptive context"); - - return (struct kretprobe *)READ_ONCE(ri->node.rethook->data); + /* rethook::data is non-changed field, so that you can access it freely. */ + return (struct kretprobe *)ri->node.rethook->data; } static nokprobe_inline unsigned long get_kretprobe_retaddr(struct kretprobe_instance *ri) { @@ -250,10 +243,7 @@ unsigned long kretprobe_trampoline_handler(struct pt_regs *regs, static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri) { - RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(), - "Kretprobe is accessed from instance under preemptive context"); - - return READ_ONCE(ri->rph->rp); + return rcu_dereference_check(ri->rph->rp, rcu_read_lock_any_held()); } static nokprobe_inline unsigned long get_kretprobe_retaddr(struct kretprobe_instance *ri) @@ -450,6 +440,10 @@ int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type, int arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value, char *type, char *sym); + +int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); + #else /* !CONFIG_KPROBES: */ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) diff --git a/include/linux/ksm.h b/include/linux/ksm.h index c2dd786a30e1..401348e9f92b 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -76,8 +76,8 @@ static inline void ksm_exit(struct mm_struct *mm) * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE, * but what if the vma was unmerged while the page was swapped out? */ -struct page *ksm_might_need_to_copy(struct page *page, - struct vm_area_struct *vma, unsigned long address); +struct folio *ksm_might_need_to_copy(struct folio *folio, + struct vm_area_struct *vma, unsigned long addr); void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc); void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); @@ -129,10 +129,10 @@ static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start, return 0; } -static inline struct page *ksm_might_need_to_copy(struct page *page, - struct vm_area_struct *vma, unsigned long address) +static inline struct folio *ksm_might_need_to_copy(struct folio *folio, + struct vm_area_struct *vma, unsigned long addr) { - return page; + return folio; } static inline void rmap_walk_ksm(struct folio *folio, diff --git a/include/linux/kstrtox.h b/include/linux/kstrtox.h index 529974e22ea7..7fcf29a4e0de 100644 --- a/include/linux/kstrtox.h +++ b/include/linux/kstrtox.h @@ -147,9 +147,4 @@ extern long simple_strtol(const char *,char **,unsigned int); extern unsigned long long simple_strtoull(const char *,char **,unsigned int); extern long long simple_strtoll(const char *,char **,unsigned int); -static inline int strtobool(const char *s, bool *res) -{ - return kstrtobool(s, res); -} - #endif /* _LINUX_KSTRTOX_H */ diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 2c30ade43bc8..b11f53c1ba2e 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -86,6 +86,7 @@ void free_kthread_struct(struct task_struct *k); void kthread_bind(struct task_struct *k, unsigned int cpu); void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask); int kthread_stop(struct task_struct *k); +int kthread_stop_put(struct task_struct *k); bool kthread_should_stop(void); bool kthread_should_park(void); bool kthread_should_stop_or_park(void); diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 73f20deb497d..3a4e723eae0f 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -21,12 +21,10 @@ #ifndef _LINUX_KTIME_H #define _LINUX_KTIME_H -#include <linux/time.h> -#include <linux/jiffies.h> #include <asm/bug.h> - -/* Nanosecond scalar representation for kernel time values */ -typedef s64 ktime_t; +#include <linux/jiffies.h> +#include <linux/time.h> +#include <linux/types.h> /** * ktime_set - Set a ktime_t variable from a seconds/nanoseconds value diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fb6c6109fdca..48f31dcd318a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -80,8 +80,8 @@ /* Two fragments for cross MMIO pages. */ #define KVM_MAX_MMIO_FRAGMENTS 2 -#ifndef KVM_ADDRESS_SPACE_NUM -#define KVM_ADDRESS_SPACE_NUM 1 +#ifndef KVM_MAX_NR_ADDRESS_SPACES +#define KVM_MAX_NR_ADDRESS_SPACES 1 #endif /* @@ -148,6 +148,11 @@ static inline bool kvm_is_error_hva(unsigned long addr) #endif +static inline bool kvm_is_error_gpa(gpa_t gpa) +{ + return gpa == INVALID_GPA; +} + #define KVM_ERR_PTR_BAD_PAGE (ERR_PTR(-ENOENT)) static inline bool is_error_page(struct page *page) @@ -238,7 +243,6 @@ struct kvm_async_pf { struct list_head link; struct list_head queue; struct kvm_vcpu *vcpu; - struct mm_struct *mm; gpa_t cr2_or_gpa; unsigned long addr; struct kvm_arch_async_pf arch; @@ -253,9 +257,10 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif -#ifdef KVM_ARCH_WANT_MMU_NOTIFIER +#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER union kvm_mmu_notifier_arg { pte_t pte; + unsigned long attributes; }; struct kvm_gfn_range { @@ -588,8 +593,20 @@ struct kvm_memory_slot { u32 flags; short id; u16 as_id; + +#ifdef CONFIG_KVM_PRIVATE_MEM + struct { + struct file __rcu *file; + pgoff_t pgoff; + } gmem; +#endif }; +static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot) +{ + return slot && (slot->flags & KVM_MEM_GUEST_MEMFD); +} + static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *slot) { return slot->flags & KVM_MEM_LOG_DIRTY_PAGES; @@ -664,7 +681,7 @@ struct kvm_irq_routing_table { * Array indexed by gsi. Each entry contains list of irq chips * the gsi is connected to. */ - struct hlist_head map[]; + struct hlist_head map[] __counted_by(nr_rt_entries); }; #endif @@ -677,13 +694,29 @@ bool kvm_arch_irqchip_in_kernel(struct kvm *kvm); #define KVM_MEM_SLOTS_NUM SHRT_MAX #define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_INTERNAL_MEM_SLOTS) -#ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE +#if KVM_MAX_NR_ADDRESS_SPACES == 1 +static inline int kvm_arch_nr_memslot_as_ids(struct kvm *kvm) +{ + return KVM_MAX_NR_ADDRESS_SPACES; +} + static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) { return 0; } #endif +/* + * Arch code must define kvm_arch_has_private_mem if support for private memory + * is enabled. + */ +#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) +static inline bool kvm_arch_has_private_mem(struct kvm *kvm) +{ + return false; +} +#endif + struct kvm_memslots { u64 generation; atomic_long_t last_used_slot; @@ -721,9 +754,9 @@ struct kvm { struct mm_struct *mm; /* userspace tied to this vm */ unsigned long nr_memslot_pages; /* The two memslot sets - active and inactive (per address space) */ - struct kvm_memslots __memslots[KVM_ADDRESS_SPACE_NUM][2]; + struct kvm_memslots __memslots[KVM_MAX_NR_ADDRESS_SPACES][2]; /* The current active memslot set for each address space */ - struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM]; + struct kvm_memslots __rcu *memslots[KVM_MAX_NR_ADDRESS_SPACES]; struct xarray vcpu_array; /* * Protected by slots_lock, but can be read outside if an @@ -753,7 +786,7 @@ struct kvm { struct list_head vm_list; struct mutex lock; struct kvm_io_bus __rcu *buses[KVM_NR_BUSES]; -#ifdef CONFIG_HAVE_KVM_EVENTFD +#ifdef CONFIG_HAVE_KVM_IRQCHIP struct { spinlock_t lock; struct list_head items; @@ -761,8 +794,8 @@ struct kvm { struct list_head resampler_list; struct mutex resampler_lock; } irqfds; - struct list_head ioeventfds; #endif + struct list_head ioeventfds; struct kvm_vm_stat stat; struct kvm_arch arch; refcount_t users_count; @@ -778,17 +811,16 @@ struct kvm { * Update side is protected by irq_lock. */ struct kvm_irq_routing_table __rcu *irq_routing; -#endif -#ifdef CONFIG_HAVE_KVM_IRQFD + struct hlist_head irq_ack_notifier_list; #endif -#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) +#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER struct mmu_notifier mmu_notifier; unsigned long mmu_invalidate_seq; long mmu_invalidate_in_progress; - unsigned long mmu_invalidate_range_start; - unsigned long mmu_invalidate_range_end; + gfn_t mmu_invalidate_range_start; + gfn_t mmu_invalidate_range_end; #endif struct list_head devices; u64 manual_dirty_log_protect; @@ -807,6 +839,10 @@ struct kvm { #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER struct notifier_block pm_notifier; #endif +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES + /* Protected by slots_locks (for writes) and RCU (for reads) */ + struct xarray mem_attr_array; +#endif char stats_id[KVM_STATS_NAME_SIZE]; }; @@ -965,7 +1001,7 @@ static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm) } #endif -#ifdef CONFIG_HAVE_KVM_IRQFD +#ifdef CONFIG_HAVE_KVM_IRQCHIP int kvm_irqfd_init(void); void kvm_irqfd_exit(void); #else @@ -989,7 +1025,7 @@ void kvm_put_kvm_no_destroy(struct kvm *kvm); static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) { - as_id = array_index_nospec(as_id, KVM_ADDRESS_SPACE_NUM); + as_id = array_index_nospec(as_id, KVM_MAX_NR_ADDRESS_SPACES); return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu, lockdep_is_held(&kvm->slots_lock) || !refcount_read(&kvm->users_count)); @@ -1146,9 +1182,9 @@ enum kvm_mr_change { }; int kvm_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem); + const struct kvm_userspace_memory_region2 *mem); int __kvm_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem); + const struct kvm_userspace_memory_region2 *mem); void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen); int kvm_arch_prepare_memory_region(struct kvm *kvm, @@ -1287,21 +1323,12 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); * * @gpc: struct gfn_to_pfn_cache object. * @kvm: pointer to kvm instance. - * @vcpu: vCPU to be used for marking pages dirty and to be woken on - * invalidation. - * @usage: indicates if the resulting host physical PFN is used while - * the @vcpu is IN_GUEST_MODE (in which case invalidation of - * the cache from MMU notifiers---but not for KVM memslot - * changes!---will also force @vcpu to exit the guest and - * refresh the cache); and/or if the PFN used directly - * by KVM (and thus needs a kernel virtual mapping). * * This sets up a gfn_to_pfn_cache by initializing locks and assigning the * immutable attributes. Note, the cache must be zero-allocated (or zeroed by * the caller before init). */ -void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm, - struct kvm_vcpu *vcpu, enum pfn_cache_usage usage); +void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm); /** * kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest @@ -1322,6 +1349,22 @@ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm, int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); /** + * kvm_gpc_activate_hva - prepare a cached kernel mapping and HPA for a given HVA. + * + * @gpc: struct gfn_to_pfn_cache object. + * @hva: userspace virtual address to map. + * @len: sanity check; the range being access must fit a single page. + * + * @return: 0 for success. + * -EINVAL for a mapping which would cross a page boundary. + * -EFAULT for an untranslatable guest physical address. + * + * The semantics of this function are the same as those of kvm_gpc_activate(). It + * merely bypasses a layer of address translation. + */ +int kvm_gpc_activate_hva(struct gfn_to_pfn_cache *gpc, unsigned long hva, unsigned long len); + +/** * kvm_gpc_check - check validity of a gfn_to_pfn_cache. * * @gpc: struct gfn_to_pfn_cache object. @@ -1367,6 +1410,16 @@ int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len); */ void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc); +static inline bool kvm_gpc_is_gpa_active(struct gfn_to_pfn_cache *gpc) +{ + return gpc->active && !kvm_is_error_gpa(gpc->gpa); +} + +static inline bool kvm_gpc_is_hva_active(struct gfn_to_pfn_cache *gpc) +{ + return gpc->active && kvm_is_error_gpa(gpc->gpa); +} + void kvm_sigset_activate(struct kvm_vcpu *vcpu); void kvm_sigset_deactivate(struct kvm_vcpu *vcpu); @@ -1392,10 +1445,10 @@ void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc); void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); #endif -void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, - unsigned long end); -void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start, - unsigned long end); +void kvm_mmu_invalidate_begin(struct kvm *kvm); +void kvm_mmu_invalidate_range_add(struct kvm *kvm, gfn_t start, gfn_t end); +void kvm_mmu_invalidate_end(struct kvm *kvm); +bool kvm_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); @@ -1473,9 +1526,10 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu); +bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_post_init_vm(struct kvm *kvm); void kvm_arch_pre_destroy_vm(struct kvm *kvm); -int kvm_arch_create_vm_debugfs(struct kvm *kvm); +void kvm_arch_create_vm_debugfs(struct kvm *kvm); #ifndef __KVM_HAVE_ARCH_VM_ALLOC /* @@ -1756,11 +1810,21 @@ static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn) return (hpa_t)pfn << PAGE_SHIFT; } -static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa) +static inline bool kvm_is_gpa_in_memslot(struct kvm *kvm, gpa_t gpa) { unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); - return kvm_is_error_hva(hva); + return !kvm_is_error_hva(hva); +} + +static inline void kvm_gpc_mark_dirty_in_slot(struct gfn_to_pfn_cache *gpc) +{ + lockdep_assert_held(&gpc->lock); + + if (!gpc->memslot) + return; + + mark_page_dirty_in_slot(gpc->kvm, gpc->memslot, gpa_to_gfn(gpc->gpa)); } enum kvm_stat_kind { @@ -1947,7 +2011,7 @@ extern const struct _kvm_stats_desc kvm_vm_stats_desc[]; extern const struct kvm_stats_header kvm_vcpu_stats_header; extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[]; -#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) +#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq) { if (unlikely(kvm->mmu_invalidate_in_progress)) @@ -1970,9 +2034,9 @@ static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq) return 0; } -static inline int mmu_invalidate_retry_hva(struct kvm *kvm, +static inline int mmu_invalidate_retry_gfn(struct kvm *kvm, unsigned long mmu_seq, - unsigned long hva) + gfn_t gfn) { lockdep_assert_held(&kvm->mmu_lock); /* @@ -1981,14 +2045,50 @@ static inline int mmu_invalidate_retry_hva(struct kvm *kvm, * that might be being invalidated. Note that it may include some false * positives, due to shortcuts when handing concurrent invalidations. */ - if (unlikely(kvm->mmu_invalidate_in_progress) && - hva >= kvm->mmu_invalidate_range_start && - hva < kvm->mmu_invalidate_range_end) - return 1; + if (unlikely(kvm->mmu_invalidate_in_progress)) { + /* + * Dropping mmu_lock after bumping mmu_invalidate_in_progress + * but before updating the range is a KVM bug. + */ + if (WARN_ON_ONCE(kvm->mmu_invalidate_range_start == INVALID_GPA || + kvm->mmu_invalidate_range_end == INVALID_GPA)) + return 1; + + if (gfn >= kvm->mmu_invalidate_range_start && + gfn < kvm->mmu_invalidate_range_end) + return 1; + } + if (kvm->mmu_invalidate_seq != mmu_seq) return 1; return 0; } + +/* + * This lockless version of the range-based retry check *must* be paired with a + * call to the locked version after acquiring mmu_lock, i.e. this is safe to + * use only as a pre-check to avoid contending mmu_lock. This version *will* + * get false negatives and false positives. + */ +static inline bool mmu_invalidate_retry_gfn_unsafe(struct kvm *kvm, + unsigned long mmu_seq, + gfn_t gfn) +{ + /* + * Use READ_ONCE() to ensure the in-progress flag and sequence counter + * are always read from memory, e.g. so that checking for retry in a + * loop won't result in an infinite retry loop. Don't force loads for + * start+end, as the key to avoiding infinite retry loops is observing + * the 1=>0 transition of in-progress, i.e. getting false negatives + * due to stale start+end values is acceptable. + */ + if (unlikely(READ_ONCE(kvm->mmu_invalidate_in_progress)) && + gfn >= kvm->mmu_invalidate_range_start && + gfn < kvm->mmu_invalidate_range_end) + return true; + + return READ_ONCE(kvm->mmu_invalidate_seq) != mmu_seq; +} #endif #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING @@ -2013,12 +2113,10 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {} int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); -#ifdef CONFIG_HAVE_KVM_EVENTFD - void kvm_eventfd_init(struct kvm *kvm); int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); -#ifdef CONFIG_HAVE_KVM_IRQFD +#ifdef CONFIG_HAVE_KVM_IRQCHIP int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args); void kvm_irqfd_release(struct kvm *kvm); bool kvm_notify_irqfd_resampler(struct kvm *kvm, @@ -2039,31 +2137,7 @@ static inline bool kvm_notify_irqfd_resampler(struct kvm *kvm, { return false; } -#endif - -#else - -static inline void kvm_eventfd_init(struct kvm *kvm) {} - -static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) -{ - return -EINVAL; -} - -static inline void kvm_irqfd_release(struct kvm *kvm) {} - -#ifdef CONFIG_HAVE_KVM_IRQCHIP -static inline void kvm_irq_routing_update(struct kvm *kvm) -{ -} -#endif - -static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) -{ - return -ENOSYS; -} - -#endif /* CONFIG_HAVE_KVM_EVENTFD */ +#endif /* CONFIG_HAVE_KVM_IRQCHIP */ void kvm_arch_irq_routing_update(struct kvm *kvm); @@ -2318,4 +2392,57 @@ static inline void kvm_account_pgtable_pages(void *virt, int nr) /* Max number of entries allowed for each kvm dirty ring */ #define KVM_DIRTY_RING_MAX_ENTRIES 65536 +static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, + gpa_t gpa, gpa_t size, + bool is_write, bool is_exec, + bool is_private) +{ + vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT; + vcpu->run->memory_fault.gpa = gpa; + vcpu->run->memory_fault.size = size; + + /* RWX flags are not (yet) defined or communicated to userspace. */ + vcpu->run->memory_fault.flags = 0; + if (is_private) + vcpu->run->memory_fault.flags |= KVM_MEMORY_EXIT_FLAG_PRIVATE; +} + +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES +static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn) +{ + return xa_to_value(xa_load(&kvm->mem_attr_array, gfn)); +} + +bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, + unsigned long attrs); +bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, + struct kvm_gfn_range *range); +bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, + struct kvm_gfn_range *range); + +static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) +{ + return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) && + kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE; +} +#else +static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) +{ + return false; +} +#endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */ + +#ifdef CONFIG_KVM_PRIVATE_MEM +int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, + gfn_t gfn, kvm_pfn_t *pfn, int *max_order); +#else +static inline int kvm_gmem_get_pfn(struct kvm *kvm, + struct kvm_memory_slot *slot, gfn_t gfn, + kvm_pfn_t *pfn, int *max_order) +{ + KVM_BUG_ON(1, kvm); + return -EIO; +} +#endif /* CONFIG_KVM_PRIVATE_MEM */ + #endif diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 6f4737d5046a..d93f6522b2c3 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -6,6 +6,7 @@ struct kvm; struct kvm_async_pf; struct kvm_device_ops; +struct kvm_gfn_range; struct kvm_interrupt; struct kvm_irq_routing_table; struct kvm_memory_slot; @@ -48,12 +49,6 @@ typedef u64 hfn_t; typedef hfn_t kvm_pfn_t; -enum pfn_cache_usage { - KVM_GUEST_USES_PFN = BIT(0), - KVM_HOST_USES_PFN = BIT(1), - KVM_GUEST_AND_HOST_USE_PFN = KVM_GUEST_USES_PFN | KVM_HOST_USES_PFN, -}; - struct gfn_to_hva_cache { u64 generation; gpa_t gpa; @@ -68,13 +63,11 @@ struct gfn_to_pfn_cache { unsigned long uhva; struct kvm_memory_slot *memslot; struct kvm *kvm; - struct kvm_vcpu *vcpu; struct list_head list; rwlock_t lock; struct mutex refresh_lock; void *khva; kvm_pfn_t pfn; - enum pfn_cache_usage usage; bool active; bool valid; }; diff --git a/include/linux/led-class-flash.h b/include/linux/led-class-flash.h index 612b4cab3819..36df927ec4b7 100644 --- a/include/linux/led-class-flash.h +++ b/include/linux/led-class-flash.h @@ -85,7 +85,6 @@ static inline struct led_classdev_flash *lcdev_to_flcdev( return container_of(lcdev, struct led_classdev_flash, led_cdev); } -#if IS_ENABLED(CONFIG_LEDS_CLASS_FLASH) /** * led_classdev_flash_register_ext - register a new object of LED class with * init data and with support for flash LEDs @@ -116,29 +115,6 @@ int devm_led_classdev_flash_register_ext(struct device *parent, void devm_led_classdev_flash_unregister(struct device *parent, struct led_classdev_flash *fled_cdev); -#else - -static inline int led_classdev_flash_register_ext(struct device *parent, - struct led_classdev_flash *fled_cdev, - struct led_init_data *init_data) -{ - return 0; -} - -static inline void led_classdev_flash_unregister(struct led_classdev_flash *fled_cdev) {}; -static inline int devm_led_classdev_flash_register_ext(struct device *parent, - struct led_classdev_flash *fled_cdev, - struct led_init_data *init_data) -{ - return 0; -} - -static inline void devm_led_classdev_flash_unregister(struct device *parent, - struct led_classdev_flash *fled_cdev) -{}; - -#endif /* IS_ENABLED(CONFIG_LEDS_CLASS_FLASH) */ - static inline int led_classdev_flash_register(struct device *parent, struct led_classdev_flash *fled_cdev) { diff --git a/include/linux/led-class-multicolor.h b/include/linux/led-class-multicolor.h index 210d57bcd767..db9f34c6736e 100644 --- a/include/linux/led-class-multicolor.h +++ b/include/linux/led-class-multicolor.h @@ -30,7 +30,6 @@ static inline struct led_classdev_mc *lcdev_to_mccdev( return container_of(led_cdev, struct led_classdev_mc, led_cdev); } -#if IS_ENABLED(CONFIG_LEDS_CLASS_MULTICOLOR) /** * led_classdev_multicolor_register_ext - register a new object of led_classdev * class with support for multicolor LEDs @@ -64,34 +63,6 @@ int devm_led_classdev_multicolor_register_ext(struct device *parent, void devm_led_classdev_multicolor_unregister(struct device *parent, struct led_classdev_mc *mcled_cdev); -#else - -static inline int led_classdev_multicolor_register_ext(struct device *parent, - struct led_classdev_mc *mcled_cdev, - struct led_init_data *init_data) -{ - return 0; -} - -static inline void led_classdev_multicolor_unregister(struct led_classdev_mc *mcled_cdev) {}; -static inline int led_mc_calc_color_components(struct led_classdev_mc *mcled_cdev, - enum led_brightness brightness) -{ - return 0; -} - -static inline int devm_led_classdev_multicolor_register_ext(struct device *parent, - struct led_classdev_mc *mcled_cdev, - struct led_init_data *init_data) -{ - return 0; -} - -static inline void devm_led_classdev_multicolor_unregister(struct device *parent, - struct led_classdev_mc *mcled_cdev) -{}; - -#endif /* IS_ENABLED(CONFIG_LEDS_CLASS_MULTICOLOR) */ static inline int led_classdev_multicolor_register(struct device *parent, struct led_classdev_mc *mcled_cdev) diff --git a/include/linux/leds-expresswire.h b/include/linux/leds-expresswire.h new file mode 100644 index 000000000000..a422921f4159 --- /dev/null +++ b/include/linux/leds-expresswire.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Shared library for Kinetic's ExpressWire protocol. + * This protocol works by pulsing the ExpressWire IC's control GPIO. + * ktd2692 and ktd2801 are known to use this protocol. + */ + +#ifndef _LEDS_EXPRESSWIRE_H +#define _LEDS_EXPRESSWIRE_H + +#include <linux/types.h> + +struct gpio_desc; + +struct expresswire_timing { + unsigned long poweroff_us; + unsigned long detect_delay_us; + unsigned long detect_us; + unsigned long data_start_us; + unsigned long end_of_data_low_us; + unsigned long end_of_data_high_us; + unsigned long short_bitset_us; + unsigned long long_bitset_us; +}; + +struct expresswire_common_props { + struct gpio_desc *ctrl_gpio; + struct expresswire_timing timing; +}; + +void expresswire_power_off(struct expresswire_common_props *props); +void expresswire_enable(struct expresswire_common_props *props); +void expresswire_start(struct expresswire_common_props *props); +void expresswire_end(struct expresswire_common_props *props); +void expresswire_set_bit(struct expresswire_common_props *props, bool bit); +void expresswire_write_u8(struct expresswire_common_props *props, u8 val); + +#endif /* _LEDS_EXPRESSWIRE_H */ diff --git a/include/linux/leds.h b/include/linux/leds.h index aa16dc2a8230..db6b114bb3d9 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -82,15 +82,7 @@ struct led_init_data { bool devname_mandatory; }; -#if IS_ENABLED(CONFIG_NEW_LEDS) enum led_default_state led_init_default_state_get(struct fwnode_handle *fwnode); -#else -static inline enum led_default_state -led_init_default_state_get(struct fwnode_handle *fwnode) -{ - return LEDS_DEFSTATE_OFF; -} -#endif struct led_hw_trigger_type { int dummy; @@ -279,20 +271,9 @@ static inline int led_classdev_register(struct device *parent, return led_classdev_register_ext(parent, led_cdev, NULL); } -#if IS_ENABLED(CONFIG_LEDS_CLASS) int devm_led_classdev_register_ext(struct device *parent, struct led_classdev *led_cdev, struct led_init_data *init_data); -#else -static inline int -devm_led_classdev_register_ext(struct device *parent, - struct led_classdev *led_cdev, - struct led_init_data *init_data) -{ - return 0; -} -#endif - static inline int devm_led_classdev_register(struct device *parent, struct led_classdev *led_cdev) { @@ -527,23 +508,6 @@ static inline void *led_get_trigger_data(struct led_classdev *led_cdev) return led_cdev->trigger_data; } -/** - * led_trigger_rename_static - rename a trigger - * @name: the new trigger name - * @trig: the LED trigger to rename - * - * Change a LED trigger name by copying the string passed in - * name into current trigger name, which MUST be large - * enough for the new string. - * - * Note that name must NOT point to the same string used - * during LED registration, as that could lead to races. - * - * This is meant to be used on triggers with statically - * allocated name. - */ -void led_trigger_rename_static(const char *name, struct led_trigger *trig); - #define module_led_trigger(__led_trigger) \ module_driver(__led_trigger, led_trigger_register, \ led_trigger_unregister) @@ -588,6 +552,9 @@ enum led_trigger_netdev_modes { TRIGGER_NETDEV_LINK_10, TRIGGER_NETDEV_LINK_100, TRIGGER_NETDEV_LINK_1000, + TRIGGER_NETDEV_LINK_2500, + TRIGGER_NETDEV_LINK_5000, + TRIGGER_NETDEV_LINK_10000, TRIGGER_NETDEV_HALF_DUPLEX, TRIGGER_NETDEV_FULL_DUPLEX, TRIGGER_NETDEV_TX, @@ -672,7 +639,7 @@ struct gpio_led_platform_data { gpio_blink_set_t gpio_blink_set; }; -#ifdef CONFIG_NEW_LEDS +#ifdef CONFIG_LEDS_GPIO_REGISTER struct platform_device *gpio_led_register_device( int id, const struct gpio_led_platform_data *pdata); #else diff --git a/include/linux/libata.h b/include/linux/libata.h index 2a7d2af0ed80..324d792e7c78 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -107,6 +107,7 @@ enum { ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */ ATA_DFLAG_CDL_ENABLED = (1 << 21), /* cmd duration limits is enabled */ + ATA_DFLAG_RESUMING = (1 << 22), /* Device is resuming */ ATA_DFLAG_DETACH = (1 << 24), ATA_DFLAG_DETACHED = (1 << 25), ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */ @@ -471,7 +472,7 @@ enum ata_completion_errors { /* * Link power management policy: If you alter this, you also need to - * alter libata-scsi.c (for the ascii descriptions) + * alter libata-sata.c (for the ascii descriptions) */ enum ata_lpm_policy { ATA_LPM_UNKNOWN, @@ -656,7 +657,7 @@ struct ata_cpr { struct ata_cpr_log { u8 nr_cpr; - struct ata_cpr cpr[]; + struct ata_cpr cpr[] __counted_by(nr_cpr); }; struct ata_device { @@ -1561,6 +1562,11 @@ void ata_port_desc(struct ata_port *ap, const char *fmt, ...); extern void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, const char *name); #endif +static inline void ata_port_desc_misc(struct ata_port *ap, int irq) +{ + ata_port_desc(ap, "irq %d", irq); + ata_port_desc(ap, "lpm-pol %d", ap->target_lpm_policy); +} static inline bool ata_tag_internal(unsigned int tag) { @@ -1881,23 +1887,21 @@ static inline unsigned long ata_deadline(unsigned long from_jiffies, change in future hardware and specs, secondly 0xFF means 'no DMA' but is > UDMA_0. Dyma ddreigiau */ -static inline int ata_using_mwdma(struct ata_device *adev) +static inline bool ata_using_mwdma(struct ata_device *adev) { - if (adev->dma_mode >= XFER_MW_DMA_0 && adev->dma_mode <= XFER_MW_DMA_4) - return 1; - return 0; + return adev->dma_mode >= XFER_MW_DMA_0 && + adev->dma_mode <= XFER_MW_DMA_4; } -static inline int ata_using_udma(struct ata_device *adev) +static inline bool ata_using_udma(struct ata_device *adev) { - if (adev->dma_mode >= XFER_UDMA_0 && adev->dma_mode <= XFER_UDMA_7) - return 1; - return 0; + return adev->dma_mode >= XFER_UDMA_0 && + adev->dma_mode <= XFER_UDMA_7; } -static inline int ata_dma_enabled(struct ata_device *adev) +static inline bool ata_dma_enabled(struct ata_device *adev) { - return (adev->dma_mode == 0xFF ? 0 : 1); + return adev->dma_mode != 0xFF; } /************************************************************************** diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index 15e0e0209da4..287f590ed56b 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -10,6 +10,11 @@ static inline void linkmode_zero(unsigned long *dst) bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS); } +static inline void linkmode_fill(unsigned long *dst) +{ + bitmap_fill(dst, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + static inline void linkmode_copy(unsigned long *dst, const unsigned long *src) { bitmap_copy(dst, src, __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -43,15 +48,6 @@ static inline void linkmode_set_bit(int nr, volatile unsigned long *addr) __set_bit(nr, addr); } -static inline void linkmode_set_bit_array(const int *array, int array_size, - unsigned long *addr) -{ - int i; - - for (i = 0; i < array_size; i++) - linkmode_set_bit(array[i], addr); -} - static inline void linkmode_clear_bit(int nr, volatile unsigned long *addr) { __clear_bit(nr, addr); @@ -71,6 +67,15 @@ static inline int linkmode_test_bit(int nr, const volatile unsigned long *addr) return test_bit(nr, addr); } +static inline void linkmode_set_bit_array(const int *array, int array_size, + unsigned long *addr) +{ + int i; + + for (i = 0; i < array_size; i++) + linkmode_set_bit(array[i], addr); +} + static inline int linkmode_equal(const unsigned long *src1, const unsigned long *src2) { diff --git a/include/linux/list.h b/include/linux/list.h index 164b4d0e9d2a..5f4b0a39cf46 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -687,6 +687,14 @@ static inline void list_splice_tail_init(struct list_head *list, for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next) /** + * list_for_each_reverse - iterate backwards over a list + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + */ +#define list_for_each_reverse(pos, head) \ + for (pos = (head)->prev; pos != (head); pos = pos->prev) + +/** * list_for_each_rcu - Iterate over a list in an RCU-safe fashion * @pos: the &struct list_head to use as a loop cursor. * @head: the head for your list. @@ -758,7 +766,7 @@ static inline size_t list_count_nodes(struct list_head *head) * @member: the name of the list_head within the struct. */ #define list_entry_is_head(pos, head, member) \ - (&pos->member == (head)) + list_is_head(&pos->member, (head)) /** * list_for_each_entry - iterate over list of given type @@ -1111,6 +1119,26 @@ static inline void hlist_move_list(struct hlist_head *old, old->first = NULL; } +/** + * hlist_splice_init() - move all entries from one list to another + * @from: hlist_head from which entries will be moved + * @last: last entry on the @from list + * @to: hlist_head to which entries will be moved + * + * @to can be empty, @from must contain at least @last. + */ +static inline void hlist_splice_init(struct hlist_head *from, + struct hlist_node *last, + struct hlist_head *to) +{ + if (to->first) + to->first->pprev = &last->next; + last->next = to->first; + to->first = from->first; + from->first->pprev = &to->first; + from->first = NULL; +} + #define hlist_entry(ptr, type, member) container_of(ptr,type,member) #define hlist_for_each(pos, head) \ @@ -1167,4 +1195,19 @@ static inline void hlist_move_list(struct hlist_head *old, pos && ({ n = pos->member.next; 1; }); \ pos = hlist_entry_safe(n, typeof(*pos), member)) +/** + * hlist_count_nodes - count nodes in the hlist + * @head: the head for your hlist. + */ +static inline size_t hlist_count_nodes(struct hlist_head *head) +{ + struct hlist_node *pos; + size_t count = 0; + + hlist_for_each(pos, head) + count++; + + return count; +} + #endif diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index b35968ee9fb5..792b67ceb631 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -24,6 +24,8 @@ enum lru_status { LRU_SKIP, /* item cannot be locked, skip */ LRU_RETRY, /* item not freeable. May drop the lock internally, but has to return locked. */ + LRU_STOP, /* stop lru list walking. May drop the lock + internally, but has to return locked. */ }; struct list_lru_one { @@ -62,8 +64,6 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware, #define list_lru_init(lru) \ __list_lru_init((lru), false, NULL, NULL) -#define list_lru_init_key(lru, key) \ - __list_lru_init((lru), false, (key), NULL) #define list_lru_init_memcg(lru, shrinker) \ __list_lru_init((lru), true, NULL, shrinker) @@ -73,8 +73,10 @@ void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *paren /** * list_lru_add: add an element to the lru list's tail - * @list_lru: the lru pointer + * @lru: the lru pointer * @item: the item to be added. + * @nid: the node id of the sublist to add the item to. + * @memcg: the cgroup of the sublist to add the item to. * * If the element is already part of a list, this function returns doing * nothing. Therefore the caller does not need to keep state about whether or @@ -83,24 +85,54 @@ void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *paren * the caller organize itself in a way that elements can be in more than * one type of list, it is up to the caller to fully remove the item from * the previous list (with list_lru_del() for instance) before moving it - * to @list_lru + * to @lru. + * + * Return: true if the list was updated, false otherwise + */ +bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid, + struct mem_cgroup *memcg); + +/** + * list_lru_add_obj: add an element to the lru list's tail + * @lru: the lru pointer + * @item: the item to be added. + * + * This function is similar to list_lru_add(), but the NUMA node and the + * memcg of the sublist is determined by @item list_head. This assumption is + * valid for slab objects LRU such as dentries, inodes, etc. * * Return value: true if the list was updated, false otherwise */ -bool list_lru_add(struct list_lru *lru, struct list_head *item); +bool list_lru_add_obj(struct list_lru *lru, struct list_head *item); /** - * list_lru_del: delete an element to the lru list - * @list_lru: the lru pointer + * list_lru_del: delete an element from the lru list + * @lru: the lru pointer * @item: the item to be deleted. + * @nid: the node id of the sublist to delete the item from. + * @memcg: the cgroup of the sublist to delete the item from. * - * This function works analogously as list_lru_add in terms of list + * This function works analogously as list_lru_add() in terms of list * manipulation. The comments about an element already pertaining to - * a list are also valid for list_lru_del. + * a list are also valid for list_lru_del(). * - * Return value: true if the list was updated, false otherwise + * Return: true if the list was updated, false otherwise */ -bool list_lru_del(struct list_lru *lru, struct list_head *item); +bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid, + struct mem_cgroup *memcg); + +/** + * list_lru_del_obj: delete an element from the lru list + * @lru: the lru pointer + * @item: the item to be deleted. + * + * This function is similar to list_lru_del(), but the NUMA node and the + * memcg of the sublist is determined by @item list_head. This assumption is + * valid for slab objects LRU such as dentries, inodes, etc. + * + * Return value: true if the list was updated, false otherwise. + */ +bool list_lru_del_obj(struct list_lru *lru, struct list_head *item); /** * list_lru_count_one: return the number of objects currently held by @lru @@ -108,9 +140,11 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item); * @nid: the node id to count from. * @memcg: the cgroup to count from. * - * Always return a non-negative number, 0 for empty lists. There is no - * guarantee that the list is not updated while the count is being computed. - * Callers that want such a guarantee need to provide an outer lock. + * There is no guarantee that the list is not updated while the count is being + * computed. Callers that want such a guarantee need to provide an outer lock. + * + * Return: 0 for empty lists, otherwise the number of objects + * currently held by @lru. */ unsigned long list_lru_count_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg); @@ -141,7 +175,7 @@ typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item, struct list_lru_one *list, spinlock_t *lock, void *cb_arg); /** - * list_lru_walk_one: walk a list_lru, isolating and disposing freeable items. + * list_lru_walk_one: walk a @lru, isolating and disposing freeable items. * @lru: the lru pointer. * @nid: the node id to scan from. * @memcg: the cgroup to scan from. @@ -150,24 +184,24 @@ typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item, * @cb_arg: opaque type that will be passed to @isolate * @nr_to_walk: how many items to scan. * - * This function will scan all elements in a particular list_lru, calling the + * This function will scan all elements in a particular @lru, calling the * @isolate callback for each of those items, along with the current list * spinlock and a caller-provided opaque. The @isolate callback can choose to * drop the lock internally, but *must* return with the lock held. The callback - * will return an enum lru_status telling the list_lru infrastructure what to + * will return an enum lru_status telling the @lru infrastructure what to * do with the object being scanned. * - * Please note that nr_to_walk does not mean how many objects will be freed, + * Please note that @nr_to_walk does not mean how many objects will be freed, * just how many objects will be scanned. * - * Return value: the number of objects effectively removed from the LRU. + * Return: the number of objects effectively removed from the LRU. */ unsigned long list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, list_lru_walk_cb isolate, void *cb_arg, unsigned long *nr_to_walk); /** - * list_lru_walk_one_irq: walk a list_lru, isolating and disposing freeable items. + * list_lru_walk_one_irq: walk a @lru, isolating and disposing freeable items. * @lru: the lru pointer. * @nid: the node id to scan from. * @memcg: the cgroup to scan from. @@ -176,7 +210,7 @@ unsigned long list_lru_walk_one(struct list_lru *lru, * @cb_arg: opaque type that will be passed to @isolate * @nr_to_walk: how many items to scan. * - * Same as @list_lru_walk_one except that the spinlock is acquired with + * Same as list_lru_walk_one() except that the spinlock is acquired with * spin_lock_irq(). */ unsigned long list_lru_walk_one_irq(struct list_lru *lru, diff --git a/include/linux/llist.h b/include/linux/llist.h index 85bda2d02d65..2c982ff7475a 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -74,6 +74,33 @@ static inline void init_llist_head(struct llist_head *list) } /** + * init_llist_node - initialize lock-less list node + * @node: the node to be initialised + * + * In cases where there is a need to test if a node is on + * a list or not, this initialises the node to clearly + * not be on any list. + */ +static inline void init_llist_node(struct llist_node *node) +{ + node->next = node; +} + +/** + * llist_on_list - test if a lock-list list node is on a list + * @node: the node to test + * + * When a node is on a list the ->next pointer will be NULL or + * some other node. It can never point to itself. We use that + * in init_llist_node() to record that a node is not on any list, + * and here to test whether it is on any list. + */ +static inline bool llist_on_list(const struct llist_node *node) +{ + return node->next != node; +} + +/** * llist_entry - get the struct of this entry * @ptr: the &struct llist_node pointer. * @type: the type of the struct this is embedded in. @@ -249,6 +276,25 @@ static inline struct llist_node *__llist_del_all(struct llist_head *head) extern struct llist_node *llist_del_first(struct llist_head *head); +/** + * llist_del_first_init - delete first entry from lock-list and mark is as being off-list + * @head: the head of lock-less list to delete from. + * + * This behave the same as llist_del_first() except that llist_init_node() is called + * on the returned node so that llist_on_list() will report false for the node. + */ +static inline struct llist_node *llist_del_first_init(struct llist_head *head) +{ + struct llist_node *n = llist_del_first(head); + + if (n) + init_llist_node(n); + return n; +} + +extern bool llist_del_first_this(struct llist_head *head, + struct llist_node *this); + struct llist_node *llist_reverse_order(struct llist_node *head); #endif /* LLIST_H */ diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 0f016d69c996..1b95fe31051f 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -282,7 +282,7 @@ __be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *, struct nlm_host *, struct nlm_lock *, struct nlm_lock *, struct nlm_cookie *); __be32 nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *); -void nlmsvc_retry_blocked(void); +void nlmsvc_retry_blocked(struct svc_rqst *rqstp); void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *, nlm_host_match_fn_t match); void nlmsvc_grant_reply(struct nlm_cookie *, __be32); @@ -375,12 +375,12 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp) static inline int nlm_compare_locks(const struct file_lock *fl1, const struct file_lock *fl2) { - return file_inode(fl1->fl_file) == file_inode(fl2->fl_file) - && fl1->fl_pid == fl2->fl_pid - && fl1->fl_owner == fl2->fl_owner + return file_inode(fl1->c.flc_file) == file_inode(fl2->c.flc_file) + && fl1->c.flc_pid == fl2->c.flc_pid + && fl1->c.flc_owner == fl2->c.flc_owner && fl1->fl_start == fl2->fl_start && fl1->fl_end == fl2->fl_end - &&(fl1->fl_type == fl2->fl_type || fl2->fl_type == F_UNLCK); + &&(fl1->c.flc_type == fl2->c.flc_type || fl2->c.flc_type == F_UNLCK); } extern const struct lock_manager_operations nlmsvc_lock_operations; diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index b60fbcd8cdfa..80cca9426761 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -52,7 +52,7 @@ struct nlm_lock { * FreeBSD uses 16, Apple Mac OS X 10.3 uses 20. Therefore we set it to * 32 bytes. */ - + struct nlm_cookie { unsigned char data[NLM_MAXCOOKIELEN]; diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index dc2844b071c2..08b0d1d9d78b 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -82,63 +82,6 @@ struct lock_chain { u64 chain_key; }; -#define MAX_LOCKDEP_KEYS_BITS 13 -#define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS) -#define INITIAL_CHAIN_KEY -1 - -struct held_lock { - /* - * One-way hash of the dependency chain up to this point. We - * hash the hashes step by step as the dependency chain grows. - * - * We use it for dependency-caching and we skip detection - * passes and dependency-updates if there is a cache-hit, so - * it is absolutely critical for 100% coverage of the validator - * to have a unique key value for every unique dependency path - * that can occur in the system, to make a unique hash value - * as likely as possible - hence the 64-bit width. - * - * The task struct holds the current hash value (initialized - * with zero), here we store the previous hash value: - */ - u64 prev_chain_key; - unsigned long acquire_ip; - struct lockdep_map *instance; - struct lockdep_map *nest_lock; -#ifdef CONFIG_LOCK_STAT - u64 waittime_stamp; - u64 holdtime_stamp; -#endif - /* - * class_idx is zero-indexed; it points to the element in - * lock_classes this held lock instance belongs to. class_idx is in - * the range from 0 to (MAX_LOCKDEP_KEYS-1) inclusive. - */ - unsigned int class_idx:MAX_LOCKDEP_KEYS_BITS; - /* - * The lock-stack is unified in that the lock chains of interrupt - * contexts nest ontop of process context chains, but we 'separate' - * the hashes by starting with 0 if we cross into an interrupt - * context, and we also keep do not add cross-context lock - * dependencies - the lock usage graph walking covers that area - * anyway, and we'd just unnecessarily increase the number of - * dependencies otherwise. [Note: hardirq and softirq contexts - * are separated from each other too.] - * - * The following field is used to detect when we cross into an - * interrupt context: - */ - unsigned int irq_context:2; /* bit 0 - soft, bit 1 - hard */ - unsigned int trylock:1; /* 16 bits */ - - unsigned int read:2; /* see lock_acquire() comment */ - unsigned int check:1; /* see lock_acquire() comment */ - unsigned int hardirqs_off:1; - unsigned int sync:1; - unsigned int references:11; /* 32 bits */ - unsigned int pin_count; -}; - /* * Initialization, self-test and debugging-output methods: */ diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h index 2ebc323d345a..70d30d40ea4a 100644 --- a/include/linux/lockdep_types.h +++ b/include/linux/lockdep_types.h @@ -127,12 +127,12 @@ struct lock_class { unsigned long usage_mask; const struct lock_trace *usage_traces[LOCK_TRACE_STATES]; + const char *name; /* * Generation counter, when doing certain classes of graph walking, * to ensure that we check one node only once: */ int name_version; - const char *name; u8 wait_type_inner; u8 wait_type_outer; @@ -198,6 +198,63 @@ struct lockdep_map { struct pin_cookie { unsigned int val; }; +#define MAX_LOCKDEP_KEYS_BITS 13 +#define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS) +#define INITIAL_CHAIN_KEY -1 + +struct held_lock { + /* + * One-way hash of the dependency chain up to this point. We + * hash the hashes step by step as the dependency chain grows. + * + * We use it for dependency-caching and we skip detection + * passes and dependency-updates if there is a cache-hit, so + * it is absolutely critical for 100% coverage of the validator + * to have a unique key value for every unique dependency path + * that can occur in the system, to make a unique hash value + * as likely as possible - hence the 64-bit width. + * + * The task struct holds the current hash value (initialized + * with zero), here we store the previous hash value: + */ + u64 prev_chain_key; + unsigned long acquire_ip; + struct lockdep_map *instance; + struct lockdep_map *nest_lock; +#ifdef CONFIG_LOCK_STAT + u64 waittime_stamp; + u64 holdtime_stamp; +#endif + /* + * class_idx is zero-indexed; it points to the element in + * lock_classes this held lock instance belongs to. class_idx is in + * the range from 0 to (MAX_LOCKDEP_KEYS-1) inclusive. + */ + unsigned int class_idx:MAX_LOCKDEP_KEYS_BITS; + /* + * The lock-stack is unified in that the lock chains of interrupt + * contexts nest ontop of process context chains, but we 'separate' + * the hashes by starting with 0 if we cross into an interrupt + * context, and we also keep do not add cross-context lock + * dependencies - the lock usage graph walking covers that area + * anyway, and we'd just unnecessarily increase the number of + * dependencies otherwise. [Note: hardirq and softirq contexts + * are separated from each other too.] + * + * The following field is used to detect when we cross into an + * interrupt context: + */ + unsigned int irq_context:2; /* bit 0 - soft, bit 1 - hard */ + unsigned int trylock:1; /* 16 bits */ + + unsigned int read:2; /* see lock_acquire() comment */ + unsigned int check:1; /* see lock_acquire() comment */ + unsigned int hardirqs_off:1; + unsigned int sync:1; + unsigned int references:11; /* 32 bits */ + unsigned int pin_count; +}; + #else /* !CONFIG_LOCKDEP */ /* diff --git a/include/linux/logic_pio.h b/include/linux/logic_pio.h index 54945aa824b4..babf4e3c28ba 100644 --- a/include/linux/logic_pio.h +++ b/include/linux/logic_pio.h @@ -39,9 +39,6 @@ struct logic_pio_host_ops { #ifdef CONFIG_INDIRECT_PIO u8 logic_inb(unsigned long addr); -void logic_outb(u8 value, unsigned long addr); -void logic_outw(u16 value, unsigned long addr); -void logic_outl(u32 value, unsigned long addr); u16 logic_inw(unsigned long addr); u32 logic_inl(unsigned long addr); void logic_outb(u8 value, unsigned long addr); diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index ac962c4cb44b..334e00efbde4 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -43,17 +43,17 @@ LSM_HOOK(int, 0, capset, struct cred *new, const struct cred *old, const kernel_cap_t *permitted) LSM_HOOK(int, 0, capable, const struct cred *cred, struct user_namespace *ns, int cap, unsigned int opts) -LSM_HOOK(int, 0, quotactl, int cmds, int type, int id, struct super_block *sb) +LSM_HOOK(int, 0, quotactl, int cmds, int type, int id, const struct super_block *sb) LSM_HOOK(int, 0, quota_on, struct dentry *dentry) LSM_HOOK(int, 0, syslog, int type) LSM_HOOK(int, 0, settime, const struct timespec64 *ts, const struct timezone *tz) -LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages) +LSM_HOOK(int, 1, vm_enough_memory, struct mm_struct *mm, long pages) LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm) -LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, struct file *file) +LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, const struct file *file) LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm) -LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, struct linux_binprm *bprm) -LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm) +LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, const struct linux_binprm *bprm) +LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, const struct linux_binprm *bprm) LSM_HOOK(int, 0, fs_context_submount, struct fs_context *fc, struct super_block *reference) LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc, struct fs_context *src_sc) @@ -66,7 +66,7 @@ LSM_HOOK(void, LSM_RET_VOID, sb_free_mnt_opts, void *mnt_opts) LSM_HOOK(int, 0, sb_eat_lsm_opts, char *orig, void **mnt_opts) LSM_HOOK(int, 0, sb_mnt_opts_compat, struct super_block *sb, void *mnt_opts) LSM_HOOK(int, 0, sb_remount, struct super_block *sb, void *mnt_opts) -LSM_HOOK(int, 0, sb_kern_mount, struct super_block *sb) +LSM_HOOK(int, 0, sb_kern_mount, const struct super_block *sb) LSM_HOOK(int, 0, sb_show_options, struct seq_file *m, struct super_block *sb) LSM_HOOK(int, 0, sb_statfs, struct dentry *dentry) LSM_HOOK(int, 0, sb_mount, const char *dev_name, const struct path *path, @@ -94,6 +94,8 @@ LSM_HOOK(int, 0, path_mkdir, const struct path *dir, struct dentry *dentry, LSM_HOOK(int, 0, path_rmdir, const struct path *dir, struct dentry *dentry) LSM_HOOK(int, 0, path_mknod, const struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev) +LSM_HOOK(void, LSM_RET_VOID, path_post_mknod, struct mnt_idmap *idmap, + struct dentry *dentry) LSM_HOOK(int, 0, path_truncate, const struct path *path) LSM_HOOK(int, 0, path_symlink, const struct path *dir, struct dentry *dentry, const char *old_name) @@ -119,6 +121,8 @@ LSM_HOOK(int, 0, inode_init_security_anon, struct inode *inode, const struct qstr *name, const struct inode *context_inode) LSM_HOOK(int, 0, inode_create, struct inode *dir, struct dentry *dentry, umode_t mode) +LSM_HOOK(void, LSM_RET_VOID, inode_post_create_tmpfile, struct mnt_idmap *idmap, + struct inode *inode) LSM_HOOK(int, 0, inode_link, struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) LSM_HOOK(int, 0, inode_unlink, struct inode *dir, struct dentry *dentry) @@ -135,7 +139,10 @@ LSM_HOOK(int, 0, inode_readlink, struct dentry *dentry) LSM_HOOK(int, 0, inode_follow_link, struct dentry *dentry, struct inode *inode, bool rcu) LSM_HOOK(int, 0, inode_permission, struct inode *inode, int mask) -LSM_HOOK(int, 0, inode_setattr, struct dentry *dentry, struct iattr *attr) +LSM_HOOK(int, 0, inode_setattr, struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *attr) +LSM_HOOK(void, LSM_RET_VOID, inode_post_setattr, struct mnt_idmap *idmap, + struct dentry *dentry, int ia_valid) LSM_HOOK(int, 0, inode_getattr, const struct path *path) LSM_HOOK(int, 0, inode_setxattr, struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, @@ -146,12 +153,18 @@ LSM_HOOK(int, 0, inode_getxattr, struct dentry *dentry, const char *name) LSM_HOOK(int, 0, inode_listxattr, struct dentry *dentry) LSM_HOOK(int, 0, inode_removexattr, struct mnt_idmap *idmap, struct dentry *dentry, const char *name) +LSM_HOOK(void, LSM_RET_VOID, inode_post_removexattr, struct dentry *dentry, + const char *name) LSM_HOOK(int, 0, inode_set_acl, struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl) +LSM_HOOK(void, LSM_RET_VOID, inode_post_set_acl, struct dentry *dentry, + const char *acl_name, struct posix_acl *kacl) LSM_HOOK(int, 0, inode_get_acl, struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) LSM_HOOK(int, 0, inode_remove_acl, struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) +LSM_HOOK(void, LSM_RET_VOID, inode_post_remove_acl, struct mnt_idmap *idmap, + struct dentry *dentry, const char *acl_name) LSM_HOOK(int, 0, inode_need_killpriv, struct dentry *dentry) LSM_HOOK(int, 0, inode_killpriv, struct mnt_idmap *idmap, struct dentry *dentry) @@ -168,9 +181,12 @@ LSM_HOOK(int, 0, kernfs_init_security, struct kernfs_node *kn_dir, struct kernfs_node *kn) LSM_HOOK(int, 0, file_permission, struct file *file, int mask) LSM_HOOK(int, 0, file_alloc_security, struct file *file) +LSM_HOOK(void, LSM_RET_VOID, file_release, struct file *file) LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file) LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd, unsigned long arg) +LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd, + unsigned long arg) LSM_HOOK(int, 0, mmap_addr, unsigned long addr) LSM_HOOK(int, 0, mmap_file, struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags) @@ -184,6 +200,7 @@ LSM_HOOK(int, 0, file_send_sigiotask, struct task_struct *tsk, struct fown_struct *fown, int sig) LSM_HOOK(int, 0, file_receive, struct file *file) LSM_HOOK(int, 0, file_open, struct file *file) +LSM_HOOK(int, 0, file_post_open, struct file *file, int mask) LSM_HOOK(int, 0, file_truncate, struct file *file) LSM_HOOK(int, 0, task_alloc, struct task_struct *task, unsigned long clone_flags) @@ -262,6 +279,10 @@ LSM_HOOK(int, 0, sem_semop, struct kern_ipc_perm *perm, struct sembuf *sops, LSM_HOOK(int, 0, netlink_send, struct sock *sk, struct sk_buff *skb) LSM_HOOK(void, LSM_RET_VOID, d_instantiate, struct dentry *dentry, struct inode *inode) +LSM_HOOK(int, -EOPNOTSUPP, getselfattr, unsigned int attr, + struct lsm_ctx __user *ctx, u32 *size, u32 flags) +LSM_HOOK(int, -EOPNOTSUPP, setselfattr, unsigned int attr, + struct lsm_ctx *ctx, u32 size, u32 flags) LSM_HOOK(int, -EINVAL, getprocattr, struct task_struct *p, const char *name, char **value) LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size) @@ -273,7 +294,7 @@ LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen) LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode) LSM_HOOK(int, 0, inode_notifysecctx, struct inode *inode, void *ctx, u32 ctxlen) LSM_HOOK(int, 0, inode_setsecctx, struct dentry *dentry, void *ctx, u32 ctxlen) -LSM_HOOK(int, 0, inode_getsecctx, struct inode *inode, void **ctx, +LSM_HOOK(int, -EOPNOTSUPP, inode_getsecctx, struct inode *inode, void **ctx, u32 *ctxlen) #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) @@ -309,9 +330,9 @@ LSM_HOOK(int, 0, socket_getsockopt, struct socket *sock, int level, int optname) LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname) LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how) LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb) -LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock, +LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_stream, struct socket *sock, sockptr_t optval, sockptr_t optlen, unsigned int len) -LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock, +LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_dgram, struct socket *sock, struct sk_buff *skb, u32 *secid) LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority) LSM_HOOK(void, LSM_RET_VOID, sk_free_security, struct sock *sk) @@ -384,6 +405,9 @@ LSM_HOOK(void, LSM_RET_VOID, key_free, struct key *key) LSM_HOOK(int, 0, key_permission, key_ref_t key_ref, const struct cred *cred, enum key_need_perm need_perm) LSM_HOOK(int, 0, key_getsecurity, struct key *key, char **buffer) +LSM_HOOK(void, LSM_RET_VOID, key_post_create_or_update, struct key *keyring, + struct key *key, const void *payload, size_t payload_len, + unsigned long flags, bool create) #endif /* CONFIG_KEYS */ #ifdef CONFIG_AUDIT @@ -398,10 +422,17 @@ LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule) LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size) LSM_HOOK(int, 0, bpf_map, struct bpf_map *map, fmode_t fmode) LSM_HOOK(int, 0, bpf_prog, struct bpf_prog *prog) -LSM_HOOK(int, 0, bpf_map_alloc_security, struct bpf_map *map) -LSM_HOOK(void, LSM_RET_VOID, bpf_map_free_security, struct bpf_map *map) -LSM_HOOK(int, 0, bpf_prog_alloc_security, struct bpf_prog_aux *aux) -LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free_security, struct bpf_prog_aux *aux) +LSM_HOOK(int, 0, bpf_map_create, struct bpf_map *map, union bpf_attr *attr, + struct bpf_token *token) +LSM_HOOK(void, LSM_RET_VOID, bpf_map_free, struct bpf_map *map) +LSM_HOOK(int, 0, bpf_prog_load, struct bpf_prog *prog, union bpf_attr *attr, + struct bpf_token *token) +LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog) +LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr, + struct path *path) +LSM_HOOK(void, LSM_RET_VOID, bpf_token_free, struct bpf_token *token) +LSM_HOOK(int, 0, bpf_token_cmd, const struct bpf_token *token, enum bpf_cmd cmd) +LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap) #endif /* CONFIG_BPF_SYSCALL */ LSM_HOOK(int, 0, locked_down, enum lockdown_reason what) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index dcb5e5b5eb13..a2ade0ffe9e7 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -25,6 +25,7 @@ #ifndef __LINUX_LSM_HOOKS_H #define __LINUX_LSM_HOOKS_H +#include <uapi/linux/lsm.h> #include <linux/security.h> #include <linux/init.h> #include <linux/rculist.h> @@ -42,6 +43,18 @@ struct security_hook_heads { #undef LSM_HOOK } __randomize_layout; +/** + * struct lsm_id - Identify a Linux Security Module. + * @lsm: name of the LSM, must be approved by the LSM maintainers + * @id: LSM ID number from uapi/linux/lsm.h + * + * Contains the information that identifies the LSM. + */ +struct lsm_id { + const char *name; + u64 id; +}; + /* * Security module hook list structure. * For use with generic list macros for common operations. @@ -50,7 +63,7 @@ struct security_hook_list { struct hlist_node list; struct hlist_head *head; union security_list_options hook; - const char *lsm; + const struct lsm_id *lsmid; } __randomize_layout; /* @@ -104,7 +117,7 @@ extern struct security_hook_heads security_hook_heads; extern char *lsm_names; extern void security_add_hooks(struct security_hook_list *hooks, int count, - const char *lsm); + const struct lsm_id *lsmid); #define LSM_FLAG_LEGACY_MAJOR BIT(0) #define LSM_FLAG_EXCLUSIVE BIT(1) diff --git a/include/linux/lwq.h b/include/linux/lwq.h new file mode 100644 index 000000000000..d081d5cf8e33 --- /dev/null +++ b/include/linux/lwq.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef LWQ_H +#define LWQ_H +/* + * Light-weight single-linked queue built from llist + * + * Entries can be enqueued from any context with no locking. + * Entries can be dequeued from process context with integrated locking. + * + * This is particularly suitable when work items are queued in + * BH or IRQ context, and where work items are handled one at a time + * by dedicated threads. + */ +#include <linux/container_of.h> +#include <linux/spinlock.h> +#include <linux/llist.h> + +struct lwq_node { + struct llist_node node; +}; + +struct lwq { + spinlock_t lock; + struct llist_node *ready; /* entries to be dequeued */ + struct llist_head new; /* entries being enqueued */ +}; + +/** + * lwq_init - initialise a lwq + * @q: the lwq object + */ +static inline void lwq_init(struct lwq *q) +{ + spin_lock_init(&q->lock); + q->ready = NULL; + init_llist_head(&q->new); +} + +/** + * lwq_empty - test if lwq contains any entry + * @q: the lwq object + * + * This empty test contains an acquire barrier so that if a wakeup + * is sent when lwq_dequeue returns true, it is safe to go to sleep after + * a test on lwq_empty(). + */ +static inline bool lwq_empty(struct lwq *q) +{ + /* acquire ensures ordering wrt lwq_enqueue() */ + return smp_load_acquire(&q->ready) == NULL && llist_empty(&q->new); +} + +struct llist_node *__lwq_dequeue(struct lwq *q); +/** + * lwq_dequeue - dequeue first (oldest) entry from lwq + * @q: the queue to dequeue from + * @type: the type of object to return + * @member: them member in returned object which is an lwq_node. + * + * Remove a single object from the lwq and return it. This will take + * a spinlock and so must always be called in the same context, typcially + * process contet. + */ +#define lwq_dequeue(q, type, member) \ + ({ struct llist_node *_n = __lwq_dequeue(q); \ + _n ? container_of(_n, type, member.node) : NULL; }) + +struct llist_node *lwq_dequeue_all(struct lwq *q); + +/** + * lwq_for_each_safe - iterate over detached queue allowing deletion + * @_n: iterator variable + * @_t1: temporary struct llist_node ** + * @_t2: temporary struct llist_node * + * @_l: address of llist_node pointer from lwq_dequeue_all() + * @_member: member in _n where lwq_node is found. + * + * Iterate over members in a dequeued list. If the iterator variable + * is set to NULL, the iterator removes that entry from the queue. + */ +#define lwq_for_each_safe(_n, _t1, _t2, _l, _member) \ + for (_t1 = (_l); \ + *(_t1) ? (_n = container_of(*(_t1), typeof(*(_n)), _member.node),\ + _t2 = ((*_t1)->next), \ + true) \ + : false; \ + (_n) ? (_t1 = &(_n)->_member.node.next, 0) \ + : ((*(_t1) = (_t2)), 0)) + +/** + * lwq_enqueue - add a new item to the end of the queue + * @n - the lwq_node embedded in the item to be added + * @q - the lwq to append to. + * + * No locking is needed to append to the queue so this can + * be called from any context. + * Return %true is the list may have previously been empty. + */ +static inline bool lwq_enqueue(struct lwq_node *n, struct lwq *q) +{ + /* acquire enqures ordering wrt lwq_dequeue */ + return llist_add(&n->node, &q->new) && + smp_load_acquire(&q->ready) == NULL; +} + +/** + * lwq_enqueue_batch - add a list of new items to the end of the queue + * @n - the lwq_node embedded in the first item to be added + * @q - the lwq to append to. + * + * No locking is needed to append to the queue so this can + * be called from any context. + * Return %true is the list may have previously been empty. + */ +static inline bool lwq_enqueue_batch(struct llist_node *n, struct lwq *q) +{ + struct llist_node *e = n; + + /* acquire enqures ordering wrt lwq_dequeue */ + return llist_add_batch(llist_reverse_order(n), e, &q->new) && + smp_load_acquire(&q->ready) == NULL; +} +#endif /* LWQ_H */ diff --git a/include/linux/maple.h b/include/linux/maple.h index 9b140272ee16..9aae44efcfd4 100644 --- a/include/linux/maple.h +++ b/include/linux/maple.h @@ -5,7 +5,6 @@ #include <mach/maple.h> struct device; -extern struct bus_type maple_bus_type; /* Maple Bus command and response codes */ enum maple_code { diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index d01e850b570f..a53ad4dabd7e 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -171,6 +171,7 @@ enum maple_type { #define MT_FLAGS_LOCK_IRQ 0x100 #define MT_FLAGS_LOCK_BH 0x200 #define MT_FLAGS_LOCK_EXTERN 0x300 +#define MT_FLAGS_ALLOC_WRAPPED 0x0800 #define MAPLE_HEIGHT_MAX 31 @@ -256,6 +257,8 @@ struct maple_tree { struct maple_tree name = MTREE_INIT(name, 0) #define mtree_lock(mt) spin_lock((&(mt)->ma_lock)) +#define mtree_lock_nested(mas, subclass) \ + spin_lock_nested((&(mt)->ma_lock), subclass) #define mtree_unlock(mt) spin_unlock((&(mt)->ma_lock)) /* @@ -317,6 +320,9 @@ int mtree_insert_range(struct maple_tree *mt, unsigned long first, int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, void *entry, unsigned long size, unsigned long min, unsigned long max, gfp_t gfp); +int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp, + void *entry, unsigned long range_lo, unsigned long range_hi, + unsigned long *next, gfp_t gfp); int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, void *entry, unsigned long size, unsigned long min, unsigned long max, gfp_t gfp); @@ -327,6 +333,9 @@ int mtree_store(struct maple_tree *mt, unsigned long index, void *entry, gfp_t gfp); void *mtree_erase(struct maple_tree *mt, unsigned long index); +int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp); +int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp); + void mtree_destroy(struct maple_tree *mt); void __mt_destroy(struct maple_tree *mt); @@ -345,6 +354,36 @@ static inline bool mtree_empty(const struct maple_tree *mt) /* Advanced API */ /* + * Maple State Status + * ma_active means the maple state is pointing to a node and offset and can + * continue operating on the tree. + * ma_start means we have not searched the tree. + * ma_root means we have searched the tree and the entry we found lives in + * the root of the tree (ie it has index 0, length 1 and is the only entry in + * the tree). + * ma_none means we have searched the tree and there is no node in the + * tree for this entry. For example, we searched for index 1 in an empty + * tree. Or we have a tree which points to a full leaf node and we + * searched for an entry which is larger than can be contained in that + * leaf node. + * ma_pause means the data within the maple state may be stale, restart the + * operation + * ma_overflow means the search has reached the upper limit of the search + * ma_underflow means the search has reached the lower limit of the search + * ma_error means there was an error, check the node for the error number. + */ +enum maple_status { + ma_active, + ma_start, + ma_root, + ma_none, + ma_pause, + ma_overflow, + ma_underflow, + ma_error, +}; + +/* * The maple state is defined in the struct ma_state and is used to keep track * of information during operations, and even between operations when using the * advanced API. @@ -376,6 +415,13 @@ static inline bool mtree_empty(const struct maple_tree *mt) * When returning a value the maple state index and last respectively contain * the start and end of the range for the entry. Ranges are inclusive in the * Maple Tree. + * + * The status of the state is used to determine how the next action should treat + * the state. For instance, if the status is ma_start then the next action + * should start at the root of the tree and walk down. If the status is + * ma_pause then the node may be stale data and should be discarded. If the + * status is ma_overflow, then the last action hit the upper limit. + * */ struct ma_state { struct maple_tree *tree; /* The tree we're operating in */ @@ -385,9 +431,11 @@ struct ma_state { unsigned long min; /* The minimum index of this node - implied pivot min */ unsigned long max; /* The maximum index of this node - implied pivot max */ struct maple_alloc *alloc; /* Allocated nodes for this operation */ + enum maple_status status; /* The status of the state (active, start, none, etc) */ unsigned char depth; /* depth of tree descent during write */ unsigned char offset; unsigned char mas_flags; + unsigned char end; /* The end of the node */ }; struct ma_wr_state { @@ -397,7 +445,6 @@ struct ma_wr_state { unsigned long r_max; /* range max */ enum maple_type type; /* mas->node type */ unsigned char offset_end; /* The offset where the write ends */ - unsigned char node_end; /* mas->node end */ unsigned long *pivots; /* mas->node->pivots pointer */ unsigned long end_piv; /* The pivot at the offset end */ void __rcu **slots; /* mas->node->slots pointer */ @@ -406,30 +453,16 @@ struct ma_wr_state { }; #define mas_lock(mas) spin_lock(&((mas)->tree->ma_lock)) +#define mas_lock_nested(mas, subclass) \ + spin_lock_nested(&((mas)->tree->ma_lock), subclass) #define mas_unlock(mas) spin_unlock(&((mas)->tree->ma_lock)) - /* * Special values for ma_state.node. - * MAS_START means we have not searched the tree. - * MAS_ROOT means we have searched the tree and the entry we found lives in - * the root of the tree (ie it has index 0, length 1 and is the only entry in - * the tree). - * MAS_NONE means we have searched the tree and there is no node in the - * tree for this entry. For example, we searched for index 1 in an empty - * tree. Or we have a tree which points to a full leaf node and we - * searched for an entry which is larger than can be contained in that - * leaf node. * MA_ERROR represents an errno. After dropping the lock and attempting * to resolve the error, the walk would have to be restarted from the * top of the tree as the tree may have been modified. */ -#define MAS_START ((struct maple_enode *)1UL) -#define MAS_ROOT ((struct maple_enode *)5UL) -#define MAS_NONE ((struct maple_enode *)9UL) -#define MAS_PAUSE ((struct maple_enode *)17UL) -#define MAS_OVERFLOW ((struct maple_enode *)33UL) -#define MAS_UNDERFLOW ((struct maple_enode *)65UL) #define MA_ERROR(err) \ ((struct maple_enode *)(((unsigned long)err << 2) | 2UL)) @@ -438,7 +471,8 @@ struct ma_wr_state { .tree = mt, \ .index = first, \ .last = end, \ - .node = MAS_START, \ + .node = NULL, \ + .status = ma_start, \ .min = 0, \ .max = ULONG_MAX, \ .alloc = NULL, \ @@ -469,7 +503,9 @@ void *mas_find_range(struct ma_state *mas, unsigned long max); void *mas_find_rev(struct ma_state *mas, unsigned long min); void *mas_find_range_rev(struct ma_state *mas, unsigned long max); int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp); -bool mas_is_err(struct ma_state *mas); +int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp, + void *entry, unsigned long range_lo, unsigned long range_hi, + unsigned long *next, gfp_t gfp); bool mas_nomem(struct ma_state *mas, gfp_t gfp); void mas_pause(struct ma_state *mas); @@ -498,28 +534,18 @@ static inline void mas_init(struct ma_state *mas, struct maple_tree *tree, mas->tree = tree; mas->index = mas->last = addr; mas->max = ULONG_MAX; - mas->node = MAS_START; -} - -/* Checks if a mas has not found anything */ -static inline bool mas_is_none(const struct ma_state *mas) -{ - return mas->node == MAS_NONE; + mas->status = ma_start; + mas->node = NULL; } -/* Checks if a mas has been paused */ -static inline bool mas_is_paused(const struct ma_state *mas) +static inline bool mas_is_active(struct ma_state *mas) { - return mas->node == MAS_PAUSE; + return mas->status == ma_active; } -/* Check if the mas is pointing to a node or not */ -static inline bool mas_is_active(struct ma_state *mas) +static inline bool mas_is_err(struct ma_state *mas) { - if ((unsigned long)mas->node >= MAPLE_RESERVED_RANGE) - return true; - - return false; + return mas->status == ma_error; } /** @@ -532,9 +558,10 @@ static inline bool mas_is_active(struct ma_state *mas) * * Context: Any context. */ -static inline void mas_reset(struct ma_state *mas) +static __always_inline void mas_reset(struct ma_state *mas) { - mas->node = MAS_START; + mas->status = ma_start; + mas->node = NULL; } /** @@ -550,6 +577,131 @@ static inline void mas_reset(struct ma_state *mas) */ #define mas_for_each(__mas, __entry, __max) \ while (((__entry) = mas_find((__mas), (__max))) != NULL) + +#ifdef CONFIG_DEBUG_MAPLE_TREE +enum mt_dump_format { + mt_dump_dec, + mt_dump_hex, +}; + +extern atomic_t maple_tree_tests_run; +extern atomic_t maple_tree_tests_passed; + +void mt_dump(const struct maple_tree *mt, enum mt_dump_format format); +void mas_dump(const struct ma_state *mas); +void mas_wr_dump(const struct ma_wr_state *wr_mas); +void mt_validate(struct maple_tree *mt); +void mt_cache_shrink(void); +#define MT_BUG_ON(__tree, __x) do { \ + atomic_inc(&maple_tree_tests_run); \ + if (__x) { \ + pr_info("BUG at %s:%d (%u)\n", \ + __func__, __LINE__, __x); \ + mt_dump(__tree, mt_dump_hex); \ + pr_info("Pass: %u Run:%u\n", \ + atomic_read(&maple_tree_tests_passed), \ + atomic_read(&maple_tree_tests_run)); \ + dump_stack(); \ + } else { \ + atomic_inc(&maple_tree_tests_passed); \ + } \ +} while (0) + +#define MAS_BUG_ON(__mas, __x) do { \ + atomic_inc(&maple_tree_tests_run); \ + if (__x) { \ + pr_info("BUG at %s:%d (%u)\n", \ + __func__, __LINE__, __x); \ + mas_dump(__mas); \ + mt_dump((__mas)->tree, mt_dump_hex); \ + pr_info("Pass: %u Run:%u\n", \ + atomic_read(&maple_tree_tests_passed), \ + atomic_read(&maple_tree_tests_run)); \ + dump_stack(); \ + } else { \ + atomic_inc(&maple_tree_tests_passed); \ + } \ +} while (0) + +#define MAS_WR_BUG_ON(__wrmas, __x) do { \ + atomic_inc(&maple_tree_tests_run); \ + if (__x) { \ + pr_info("BUG at %s:%d (%u)\n", \ + __func__, __LINE__, __x); \ + mas_wr_dump(__wrmas); \ + mas_dump((__wrmas)->mas); \ + mt_dump((__wrmas)->mas->tree, mt_dump_hex); \ + pr_info("Pass: %u Run:%u\n", \ + atomic_read(&maple_tree_tests_passed), \ + atomic_read(&maple_tree_tests_run)); \ + dump_stack(); \ + } else { \ + atomic_inc(&maple_tree_tests_passed); \ + } \ +} while (0) + +#define MT_WARN_ON(__tree, __x) ({ \ + int ret = !!(__x); \ + atomic_inc(&maple_tree_tests_run); \ + if (ret) { \ + pr_info("WARN at %s:%d (%u)\n", \ + __func__, __LINE__, __x); \ + mt_dump(__tree, mt_dump_hex); \ + pr_info("Pass: %u Run:%u\n", \ + atomic_read(&maple_tree_tests_passed), \ + atomic_read(&maple_tree_tests_run)); \ + dump_stack(); \ + } else { \ + atomic_inc(&maple_tree_tests_passed); \ + } \ + unlikely(ret); \ +}) + +#define MAS_WARN_ON(__mas, __x) ({ \ + int ret = !!(__x); \ + atomic_inc(&maple_tree_tests_run); \ + if (ret) { \ + pr_info("WARN at %s:%d (%u)\n", \ + __func__, __LINE__, __x); \ + mas_dump(__mas); \ + mt_dump((__mas)->tree, mt_dump_hex); \ + pr_info("Pass: %u Run:%u\n", \ + atomic_read(&maple_tree_tests_passed), \ + atomic_read(&maple_tree_tests_run)); \ + dump_stack(); \ + } else { \ + atomic_inc(&maple_tree_tests_passed); \ + } \ + unlikely(ret); \ +}) + +#define MAS_WR_WARN_ON(__wrmas, __x) ({ \ + int ret = !!(__x); \ + atomic_inc(&maple_tree_tests_run); \ + if (ret) { \ + pr_info("WARN at %s:%d (%u)\n", \ + __func__, __LINE__, __x); \ + mas_wr_dump(__wrmas); \ + mas_dump((__wrmas)->mas); \ + mt_dump((__wrmas)->mas->tree, mt_dump_hex); \ + pr_info("Pass: %u Run:%u\n", \ + atomic_read(&maple_tree_tests_passed), \ + atomic_read(&maple_tree_tests_run)); \ + dump_stack(); \ + } else { \ + atomic_inc(&maple_tree_tests_passed); \ + } \ + unlikely(ret); \ +}) +#else +#define MT_BUG_ON(__tree, __x) BUG_ON(__x) +#define MAS_BUG_ON(__mas, __x) BUG_ON(__x) +#define MAS_WR_BUG_ON(__mas, __x) BUG_ON(__x) +#define MT_WARN_ON(__tree, __x) WARN_ON(__x) +#define MAS_WARN_ON(__mas, __x) WARN_ON(__x) +#define MAS_WR_WARN_ON(__mas, __x) WARN_ON(__x) +#endif /* CONFIG_DEBUG_MAPLE_TREE */ + /** * __mas_set_range() - Set up Maple Tree operation state to a sub-range of the * current location. @@ -563,6 +715,9 @@ static inline void mas_reset(struct ma_state *mas) static inline void __mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last) { + /* Ensure the range starts within the current slot */ + MAS_WARN_ON(mas, mas_is_active(mas) && + (mas->index > start || mas->last < start)); mas->index = start; mas->last = last; } @@ -580,8 +735,8 @@ static inline void __mas_set_range(struct ma_state *mas, unsigned long start, static inline void mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last) { + mas_reset(mas); __mas_set_range(mas, start, last); - mas->node = MAS_START; } /** @@ -706,129 +861,4 @@ void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max); for (__entry = mt_find(__tree, &(__index), __max); \ __entry; __entry = mt_find_after(__tree, &(__index), __max)) - -#ifdef CONFIG_DEBUG_MAPLE_TREE -enum mt_dump_format { - mt_dump_dec, - mt_dump_hex, -}; - -extern atomic_t maple_tree_tests_run; -extern atomic_t maple_tree_tests_passed; - -void mt_dump(const struct maple_tree *mt, enum mt_dump_format format); -void mas_dump(const struct ma_state *mas); -void mas_wr_dump(const struct ma_wr_state *wr_mas); -void mt_validate(struct maple_tree *mt); -void mt_cache_shrink(void); -#define MT_BUG_ON(__tree, __x) do { \ - atomic_inc(&maple_tree_tests_run); \ - if (__x) { \ - pr_info("BUG at %s:%d (%u)\n", \ - __func__, __LINE__, __x); \ - mt_dump(__tree, mt_dump_hex); \ - pr_info("Pass: %u Run:%u\n", \ - atomic_read(&maple_tree_tests_passed), \ - atomic_read(&maple_tree_tests_run)); \ - dump_stack(); \ - } else { \ - atomic_inc(&maple_tree_tests_passed); \ - } \ -} while (0) - -#define MAS_BUG_ON(__mas, __x) do { \ - atomic_inc(&maple_tree_tests_run); \ - if (__x) { \ - pr_info("BUG at %s:%d (%u)\n", \ - __func__, __LINE__, __x); \ - mas_dump(__mas); \ - mt_dump((__mas)->tree, mt_dump_hex); \ - pr_info("Pass: %u Run:%u\n", \ - atomic_read(&maple_tree_tests_passed), \ - atomic_read(&maple_tree_tests_run)); \ - dump_stack(); \ - } else { \ - atomic_inc(&maple_tree_tests_passed); \ - } \ -} while (0) - -#define MAS_WR_BUG_ON(__wrmas, __x) do { \ - atomic_inc(&maple_tree_tests_run); \ - if (__x) { \ - pr_info("BUG at %s:%d (%u)\n", \ - __func__, __LINE__, __x); \ - mas_wr_dump(__wrmas); \ - mas_dump((__wrmas)->mas); \ - mt_dump((__wrmas)->mas->tree, mt_dump_hex); \ - pr_info("Pass: %u Run:%u\n", \ - atomic_read(&maple_tree_tests_passed), \ - atomic_read(&maple_tree_tests_run)); \ - dump_stack(); \ - } else { \ - atomic_inc(&maple_tree_tests_passed); \ - } \ -} while (0) - -#define MT_WARN_ON(__tree, __x) ({ \ - int ret = !!(__x); \ - atomic_inc(&maple_tree_tests_run); \ - if (ret) { \ - pr_info("WARN at %s:%d (%u)\n", \ - __func__, __LINE__, __x); \ - mt_dump(__tree, mt_dump_hex); \ - pr_info("Pass: %u Run:%u\n", \ - atomic_read(&maple_tree_tests_passed), \ - atomic_read(&maple_tree_tests_run)); \ - dump_stack(); \ - } else { \ - atomic_inc(&maple_tree_tests_passed); \ - } \ - unlikely(ret); \ -}) - -#define MAS_WARN_ON(__mas, __x) ({ \ - int ret = !!(__x); \ - atomic_inc(&maple_tree_tests_run); \ - if (ret) { \ - pr_info("WARN at %s:%d (%u)\n", \ - __func__, __LINE__, __x); \ - mas_dump(__mas); \ - mt_dump((__mas)->tree, mt_dump_hex); \ - pr_info("Pass: %u Run:%u\n", \ - atomic_read(&maple_tree_tests_passed), \ - atomic_read(&maple_tree_tests_run)); \ - dump_stack(); \ - } else { \ - atomic_inc(&maple_tree_tests_passed); \ - } \ - unlikely(ret); \ -}) - -#define MAS_WR_WARN_ON(__wrmas, __x) ({ \ - int ret = !!(__x); \ - atomic_inc(&maple_tree_tests_run); \ - if (ret) { \ - pr_info("WARN at %s:%d (%u)\n", \ - __func__, __LINE__, __x); \ - mas_wr_dump(__wrmas); \ - mas_dump((__wrmas)->mas); \ - mt_dump((__wrmas)->mas->tree, mt_dump_hex); \ - pr_info("Pass: %u Run:%u\n", \ - atomic_read(&maple_tree_tests_passed), \ - atomic_read(&maple_tree_tests_run)); \ - dump_stack(); \ - } else { \ - atomic_inc(&maple_tree_tests_passed); \ - } \ - unlikely(ret); \ -}) -#else -#define MT_BUG_ON(__tree, __x) BUG_ON(__x) -#define MAS_BUG_ON(__mas, __x) BUG_ON(__x) -#define MAS_WR_BUG_ON(__mas, __x) BUG_ON(__x) -#define MT_WARN_ON(__tree, __x) WARN_ON(__x) -#define MAS_WARN_ON(__mas, __x) WARN_ON(__x) -#define MAS_WR_WARN_ON(__mas, __x) WARN_ON(__x) -#endif /* CONFIG_DEBUG_MAPLE_TREE */ - #endif /*_LINUX_MAPLE_TREE_H */ diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index 9b54c4f0677f..693eba9869e4 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -26,6 +26,7 @@ #define MARVELL_PHY_ID_88E2110 0x002b09b0 #define MARVELL_PHY_ID_88X2222 0x01410f10 #define MARVELL_PHY_ID_88Q2110 0x002b0980 +#define MARVELL_PHY_ID_88Q2220 0x002b0b20 /* Marvel 88E1111 in Finisar SFP module with modified PHY ID */ #define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0 diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h index b0da04fe087b..34dfcc77f505 100644 --- a/include/linux/mc146818rtc.h +++ b/include/linux/mc146818rtc.h @@ -126,10 +126,11 @@ struct cmos_rtc_board_info { #endif /* ARCH_RTC_LOCATION */ bool mc146818_does_rtc_work(void); -int mc146818_get_time(struct rtc_time *time); +int mc146818_get_time(struct rtc_time *time, int timeout); int mc146818_set_time(struct rtc_time *time); bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), + int timeout, void *param); #endif /* _MC146818RTC_H */ diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 8fa23bdcedbf..68f8d2e970d4 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -38,6 +38,7 @@ struct mdio_device { /* Bus address of the MDIO device (0-31) */ int addr; int flags; + int reset_state; struct gpio_desc *reset_gpio; struct reset_control *reset_ctrl; unsigned int reset_assert_delay; @@ -372,6 +373,10 @@ static inline void mii_t1_adv_m_mod_linkmode_t(unsigned long *advertising, u32 l { linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, advertising, lpa & MDIO_AN_T1_ADV_M_B10L); + linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT1_Full_BIT, + advertising, lpa & MDIO_AN_T1_ADV_M_100BT1); + linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT1_Full_BIT, + advertising, lpa & MDIO_AN_T1_ADV_M_1000BT1); } /** @@ -408,6 +413,10 @@ static inline u32 linkmode_adv_to_mii_t1_adv_m_t(unsigned long *advertising) if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, advertising)) result |= MDIO_AN_T1_ADV_M_B10L; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT1_Full_BIT, advertising)) + result |= MDIO_AN_T1_ADV_M_100BT1; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT1_Full_BIT, advertising)) + result |= MDIO_AN_T1_ADV_M_1000BT1; return result; } @@ -420,7 +429,7 @@ static inline u32 linkmode_adv_to_mii_t1_adv_m_t(unsigned long *advertising) * A function that translates value of following registers to the linkmode: * IEEE 802.3-2018 45.2.3.10 "EEE control and capability 1" register (3.20) * IEEE 802.3-2018 45.2.7.13 "EEE advertisement 1" register (7.60) - * IEEE 802.3-2018 45.2.7.14 "EEE "link partner ability 1 register (7.61) + * IEEE 802.3-2018 45.2.7.14 "EEE link partner ability 1" register (7.61) */ static inline void mii_eee_cap1_mod_linkmode_t(unsigned long *adv, u32 val) { @@ -439,6 +448,42 @@ static inline void mii_eee_cap1_mod_linkmode_t(unsigned long *adv, u32 val) } /** + * mii_eee_cap2_mod_linkmode_sup_t() + * @adv: target the linkmode settings + * @val: register value + * + * A function that translates value of following registers to the linkmode: + * IEEE 802.3-2022 45.2.3.11 "EEE control and capability 2" register (3.21) + */ +static inline void mii_eee_cap2_mod_linkmode_sup_t(unsigned long *adv, u32 val) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + adv, val & MDIO_EEE_2_5GT); + linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + adv, val & MDIO_EEE_5GT); +} + +/** + * mii_eee_cap2_mod_linkmode_adv_t() + * @adv: target the linkmode advertisement settings + * @val: register value + * + * A function that translates value of following registers to the linkmode: + * IEEE 802.3-2022 45.2.7.16 "EEE advertisement 2" register (7.62) + * IEEE 802.3-2022 45.2.7.17 "EEE link partner ability 2" register (7.63) + * Note: Currently this function is the same as mii_eee_cap2_mod_linkmode_sup_t. + * For certain, not yet supported, modes however the bits differ. + * Therefore create separate functions already. + */ +static inline void mii_eee_cap2_mod_linkmode_adv_t(unsigned long *adv, u32 val) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + adv, val & MDIO_EEE_2_5GT); + linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + adv, val & MDIO_EEE_5GT); +} + +/** * linkmode_to_mii_eee_cap1_t() * @adv: the linkmode advertisement settings * @@ -466,6 +511,25 @@ static inline u32 linkmode_to_mii_eee_cap1_t(unsigned long *adv) } /** + * linkmode_to_mii_eee_cap2_t() + * @adv: the linkmode advertisement settings + * + * A function that translates linkmode to value for IEEE 802.3-2022 45.2.7.16 + * "EEE advertisement 2" register (7.62) + */ +static inline u32 linkmode_to_mii_eee_cap2_t(unsigned long *adv) +{ + u32 result = 0; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, adv)) + result |= MDIO_EEE_2_5GT; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, adv)) + result |= MDIO_EEE_5GT; + + return result; +} + +/** * mii_10base_t1_adv_mod_linkmode_t() * @adv: linkmode advertisement settings * @val: register value diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index fd6e0620658d..b38a56a13f39 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -31,11 +31,11 @@ typedef void (*mei_cldev_cb_t)(struct mei_cl_device *cldev); * @rx_work: async work to execute Rx event callback * @rx_cb: Drivers register this callback to get asynchronous ME * Rx buffer pending notifications. - * @notif_work: async work to execute FW notif event callback + * @notif_work: async work to execute FW notify event callback * @notif_cb: Drivers register this callback to get asynchronous ME * FW notification pending notifications. * - * @do_match: wheather device can be matched with a driver + * @do_match: whether the device can be matched with a driver * @is_added: device is already scanned * @priv_data: client private data */ @@ -94,15 +94,23 @@ void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv); ssize_t mei_cldev_send(struct mei_cl_device *cldev, const u8 *buf, size_t length); +ssize_t mei_cldev_send_timeout(struct mei_cl_device *cldev, const u8 *buf, + size_t length, unsigned long timeout); ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length); ssize_t mei_cldev_recv_nonblock(struct mei_cl_device *cldev, u8 *buf, size_t length); +ssize_t mei_cldev_recv_timeout(struct mei_cl_device *cldev, u8 *buf, size_t length, + unsigned long timeout); ssize_t mei_cldev_send_vtag(struct mei_cl_device *cldev, const u8 *buf, size_t length, u8 vtag); +ssize_t mei_cldev_send_vtag_timeout(struct mei_cl_device *cldev, const u8 *buf, + size_t length, u8 vtag, unsigned long timeout); ssize_t mei_cldev_recv_vtag(struct mei_cl_device *cldev, u8 *buf, size_t length, u8 *vtag); ssize_t mei_cldev_recv_nonblock_vtag(struct mei_cl_device *cldev, u8 *buf, size_t length, u8 *vtag); +ssize_t mei_cldev_recv_vtag_timeout(struct mei_cl_device *cldev, u8 *buf, size_t length, + u8 *vtag, unsigned long timeout); int mei_cldev_register_rx_cb(struct mei_cl_device *cldev, mei_cldev_cb_t rx_cb); int mei_cldev_register_notif_cb(struct mei_cl_device *cldev, diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 1c1072e3ca06..e2082240586d 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -40,6 +40,8 @@ extern unsigned long long max_possible_pfn; * via a driver, and never indicated in the firmware-provided memory map as * system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the * kernel resource tree. + * @MEMBLOCK_RSRV_NOINIT: memory region for which struct pages are + * not initialized (only for reserved regions). */ enum memblock_flags { MEMBLOCK_NONE = 0x0, /* No special request */ @@ -47,6 +49,7 @@ enum memblock_flags { MEMBLOCK_MIRROR = 0x2, /* mirrored region */ MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */ MEMBLOCK_DRIVER_MANAGED = 0x8, /* always detected via a driver */ + MEMBLOCK_RSRV_NOINIT = 0x10, /* don't initialize struct pages */ }; /** @@ -118,13 +121,17 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size); int memblock_physmem_add(phys_addr_t base, phys_addr_t size); #endif void memblock_trim_memory(phys_addr_t align); +unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, + phys_addr_t base2, phys_addr_t size2); bool memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); +bool memblock_validate_numa_coverage(unsigned long threshold_bytes); int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); +int memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size); void memblock_free_all(void); void memblock_free(void *ptr, size_t size); @@ -259,6 +266,11 @@ static inline bool memblock_is_nomap(struct memblock_region *m) return m->flags & MEMBLOCK_NOMAP; } +static inline bool memblock_is_reserved_noinit(struct memblock_region *m) +{ + return m->flags & MEMBLOCK_RSRV_NOINIT; +} + static inline bool memblock_is_driver_managed(struct memblock_region *m) { return m->flags & MEMBLOCK_DRIVER_MANAGED; diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e4e24da16d2c..394fd0a887ae 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -14,6 +14,7 @@ #include <linux/vm_event_item.h> #include <linux/hardirq.h> #include <linux/jump_label.h> +#include <linux/kernel.h> #include <linux/page_counter.h> #include <linux/vmpressure.h> #include <linux/eventfd.h> @@ -21,6 +22,7 @@ #include <linux/vmstat.h> #include <linux/writeback.h> #include <linux/page-flags.h> +#include <linux/shrinker.h> struct mem_cgroup; struct obj_cgroup; @@ -88,17 +90,6 @@ struct mem_cgroup_reclaim_iter { unsigned int generation; }; -/* - * Bitmap and deferred work of shrinker::id corresponding to memcg-aware - * shrinkers, which have elements charged to this memcg. - */ -struct shrinker_info { - struct rcu_head rcu; - atomic_long_t *nr_deferred; - unsigned long *map; - int map_nr_max; -}; - struct lruvec_stats_percpu { /* Local (CPU and cgroup) state */ long state[NR_VM_NODE_STAT_ITEMS]; @@ -153,7 +144,7 @@ struct mem_cgroup_threshold_ary { /* Size of entries[] */ unsigned int size; /* Array of thresholds */ - struct mem_cgroup_threshold entries[]; + struct mem_cgroup_threshold entries[] __counted_by(size); }; struct mem_cgroup_thresholds { @@ -229,6 +220,12 @@ struct mem_cgroup { #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP) unsigned long zswap_max; + + /* + * Prevent pages from this memcg from being written back from zswap to + * swap, and from being swapped out on zswap store failures. + */ + bool zswap_writeback; #endif unsigned long soft_limit; @@ -299,7 +296,13 @@ struct mem_cgroup { #ifdef CONFIG_MEMCG_KMEM int kmemcg_id; - struct obj_cgroup __rcu *objcg; + /* + * memcg->objcg is wiped out as a part of the objcg repaprenting + * process. memcg->orig_objcg preserves a pointer (and a reference) + * to the original objcg until the end of live of memcg. + */ + struct obj_cgroup __rcu *objcg; + struct obj_cgroup *orig_objcg; /* list of inherited objcgs, protected by objcg_lock */ struct list_head objcg_list; #endif @@ -328,7 +331,7 @@ struct mem_cgroup { struct deferred_split deferred_split_queue; #endif -#ifdef CONFIG_LRU_GEN +#ifdef CONFIG_LRU_GEN_WALKS_MMU /* per-memcg mm_struct list */ struct lru_gen_mm_list mm_list; #endif @@ -662,6 +665,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target, page_counter_read(&memcg->memory); } +void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg); + int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp); /** @@ -686,6 +691,9 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, return __mem_cgroup_charge(folio, mm, gfp); } +int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp, + long nr_pages); + int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry); void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry); @@ -705,14 +713,16 @@ static inline void mem_cgroup_uncharge(struct folio *folio) __mem_cgroup_uncharge(folio); } -void __mem_cgroup_uncharge_list(struct list_head *page_list); -static inline void mem_cgroup_uncharge_list(struct list_head *page_list) +void __mem_cgroup_uncharge_folios(struct folio_batch *folios); +static inline void mem_cgroup_uncharge_folios(struct folio_batch *folios) { if (mem_cgroup_disabled()) return; - __mem_cgroup_uncharge_list(page_list); + __mem_cgroup_uncharge_folios(folios); } +void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages); +void mem_cgroup_replace_folio(struct folio *old, struct folio *new); void mem_cgroup_migrate(struct folio *old, struct folio *new); /** @@ -769,6 +779,8 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); +struct mem_cgroup *get_mem_cgroup_from_current(void); + struct lruvec *folio_lruvec_lock(struct folio *folio); struct lruvec *folio_lruvec_lock_irq(struct folio *folio); struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, @@ -814,6 +826,11 @@ static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg) return !memcg || css_tryget(&memcg->css); } +static inline bool mem_cgroup_tryget_online(struct mem_cgroup *memcg) +{ + return !memcg || css_tryget_online(&memcg->css); +} + static inline void mem_cgroup_put(struct mem_cgroup *memcg) { if (memcg) @@ -1039,8 +1056,8 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, return x; } -void mem_cgroup_flush_stats(void); -void mem_cgroup_flush_stats_ratelimited(void); +void mem_cgroup_flush_stats(struct mem_cgroup *memcg); +void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg); void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); @@ -1080,15 +1097,6 @@ static inline void count_memcg_events(struct mem_cgroup *memcg, local_irq_restore(flags); } -static inline void count_memcg_page_event(struct page *page, - enum vm_event_item idx) -{ - struct mem_cgroup *memcg = page_memcg(page); - - if (memcg) - count_memcg_events(memcg, idx, 1); -} - static inline void count_memcg_folio_events(struct folio *folio, enum vm_event_item idx, unsigned long nr) { @@ -1153,7 +1161,7 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, rcu_read_unlock(); } -void split_page_memcg(struct page *head, unsigned int nr); +void split_page_memcg(struct page *head, int old_order, int new_order); unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, @@ -1189,6 +1197,11 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) return NULL; } +static inline struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg) +{ + return NULL; +} + static inline bool folio_memcg_kmem(struct folio *folio) { return false; @@ -1249,12 +1262,23 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target, return false; } +static inline void mem_cgroup_commit_charge(struct folio *folio, + struct mem_cgroup *memcg) +{ +} + static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp) { return 0; } +static inline int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, + gfp_t gfp, long nr_pages) +{ + return 0; +} + static inline int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry) { @@ -1269,7 +1293,17 @@ static inline void mem_cgroup_uncharge(struct folio *folio) { } -static inline void mem_cgroup_uncharge_list(struct list_head *page_list) +static inline void mem_cgroup_uncharge_folios(struct folio_batch *folios) +{ +} + +static inline void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, + unsigned int nr_pages) +{ +} + +static inline void mem_cgroup_replace_folio(struct folio *old, + struct folio *new) { } @@ -1310,6 +1344,11 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) return NULL; } +static inline struct mem_cgroup *get_mem_cgroup_from_current(void) +{ + return NULL; +} + static inline struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css) { @@ -1325,6 +1364,11 @@ static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg) return true; } +static inline bool mem_cgroup_tryget_online(struct mem_cgroup *memcg) +{ + return true; +} + static inline void mem_cgroup_put(struct mem_cgroup *memcg) { } @@ -1524,11 +1568,11 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, return node_page_state(lruvec_pgdat(lruvec), idx); } -static inline void mem_cgroup_flush_stats(void) +static inline void mem_cgroup_flush_stats(struct mem_cgroup *memcg) { } -static inline void mem_cgroup_flush_stats_ratelimited(void) +static inline void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg) { } @@ -1565,11 +1609,6 @@ static inline void __count_memcg_events(struct mem_cgroup *memcg, { } -static inline void count_memcg_page_event(struct page *page, - int idx) -{ -} - static inline void count_memcg_folio_events(struct folio *folio, enum vm_event_item idx, unsigned long nr) { @@ -1580,7 +1619,7 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) { } -static inline void split_page_memcg(struct page *head, unsigned int nr) +static inline void split_page_memcg(struct page *head, int old_order, int new_order) { } @@ -1654,18 +1693,18 @@ static inline struct lruvec *folio_lruvec_relock_irq(struct folio *folio, return folio_lruvec_lock_irq(folio); } -/* Don't lock again iff page's lruvec locked */ -static inline struct lruvec *folio_lruvec_relock_irqsave(struct folio *folio, - struct lruvec *locked_lruvec, unsigned long *flags) +/* Don't lock again iff folio's lruvec locked */ +static inline void folio_lruvec_relock_irqsave(struct folio *folio, + struct lruvec **lruvecp, unsigned long *flags) { - if (locked_lruvec) { - if (folio_matches_lruvec(folio, locked_lruvec)) - return locked_lruvec; + if (*lruvecp) { + if (folio_matches_lruvec(folio, *lruvecp)) + return; - unlock_page_lruvec_irqrestore(locked_lruvec, *flags); + unlock_page_lruvec_irqrestore(*lruvecp, *flags); } - return folio_lruvec_lock_irqsave(folio, flags); + *lruvecp = folio_lruvec_lock_irqsave(folio, flags); } #ifdef CONFIG_CGROUP_WRITEBACK @@ -1763,9 +1802,27 @@ bool mem_cgroup_kmem_disabled(void); int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order); void __memcg_kmem_uncharge_page(struct page *page, int order); -struct obj_cgroup *get_obj_cgroup_from_current(void); +/* + * The returned objcg pointer is safe to use without additional + * protection within a scope. The scope is defined either by + * the current task (similar to the "current" global variable) + * or by set_active_memcg() pair. + * Please, use obj_cgroup_get() to get a reference if the pointer + * needs to be used outside of the local scope. + */ +struct obj_cgroup *current_obj_cgroup(void); struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio); +static inline struct obj_cgroup *get_obj_cgroup_from_current(void) +{ + struct obj_cgroup *objcg = current_obj_cgroup(); + + if (objcg) + obj_cgroup_get(objcg); + + return objcg; +} + int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size); void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size); @@ -1889,6 +1946,7 @@ static inline void count_objcg_event(struct obj_cgroup *objcg, bool obj_cgroup_may_zswap(struct obj_cgroup *objcg); void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size); void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size); +bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg); #else static inline bool obj_cgroup_may_zswap(struct obj_cgroup *objcg) { @@ -1902,6 +1960,11 @@ static inline void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size) { } +static inline bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg) +{ + /* if zswap is disabled, do not block pages going to the swapping device */ + return true; +} #endif #endif /* _LINUX_MEMCONTROL_H */ diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index 437441cdf78f..69e781900082 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -6,6 +6,7 @@ #include <linux/nodemask.h> #include <linux/kref.h> #include <linux/mmzone.h> +#include <linux/notifier.h> /* * Each tier cover a abstrace distance chunk size of 128 */ @@ -22,7 +23,9 @@ struct memory_tier; struct memory_dev_type { /* list of memory types that are part of same tier as this type */ - struct list_head tier_sibiling; + struct list_head tier_sibling; + /* list of memory types that are managed by one driver */ + struct list_head list; /* abstract distance for this specific memory type */ int adistance; /* Nodes of same abstract distance */ @@ -30,12 +33,21 @@ struct memory_dev_type { struct kref kref; }; +struct access_coordinate; + #ifdef CONFIG_NUMA extern bool numa_demotion_enabled; +extern struct memory_dev_type *default_dram_type; struct memory_dev_type *alloc_memory_type(int adistance); void put_memory_type(struct memory_dev_type *memtype); void init_node_memory_type(int node, struct memory_dev_type *default_type); void clear_node_memory_type(int node, struct memory_dev_type *memtype); +int register_mt_adistance_algorithm(struct notifier_block *nb); +int unregister_mt_adistance_algorithm(struct notifier_block *nb); +int mt_calc_adistance(int node, int *adist); +int mt_set_default_dram_perf(int nid, struct access_coordinate *perf, + const char *source); +int mt_perf_to_adistance(struct access_coordinate *perf, int *adist); #ifdef CONFIG_MIGRATION int next_demotion_node(int node); void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); @@ -60,6 +72,7 @@ static inline bool node_is_toptier(int node) #else #define numa_demotion_enabled false +#define default_dram_type NULL /* * CONFIG_NUMA implementation returns non NULL error. */ @@ -97,5 +110,31 @@ static inline bool node_is_toptier(int node) { return true; } + +static inline int register_mt_adistance_algorithm(struct notifier_block *nb) +{ + return 0; +} + +static inline int unregister_mt_adistance_algorithm(struct notifier_block *nb) +{ + return 0; +} + +static inline int mt_calc_adistance(int node, int *adist) +{ + return NOTIFY_DONE; +} + +static inline int mt_set_default_dram_perf(int nid, struct access_coordinate *perf, + const char *source) +{ + return -EIO; +} + +static inline int mt_perf_to_adistance(struct access_coordinate *perf, int *adist) +{ + return -EIO; +} #endif /* CONFIG_NUMA */ #endif /* _LINUX_MEMORY_TIERS_H */ diff --git a/include/linux/memory.h b/include/linux/memory.h index f53cfdaaaa41..c0afee5d126e 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -96,8 +96,17 @@ int set_memory_block_size_order(unsigned int order); #define MEM_GOING_ONLINE (1<<3) #define MEM_CANCEL_ONLINE (1<<4) #define MEM_CANCEL_OFFLINE (1<<5) +#define MEM_PREPARE_ONLINE (1<<6) +#define MEM_FINISH_OFFLINE (1<<7) struct memory_notify { + /* + * The altmap_start_pfn and altmap_nr_pages fields are designated for + * specifying the altmap range and are exclusively intended for use in + * MEM_PREPARE_ONLINE/MEM_FINISH_OFFLINE notifiers. + */ + unsigned long altmap_start_pfn; + unsigned long altmap_nr_pages; unsigned long start_pfn; unsigned long nr_pages; int status_change_nid_normal; @@ -114,6 +123,7 @@ struct mem_section; #define DEFAULT_CALLBACK_PRI 0 #define SLAB_CALLBACK_PRI 1 #define HMAT_CALLBACK_PRI 2 +#define CXL_CALLBACK_PRI 5 #define MM_COMPUTE_BATCH_PRI 10 #define CPUSET_CALLBACK_PRI 10 #define MEMTIER_HOTPLUG_PRI 100 diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 7d2076583494..7a9ff464608d 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -106,6 +106,22 @@ typedef int __bitwise mhp_t; * implies the node id (nid). */ #define MHP_NID_IS_MGID ((__force mhp_t)BIT(2)) +/* + * The hotplugged memory is completely inaccessible while the memory is + * offline. The memory provider will handle MEM_PREPARE_ONLINE / + * MEM_FINISH_OFFLINE notifications and make the memory accessible. + * + * This flag is only relevant when used along with MHP_MEMMAP_ON_MEMORY, + * because the altmap cannot be written (e.g., poisoned) when adding + * memory -- before it is set online. + * + * This allows for adding memory with an altmap that is not currently + * made available by a hypervisor. When onlining that memory, the + * hypervisor can be instructed to make that memory available, and + * the onlining phase will not require any memory allocations, which is + * helpful in low-memory situations. + */ +#define MHP_OFFLINE_INACCESSIBLE ((__force mhp_t)BIT(3)) /* * Extended parameters for memory hotplug: @@ -121,6 +137,7 @@ struct mhp_params { bool mhp_range_allowed(u64 start, u64 size, bool need_mapping); struct range mhp_get_pluggable_range(bool need_mapping); +bool mhp_supports_memmap_on_memory(void); /* * Zone resizing functions @@ -154,7 +171,7 @@ extern void adjust_present_page_count(struct page *page, long nr_pages); /* VM interface that may be used by firmware interface */ extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, - struct zone *zone); + struct zone *zone, bool mhp_off_inaccessible); extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages); extern int online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *zone, struct memory_group *group); @@ -262,6 +279,11 @@ static inline bool movable_node_is_enabled(void) return false; } +static inline bool mhp_supports_memmap_on_memory(void) +{ + return false; +} + static inline void pgdat_kswapd_lock(pg_data_t *pgdat) {} static inline void pgdat_kswapd_unlock(pg_data_t *pgdat) {} static inline void pgdat_kswapd_lock_init(pg_data_t *pgdat) {} diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index d232de7cdc56..931b118336f4 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -17,6 +17,8 @@ struct mm_struct; +#define NO_INTERLEAVE_INDEX (-1UL) /* use task il_prev for interleaving */ + #ifdef CONFIG_NUMA /* @@ -89,8 +91,6 @@ static inline struct mempolicy *mpol_dup(struct mempolicy *pol) return pol; } -#define vma_policy(vma) ((vma)->vm_policy) - static inline void mpol_get(struct mempolicy *pol) { if (pol) @@ -107,35 +107,30 @@ static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b) /* * Tree of shared policies for a shared memory region. - * Maintain the policies in a pseudo mm that contains vmas. The vmas - * carry the policy. As a special twist the pseudo mm is indexed in pages, not - * bytes, so that we can work with shared memory segments bigger than - * unsigned long. */ - -struct sp_node { - struct rb_node nd; - unsigned long start, end; - struct mempolicy *policy; -}; - struct shared_policy { struct rb_root root; rwlock_t lock; }; +struct sp_node { + struct rb_node nd; + pgoff_t start, end; + struct mempolicy *policy; +}; int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst); void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); -int mpol_set_shared_policy(struct shared_policy *info, - struct vm_area_struct *vma, - struct mempolicy *new); -void mpol_free_shared_policy(struct shared_policy *p); +int mpol_set_shared_policy(struct shared_policy *sp, + struct vm_area_struct *vma, struct mempolicy *mpol); +void mpol_free_shared_policy(struct shared_policy *sp); struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, - unsigned long idx); + pgoff_t idx); struct mempolicy *get_task_policy(struct task_struct *p); struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, - unsigned long addr); + unsigned long addr, pgoff_t *ilx); +struct mempolicy *get_vma_policy(struct vm_area_struct *vma, + unsigned long addr, int order, pgoff_t *ilx); bool vma_policy_mof(struct vm_area_struct *vma); extern void numa_default_policy(void); @@ -149,8 +144,6 @@ extern int huge_node(struct vm_area_struct *vma, extern bool init_nodemask_of_mempolicy(nodemask_t *mask); extern bool mempolicy_in_oom_domain(struct task_struct *tsk, const nodemask_t *mask); -extern nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy); - extern unsigned int mempolicy_slab_node(void); extern enum zone_type policy_zone; @@ -174,7 +167,7 @@ extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol); /* Check if a vma is migratable */ extern bool vma_migratable(struct vm_area_struct *vma); -extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long); +int mpol_misplaced(struct folio *, struct vm_area_struct *, unsigned long); extern void mpol_put_task_policy(struct task_struct *); static inline bool mpol_is_preferred_many(struct mempolicy *pol) @@ -188,12 +181,17 @@ extern bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone); struct mempolicy {}; +static inline struct mempolicy *get_task_policy(struct task_struct *p) +{ + return NULL; +} + static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b) { return true; } -static inline void mpol_put(struct mempolicy *p) +static inline void mpol_put(struct mempolicy *pol) { } @@ -212,17 +210,22 @@ static inline void mpol_shared_policy_init(struct shared_policy *sp, { } -static inline void mpol_free_shared_policy(struct shared_policy *p) +static inline void mpol_free_shared_policy(struct shared_policy *sp) { } static inline struct mempolicy * -mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) +mpol_shared_policy_lookup(struct shared_policy *sp, pgoff_t idx) { return NULL; } -#define vma_policy(vma) NULL +static inline struct mempolicy *get_vma_policy(struct vm_area_struct *vma, + unsigned long addr, int order, pgoff_t *ilx) +{ + *ilx = 0; + return NULL; +} static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) @@ -278,7 +281,8 @@ static inline int mpol_parse_str(char *str, struct mempolicy **mpol) } #endif -static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma, +static inline int mpol_misplaced(struct folio *folio, + struct vm_area_struct *vma, unsigned long address) { return -1; /* no node preference */ diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 4aae6c06c5f2..16c5cc807ff6 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -51,6 +51,7 @@ extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, extern int mempool_resize(mempool_t *pool, int new_min_nr); extern void mempool_destroy(mempool_t *pool); extern void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) __malloc; +extern void *mempool_alloc_preallocated(mempool_t *pool) __malloc; extern void mempool_free(void *element, mempool_t *pool); /* @@ -94,6 +95,19 @@ static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size) (void *) size); } +void *mempool_kvmalloc(gfp_t gfp_mask, void *pool_data); +void mempool_kvfree(void *element, void *pool_data); + +static inline int mempool_init_kvmalloc_pool(mempool_t *pool, int min_nr, size_t size) +{ + return mempool_init(pool, min_nr, mempool_kvmalloc, mempool_kvfree, (void *) size); +} + +static inline mempool_t *mempool_create_kvmalloc_pool(int min_nr, size_t size) +{ + return mempool_create(min_nr, mempool_kvmalloc, mempool_kvfree, (void *) size); +} + /* * A mempool_alloc_t and mempool_free_t for a simple page allocator that * allocates pages of the order specified by pool_data diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 1314d9c5f05b..3f7143ade32c 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -25,6 +25,7 @@ struct vmem_altmap { unsigned long free; unsigned long align; unsigned long alloc; + bool inaccessible; }; /* @@ -108,7 +109,7 @@ struct dev_pagemap_ops { * @altmap: pre-allocated/reserved memory for vmemmap allocations * @ref: reference count that pins the devm_memremap_pages() mapping * @done: completion for @ref - * @type: memory type: see MEMORY_* in memory_hotplug.h + * @type: memory type: see MEMORY_* above in memremap.h * @flags: PGMAP_* flags to specify defailed behavior * @vmemmap_shift: structural definition of how the vmemmap page metadata * is populated, specifically the metadata page order. @@ -196,8 +197,6 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn, struct dev_pagemap *pgmap); bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn); -unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); -void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); unsigned long memremap_compat_align(void); #else static inline void *devm_memremap_pages(struct device *dev, @@ -228,16 +227,6 @@ static inline bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn) return false; } -static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) -{ - return 0; -} - -static inline void vmem_altmap_free(struct vmem_altmap *altmap, - unsigned long nr_pfns) -{ -} - /* when memremap_pages() is disabled all archs can remap a single page */ static inline unsigned long memremap_compat_align(void) { diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h index 09fb3c56e7d7..76d326ea8eba 100644 --- a/include/linux/mfd/abx500/ab8500.h +++ b/include/linux/mfd/abx500/ab8500.h @@ -499,13 +499,7 @@ static inline int is_ab9540_2p0_or_earlier(struct ab8500 *ab) void ab8500_override_turn_on_stat(u8 mask, u8 set); -#ifdef CONFIG_AB8500_DEBUG -extern int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size); -void ab8500_dump_all_banks(struct device *dev); -void ab8500_debug_register_interrupt(int line); -#else static inline void ab8500_dump_all_banks(struct device *dev) {} static inline void ab8500_debug_register_interrupt(int line) {} -#endif #endif /* MFD_AB8500_H */ diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index 47e7a3a61ce6..e8bcad641d8c 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -92,7 +92,7 @@ struct mfd_cell { * (above) when matching OF nodes with devices that have identical * compatible strings */ - const u64 of_reg; + u64 of_reg; /* Set to 'true' to use 'of_reg' (above) - allows for of_reg=0 */ bool use_of_reg; diff --git a/include/linux/mfd/cs42l43.h b/include/linux/mfd/cs42l43.h index cf8263aab41b..2239d8585e78 100644 --- a/include/linux/mfd/cs42l43.h +++ b/include/linux/mfd/cs42l43.h @@ -6,20 +6,21 @@ * Cirrus Logic International Semiconductor Ltd. */ +#ifndef CS42L43_CORE_EXT_H +#define CS42L43_CORE_EXT_H + #include <linux/completion.h> -#include <linux/device.h> -#include <linux/gpio/consumer.h> #include <linux/mutex.h> #include <linux/regmap.h> #include <linux/regulator/consumer.h> -#include <linux/soundwire/sdw.h> #include <linux/workqueue.h> -#ifndef CS42L43_CORE_EXT_H -#define CS42L43_CORE_EXT_H - #define CS42L43_N_SUPPLIES 3 +struct device; +struct gpio_desc; +struct sdw_slave; + enum cs42l43_irq_numbers { CS42L43_PLL_LOST_LOCK, CS42L43_PLL_READY, diff --git a/include/linux/mfd/da9055/pdata.h b/include/linux/mfd/da9055/pdata.h index d3f126990ad0..137a2b067512 100644 --- a/include/linux/mfd/da9055/pdata.h +++ b/include/linux/mfd/da9055/pdata.h @@ -7,7 +7,6 @@ #define DA9055_MAX_REGULATORS 8 struct da9055; -struct gpio_desc; enum gpio_select { NO_GPIO = 0, @@ -24,16 +23,6 @@ struct da9055_pdata { /* Enable RTC in RESET Mode */ bool reset_enable; /* - * GPI muxed pin to control - * regulator state A/B, 0 if not available. - */ - int *gpio_ren; - /* - * GPI muxed pin to control - * regulator set, 0 if not available. - */ - int *gpio_rsel; - /* * Regulator mode control bits value (GPI offset) that * controls the regulator state, 0 if not available. */ @@ -43,7 +32,5 @@ struct da9055_pdata { * controls the regulator set A/B, 0 if not available. */ enum gpio_select *reg_rsel; - /* GPIO descriptors to enable regulator, NULL if not available */ - struct gpio_desc **ena_gpiods; }; #endif /* __DA9055_PDATA_H */ diff --git a/include/linux/mfd/idtRC38xxx_reg.h b/include/linux/mfd/idtRC38xxx_reg.h new file mode 100644 index 000000000000..ec11872f51ad --- /dev/null +++ b/include/linux/mfd/idtRC38xxx_reg.h @@ -0,0 +1,273 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Register Map - Based on PolarBear_CSRs.RevA.xlsx (2023-04-21) + * + * Copyright (C) 2023 Integrated Device Technology, Inc., a Renesas Company. + */ +#ifndef MFD_IDTRC38XXX_REG +#define MFD_IDTRC38XXX_REG + +/* GLOBAL */ +#define SOFT_RESET_CTRL (0x15) /* Specific to FC3W */ +#define MISC_CTRL (0x14) /* Specific to FC3A */ +#define APLL_REINIT BIT(1) +#define APLL_REINIT_VFC3A BIT(2) + +#define DEVICE_ID (0x2) +#define DEVICE_ID_MASK (0x1000) /* Bit 12 is 1 if FC3W and 0 if FC3A */ +#define DEVICE_ID_SHIFT (12) + +/* FOD */ +#define FOD_0 (0x300) +#define FOD_0_VFC3A (0x400) +#define FOD_1 (0x340) +#define FOD_1_VFC3A (0x440) +#define FOD_2 (0x380) +#define FOD_2_VFC3A (0x480) + +/* TDCAPLL */ +#define TDC_CTRL (0x44a) /* Specific to FC3W */ +#define TDC_ENABLE_CTRL (0x169) /* Specific to FC3A */ +#define TDC_DAC_CAL_CTRL (0x16a) /* Specific to FC3A */ +#define TDC_EN BIT(0) +#define TDC_DAC_RECAL_REQ BIT(1) +#define TDC_DAC_RECAL_REQ_VFC3A BIT(0) + +#define TDC_FB_DIV_INT_CNFG (0x442) +#define TDC_FB_DIV_INT_CNFG_VFC3A (0x162) +#define TDC_FB_DIV_INT_MASK GENMASK(7, 0) +#define TDC_REF_DIV_CNFG (0x443) +#define TDC_REF_DIV_CNFG_VFC3A (0x163) +#define TDC_REF_DIV_CONFIG_MASK GENMASK(2, 0) + +/* TIME SYNC CHANNEL */ +#define TIME_CLOCK_SRC (0xa01) /* Specific to FC3W */ +#define TIME_CLOCK_COUNT (0xa00) /* Specific to FC3W */ +#define TIME_CLOCK_COUNT_MASK GENMASK(5, 0) + +#define SUB_SYNC_GEN_CNFG (0xa04) + +#define TOD_COUNTER_READ_REQ (0xa5f) +#define TOD_COUNTER_READ_REQ_VFC3A (0x6df) +#define TOD_SYNC_LOAD_VAL_CTRL (0xa10) +#define TOD_SYNC_LOAD_VAL_CTRL_VFC3A (0x690) +#define SYNC_COUNTER_MASK GENMASK_ULL(51, 0) +#define SUB_SYNC_COUNTER_MASK GENMASK(30, 0) +#define TOD_SYNC_LOAD_REQ_CTRL (0xa21) +#define TOD_SYNC_LOAD_REQ_CTRL_VFC3A (0x6a1) +#define SYNC_LOAD_ENABLE BIT(1) +#define SUB_SYNC_LOAD_ENABLE BIT(0) +#define SYNC_LOAD_REQ BIT(0) + +#define LPF_MODE_CNFG (0xa80) +#define LPF_MODE_CNFG_VFC3A (0x700) +enum lpf_mode { + LPF_DISABLED = 0, + LPF_WP = 1, + LPF_HOLDOVER = 2, + LPF_WF = 3, + LPF_INVALID = 4 +}; +#define LPF_CTRL (0xa98) +#define LPF_CTRL_VFC3A (0x718) +#define LPF_EN BIT(0) + +#define LPF_BW_CNFG (0xa81) +#define LPF_BW_SHIFT GENMASK(7, 3) +#define LPF_BW_MULT GENMASK(2, 0) +#define LPF_BW_SHIFT_DEFAULT (0xb) +#define LPF_BW_MULT_DEFAULT (0x0) +#define LPF_BW_SHIFT_1PPS (0x5) + +#define LPF_WR_PHASE_CTRL (0xaa8) +#define LPF_WR_PHASE_CTRL_VFC3A (0x728) +#define LPF_WR_FREQ_CTRL (0xab0) +#define LPF_WR_FREQ_CTRL_VFC3A (0x730) + +#define TIME_CLOCK_TDC_FANOUT_CNFG (0xB00) +#define TIME_SYNC_TO_TDC_EN BIT(0) +#define SIG1_MUX_SEL_MASK GENMASK(7, 4) +#define SIG2_MUX_SEL_MASK GENMASK(11, 8) +enum tdc_mux_sel { + REF0 = 0, + REF1 = 1, + REF2 = 2, + REF3 = 3, + REF_CLK5 = 4, + REF_CLK6 = 5, + DPLL_FB_TO_TDC = 6, + DPLL_FB_DIVIDED_TO_TDC = 7, + TIME_CLK_DIVIDED = 8, + TIME_SYNC = 9, +}; + +#define TIME_CLOCK_MEAS_CNFG (0xB04) +#define TDC_MEAS_MODE BIT(0) +enum tdc_meas_mode { + CONTINUOUS = 0, + ONE_SHOT = 1, + MEAS_MODE_INVALID = 2, +}; + +#define TIME_CLOCK_MEAS_DIV_CNFG (0xB08) +#define TIME_REF_DIV_MASK GENMASK(29, 24) + +#define TIME_CLOCK_MEAS_CTRL (0xB10) +#define TDC_MEAS_EN BIT(0) +#define TDC_MEAS_START BIT(1) + +#define TDC_FIFO_READ_REQ (0xB2F) +#define TDC_FIFO_READ (0xB30) +#define COARSE_MEAS_MASK GENMASK_ULL(39, 13) +#define FINE_MEAS_MASK GENMASK(12, 0) + +#define TDC_FIFO_CTRL (0xB12) +#define FIFO_CLEAR BIT(0) +#define TDC_FIFO_STS (0xB38) +#define FIFO_FULL BIT(1) +#define FIFO_EMPTY BIT(0) +#define TDC_FIFO_EVENT (0xB39) +#define FIFO_OVERRUN BIT(1) + +/* DPLL */ +#define MAX_REFERENCE_INDEX (3) +#define MAX_NUM_REF_PRIORITY (4) + +#define MAX_DPLL_INDEX (2) + +#define DPLL_STS (0x580) +#define DPLL_STS_VFC3A (0x571) +#define DPLL_STATE_STS_MASK (0x70) +#define DPLL_STATE_STS_SHIFT (4) +#define DPLL_REF_SEL_STS_MASK (0x6) +#define DPLL_REF_SEL_STS_SHIFT (1) + +#define DPLL_REF_PRIORITY_CNFG (0x502) +#define DPLL_REFX_PRIORITY_DISABLE_MASK (0xf) +#define DPLL_REF0_PRIORITY_ENABLE_AND_SET_MASK (0x31) +#define DPLL_REF1_PRIORITY_ENABLE_AND_SET_MASK (0xc2) +#define DPLL_REF2_PRIORITY_ENABLE_AND_SET_MASK (0x304) +#define DPLL_REF3_PRIORITY_ENABLE_AND_SET_MASK (0xc08) +#define DPLL_REF0_PRIORITY_SHIFT (4) +#define DPLL_REF1_PRIORITY_SHIFT (6) +#define DPLL_REF2_PRIORITY_SHIFT (8) +#define DPLL_REF3_PRIORITY_SHIFT (10) + +enum dpll_state { + DPLL_STATE_MIN = 0, + DPLL_STATE_FREERUN = DPLL_STATE_MIN, + DPLL_STATE_LOCKED = 1, + DPLL_STATE_HOLDOVER = 2, + DPLL_STATE_WRITE_FREQUENCY = 3, + DPLL_STATE_ACQUIRE = 4, + DPLL_STATE_HITLESS_SWITCH = 5, + DPLL_STATE_MAX = DPLL_STATE_HITLESS_SWITCH +}; + +/* REFMON */ +#define LOSMON_STS_0 (0x81e) +#define LOSMON_STS_0_VFC3A (0x18e) +#define LOSMON_STS_1 (0x82e) +#define LOSMON_STS_1_VFC3A (0x19e) +#define LOSMON_STS_2 (0x83e) +#define LOSMON_STS_2_VFC3A (0x1ae) +#define LOSMON_STS_3 (0x84e) +#define LOSMON_STS_3_VFC3A (0x1be) +#define LOS_STS_MASK (0x1) + +#define FREQMON_STS_0 (0x874) +#define FREQMON_STS_0_VFC3A (0x1d4) +#define FREQMON_STS_1 (0x894) +#define FREQMON_STS_1_VFC3A (0x1f4) +#define FREQMON_STS_2 (0x8b4) +#define FREQMON_STS_2_VFC3A (0x214) +#define FREQMON_STS_3 (0x8d4) +#define FREQMON_STS_3_VFC3A (0x234) +#define FREQ_FAIL_STS_SHIFT (31) + +/* Firmware interface */ +#define TIME_CLK_FREQ_ADDR (0xffa0) +#define XTAL_FREQ_ADDR (0xffa1) + +/* + * Return register address and field mask based on passed in firmware version + */ +#define IDTFC3_FW_REG(FW, VER, REG) (((FW) < (VER)) ? (REG) : (REG##_##VER)) +#define IDTFC3_FW_FIELD(FW, VER, FIELD) (((FW) < (VER)) ? (FIELD) : (FIELD##_##VER)) +enum fw_version { + V_DEFAULT = 0, + VFC3W = 1, + VFC3A = 2 +}; + +/* XTAL_FREQ_ADDR/TIME_CLK_FREQ_ADDR */ +enum { + FREQ_MIN = 0, + FREQ_25M = 1, + FREQ_49_152M = 2, + FREQ_50M = 3, + FREQ_100M = 4, + FREQ_125M = 5, + FREQ_250M = 6, + FREQ_MAX +}; + +struct idtfc3_hw_param { + u32 xtal_freq; + u32 time_clk_freq; +}; + +struct idtfc3_fwrc { + u8 hiaddr; + u8 loaddr; + u8 value; + u8 reserved; +} __packed; + +static inline void idtfc3_default_hw_param(struct idtfc3_hw_param *hw_param) +{ + hw_param->xtal_freq = 49152000; + hw_param->time_clk_freq = 25000000; +} + +static inline int idtfc3_set_hw_param(struct idtfc3_hw_param *hw_param, + u16 addr, u8 val) +{ + if (addr == XTAL_FREQ_ADDR) + switch (val) { + case FREQ_49_152M: + hw_param->xtal_freq = 49152000; + break; + case FREQ_50M: + hw_param->xtal_freq = 50000000; + break; + default: + return -EINVAL; + } + else if (addr == TIME_CLK_FREQ_ADDR) + switch (val) { + case FREQ_25M: + hw_param->time_clk_freq = 25000000; + break; + case FREQ_50M: + hw_param->time_clk_freq = 50000000; + break; + case FREQ_100M: + hw_param->time_clk_freq = 100000000; + break; + case FREQ_125M: + hw_param->time_clk_freq = 125000000; + break; + case FREQ_250M: + hw_param->time_clk_freq = 250000000; + break; + default: + return -EINVAL; + } + else + return -EFAULT; + + return 0; +} + +#endif diff --git a/include/linux/mfd/lp8788.h b/include/linux/mfd/lp8788.h index 3d5c480d58ea..51b47966a04d 100644 --- a/include/linux/mfd/lp8788.h +++ b/include/linux/mfd/lp8788.h @@ -10,7 +10,6 @@ #ifndef __MFD_LP8788_H__ #define __MFD_LP8788_H__ -#include <linux/gpio.h> #include <linux/irqdomain.h> #include <linux/pwm.h> #include <linux/regmap.h> @@ -159,21 +158,17 @@ struct lp8788; /* * lp8788_buck1_dvs - * @gpio : gpio pin number for dvs control * @vsel : dvs selector for buck v1 register */ struct lp8788_buck1_dvs { - int gpio; enum lp8788_dvs_sel vsel; }; /* * lp8788_buck2_dvs - * @gpio : two gpio pin numbers are used for dvs * @vsel : dvs selector for buck v2 register */ struct lp8788_buck2_dvs { - int gpio[LP8788_NUM_BUCK2_DVS]; enum lp8788_dvs_sel vsel; }; @@ -268,8 +263,8 @@ struct lp8788_vib_platform_data { * @buck_data : regulator initial data for buck * @dldo_data : regulator initial data for digital ldo * @aldo_data : regulator initial data for analog ldo - * @buck1_dvs : gpio configurations for buck1 dvs - * @buck2_dvs : gpio configurations for buck2 dvs + * @buck1_dvs : configurations for buck1 dvs + * @buck2_dvs : configurations for buck2 dvs * @chg_pdata : platform data for charger driver * @alarm_sel : rtc alarm selection (1 or 2) * @bl_pdata : configurable data for backlight driver diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h index ea4a4b1b246a..1fbda1f8967d 100644 --- a/include/linux/mfd/lpc_ich.h +++ b/include/linux/mfd/lpc_ich.h @@ -15,7 +15,7 @@ #define ICH_RES_GPE0 1 /* GPIO compatibility */ -enum { +enum lpc_gpio_versions { ICH_I3100_GPIO, ICH_V5_GPIO, ICH_V6_GPIO, @@ -26,11 +26,14 @@ enum { AVOTON_GPIO, }; +struct lpc_ich_gpio_info; + struct lpc_ich_info { char name[32]; unsigned int iTCO_version; - unsigned int gpio_version; + enum lpc_gpio_versions gpio_version; enum intel_spi_type spi_type; + const struct lpc_ich_gpio_info *gpio_info; u8 use_gpio; }; diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index 311f7d3d2323..54444ff2a5de 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -405,7 +405,7 @@ enum max77693_haptic_reg { MAX77693_HAPTIC_REG_END, }; -/* max77693-pmic LSCNFG configuraton register */ +/* max77693-pmic LSCNFG configuration register */ #define MAX77693_PMIC_LOW_SYS_MASK 0x80 #define MAX77693_PMIC_LOW_SYS_SHIFT 7 diff --git a/include/linux/mfd/max77843-private.h b/include/linux/mfd/max77843-private.h index 0bc7454c4dbe..2fb4db67f110 100644 --- a/include/linux/mfd/max77843-private.h +++ b/include/linux/mfd/max77843-private.h @@ -198,7 +198,7 @@ enum max77843_irq_muic { #define MAX77843_MCONFIG_MEN_MASK BIT(MCONFIG_MEN_SHIFT) #define MAX77843_MCONFIG_PDIV_MASK (0x3 << MCONFIG_PDIV_SHIFT) -/* Max77843 charger insterrupts */ +/* Max77843 charger interrupts */ #define MAX77843_CHG_BYP_I BIT(0) #define MAX77843_CHG_BATP_I BIT(2) #define MAX77843_CHG_BAT_I BIT(3) diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h index 6193905abbb5..5c2cc1103437 100644 --- a/include/linux/mfd/max8997.h +++ b/include/linux/mfd/max8997.h @@ -178,7 +178,6 @@ struct max8997_platform_data { * */ bool ignore_gpiodvs_side_effect; - int buck125_gpios[3]; /* GPIO of [0]SET1, [1]SET2, [2]SET3 */ int buck125_default_idx; /* Default value of SET1, 2, 3 */ unsigned int buck1_voltage[8]; /* buckx_voltage in uV */ bool buck1_gpiodvs; diff --git a/include/linux/mfd/max8998.h b/include/linux/mfd/max8998.h index 79c020bd0c70..a054e55c8646 100644 --- a/include/linux/mfd/max8998.h +++ b/include/linux/mfd/max8998.h @@ -65,10 +65,7 @@ struct max8998_regulator_data { * be other than the preset values. * @buck1_voltage: BUCK1 DVS mode 1 voltage registers * @buck2_voltage: BUCK2 DVS mode 2 voltage registers - * @buck1_set1: BUCK1 gpio pin 1 to set output voltage - * @buck1_set2: BUCK1 gpio pin 2 to set output voltage * @buck1_default_idx: Default for BUCK1 gpio pin 1, 2 - * @buck2_set3: BUCK2 gpio pin to set output voltage * @buck2_default_idx: Default for BUCK2 gpio pin. * @wakeup: Allow to wake up from suspend * @rtc_delay: LP3974 RTC chip bug that requires delay after a register @@ -91,10 +88,7 @@ struct max8998_platform_data { bool buck_voltage_lock; int buck1_voltage[4]; int buck2_voltage[2]; - int buck1_set1; - int buck1_set2; int buck1_default_idx; - int buck2_set3; int buck2_default_idx; bool wakeup; bool rtc_delay; diff --git a/include/linux/mfd/mt6358/registers.h b/include/linux/mfd/mt6358/registers.h index 3d33517f178c..d83e87298ac4 100644 --- a/include/linux/mfd/mt6358/registers.h +++ b/include/linux/mfd/mt6358/registers.h @@ -262,6 +262,12 @@ #define MT6358_LDO_VBIF28_CON3 0x1db0 #define MT6358_VCAMA1_ANA_CON0 0x1e08 #define MT6358_VCAMA2_ANA_CON0 0x1e0c +#define MT6358_VFE28_ANA_CON0 0x1e10 +#define MT6358_VCN28_ANA_CON0 0x1e14 +#define MT6358_VBIF28_ANA_CON0 0x1e18 +#define MT6358_VAUD28_ANA_CON0 0x1e1c +#define MT6358_VAUX18_ANA_CON0 0x1e20 +#define MT6358_VXO22_ANA_CON0 0x1e24 #define MT6358_VCN33_ANA_CON0 0x1e28 #define MT6358_VSIM1_ANA_CON0 0x1e2c #define MT6358_VSIM2_ANA_CON0 0x1e30 @@ -288,4 +294,21 @@ #define MT6358_AUD_TOP_INT_CON0 0x2228 #define MT6358_AUD_TOP_INT_STATUS0 0x2234 +/* + * MT6366 has no VCAM*, but has other regulators in its place. The names + * keep the MT6358 prefix for ease of use in the regulator driver. + */ +#define MT6358_LDO_VSRAM_CON5 0x1bf8 +#define MT6358_LDO_VM18_CON0 MT6358_LDO_VCAMA1_CON0 +#define MT6358_LDO_VM18_CON1 MT6358_LDO_VCAMA1_CON1 +#define MT6358_LDO_VM18_CON2 MT6358_LDO_VCAMA1_CON2 +#define MT6358_LDO_VMDDR_CON0 MT6358_LDO_VCAMA2_CON0 +#define MT6358_LDO_VMDDR_CON1 MT6358_LDO_VCAMA2_CON1 +#define MT6358_LDO_VMDDR_CON2 MT6358_LDO_VCAMA2_CON2 +#define MT6358_LDO_VSRAM_CORE_CON0 MT6358_LDO_VCAMD_CON0 +#define MT6358_LDO_VSRAM_CORE_DBG0 0x1cb6 +#define MT6358_LDO_VSRAM_CORE_DBG1 0x1cb8 +#define MT6358_VM18_ANA_CON0 MT6358_VCAMA1_ANA_CON0 +#define MT6358_VMDDR_ANA_CON0 MT6358_VCAMD_ANA_CON0 + #endif /* __MFD_MT6358_REGISTERS_H__ */ diff --git a/include/linux/mfd/si476x-platform.h b/include/linux/mfd/si476x-platform.h index 18363b773d07..cb99e16ca947 100644 --- a/include/linux/mfd/si476x-platform.h +++ b/include/linux/mfd/si476x-platform.h @@ -10,7 +10,7 @@ #ifndef __SI476X_PLATFORM_H__ #define __SI476X_PLATFORM_H__ -/* It is possible to select one of the four adresses using pins A0 +/* It is possible to select one of the four addresses using pins A0 * and A1 on SI476x */ #define SI476X_I2C_ADDR_1 0x60 #define SI476X_I2C_ADDR_2 0x61 diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h index 1b94325febb3..ca35af30745f 100644 --- a/include/linux/mfd/stm32-timers.h +++ b/include/linux/mfd/stm32-timers.h @@ -102,6 +102,15 @@ enum stm32_timers_dmas { STM32_TIMERS_MAX_DMAS, }; +/* STM32 Timer may have either a unique global interrupt or 4 interrupt lines */ +enum stm32_timers_irqs { + STM32_TIMERS_IRQ_GLOBAL_BRK, /* global or brk IRQ */ + STM32_TIMERS_IRQ_UP, + STM32_TIMERS_IRQ_TRG_COM, + STM32_TIMERS_IRQ_CC, + STM32_TIMERS_MAX_IRQS, +}; + /** * struct stm32_timers_dma - STM32 timer DMA handling. * @completion: end of DMA transfer completion @@ -123,6 +132,8 @@ struct stm32_timers { struct regmap *regmap; u32 max_arr; struct stm32_timers_dma dma; /* Only to be used by the parent */ + unsigned int nr_irqs; + int irq[STM32_TIMERS_MAX_IRQS]; }; #if IS_REACHABLE(CONFIG_MFD_STM32_TIMERS) diff --git a/include/linux/mfd/sun4i-gpadc.h b/include/linux/mfd/sun4i-gpadc.h index ea0ccf33a459..021f820f9d52 100644 --- a/include/linux/mfd/sun4i-gpadc.h +++ b/include/linux/mfd/sun4i-gpadc.h @@ -81,8 +81,8 @@ #define SUN4I_GPADC_TEMP_DATA 0x20 #define SUN4I_GPADC_DATA 0x24 -#define SUN4I_GPADC_IRQ_FIFO_DATA 0 -#define SUN4I_GPADC_IRQ_TEMP_DATA 1 +#define SUN4I_GPADC_IRQ_FIFO_DATA 1 +#define SUN4I_GPADC_IRQ_TEMP_DATA 2 /* 10s delay before suspending the IP */ #define SUN4I_GPADC_AUTOSUSPEND_DELAY 10000 diff --git a/include/linux/mfd/syscon.h b/include/linux/mfd/syscon.h index fecc2fa2a364..c315903f6dab 100644 --- a/include/linux/mfd/syscon.h +++ b/include/linux/mfd/syscon.h @@ -17,20 +17,17 @@ struct device_node; #ifdef CONFIG_MFD_SYSCON -extern struct regmap *device_node_to_regmap(struct device_node *np); -extern struct regmap *syscon_node_to_regmap(struct device_node *np); -extern struct regmap *syscon_regmap_lookup_by_compatible(const char *s); -extern struct regmap *syscon_regmap_lookup_by_phandle( - struct device_node *np, - const char *property); -extern struct regmap *syscon_regmap_lookup_by_phandle_args( - struct device_node *np, - const char *property, - int arg_count, - unsigned int *out_args); -extern struct regmap *syscon_regmap_lookup_by_phandle_optional( - struct device_node *np, - const char *property); +struct regmap *device_node_to_regmap(struct device_node *np); +struct regmap *syscon_node_to_regmap(struct device_node *np); +struct regmap *syscon_regmap_lookup_by_compatible(const char *s); +struct regmap *syscon_regmap_lookup_by_phandle(struct device_node *np, + const char *property); +struct regmap *syscon_regmap_lookup_by_phandle_args(struct device_node *np, + const char *property, + int arg_count, + unsigned int *out_args); +struct regmap *syscon_regmap_lookup_by_phandle_optional(struct device_node *np, + const char *property); #else static inline struct regmap *device_node_to_regmap(struct device_node *np) { diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h index 701925db75b3..f67ef0a4e041 100644 --- a/include/linux/mfd/tps65910.h +++ b/include/linux/mfd/tps65910.h @@ -749,7 +749,7 @@ #define VDDCTRL_ST_SHIFT 0 -/*Register VDDCTRL_OP (0x28) bit definitios */ +/*Register VDDCTRL_OP (0x28) bit definitions */ #define VDDCTRL_OP_CMD_MASK 0x80 #define VDDCTRL_OP_CMD_SHIFT 7 #define VDDCTRL_OP_SEL_MASK 0x7F diff --git a/include/linux/mfd/twl.h b/include/linux/mfd/twl.h index c062d91a67d9..85dc406173db 100644 --- a/include/linux/mfd/twl.h +++ b/include/linux/mfd/twl.h @@ -461,6 +461,7 @@ static inline int twl6030_mmc_card_detect(struct device *dev, int slot) #define TWL4030_PM_MASTER_GLOBAL_TST 0xb6 +#define TWL6030_PHOENIX_DEV_ON 0x06 /*----------------------------------------------------------------------*/ /* Power bus message definitions */ diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 039943ec4d4e..77b8c0a26674 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -266,6 +266,7 @@ struct mhi_event_config { * struct mhi_controller_config - Root MHI controller configuration * @max_channels: Maximum number of channels supported * @timeout_ms: Timeout value for operations. 0 means use default + * @ready_timeout_ms: Timeout value for waiting device to be ready (optional) * @buf_len: Size of automatically allocated buffers. 0 means use default * @num_channels: Number of channels defined in @ch_cfg * @ch_cfg: Array of defined channels @@ -277,6 +278,7 @@ struct mhi_event_config { struct mhi_controller_config { u32 max_channels; u32 timeout_ms; + u32 ready_timeout_ms; u32 buf_len; u32 num_channels; const struct mhi_channel_config *ch_cfg; @@ -318,18 +320,14 @@ struct mhi_controller_config { * @hw_ev_rings: Number of hardware event rings * @sw_ev_rings: Number of software event rings * @nr_irqs: Number of IRQ allocated by bus master (required) - * @family_number: MHI controller family number - * @device_number: MHI controller device number - * @major_version: MHI controller major revision number - * @minor_version: MHI controller minor revision number * @serial_number: MHI controller serial number obtained from BHI - * @oem_pk_hash: MHI controller OEM PK Hash obtained from BHI * @mhi_event: MHI event ring configurations table * @mhi_cmd: MHI command ring configurations table * @mhi_ctxt: MHI device context, shared memory between host and device * @pm_mutex: Mutex for suspend/resume operation * @pm_lock: Lock for protecting MHI power management state * @timeout_ms: Timeout in ms for state transitions + * @ready_timeout_ms: Timeout in ms for waiting device to be ready (optional) * @pm_state: MHI power management state * @db_access: DB access states * @ee: MHI device execution environment @@ -366,15 +364,6 @@ struct mhi_controller_config { * Fields marked as (required) need to be populated by the controller driver * before calling mhi_register_controller(). For the fields marked as (optional) * they can be populated depending on the usecase. - * - * The following fields are present for the purpose of implementing any device - * specific quirks or customizations for specific MHI revisions used in device - * by the controller drivers. The MHI stack will just populate these fields - * during mhi_register_controller(): - * family_number - * device_number - * major_version - * minor_version */ struct mhi_controller { struct device *cntrl_dev; @@ -405,12 +394,7 @@ struct mhi_controller { u32 hw_ev_rings; u32 sw_ev_rings; u32 nr_irqs; - u32 family_number; - u32 device_number; - u32 major_version; - u32 minor_version; u32 serial_number; - u32 oem_pk_hash[MHI_MAX_OEM_PK_HASH_SEGMENTS]; struct mhi_event *mhi_event; struct mhi_cmd *mhi_cmd; @@ -419,6 +403,7 @@ struct mhi_controller { struct mutex pm_mutex; rwlock_t pm_lock; u32 timeout_ms; + u32 ready_timeout_ms; u32 pm_state; u32 db_access; enum mhi_ee_type ee; diff --git a/include/linux/mhi_ep.h b/include/linux/mhi_ep.h index f198a8ac7ee7..11bf3212f782 100644 --- a/include/linux/mhi_ep.h +++ b/include/linux/mhi_ep.h @@ -50,6 +50,27 @@ struct mhi_ep_db_info { }; /** + * struct mhi_ep_buf_info - MHI Endpoint transfer buffer info + * @mhi_dev: MHI device associated with this buffer + * @dev_addr: Address of the buffer in endpoint + * @host_addr: Address of the bufffer in host + * @size: Size of the buffer + * @code: Transfer completion code + * @cb: Callback to be executed by controller drivers after transfer completion (async) + * @cb_buf: Opaque buffer to be passed to the callback + */ +struct mhi_ep_buf_info { + struct mhi_ep_device *mhi_dev; + void *dev_addr; + u64 host_addr; + size_t size; + int code; + + void (*cb)(struct mhi_ep_buf_info *buf_info); + void *cb_buf; +}; + +/** * struct mhi_ep_cntrl - MHI Endpoint controller structure * @cntrl_dev: Pointer to the struct device of physical bus acting as the MHI * Endpoint controller @@ -82,8 +103,10 @@ struct mhi_ep_db_info { * @raise_irq: CB function for raising IRQ to the host * @alloc_map: CB function for allocating memory in endpoint for storing host context and mapping it * @unmap_free: CB function to unmap and free the allocated memory in endpoint for storing host context - * @read_from_host: CB function for reading from host memory from endpoint - * @write_to_host: CB function for writing to host memory from endpoint + * @read_sync: CB function for reading from host memory synchronously + * @write_sync: CB function for writing to host memory synchronously + * @read_async: CB function for reading from host memory asynchronously + * @write_async: CB function for writing to host memory asynchronously * @mhi_state: MHI Endpoint state * @max_chan: Maximum channels supported by the endpoint controller * @mru: MRU (Maximum Receive Unit) value of the endpoint controller @@ -128,14 +151,19 @@ struct mhi_ep_cntrl { struct work_struct reset_work; struct work_struct cmd_ring_work; struct work_struct ch_ring_work; + struct kmem_cache *ring_item_cache; + struct kmem_cache *ev_ring_el_cache; + struct kmem_cache *tre_buf_cache; void (*raise_irq)(struct mhi_ep_cntrl *mhi_cntrl, u32 vector); int (*alloc_map)(struct mhi_ep_cntrl *mhi_cntrl, u64 pci_addr, phys_addr_t *phys_ptr, void __iomem **virt, size_t size); void (*unmap_free)(struct mhi_ep_cntrl *mhi_cntrl, u64 pci_addr, phys_addr_t phys, void __iomem *virt, size_t size); - int (*read_from_host)(struct mhi_ep_cntrl *mhi_cntrl, u64 from, void *to, size_t size); - int (*write_to_host)(struct mhi_ep_cntrl *mhi_cntrl, void *from, u64 to, size_t size); + int (*read_sync)(struct mhi_ep_cntrl *mhi_cntrl, struct mhi_ep_buf_info *buf_info); + int (*write_sync)(struct mhi_ep_cntrl *mhi_cntrl, struct mhi_ep_buf_info *buf_info); + int (*read_async)(struct mhi_ep_cntrl *mhi_cntrl, struct mhi_ep_buf_info *buf_info); + int (*write_async)(struct mhi_ep_cntrl *mhi_cntrl, struct mhi_ep_buf_info *buf_info); enum mhi_state mhi_state; diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 4e27ca7c49de..591bf5b5e8dc 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -64,6 +64,10 @@ #define KSZ886X_BMCR_DISABLE_TRANSMIT BIT(1) #define KSZ886X_BMCR_DISABLE_LED BIT(0) +/* PHY Special Control/Status Register (Reg 31) */ #define KSZ886X_CTRL_MDIX_STAT BIT(4) +#define KSZ886X_CTRL_FORCE_LINK BIT(3) +#define KSZ886X_CTRL_PWRSAVE BIT(2) +#define KSZ886X_CTRL_REMOTE_LOOPBACK BIT(1) #endif /* _MICREL_PHY_H */ diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 711dd9412561..2ce13e8a309b 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -142,10 +142,10 @@ const struct movable_operations *page_movable_ops(struct page *page) } #ifdef CONFIG_NUMA_BALANCING -int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, +int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, int node); #else -static inline int migrate_misplaced_page(struct page *page, +static inline int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, int node) { return -EAGAIN; /* can't migrate now */ diff --git a/include/linux/mii_timestamper.h b/include/linux/mii_timestamper.h index fa940bbaf8ae..26b04f73f214 100644 --- a/include/linux/mii_timestamper.h +++ b/include/linux/mii_timestamper.h @@ -9,6 +9,7 @@ #include <linux/device.h> #include <linux/ethtool.h> #include <linux/skbuff.h> +#include <linux/net_tstamp.h> struct phy_device; @@ -51,7 +52,8 @@ struct mii_timestamper { struct sk_buff *skb, int type); int (*hwtstamp)(struct mii_timestamper *mii_ts, - struct ifreq *ifreq); + struct kernel_hwtstamp_config *kernel_config, + struct netlink_ext_ack *extack); void (*link_state)(struct mii_timestamper *mii_ts, struct phy_device *phydev); diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 44077837385f..d52daf45861b 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -35,31 +35,33 @@ static __always_inline void min_heapify(struct min_heap *heap, int pos, const struct min_heap_callbacks *func) { - void *left, *right, *parent, *smallest; + void *left, *right; void *data = heap->data; + void *root = data + pos * func->elem_size; + int i = pos, j; + /* Find the sift-down path all the way to the leaves. */ for (;;) { - if (pos * 2 + 1 >= heap->nr) + if (i * 2 + 2 >= heap->nr) break; + left = data + (i * 2 + 1) * func->elem_size; + right = data + (i * 2 + 2) * func->elem_size; + i = func->less(left, right) ? i * 2 + 1 : i * 2 + 2; + } - left = data + ((pos * 2 + 1) * func->elem_size); - parent = data + (pos * func->elem_size); - smallest = parent; - if (func->less(left, smallest)) - smallest = left; + /* Special case for the last leaf with no sibling. */ + if (i * 2 + 2 == heap->nr) + i = i * 2 + 1; - if (pos * 2 + 2 < heap->nr) { - right = data + ((pos * 2 + 2) * func->elem_size); - if (func->less(right, smallest)) - smallest = right; - } - if (smallest == parent) - break; - func->swp(smallest, parent); - if (smallest == left) - pos = (pos * 2) + 1; - else - pos = (pos * 2) + 2; + /* Backtrack to the correct location. */ + while (i != pos && func->less(root, data + i * func->elem_size)) + i = (i - 1) / 2; + + /* Shift the element into its correct place. */ + j = i; + while (i != pos) { + i = (i - 1) / 2; + func->swp(data + i * func->elem_size, data + j * func->elem_size); } } @@ -70,7 +72,7 @@ void min_heapify_all(struct min_heap *heap, { int i; - for (i = heap->nr / 2; i >= 0; i--) + for (i = heap->nr / 2 - 1; i >= 0; i--) min_heapify(heap, i, func); } diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 83aebc244cba..2ec559284a9f 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -2,60 +2,77 @@ #ifndef _LINUX_MINMAX_H #define _LINUX_MINMAX_H +#include <linux/build_bug.h> +#include <linux/compiler.h> #include <linux/const.h> #include <linux/types.h> /* * min()/max()/clamp() macros must accomplish three things: * - * - avoid multiple evaluations of the arguments (so side-effects like + * - Avoid multiple evaluations of the arguments (so side-effects like * "x++" happen only once) when non-constant. - * - perform strict type-checking (to generate warnings instead of - * nasty runtime surprises). See the "unnecessary" pointer comparison - * in __typecheck(). - * - retain result as a constant expressions when called with only + * - Retain result as a constant expressions when called with only * constant expressions (to avoid tripping VLA warnings in stack * allocation usage). + * - Perform signed v unsigned type-checking (to generate compile + * errors instead of nasty runtime surprises). + * - Unsigned char/short are always promoted to signed int and can be + * compared against signed or unsigned arguments. + * - Unsigned arguments can be compared against non-negative signed constants. + * - Comparison of a signed argument against an unsigned constant fails + * even if the constant is below __INT_MAX__ and could be cast to int. */ #define __typecheck(x, y) \ (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) -#define __no_side_effects(x, y) \ - (__is_constexpr(x) && __is_constexpr(y)) +/* is_signed_type() isn't a constexpr for pointer types */ +#define __is_signed(x) \ + __builtin_choose_expr(__is_constexpr(is_signed_type(typeof(x))), \ + is_signed_type(typeof(x)), 0) -#define __safe_cmp(x, y) \ - (__typecheck(x, y) && __no_side_effects(x, y)) +/* True for a non-negative signed int constant */ +#define __is_noneg_int(x) \ + (__builtin_choose_expr(__is_constexpr(x) && __is_signed(x), x, -1) >= 0) -#define __cmp(x, y, op) ((x) op (y) ? (x) : (y)) +#define __types_ok(x, y) \ + (__is_signed(x) == __is_signed(y) || \ + __is_signed((x) + 0) == __is_signed((y) + 0) || \ + __is_noneg_int(x) || __is_noneg_int(y)) -#define __cmp_once(x, y, unique_x, unique_y, op) ({ \ - typeof(x) unique_x = (x); \ - typeof(y) unique_y = (y); \ - __cmp(unique_x, unique_y, op); }) +#define __cmp_op_min < +#define __cmp_op_max > -#define __careful_cmp(x, y, op) \ - __builtin_choose_expr(__safe_cmp(x, y), \ - __cmp(x, y, op), \ - __cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op)) +#define __cmp(op, x, y) ((x) __cmp_op_##op (y) ? (x) : (y)) + +#define __cmp_once(op, x, y, unique_x, unique_y) ({ \ + typeof(x) unique_x = (x); \ + typeof(y) unique_y = (y); \ + static_assert(__types_ok(x, y), \ + #op "(" #x ", " #y ") signedness error, fix types or consider u" #op "() before " #op "_t()"); \ + __cmp(op, unique_x, unique_y); }) + +#define __careful_cmp(op, x, y) \ + __builtin_choose_expr(__is_constexpr((x) - (y)), \ + __cmp(op, x, y), \ + __cmp_once(op, x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y))) #define __clamp(val, lo, hi) \ ((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val))) -#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({ \ - typeof(val) unique_val = (val); \ - typeof(lo) unique_lo = (lo); \ - typeof(hi) unique_hi = (hi); \ - __clamp(unique_val, unique_lo, unique_hi); }) - -#define __clamp_input_check(lo, hi) \ - (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ - __is_constexpr((lo) > (hi)), (lo) > (hi), false))) +#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({ \ + typeof(val) unique_val = (val); \ + typeof(lo) unique_lo = (lo); \ + typeof(hi) unique_hi = (hi); \ + static_assert(__builtin_choose_expr(__is_constexpr((lo) > (hi)), \ + (lo) <= (hi), true), \ + "clamp() low limit " #lo " greater than high limit " #hi); \ + static_assert(__types_ok(val, lo), "clamp() 'lo' signedness error"); \ + static_assert(__types_ok(val, hi), "clamp() 'hi' signedness error"); \ + __clamp(unique_val, unique_lo, unique_hi); }) #define __careful_clamp(val, lo, hi) ({ \ - __clamp_input_check(lo, hi) + \ - __builtin_choose_expr(__typecheck(val, lo) && __typecheck(val, hi) && \ - __typecheck(hi, lo) && __is_constexpr(val) && \ - __is_constexpr(lo) && __is_constexpr(hi), \ + __builtin_choose_expr(__is_constexpr((val) - (lo) + (hi)), \ __clamp(val, lo, hi), \ __clamp_once(val, lo, hi, __UNIQUE_ID(__val), \ __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); }) @@ -65,14 +82,31 @@ * @x: first value * @y: second value */ -#define min(x, y) __careful_cmp(x, y, <) +#define min(x, y) __careful_cmp(min, x, y) /** * max - return maximum of two values of the same or compatible types * @x: first value * @y: second value */ -#define max(x, y) __careful_cmp(x, y, >) +#define max(x, y) __careful_cmp(max, x, y) + +/** + * umin - return minimum of two non-negative values + * Signed types are zero extended to match a larger unsigned type. + * @x: first value + * @y: second value + */ +#define umin(x, y) \ + __careful_cmp(min, (x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull) + +/** + * umax - return maximum of two non-negative values + * @x: first value + * @y: second value + */ +#define umax(x, y) \ + __careful_cmp(max, (x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull) /** * min3 - return minimum of three values @@ -124,7 +158,7 @@ * @x: first value * @y: second value */ -#define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <) +#define min_t(type, x, y) __careful_cmp(min, (type)(x), (type)(y)) /** * max_t - return maximum of two values, using the specified type @@ -132,28 +166,7 @@ * @x: first value * @y: second value */ -#define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >) - -/* - * Remove a const qualifier from integer types - * _Generic(foo, type-name: association, ..., default: association) performs a - * comparison against the foo type (not the qualified type). - * Do not use the const keyword in the type-name as it will not match the - * unqualified type of foo. - */ -#define __unconst_integer_type_cases(type) \ - unsigned type: (unsigned type)0, \ - signed type: (signed type)0 - -#define __unconst_integer_typeof(x) typeof( \ - _Generic((x), \ - char: (char)0, \ - __unconst_integer_type_cases(char), \ - __unconst_integer_type_cases(short), \ - __unconst_integer_type_cases(int), \ - __unconst_integer_type_cases(long), \ - __unconst_integer_type_cases(long long), \ - default: (x))) +#define max_t(type, x, y) __careful_cmp(max, (type)(x), (type)(y)) /* * Do not check the array parameter using __must_be_array(). @@ -169,13 +182,13 @@ * 'int *buff' and 'int buff[N]' types. * * The array can be an array of const items. - * typeof() keeps the const qualifier. Use __unconst_integer_typeof() in order + * typeof() keeps the const qualifier. Use __unqual_scalar_typeof() in order * to discard the const qualifier for the __element variable. */ #define __minmax_array(op, array, len) ({ \ typeof(&(array)[0]) __array = (array); \ typeof(len) __len = (len); \ - __unconst_integer_typeof(__array[0]) __element = __array[--__len]; \ + __unqual_scalar_typeof(__array[0]) __element = __array[--__len];\ while (__len--) \ __element = op(__element, __array[__len]); \ __element; }) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 4d5be378fa8c..01275c6e8468 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -366,6 +366,9 @@ enum mlx5_driver_event { MLX5_DRIVER_EVENT_UPLINK_NETDEV, MLX5_DRIVER_EVENT_MACSEC_SA_ADDED, MLX5_DRIVER_EVENT_MACSEC_SA_DELETED, + MLX5_DRIVER_EVENT_SF_PEER_DEVLINK, + MLX5_DRIVER_EVENT_AFFILIATION_DONE, + MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, }; enum { @@ -915,7 +918,7 @@ static inline u8 get_cqe_tls_offload(struct mlx5_cqe64 *cqe) return (cqe->tls_outer_l3_tunneled >> 3) & 0x3; } -static inline bool cqe_has_vlan(struct mlx5_cqe64 *cqe) +static inline bool cqe_has_vlan(const struct mlx5_cqe64 *cqe) { return cqe->l4_l3_hdr_type & 0x1; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3033bbaeac81..bf9324a31ae9 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -150,11 +150,14 @@ enum { MLX5_REG_MTPPSE = 0x9054, MLX5_REG_MTUTC = 0x9055, MLX5_REG_MPEGC = 0x9056, + MLX5_REG_MPIR = 0x9059, MLX5_REG_MCQS = 0x9060, MLX5_REG_MCQI = 0x9061, MLX5_REG_MCC = 0x9062, MLX5_REG_MCDA = 0x9063, MLX5_REG_MCAM = 0x907f, + MLX5_REG_MSECQ = 0x9155, + MLX5_REG_MSEES = 0x9156, MLX5_REG_MIRC = 0x9162, MLX5_REG_SBCAM = 0xB01F, MLX5_REG_RESOURCE_DUMP = 0xC000, @@ -613,6 +616,7 @@ struct mlx5_priv { int adev_idx; int sw_vhca_id; struct mlx5_events *events; + struct mlx5_vhca_events *vhca_events; struct mlx5_flow_steering *steering; struct mlx5_mpfs *mpfs; @@ -621,6 +625,7 @@ struct mlx5_priv { struct mlx5_lag *lag; u32 flags; struct mlx5_devcom_dev *devc; + struct mlx5_devcom_comp_dev *hca_devcom_comp; struct mlx5_fw_reset *fw_reset; struct mlx5_core_roce roce; struct mlx5_fc_stats fc_stats; @@ -674,6 +679,9 @@ struct mlx5e_resources { struct mlx5_td td; u32 mkey; struct mlx5_sq_bfreg bfreg; +#define MLX5_MAX_NUM_TC 8 + u32 tisn[MLX5_MAX_PORTS][MLX5_MAX_NUM_TC]; + bool tisn_valid; } hw_objs; struct net_device *uplink_netdev; struct mutex uplink_netdev_lock; @@ -684,6 +692,7 @@ enum mlx5_sw_icm_type { MLX5_SW_ICM_TYPE_STEERING, MLX5_SW_ICM_TYPE_HEADER_MODIFY, MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN, + MLX5_SW_ICM_TYPE_SW_ENCAP, }; #define MLX5_MAX_RESERVED_GIDS 8 @@ -814,6 +823,7 @@ struct mlx5_core_dev { struct blocking_notifier_head macsec_nh; #endif u64 num_ipsec_offloads; + struct mlx5_sd *sd; }; struct mlx5_db { @@ -1027,6 +1037,8 @@ bool mlx5_cmd_is_down(struct mlx5_core_dev *dev); void mlx5_core_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev); void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *mdev); +void mlx5_core_mp_event_replay(struct mlx5_core_dev *dev, u32 event, void *data); + void mlx5_health_cleanup(struct mlx5_core_dev *dev); int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); @@ -1037,10 +1049,6 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev); int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_frag_buf *buf, int node); void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf); -struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, - gfp_t flags, int npages); -void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, - struct mlx5_cmd_mailbox *head); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in, int inlen); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, u32 mkey); @@ -1054,8 +1062,6 @@ void mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev); void mlx5_pages_debugfs_cleanup(struct mlx5_core_dev *dev); -void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, - s32 npages, bool ec_function); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); @@ -1095,8 +1101,6 @@ int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num); __be32 mlx5_core_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev); void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common); -int mlx5_query_odp_caps(struct mlx5_core_dev *dev, - struct mlx5_odp_caps *odp_caps); int mlx5_init_rl_table(struct mlx5_core_dev *dev); void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev); @@ -1198,12 +1202,6 @@ int mlx5_sriov_blocking_notifier_register(struct mlx5_core_dev *mdev, void mlx5_sriov_blocking_notifier_unregister(struct mlx5_core_dev *mdev, int vf_id, struct notifier_block *nb); -#ifdef CONFIG_MLX5_CORE_IPOIB -struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, - struct ib_device *ibdev, - const char *name, - void (*setup)(struct net_device *)); -#endif /* CONFIG_MLX5_CORE_IPOIB */ int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev, struct ib_device *device, struct rdma_netdev_alloc_params *params); diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 950d2431a53c..df73a2ccc9af 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -7,6 +7,7 @@ #define _MLX5_ESWITCH_ #include <linux/mlx5/driver.h> +#include <linux/mlx5/vport.h> #include <net/devlink.h> #define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager) @@ -210,4 +211,11 @@ static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev) return mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS; } +/* The returned number is valid only when the dev is eswitch manager. */ +static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf_esw_manager(dev) ? + MLX5_VPORT_ECPF : MLX5_VPORT_PF; +} + #endif diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 1e00c2436377..3fb428ce7d1c 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -67,6 +67,7 @@ enum { MLX5_FLOW_TABLE_TERMINATION = BIT(2), MLX5_FLOW_TABLE_UNMANAGED = BIT(3), MLX5_FLOW_TABLE_OTHER_VPORT = BIT(4), + MLX5_FLOW_TABLE_UPLINK_VPORT = BIT(5), }; #define LEFTOVERS_RULE_NUM 2 @@ -131,6 +132,7 @@ struct mlx5_flow_handle; enum { FLOW_CONTEXT_HAS_TAG = BIT(0), + FLOW_CONTEXT_UPLINK_HAIRPIN_EN = BIT(1), }; struct mlx5_flow_context { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fc3db401f8a2..c940b329a475 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -312,6 +312,7 @@ enum { MLX5_CMD_OP_QUERY_VHCA_STATE = 0xb0d, MLX5_CMD_OP_MODIFY_VHCA_STATE = 0xb0e, MLX5_CMD_OP_SYNC_CRYPTO = 0xb12, + MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS = 0xb16, MLX5_CMD_OP_MAX }; @@ -434,7 +435,7 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 flow_table_modify[0x1]; u8 reformat[0x1]; u8 decap[0x1]; - u8 reserved_at_9[0x1]; + u8 reset_root_to_default[0x1]; u8 pop_vlan[0x1]; u8 push_vlan[0x1]; u8 reserved_at_c[0x1]; @@ -620,7 +621,7 @@ struct mlx5_ifc_fte_match_set_misc_bits { u8 reserved_at_140[0x8]; u8 bth_dst_qp[0x18]; - u8 reserved_at_160[0x20]; + u8 inner_esp_spi[0x20]; u8 outer_esp_spi[0x20]; u8 reserved_at_1a0[0x60]; }; @@ -1102,7 +1103,7 @@ struct mlx5_ifc_roce_cap_bits { u8 sw_r_roce_src_udp_port[0x1]; u8 fl_rc_qp_when_roce_disabled[0x1]; u8 fl_rc_qp_when_roce_enabled[0x1]; - u8 reserved_at_7[0x1]; + u8 roce_cc_general[0x1]; u8 qp_ooo_transmit_default[0x1]; u8 reserved_at_9[0x15]; u8 qp_ts_format[0x2]; @@ -1192,7 +1193,8 @@ struct mlx5_ifc_device_mem_cap_bits { u8 log_sw_icm_alloc_granularity[0x6]; u8 log_steering_sw_icm_size[0x8]; - u8 reserved_at_120[0x18]; + u8 log_indirect_encap_sw_icm_size[0x8]; + u8 reserved_at_128[0x10]; u8 log_header_modify_pattern_sw_icm_size[0x8]; u8 header_modify_sw_icm_start_address[0x40]; @@ -1203,7 +1205,11 @@ struct mlx5_ifc_device_mem_cap_bits { u8 memic_operations[0x20]; - u8 reserved_at_220[0x5e0]; + u8 reserved_at_220[0x20]; + + u8 indirect_encap_sw_icm_start_address[0x40]; + + u8 reserved_at_280[0x580]; }; struct mlx5_ifc_device_event_cap_bits { @@ -1231,7 +1237,14 @@ struct mlx5_ifc_virtio_emulation_cap_bits { u8 max_emulated_devices[0x8]; u8 max_num_virtio_queues[0x18]; - u8 reserved_at_a0[0x60]; + u8 reserved_at_a0[0x20]; + + u8 reserved_at_c0[0x13]; + u8 desc_group_mkey_supported[0x1]; + u8 freeze_to_rdy_supported[0x1]; + u8 reserved_at_d5[0xb]; + + u8 reserved_at_e0[0x20]; u8 umem_1_buffer_param_a[0x20]; @@ -1794,7 +1807,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 disable_local_lb_uc[0x1]; u8 disable_local_lb_mc[0x1]; u8 log_min_hairpin_wq_data_sz[0x5]; - u8 reserved_at_3e8[0x2]; + u8 reserved_at_3e8[0x1]; + u8 silent_mode[0x1]; u8 vhca_state[0x1]; u8 log_max_vlan_list[0x5]; u8 reserved_at_3f0[0x3]; @@ -1811,7 +1825,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_460[0x1]; u8 ats[0x1]; - u8 reserved_at_462[0x1]; + u8 cross_vhca_rqt[0x1]; u8 log_max_uctx[0x5]; u8 reserved_at_468[0x1]; u8 crypto[0x1]; @@ -1934,6 +1948,15 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 match_definer_format_supported[0x40]; }; +enum { + MLX5_CROSS_VHCA_OBJ_TO_OBJ_SUPPORTED_LOCAL_FLOW_TABLE_TO_REMOTE_FLOW_TABLE_MISS = 0x80000, + MLX5_CROSS_VHCA_OBJ_TO_OBJ_SUPPORTED_LOCAL_FLOW_TABLE_ROOT_TO_REMOTE_FLOW_TABLE = (1ULL << 20), +}; + +enum { + MLX5_ALLOWED_OBJ_FOR_OTHER_VHCA_ACCESS_FLOW_TABLE = 0x200, +}; + struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_0[0x80]; @@ -1948,9 +1971,15 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_c0[0x8]; u8 migration_multi_load[0x1]; u8 migration_tracking_state[0x1]; - u8 reserved_at_ca[0x16]; + u8 reserved_at_ca[0x6]; + u8 migration_in_chunks[0x1]; + u8 reserved_at_d1[0xf]; + + u8 cross_vhca_object_to_object_supported[0x20]; - u8 reserved_at_e0[0xc0]; + u8 allowed_object_for_other_vhca_access[0x40]; + + u8 reserved_at_140[0x60]; u8 flow_table_type_2_type[0x8]; u8 reserved_at_1a8[0x3]; @@ -1971,7 +2000,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_260[0x120]; u8 reserved_at_380[0x10]; u8 ec_vf_vport_base[0x10]; - u8 reserved_at_3a0[0x460]; + + u8 reserved_at_3a0[0x10]; + u8 max_rqt_vhca_id[0x10]; + + u8 reserved_at_3c0[0x440]; }; enum mlx5_ifc_flow_destination_type { @@ -2130,6 +2163,13 @@ struct mlx5_ifc_rq_num_bits { u8 rq_num[0x18]; }; +struct mlx5_ifc_rq_vhca_bits { + u8 reserved_at_0[0x8]; + u8 rq_num[0x18]; + u8 reserved_at_20[0x10]; + u8 rq_vhca_id[0x10]; +}; + struct mlx5_ifc_mac_address_layout_bits { u8 reserved_at_0[0x10]; u8 mac_addr_47_32[0x10]; @@ -3536,7 +3576,7 @@ struct mlx5_ifc_flow_context_bits { u8 action[0x10]; u8 extended_destination[0x1]; - u8 reserved_at_81[0x1]; + u8 uplink_hairpin_en[0x1]; u8 flow_source[0x2]; u8 encrypt_decrypt_type[0x4]; u8 destination_list_size[0x18]; @@ -3880,7 +3920,10 @@ struct mlx5_ifc_rqtc_bits { u8 reserved_at_e0[0x6a0]; - struct mlx5_ifc_rq_num_bits rq_num[]; + union { + DECLARE_FLEX_ARRAY(struct mlx5_ifc_rq_num_bits, rq_num); + DECLARE_FLEX_ARRAY(struct mlx5_ifc_rq_vhca_bits, rq_vhca); + }; }; enum { @@ -3993,8 +4036,13 @@ struct mlx5_ifc_nic_vport_context_bits { u8 affiliation_criteria[0x4]; u8 affiliated_vhca_id[0x10]; - u8 reserved_at_60[0xd0]; + u8 reserved_at_60[0xa0]; + u8 reserved_at_100[0x1]; + u8 sd_group[0x3]; + u8 reserved_at_104[0x1c]; + + u8 reserved_at_120[0x10]; u8 mtu[0x10]; u8 system_image_guid[0x40]; @@ -4723,7 +4771,10 @@ struct mlx5_ifc_set_l2_table_entry_in_bits { u8 reserved_at_c0[0x20]; - u8 reserved_at_e0[0x13]; + u8 reserved_at_e0[0x10]; + u8 silent_mode_valid[0x1]; + u8 silent_mode[0x1]; + u8 reserved_at_f2[0x1]; u8 vlan_valid[0x1]; u8 vlan[0xc]; @@ -6369,6 +6420,28 @@ struct mlx5_ifc_general_obj_out_cmd_hdr_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_allow_other_vhca_access_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + u8 reserved_at_40[0x50]; + u8 object_type_to_be_accessed[0x10]; + u8 object_id_to_be_accessed[0x20]; + u8 reserved_at_c0[0x40]; + union { + u8 access_key_raw[0x100]; + u8 access_key[8][0x20]; + }; +}; + +struct mlx5_ifc_allow_other_vhca_access_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + u8 syndrome[0x20]; + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_modify_header_arg_bits { u8 reserved_at_0[0x80]; @@ -6391,6 +6464,24 @@ struct mlx5_ifc_create_match_definer_out_bits { struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; }; +struct mlx5_ifc_alias_context_bits { + u8 vhca_id_to_be_accessed[0x10]; + u8 reserved_at_10[0xd]; + u8 status[0x3]; + u8 object_id_to_be_accessed[0x20]; + u8 reserved_at_40[0x40]; + union { + u8 access_key_raw[0x100]; + u8 access_key[8][0x20]; + }; + u8 metadata[0x80]; +}; + +struct mlx5_ifc_create_alias_obj_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; + struct mlx5_ifc_alias_context_bits alias_ctx; +}; + enum { MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_OUTER_HEADERS = 0x0, MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS = 0x1, @@ -10028,6 +10119,19 @@ struct mlx5_ifc_mpegc_reg_bits { u8 reserved_at_60[0x100]; }; +struct mlx5_ifc_mpir_reg_bits { + u8 sdm[0x1]; + u8 reserved_at_1[0x1b]; + u8 host_buses[0x4]; + + u8 reserved_at_20[0x20]; + + u8 local_port[0x8]; + u8 reserved_at_28[0x18]; + + u8 reserved_at_60[0x20]; +}; + enum { MLX5_MTUTC_FREQ_ADJ_UNITS_PPB = 0x0, MLX5_MTUTC_FREQ_ADJ_UNITS_SCALED_PPM = 0x1, @@ -10042,7 +10146,10 @@ enum { struct mlx5_ifc_mtutc_reg_bits { u8 reserved_at_0[0x5]; u8 freq_adj_units[0x3]; - u8 reserved_at_8[0x14]; + u8 reserved_at_8[0x3]; + u8 log_max_freq_adjustment[0x5]; + + u8 reserved_at_10[0xc]; u8 operation[0x4]; u8 freq_adjustment[0x20]; @@ -10146,7 +10253,9 @@ struct mlx5_ifc_mcam_access_reg_bits { u8 mcqi[0x1]; u8 mcqs[0x1]; - u8 regs_95_to_87[0x9]; + u8 regs_95_to_90[0x6]; + u8 mpir[0x1]; + u8 regs_88_to_87[0x2]; u8 mpegc[0x1]; u8 mtutc[0x1]; u8 regs_84_to_68[0x11]; @@ -10154,7 +10263,9 @@ struct mlx5_ifc_mcam_access_reg_bits { u8 regs_63_to_46[0x12]; u8 mrtc[0x1]; - u8 regs_44_to_32[0xd]; + u8 regs_44_to_41[0x4]; + u8 mfrl[0x1]; + u8 regs_39_to_32[0x8]; u8 regs_31_to_10[0x16]; u8 mtmp[0x1]; @@ -10176,7 +10287,9 @@ struct mlx5_ifc_mcam_access_reg_bits2 { u8 mirc[0x1]; u8 regs_97_to_96[0x2]; - u8 regs_95_to_64[0x20]; + u8 regs_95_to_87[0x09]; + u8 synce_registers[0x2]; + u8 regs_84_to_64[0x15]; u8 regs_63_to_32[0x20]; @@ -10552,6 +10665,7 @@ enum { MLX5_INITIAL_SEG_NIC_INTERFACE_FULL_DRIVER = 0x0, MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED = 0x1, MLX5_INITIAL_SEG_NIC_INTERFACE_NO_DRAM_NIC = 0x2, + MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET = 0x7, }; enum { @@ -10572,6 +10686,7 @@ enum { MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_INV = 0xe, MLX5_INITIAL_SEG_HEALTH_SYNDROME_FFSER_ERR = 0xf, MLX5_INITIAL_SEG_HEALTH_SYNDROME_HIGH_TEMP_ERR = 0x10, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PCI_POISONED_ERR = 0x12, }; struct mlx5_ifc_initial_seg_bits { @@ -11917,6 +12032,7 @@ enum { MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = 0x24, MLX5_GENERAL_OBJECT_TYPES_MACSEC = 0x27, MLX5_GENERAL_OBJECT_TYPES_INT_KEK = 0x47, + MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS = 0xff15, }; enum { @@ -11936,6 +12052,13 @@ enum { MLX5_IPSEC_ASO_INC_SN = 0x2, }; +enum { + MLX5_IPSEC_ASO_REPLAY_WIN_32BIT = 0x0, + MLX5_IPSEC_ASO_REPLAY_WIN_64BIT = 0x1, + MLX5_IPSEC_ASO_REPLAY_WIN_128BIT = 0x2, + MLX5_IPSEC_ASO_REPLAY_WIN_256BIT = 0x3, +}; + struct mlx5_ifc_ipsec_aso_bits { u8 valid[0x1]; u8 reserved_at_201[0x1]; @@ -12392,7 +12515,8 @@ struct mlx5_ifc_query_vhca_migration_state_in_bits { u8 op_mod[0x10]; u8 incremental[0x1]; - u8 reserved_at_41[0xf]; + u8 chunk[0x1]; + u8 reserved_at_42[0xe]; u8 vhca_id[0x10]; u8 reserved_at_60[0x20]; @@ -12408,7 +12532,11 @@ struct mlx5_ifc_query_vhca_migration_state_out_bits { u8 required_umem_size[0x20]; - u8 reserved_at_a0[0x160]; + u8 reserved_at_a0[0x20]; + + u8 remaining_total_size[0x40]; + + u8 reserved_at_100[0x100]; }; struct mlx5_ifc_save_vhca_state_in_bits { @@ -12440,7 +12568,7 @@ struct mlx5_ifc_save_vhca_state_out_bits { u8 actual_image_size[0x20]; - u8 reserved_at_60[0x20]; + u8 next_required_umem_size[0x20]; }; struct mlx5_ifc_load_vhca_state_in_bits { @@ -12549,4 +12677,72 @@ struct mlx5_ifc_modify_page_track_obj_in_bits { struct mlx5_ifc_page_track_bits obj_context; }; +struct mlx5_ifc_query_page_track_obj_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; + struct mlx5_ifc_page_track_bits obj_context; +}; + +struct mlx5_ifc_msecq_reg_bits { + u8 reserved_at_0[0x20]; + + u8 reserved_at_20[0x12]; + u8 network_option[0x2]; + u8 local_ssm_code[0x4]; + u8 local_enhanced_ssm_code[0x8]; + + u8 local_clock_identity[0x40]; + + u8 reserved_at_80[0x180]; +}; + +enum { + MLX5_MSEES_FIELD_SELECT_ENABLE = BIT(0), + MLX5_MSEES_FIELD_SELECT_ADMIN_STATUS = BIT(1), + MLX5_MSEES_FIELD_SELECT_ADMIN_FREQ_MEASURE = BIT(2), +}; + +enum mlx5_msees_admin_status { + MLX5_MSEES_ADMIN_STATUS_FREE_RUNNING = 0x0, + MLX5_MSEES_ADMIN_STATUS_TRACK = 0x1, +}; + +enum mlx5_msees_oper_status { + MLX5_MSEES_OPER_STATUS_FREE_RUNNING = 0x0, + MLX5_MSEES_OPER_STATUS_SELF_TRACK = 0x1, + MLX5_MSEES_OPER_STATUS_OTHER_TRACK = 0x2, + MLX5_MSEES_OPER_STATUS_HOLDOVER = 0x3, + MLX5_MSEES_OPER_STATUS_FAIL_HOLDOVER = 0x4, + MLX5_MSEES_OPER_STATUS_FAIL_FREE_RUNNING = 0x5, +}; + +enum mlx5_msees_failure_reason { + MLX5_MSEES_FAILURE_REASON_UNDEFINED_ERROR = 0x0, + MLX5_MSEES_FAILURE_REASON_PORT_DOWN = 0x1, + MLX5_MSEES_FAILURE_REASON_TOO_HIGH_FREQUENCY_DIFF = 0x2, + MLX5_MSEES_FAILURE_REASON_NET_SYNCHRONIZER_DEVICE_ERROR = 0x3, + MLX5_MSEES_FAILURE_REASON_LACK_OF_RESOURCES = 0x4, +}; + +struct mlx5_ifc_msees_reg_bits { + u8 reserved_at_0[0x8]; + u8 local_port[0x8]; + u8 pnat[0x2]; + u8 lp_msb[0x2]; + u8 reserved_at_14[0xc]; + + u8 field_select[0x20]; + + u8 admin_status[0x4]; + u8 oper_status[0x4]; + u8 ho_acq[0x1]; + u8 reserved_at_49[0xc]; + u8 admin_freq_measure[0x1]; + u8 oper_freq_measure[0x1]; + u8 failure_reason[0x9]; + + u8 frequency_diff[0x20]; + + u8 reserved_at_80[0x180]; +}; + #endif /* MLX5_IFC_H */ diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h index 9becdc3fa503..40371c916cf9 100644 --- a/include/linux/mlx5/mlx5_ifc_vdpa.h +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h @@ -74,7 +74,11 @@ struct mlx5_ifc_virtio_q_bits { u8 reserved_at_320[0x8]; u8 pd[0x18]; - u8 reserved_at_340[0xc0]; + u8 reserved_at_340[0x20]; + + u8 desc_group_mkey[0x20]; + + u8 reserved_at_380[0x80]; }; struct mlx5_ifc_virtio_net_q_object_bits { @@ -141,6 +145,11 @@ enum { MLX5_VIRTQ_MODIFY_MASK_STATE = (u64)1 << 0, MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_PARAMS = (u64)1 << 3, MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_DUMP_ENABLE = (u64)1 << 4, + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS = (u64)1 << 6, + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX = (u64)1 << 7, + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX = (u64)1 << 8, + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY = (u64)1 << 11, + MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY = (u64)1 << 14, }; enum { diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 98b2e1e149f9..26092c78a985 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -115,8 +115,9 @@ enum mlx5e_ext_link_mode { MLX5E_100GAUI_1_100GBASE_CR_KR = 11, MLX5E_200GAUI_4_200GBASE_CR4_KR4 = 12, MLX5E_200GAUI_2_200GBASE_CR2_KR2 = 13, - MLX5E_400GAUI_8 = 15, + MLX5E_400GAUI_8_400GBASE_CR8 = 15, MLX5E_400GAUI_4_400GBASE_CR4_KR4 = 16, + MLX5E_800GAUI_8_800GBASE_CR8_KR8 = 19, MLX5E_EXT_LINK_MODES_NUMBER, }; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index bd53cf4be7bd..f0e55bf3ec8b 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -269,7 +269,10 @@ struct mlx5_wqe_eth_seg { union { struct { __be16 sz; - u8 start[2]; + union { + u8 start[2]; + DECLARE_FLEX_ARRAY(u8, data); + }; } inline_hdr; struct { __be16 type; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index fbb9bf447889..c36cc6d82926 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -72,6 +72,7 @@ int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu); int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu); int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, u64 *system_image_guid); +int mlx5_query_nic_vport_sd_group(struct mlx5_core_dev *mdev, u8 *sd_group); int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid); int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, u16 vport, u64 node_guid); diff --git a/include/linux/mm.h b/include/linux/mm.h index bf5d0b1b16f4..b6bdaa18b9e9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -36,6 +36,7 @@ struct anon_vma; struct anon_vma_chain; struct user_struct; struct pt_regs; +struct folio_batch; extern int sysctl_page_lock_unfairness; @@ -86,7 +87,7 @@ extern int sysctl_legacy_va_layout; #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS extern const int mmap_rnd_bits_min; -extern const int mmap_rnd_bits_max; +extern int mmap_rnd_bits_max __ro_after_init; extern int mmap_rnd_bits __read_mostly; #endif #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS @@ -226,7 +227,6 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, /* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */ #define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE) -#define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) static inline struct folio *lru_to_folio(struct list_head *head) { return list_entry((head)->prev, struct folio, lru); @@ -362,8 +362,6 @@ extern unsigned int kobjsize(const void *objp); # define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ #elif defined(CONFIG_PARISC) # define VM_GROWSUP VM_ARCH_1 -#elif defined(CONFIG_IA64) -# define VM_GROWSUP VM_ARCH_1 #elif defined(CONFIG_SPARC64) # define VM_SPARC_ADI VM_ARCH_1 /* Uses ADI tag for access control */ # define VM_ARCH_CLEAR VM_SPARC_ADI @@ -393,6 +391,20 @@ extern unsigned int kobjsize(const void *objp); # define VM_UFFD_MINOR VM_NONE #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ +/* + * This flag is used to connect VFIO to arch specific KVM code. It + * indicates that the memory under this VMA is safe for use with any + * non-cachable memory type inside KVM. Some VFIO devices, on some + * platforms, are thought to be unsafe and can cause machine crashes + * if KVM does not lock down the memory type. + */ +#ifdef CONFIG_64BIT +#define VM_ALLOW_ANY_UNCACHED_BIT 39 +#define VM_ALLOW_ANY_UNCACHED BIT(VM_ALLOW_ANY_UNCACHED_BIT) +#else +#define VM_ALLOW_ANY_UNCACHED VM_NONE +#endif + /* Bits set in the VMA until the stack is in its final location */ #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY) @@ -619,7 +631,7 @@ struct vm_operations_struct { * policy. */ struct mempolicy *(*get_policy)(struct vm_area_struct *vma, - unsigned long addr); + unsigned long addr, pgoff_t *ilx); #endif /* * Called by vm_normal_page() for special PTEs to find the @@ -783,6 +795,11 @@ static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, return NULL; } +static inline void vma_assert_locked(struct vm_area_struct *vma) +{ + mmap_assert_locked(vma->vm_mm); +} + static inline void release_fault_lock(struct vm_fault *vmf) { mmap_read_unlock(vmf->vma->vm_mm); @@ -888,8 +905,8 @@ static inline bool vma_is_anonymous(struct vm_area_struct *vma) */ static inline bool vma_is_initial_heap(const struct vm_area_struct *vma) { - return vma->vm_start <= vma->vm_mm->brk && - vma->vm_end >= vma->vm_mm->start_brk; + return vma->vm_start < vma->vm_mm->brk && + vma->vm_end > vma->vm_mm->start_brk; } /* @@ -903,8 +920,8 @@ static inline bool vma_is_initial_stack(const struct vm_area_struct *vma) * its "stack". It's not even well-defined for programs written * languages like Go. */ - return vma->vm_start <= vma->vm_mm->start_stack && - vma->vm_end >= vma->vm_mm->start_stack; + return vma->vm_start <= vma->vm_mm->start_stack && + vma->vm_end >= vma->vm_mm->start_stack; } static inline bool vma_is_temporary_stack(struct vm_area_struct *vma) @@ -937,6 +954,17 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma) return vma->vm_flags & VM_ACCESS_FLAGS; } +static inline bool is_shared_maywrite(vm_flags_t vm_flags) +{ + return (vm_flags & (VM_SHARED | VM_MAYWRITE)) == + (VM_SHARED | VM_MAYWRITE); +} + +static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma) +{ + return is_shared_maywrite(vma->vm_flags); +} + static inline struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max) { @@ -985,6 +1013,17 @@ static inline int vma_iter_bulk_alloc(struct vma_iterator *vmi, return mas_expected_entries(&vmi->mas, count); } +static inline int vma_iter_clear_gfp(struct vma_iterator *vmi, + unsigned long start, unsigned long end, gfp_t gfp) +{ + __mas_set_range(&vmi->mas, start, end - 1); + mas_store_gfp(&vmi->mas, NULL, gfp); + if (unlikely(mas_is_err(&vmi->mas))) + return -ENOMEM; + + return 0; +} + /* Free any unused preallocations */ static inline void vma_iter_free(struct vma_iterator *vmi) { @@ -1158,7 +1197,7 @@ static inline int is_vmalloc_or_module_addr(const void *x) * How many times the entire folio is mapped as a single unit (eg by a * PMD or PUD entry). This is probably not what you want, except for * debugging purposes - it does not include PTE-mapped sub-pages; look - * at folio_mapcount() or page_mapcount() or total_mapcount() instead. + * at folio_mapcount() or page_mapcount() instead. */ static inline int folio_entire_mapcount(struct folio *folio) { @@ -1184,14 +1223,16 @@ static inline void page_mapcount_reset(struct page *page) * a large folio, it includes the number of times this page is mapped * as part of that folio. * - * The result is undefined for pages which cannot be mapped into userspace. - * For example SLAB or special types of pages. See function page_has_type(). - * They use this field in struct page differently. + * Will report 0 for pages which cannot be mapped into userspace, eg + * slab, page tables and similar. */ static inline int page_mapcount(struct page *page) { int mapcount = atomic_read(&page->_mapcount) + 1; + /* Handle page_has_type() pages */ + if (mapcount < 0) + mapcount = 0; if (unlikely(PageCompound(page))) mapcount += folio_entire_mapcount(page_folio(page)); @@ -1218,13 +1259,6 @@ static inline int folio_mapcount(struct folio *folio) return folio_total_mapcount(folio); } -static inline int total_mapcount(struct page *page) -{ - if (likely(!PageCompound(page))) - return atomic_read(&page->_mapcount) + 1; - return folio_total_mapcount(page_folio(page)); -} - static inline bool folio_large_is_mapped(struct folio *folio) { /* @@ -1337,7 +1371,6 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio, struct page *page, unsigned int nr, unsigned long addr); vm_fault_t finish_fault(struct vm_fault *vmf); -vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); #endif /* @@ -1495,6 +1528,8 @@ static inline void folio_put_refs(struct folio *folio, int refs) __folio_put(folio); } +void folios_put_refs(struct folio_batch *folios, unsigned int *refs); + /* * union release_pages_arg - an array of pages or folios * @@ -1517,18 +1552,19 @@ void release_pages(release_pages_arg, int nr); /** * folios_put - Decrement the reference count on an array of folios. * @folios: The folios. - * @nr: How many folios there are. * - * Like folio_put(), but for an array of folios. This is more efficient - * than writing the loop yourself as it will optimise the locks which - * need to be taken if the folios are freed. + * Like folio_put(), but for a batch of folios. This is more efficient + * than writing the loop yourself as it will optimise the locks which need + * to be taken if the folios are freed. The folios batch is returned + * empty and ready to be reused for another batch; there is no need to + * reinitialise it. * * Context: May be called in process or interrupt context, but not in NMI * context. May be called while holding a spinlock. */ -static inline void folios_put(struct folio **folios, unsigned int nr) +static inline void folios_put(struct folio_batch *folios) { - release_pages(folios, nr); + folios_put_refs(folios, NULL); } static inline void put_page(struct page *page) @@ -1621,13 +1657,11 @@ static inline int page_zone_id(struct page *page) } #ifdef NODE_NOT_IN_PAGE_FLAGS -extern int page_to_nid(const struct page *page); +int page_to_nid(const struct page *page); #else static inline int page_to_nid(const struct page *page) { - struct page *p = (struct page *)page; - - return (PF_POISONED_CHECK(p)->flags >> NODES_PGSHIFT) & NODES_MASK; + return (PF_POISONED_CHECK(page)->flags >> NODES_PGSHIFT) & NODES_MASK; } #endif @@ -1686,26 +1720,26 @@ static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid) #define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid) #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS -static inline int page_cpupid_xchg_last(struct page *page, int cpupid) +static inline int folio_xchg_last_cpupid(struct folio *folio, int cpupid) { - return xchg(&page->_last_cpupid, cpupid & LAST_CPUPID_MASK); + return xchg(&folio->_last_cpupid, cpupid & LAST_CPUPID_MASK); } -static inline int page_cpupid_last(struct page *page) +static inline int folio_last_cpupid(struct folio *folio) { - return page->_last_cpupid; + return folio->_last_cpupid; } static inline void page_cpupid_reset_last(struct page *page) { page->_last_cpupid = -1 & LAST_CPUPID_MASK; } #else -static inline int page_cpupid_last(struct page *page) +static inline int folio_last_cpupid(struct folio *folio) { - return (page->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK; + return (folio->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK; } -extern int page_cpupid_xchg_last(struct page *page, int cpupid); +int folio_xchg_last_cpupid(struct folio *folio, int cpupid); static inline void page_cpupid_reset_last(struct page *page) { @@ -1713,11 +1747,12 @@ static inline void page_cpupid_reset_last(struct page *page) } #endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */ -static inline int xchg_page_access_time(struct page *page, int time) +static inline int folio_xchg_access_time(struct folio *folio, int time) { int last_time; - last_time = page_cpupid_xchg_last(page, time >> PAGE_ACCESS_TIME_BUCKETS); + last_time = folio_xchg_last_cpupid(folio, + time >> PAGE_ACCESS_TIME_BUCKETS); return last_time << PAGE_ACCESS_TIME_BUCKETS; } @@ -1726,24 +1761,24 @@ static inline void vma_set_access_pid_bit(struct vm_area_struct *vma) unsigned int pid_bit; pid_bit = hash_32(current->pid, ilog2(BITS_PER_LONG)); - if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->access_pids[1])) { - __set_bit(pid_bit, &vma->numab_state->access_pids[1]); + if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->pids_active[1])) { + __set_bit(pid_bit, &vma->numab_state->pids_active[1]); } } #else /* !CONFIG_NUMA_BALANCING */ -static inline int page_cpupid_xchg_last(struct page *page, int cpupid) +static inline int folio_xchg_last_cpupid(struct folio *folio, int cpupid) { - return page_to_nid(page); /* XXX */ + return folio_nid(folio); /* XXX */ } -static inline int xchg_page_access_time(struct page *page, int time) +static inline int folio_xchg_access_time(struct folio *folio, int time) { return 0; } -static inline int page_cpupid_last(struct page *page) +static inline int folio_last_cpupid(struct folio *folio) { - return page_to_nid(page); /* XXX */ + return folio_nid(folio); /* XXX */ } static inline int cpupid_to_nid(int cpupid) @@ -1795,7 +1830,7 @@ static inline void vma_set_access_pid_bit(struct vm_area_struct *vma) static inline u8 page_kasan_tag(const struct page *page) { - u8 tag = 0xff; + u8 tag = KASAN_TAG_KERNEL; if (kasan_enabled()) { tag = (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; @@ -1824,7 +1859,7 @@ static inline void page_kasan_tag_set(struct page *page, u8 tag) static inline void page_kasan_tag_reset(struct page *page) { if (kasan_enabled()) - page_kasan_tag_set(page, 0xff); + page_kasan_tag_set(page, KASAN_TAG_KERNEL); } #else /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */ @@ -1944,15 +1979,15 @@ static inline bool page_maybe_dma_pinned(struct page *page) * * The caller has to hold the PT lock and the vma->vm_mm->->write_protect_seq. */ -static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma, - struct page *page) +static inline bool folio_needs_cow_for_dma(struct vm_area_struct *vma, + struct folio *folio) { VM_BUG_ON(!(raw_read_seqcount(&vma->vm_mm->write_protect_seq) & 1)); if (!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)) return false; - return page_maybe_dma_pinned(page); + return folio_maybe_dma_pinned(folio); } /** @@ -2045,6 +2080,13 @@ static inline long folio_nr_pages(struct folio *folio) #endif } +/* Only hugetlbfs can allocate folios larger than MAX_ORDER */ +#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE +#define MAX_FOLIO_NR_PAGES (1UL << PUD_ORDER) +#else +#define MAX_FOLIO_NR_PAGES MAX_ORDER_NR_PAGES +#endif + /* * compound_nr() returns the number of pages in this potentially compound * page. compound_nr() can be called on a tail page, and is defined to @@ -2167,11 +2209,6 @@ static inline int arch_make_folio_accessible(struct folio *folio) */ #include <linux/vmstat.h> -static __always_inline void *lowmem_page_address(const struct page *page) -{ - return page_to_virt(page); -} - #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) #define HASHED_PAGE_VIRTUAL #endif @@ -2194,6 +2231,11 @@ void set_page_address(struct page *page, void *virtual); void page_address_init(void); #endif +static __always_inline void *lowmem_page_address(const struct page *page) +{ + return page_to_virt(page); +} + #if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL) #define page_address(page) lowmem_page_address(page) #define set_page_address(page, address) do { } while(0) @@ -2327,6 +2369,8 @@ struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr, pte_t pte); struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte); +struct folio *vm_normal_folio_pmd(struct vm_area_struct *vma, + unsigned long addr, pmd_t pmd); struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t pmd); @@ -2362,7 +2406,8 @@ extern void truncate_pagecache(struct inode *inode, loff_t new); extern void truncate_setsize(struct inode *inode, loff_t newsize); void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); -int generic_error_remove_page(struct address_space *mapping, struct page *page); +int generic_error_remove_folio(struct address_space *mapping, + struct folio *folio); struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm, unsigned long address, struct pt_regs *regs); @@ -2413,8 +2458,6 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, unsigned int gup_flags); extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags); -extern int __access_remote_vm(struct mm_struct *mm, unsigned long addr, - void *buf, int len, unsigned int gup_flags); long get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, @@ -2425,6 +2468,9 @@ long pin_user_pages_remote(struct mm_struct *mm, unsigned int gup_flags, struct page **pages, int *locked); +/* + * Retrieves a single page alongside its VMA. Does not support FOLL_NOWAIT. + */ static inline struct page *get_user_page_vma_remote(struct mm_struct *mm, unsigned long addr, int gup_flags, @@ -2432,12 +2478,15 @@ static inline struct page *get_user_page_vma_remote(struct mm_struct *mm, { struct page *page; struct vm_area_struct *vma; - int got = get_user_pages_remote(mm, addr, 1, gup_flags, &page, NULL); + int got; + + if (WARN_ON_ONCE(unlikely(gup_flags & FOLL_NOWAIT))) + return ERR_PTR(-EINVAL); + + got = get_user_pages_remote(mm, addr, 1, gup_flags, &page, NULL); if (got < 0) return ERR_PTR(got); - if (got == 0) - return NULL; vma = vma_lookup(mm, addr); if (WARN_ON_ONCE(!vma)) { @@ -2480,7 +2529,7 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen); extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len, - bool need_rmap_locks); + bool need_rmap_locks, bool for_stack); /* * Flags used by change_protection(). For now we make it a bitmap so @@ -2568,19 +2617,19 @@ static inline void dec_mm_counter(struct mm_struct *mm, int member) mm_trace_rss_stat(mm, member); } -/* Optimized variant when page is already known not to be PageAnon */ -static inline int mm_counter_file(struct page *page) +/* Optimized variant when folio is already known not to be anon */ +static inline int mm_counter_file(struct folio *folio) { - if (PageSwapBacked(page)) + if (folio_test_swapbacked(folio)) return MM_SHMEMPAGES; return MM_FILEPAGES; } -static inline int mm_counter(struct page *page) +static inline int mm_counter(struct folio *folio) { - if (PageAnon(page)) + if (folio_test_anon(folio)) return MM_ANONPAGES; - return mm_counter_file(page); + return mm_counter_file(folio); } static inline unsigned long get_mm_rss(struct mm_struct *mm) @@ -2628,14 +2677,6 @@ static inline void setmax_mm_hiwater_rss(unsigned long *maxrss, *maxrss = hiwater_rss; } -#if defined(SPLIT_RSS_COUNTING) -void sync_mm_rss(struct mm_struct *mm); -#else -static inline void sync_mm_rss(struct mm_struct *mm) -{ -} -#endif - #ifndef CONFIG_ARCH_HAS_PTE_SPECIAL static inline int pte_special(pte_t pte) { @@ -3057,6 +3098,22 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud) return ptl; } +static inline void pagetable_pud_ctor(struct ptdesc *ptdesc) +{ + struct folio *folio = ptdesc_folio(ptdesc); + + __folio_set_pgtable(folio); + lruvec_stat_add_folio(folio, NR_PAGETABLE); +} + +static inline void pagetable_pud_dtor(struct ptdesc *ptdesc) +{ + struct folio *folio = ptdesc_folio(ptdesc); + + __folio_clear_pgtable(folio); + lruvec_stat_sub_folio(folio, NR_PAGETABLE); +} + extern void __init pagecache_init(void); extern void free_initmem(void); @@ -3221,22 +3278,73 @@ extern int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, struct vm_area_struct *next); extern int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff); -extern struct vm_area_struct *vma_merge(struct vma_iterator *vmi, - struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, - unsigned long end, unsigned long vm_flags, struct anon_vma *, - struct file *, pgoff_t, struct mempolicy *, struct vm_userfaultfd_ctx, - struct anon_vma_name *); extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); -extern int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *, - unsigned long addr, int new_below); -extern int split_vma(struct vma_iterator *vmi, struct vm_area_struct *, - unsigned long addr, int new_below); extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void unlink_file_vma(struct vm_area_struct *); extern struct vm_area_struct *copy_vma(struct vm_area_struct **, unsigned long addr, unsigned long len, pgoff_t pgoff, bool *need_rmap_locks); extern void exit_mmap(struct mm_struct *); +struct vm_area_struct *vma_modify(struct vma_iterator *vmi, + struct vm_area_struct *prev, + struct vm_area_struct *vma, + unsigned long start, unsigned long end, + unsigned long vm_flags, + struct mempolicy *policy, + struct vm_userfaultfd_ctx uffd_ctx, + struct anon_vma_name *anon_name); + +/* We are about to modify the VMA's flags. */ +static inline struct vm_area_struct +*vma_modify_flags(struct vma_iterator *vmi, + struct vm_area_struct *prev, + struct vm_area_struct *vma, + unsigned long start, unsigned long end, + unsigned long new_flags) +{ + return vma_modify(vmi, prev, vma, start, end, new_flags, + vma_policy(vma), vma->vm_userfaultfd_ctx, + anon_vma_name(vma)); +} + +/* We are about to modify the VMA's flags and/or anon_name. */ +static inline struct vm_area_struct +*vma_modify_flags_name(struct vma_iterator *vmi, + struct vm_area_struct *prev, + struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + unsigned long new_flags, + struct anon_vma_name *new_name) +{ + return vma_modify(vmi, prev, vma, start, end, new_flags, + vma_policy(vma), vma->vm_userfaultfd_ctx, new_name); +} + +/* We are about to modify the VMA's memory policy. */ +static inline struct vm_area_struct +*vma_modify_policy(struct vma_iterator *vmi, + struct vm_area_struct *prev, + struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct mempolicy *new_pol) +{ + return vma_modify(vmi, prev, vma, start, end, vma->vm_flags, + new_pol, vma->vm_userfaultfd_ctx, anon_vma_name(vma)); +} + +/* We are about to modify the VMA's flags and/or uffd context. */ +static inline struct vm_area_struct +*vma_modify_flags_uffd(struct vma_iterator *vmi, + struct vm_area_struct *prev, + struct vm_area_struct *vma, + unsigned long start, unsigned long end, + unsigned long new_flags, + struct vm_userfaultfd_ctx new_ctx) +{ + return vma_modify(vmi, prev, vma, start, end, new_flags, + vma_policy(vma), new_ctx, anon_vma_name(vma)); +} static inline int check_data_rlimit(unsigned long rlim, unsigned long new, @@ -3308,8 +3416,7 @@ static inline void mm_populate(unsigned long addr, unsigned long len) static inline void mm_populate(unsigned long addr, unsigned long len) {} #endif -/* These take the mm semaphore themselves */ -extern int __must_check vm_brk(unsigned long, unsigned long); +/* This takes the mm semaphore itself */ extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long); extern int vm_munmap(unsigned long, size_t); extern unsigned long __must_check vm_mmap(struct file *, unsigned long, @@ -3786,6 +3893,32 @@ void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap); #endif +#ifdef CONFIG_SPARSEMEM_VMEMMAP +static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) +{ + /* number of pfns from base where pfn_to_page() is valid */ + if (altmap) + return altmap->reserve + altmap->free; + return 0; +} + +static inline void vmem_altmap_free(struct vmem_altmap *altmap, + unsigned long nr_pfns) +{ + altmap->alloc -= nr_pfns; +} +#else +static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) +{ + return 0; +} + +static inline void vmem_altmap_free(struct vmem_altmap *altmap, + unsigned long nr_pfns) +{ +} +#endif + #define VMEMMAP_RESERVE_NR 2 #ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP static inline bool __vmemmap_can_optimize(struct vmem_altmap *altmap, @@ -3831,6 +3964,7 @@ enum mf_flags { MF_UNPOISON = 1 << 4, MF_SW_SIMULATED = 1 << 5, MF_NO_RETRY = 1 << 6, + MF_MEM_PRE_REMOVE = 1 << 7, }; int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index, unsigned long count, int mf_flags); @@ -4000,25 +4134,26 @@ static inline void mem_dump_obj(void *object) {} #endif /** - * seal_check_future_write - Check for F_SEAL_FUTURE_WRITE flag and handle it + * seal_check_write - Check for F_SEAL_WRITE or F_SEAL_FUTURE_WRITE flags and + * handle them. * @seals: the seals to check * @vma: the vma to operate on * - * Check whether F_SEAL_FUTURE_WRITE is set; if so, do proper check/handling on - * the vma flags. Return 0 if check pass, or <0 for errors. + * Check whether F_SEAL_WRITE or F_SEAL_FUTURE_WRITE are set; if so, do proper + * check/handling on the vma flags. Return 0 if check pass, or <0 for errors. */ -static inline int seal_check_future_write(int seals, struct vm_area_struct *vma) +static inline int seal_check_write(int seals, struct vm_area_struct *vma) { - if (seals & F_SEAL_FUTURE_WRITE) { + if (seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) { /* * New PROT_WRITE and MAP_SHARED mmaps are not allowed when - * "future write" seal active. + * write seals are active. */ if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) return -EPERM; /* - * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as + * Since an F_SEAL_[FUTURE_]WRITE sealed memfd can be mapped as * MAP_SHARED and read-only, take care to not allow mprotect to * revert protections on such mappings. Do this only for shared * mappings. For private mappings, don't need to mask @@ -4062,4 +4197,11 @@ static inline void accept_memory(phys_addr_t start, phys_addr_t end) #endif +static inline bool pfn_is_unaccepted_memory(unsigned long pfn) +{ + phys_addr_t paddr = pfn << PAGE_SHIFT; + + return range_contains_unaccepted_memory(paddr, paddr + PAGE_SIZE); +} + #endif /* _LINUX_MM_H */ diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 8148b30a9df1..f4fe593c1400 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -4,6 +4,7 @@ #include <linux/atomic.h> #include <linux/huge_mm.h> +#include <linux/mm_types.h> #include <linux/swap.h> #include <linux/string.h> #include <linux/userfaultfd_k.h> @@ -231,22 +232,27 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, if (folio_test_unevictable(folio) || !lrugen->enabled) return false; /* - * There are three common cases for this page: - * 1. If it's hot, e.g., freshly faulted in or previously hot and - * migrated, add it to the youngest generation. - * 2. If it's cold but can't be evicted immediately, i.e., an anon page - * not in swapcache or a dirty page pending writeback, add it to the - * second oldest generation. - * 3. Everything else (clean, cold) is added to the oldest generation. + * There are four common cases for this page: + * 1. If it's hot, i.e., freshly faulted in, add it to the youngest + * generation, and it's protected over the rest below. + * 2. If it can't be evicted immediately, i.e., a dirty page pending + * writeback, add it to the second youngest generation. + * 3. If it should be evicted first, e.g., cold and clean from + * folio_rotate_reclaimable(), add it to the oldest generation. + * 4. Everything else falls between 2 & 3 above and is added to the + * second oldest generation if it's considered inactive, or the + * oldest generation otherwise. See lru_gen_is_active(). */ if (folio_test_active(folio)) seq = lrugen->max_seq; else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) || (folio_test_reclaim(folio) && (folio_test_dirty(folio) || folio_test_writeback(folio)))) - seq = lrugen->min_seq[type] + 1; - else + seq = lrugen->max_seq - 1; + else if (reclaiming || lrugen->min_seq[type] + MIN_NR_GENS >= lrugen->max_seq) seq = lrugen->min_seq[type]; + else + seq = lrugen->min_seq[type] + 1; gen = lru_gen_from_seq(seq); flags = (gen + 1UL) << LRU_GEN_PGOFF; @@ -352,15 +358,6 @@ void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio) } #ifdef CONFIG_ANON_VMA_NAME -/* - * mmap_lock should be read-locked when calling anon_vma_name(). Caller should - * either keep holding the lock while using the returned pointer or it should - * raise anon_vma_name refcount before releasing the lock. - */ -extern struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma); -extern struct anon_vma_name *anon_vma_name_alloc(const char *name); -extern void anon_vma_name_free(struct kref *kref); - /* mmap_lock should be read-locked */ static inline void anon_vma_name_get(struct anon_vma_name *anon_name) { @@ -415,16 +412,6 @@ static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, } #else /* CONFIG_ANON_VMA_NAME */ -static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) -{ - return NULL; -} - -static inline struct anon_vma_name *anon_vma_name_alloc(const char *name) -{ - return NULL; -} - static inline void anon_vma_name_get(struct anon_vma_name *anon_name) {} static inline void anon_vma_name_put(struct anon_vma_name *anon_name) {} static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 36c5b43999e6..5240bd7bca33 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -125,18 +125,7 @@ struct page { struct page_pool *pp; unsigned long _pp_mapping_pad; unsigned long dma_addr; - union { - /** - * dma_addr_upper: might require a 64-bit - * value on 32-bit architectures. - */ - unsigned long dma_addr_upper; - /** - * For frag page support, not supported in - * 32-bit architectures with 64-bit DMA. - */ - atomic_long_t pp_frag_count; - }; + atomic_long_t pp_ref_count; }; struct { /* Tail pages of compound page */ unsigned long compound_head; /* Bit zero is set */ @@ -199,6 +188,10 @@ struct page { not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ +#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS + int _last_cpupid; +#endif + #ifdef CONFIG_KMSAN /* * KMSAN metadata for this page: @@ -210,10 +203,6 @@ struct page { struct page *kmsan_shadow; struct page *kmsan_origin; #endif - -#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS - int _last_cpupid; -#endif } _struct_page_alignment; /* @@ -221,8 +210,8 @@ struct page { * * An 'encoded_page' pointer is a pointer to a regular 'struct page', but * with the low bits of the pointer indicating extra context-dependent - * information. Not super-common, but happens in mmu_gather and mlock - * handling, and this acts as a type system check on that use. + * information. Only used in mmu_gather handling, and this acts as a type + * system check on that use. * * We only really have two guaranteed bits in general, although you could * play with 'struct page' alignment (see CONFIG_HAVE_ALIGNED_STRUCT_PAGE) @@ -231,21 +220,46 @@ struct page { * Use the supplied helper functions to endcode/decode the pointer and bits. */ struct encoded_page; -#define ENCODE_PAGE_BITS 3ul + +#define ENCODED_PAGE_BITS 3ul + +/* Perform rmap removal after we have flushed the TLB. */ +#define ENCODED_PAGE_BIT_DELAY_RMAP 1ul + +/* + * The next item in an encoded_page array is the "nr_pages" argument, specifying + * the number of consecutive pages starting from this page, that all belong to + * the same folio. For example, "nr_pages" corresponds to the number of folio + * references that must be dropped. If this bit is not set, "nr_pages" is + * implicitly 1. + */ +#define ENCODED_PAGE_BIT_NR_PAGES_NEXT 2ul + static __always_inline struct encoded_page *encode_page(struct page *page, unsigned long flags) { - BUILD_BUG_ON(flags > ENCODE_PAGE_BITS); + BUILD_BUG_ON(flags > ENCODED_PAGE_BITS); return (struct encoded_page *)(flags | (unsigned long)page); } static inline unsigned long encoded_page_flags(struct encoded_page *page) { - return ENCODE_PAGE_BITS & (unsigned long)page; + return ENCODED_PAGE_BITS & (unsigned long)page; } static inline struct page *encoded_page_ptr(struct encoded_page *page) { - return (struct page *)(~ENCODE_PAGE_BITS & (unsigned long)page); + return (struct page *)(~ENCODED_PAGE_BITS & (unsigned long)page); +} + +static __always_inline struct encoded_page *encode_nr_pages(unsigned long nr) +{ + VM_WARN_ON_ONCE((nr << 2) >> 2 != nr); + return (struct encoded_page *)(nr << 2); +} + +static __always_inline unsigned long encoded_nr_pages(struct encoded_page *page) +{ + return ((unsigned long)page) >> 2; } /* @@ -272,6 +286,8 @@ typedef struct { * @_refcount: Do not access this member directly. Use folio_ref_count() * to find how many references there are to this folio. * @memcg_data: Memory Control Group data. + * @virtual: Virtual address in the kernel direct map. + * @_last_cpupid: IDs of last CPU and last process that accessed the folio. * @_entire_mapcount: Do not use directly, call folio_entire_mapcount(). * @_nr_pages_mapped: Do not use directly, call folio_mapcount(). * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). @@ -318,6 +334,12 @@ struct folio { #ifdef CONFIG_MEMCG unsigned long memcg_data; #endif +#if defined(WANT_PAGE_VIRTUAL) + void *virtual; +#endif +#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS + int _last_cpupid; +#endif /* private: the union with struct page is transitional */ }; struct page page; @@ -373,6 +395,12 @@ FOLIO_MATCH(_refcount, _refcount); #ifdef CONFIG_MEMCG FOLIO_MATCH(memcg_data, memcg_data); #endif +#if defined(WANT_PAGE_VIRTUAL) +FOLIO_MATCH(virtual, virtual); +#endif +#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS +FOLIO_MATCH(_last_cpupid, _last_cpupid); +#endif #undef FOLIO_MATCH #define FOLIO_MATCH(pg, fl) \ static_assert(offsetof(struct folio, fl) == \ @@ -391,18 +419,19 @@ FOLIO_MATCH(compound_head, _head_2a); /** * struct ptdesc - Memory descriptor for page tables. - * @__page_flags: Same as page flags. Unused for page tables. + * @__page_flags: Same as page flags. Powerpc only. * @pt_rcu_head: For freeing page table pages. * @pt_list: List of used page tables. Used for s390 and x86. * @_pt_pad_1: Padding that aliases with page's compound head. * @pmd_huge_pte: Protected by ptdesc->ptl, used for THPs. * @__page_mapping: Aliases with page->mapping. Unused for page tables. + * @pt_index: Used for s390 gmap. * @pt_mm: Used for x86 pgds. - * @pt_frag_refcount: For fragmented page table tracking. Powerpc and s390 only. + * @pt_frag_refcount: For fragmented page table tracking. Powerpc only. * @_pt_pad_2: Padding to ensure proper alignment. * @ptl: Lock for the page table. * @__page_type: Same as page->page_type. Unused for page tables. - * @_refcount: Same as page refcount. Used for s390 page tables. + * @__page_refcount: Same as page refcount. * @pt_memcg_data: Memcg data. Tracked for page tables here. * * This struct overlays struct page for now. Do not modify without a good @@ -422,6 +451,7 @@ struct ptdesc { unsigned long __page_mapping; union { + pgoff_t pt_index; struct mm_struct *pt_mm; atomic_t pt_frag_refcount; }; @@ -435,7 +465,7 @@ struct ptdesc { #endif }; unsigned int __page_type; - atomic_t _refcount; + atomic_t __page_refcount; #ifdef CONFIG_MEMCG unsigned long pt_memcg_data; #endif @@ -447,9 +477,10 @@ TABLE_MATCH(flags, __page_flags); TABLE_MATCH(compound_head, pt_list); TABLE_MATCH(compound_head, _pt_pad_1); TABLE_MATCH(mapping, __page_mapping); +TABLE_MATCH(index, pt_index); TABLE_MATCH(rcu_head, pt_rcu_head); TABLE_MATCH(page_type, __page_type); -TABLE_MATCH(_refcount, _refcount); +TABLE_MATCH(_refcount, __page_refcount); #ifdef CONFIG_MEMCG TABLE_MATCH(memcg_data, pt_memcg_data); #endif @@ -546,14 +577,65 @@ struct anon_vma_name { char name[]; }; +#ifdef CONFIG_ANON_VMA_NAME +/* + * mmap_lock should be read-locked when calling anon_vma_name(). Caller should + * either keep holding the lock while using the returned pointer or it should + * raise anon_vma_name refcount before releasing the lock. + */ +struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma); +struct anon_vma_name *anon_vma_name_alloc(const char *name); +void anon_vma_name_free(struct kref *kref); +#else /* CONFIG_ANON_VMA_NAME */ +static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) +{ + return NULL; +} + +static inline struct anon_vma_name *anon_vma_name_alloc(const char *name) +{ + return NULL; +} +#endif + struct vma_lock { struct rw_semaphore lock; }; struct vma_numab_state { + /* + * Initialised as time in 'jiffies' after which VMA + * should be scanned. Delays first scan of new VMA by at + * least sysctl_numa_balancing_scan_delay: + */ unsigned long next_scan; - unsigned long next_pid_reset; - unsigned long access_pids[2]; + + /* + * Time in jiffies when pids_active[] is reset to + * detect phase change behaviour: + */ + unsigned long pids_active_reset; + + /* + * Approximate tracking of PIDs that trapped a NUMA hinting + * fault. May produce false positives due to hash collisions. + * + * [0] Previous PID tracking + * [1] Current PID tracking + * + * Window moves after next_pid_reset has expired approximately + * every VMA_PID_RESET_PERIOD jiffies: + */ + unsigned long pids_active[2]; + + /* MM scan sequence ID when scan first started after VMA creation */ + int start_scan_seq; + + /* + * MM scan sequence ID when the VMA was last completely scanned. + * A VMA is not eligible for scanning if prev_scan_seq == numa_scan_seq + */ + int prev_scan_seq; }; /* @@ -662,6 +744,12 @@ struct vm_area_struct { struct vm_userfaultfd_ctx vm_userfaultfd_ctx; } __randomize_layout; +#ifdef CONFIG_NUMA +#define vma_policy(vma) ((vma)->vm_policy) +#else +#define vma_policy(vma) NULL +#endif + #ifdef CONFIG_SCHED_MM_CID struct mm_cid { u64 time; @@ -670,6 +758,7 @@ struct mm_cid { #endif struct kioctx_table; +struct iommu_mm_data; struct mm_struct { struct { /* @@ -881,8 +970,8 @@ struct mm_struct { #endif struct work_struct async_put_work; -#ifdef CONFIG_IOMMU_SVA - u32 pasid; +#ifdef CONFIG_IOMMU_MM_DATA + struct iommu_mm_data *iommu_mm; #endif #ifdef CONFIG_KSM /* @@ -901,7 +990,7 @@ struct mm_struct { */ unsigned long ksm_zero_pages; #endif /* CONFIG_KSM */ -#ifdef CONFIG_LRU_GEN +#ifdef CONFIG_LRU_GEN_WALKS_MMU struct { /* this mm_struct is on lru_gen_mm_list */ struct list_head list; @@ -916,7 +1005,7 @@ struct mm_struct { struct mem_cgroup *memcg; #endif } lru_gen; -#endif /* CONFIG_LRU_GEN */ +#endif /* CONFIG_LRU_GEN_WALKS_MMU */ } __randomize_layout; /* @@ -954,11 +1043,13 @@ struct lru_gen_mm_list { spinlock_t lock; }; +#endif /* CONFIG_LRU_GEN */ + +#ifdef CONFIG_LRU_GEN_WALKS_MMU + void lru_gen_add_mm(struct mm_struct *mm); void lru_gen_del_mm(struct mm_struct *mm); -#ifdef CONFIG_MEMCG void lru_gen_migrate_mm(struct mm_struct *mm); -#endif static inline void lru_gen_init_mm(struct mm_struct *mm) { @@ -979,7 +1070,7 @@ static inline void lru_gen_use_mm(struct mm_struct *mm) WRITE_ONCE(mm->lru_gen.bitmap, -1); } -#else /* !CONFIG_LRU_GEN */ +#else /* !CONFIG_LRU_GEN_WALKS_MMU */ static inline void lru_gen_add_mm(struct mm_struct *mm) { @@ -989,11 +1080,9 @@ static inline void lru_gen_del_mm(struct mm_struct *mm) { } -#ifdef CONFIG_MEMCG static inline void lru_gen_migrate_mm(struct mm_struct *mm) { } -#endif static inline void lru_gen_init_mm(struct mm_struct *mm) { @@ -1003,7 +1092,7 @@ static inline void lru_gen_use_mm(struct mm_struct *mm) { } -#endif /* CONFIG_LRU_GEN */ +#endif /* CONFIG_LRU_GEN_WALKS_MMU */ struct vma_iterator { struct ma_state mas; @@ -1014,7 +1103,8 @@ struct vma_iterator { .mas = { \ .tree = &(__mm)->mm_mt, \ .index = __addr, \ - .node = MAS_START, \ + .node = NULL, \ + .status = ma_start, \ }, \ } diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h index aa44fff8bb9d..a2f6179b672b 100644 --- a/include/linux/mm_types_task.h +++ b/include/linux/mm_types_task.h @@ -9,9 +9,6 @@ */ #include <linux/types.h> -#include <linux/threads.h> -#include <linux/atomic.h> -#include <linux/cpumask.h> #include <asm/page.h> @@ -36,6 +33,8 @@ enum { NR_MM_COUNTERS }; +struct page; + struct page_frag { struct page *page; #if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) diff --git a/include/linux/mman.h b/include/linux/mman.h index 40d94411d492..bcb201ab7a41 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -156,11 +156,20 @@ calc_vm_flag_bits(unsigned long flags) return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | + _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | arch_calc_vm_flag_bits(flags); } unsigned long vm_commit_limit(void); +#ifndef arch_memory_deny_write_exec_supported +static inline bool arch_memory_deny_write_exec_supported(void) +{ + return true; +} +#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported +#endif + /* * Denies creating a writable executable mapping or gaining executable permissions. * diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index daa2f40d9ce6..f34407cc2788 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -32,6 +32,7 @@ struct mmc_csd { unsigned int r2w_factor; unsigned int max_dtr; unsigned int erase_size; /* In sectors */ + unsigned int wp_grp_size; unsigned int read_blkbits; unsigned int write_blkbits; unsigned int capacity; @@ -52,9 +53,6 @@ struct mmc_ext_csd { u8 part_config; u8 cache_ctrl; u8 rst_n_function; - u8 max_packed_writes; - u8 max_packed_reads; - u8 packed_event_en; unsigned int part_time; /* Units: ms */ unsigned int sa_timeout; /* Units: 100ns */ unsigned int generic_cmd6_time; /* Units: 10ms */ @@ -295,7 +293,9 @@ struct mmc_card { #define MMC_QUIRK_BROKEN_HPI (1<<13) /* Disable broken HPI support */ #define MMC_QUIRK_BROKEN_SD_DISCARD (1<<14) /* Disable broken SD discard support */ #define MMC_QUIRK_BROKEN_SD_CACHE (1<<15) /* Disable broken SD cache support */ +#define MMC_QUIRK_BROKEN_CACHE_FLUSH (1<<16) /* Don't flush cache until the write has occurred */ + bool written_flag; /* Indicates eMMC has been written since power on */ bool reenable_cmdq; /* Re-enable Command Queue */ unsigned int erase_size; /* erase size in sectors */ @@ -304,6 +304,7 @@ struct mmc_card { unsigned int eg_boundary; /* don't cross erase-group boundaries */ unsigned int erase_arg; /* erase / trim / discard */ u8 erased_byte; /* value of erased bytes */ + unsigned int wp_grp_size; /* write group size in sectors */ u32 raw_cid[4]; /* raw card CID */ u32 raw_csd[4]; /* raw card CSD */ diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 6efec0b9820c..2c7928a50907 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -27,7 +27,6 @@ struct mmc_command { u32 opcode; u32 arg; #define MMC_CMD23_ARG_REL_WR (1 << 31) -#define MMC_CMD23_ARG_PACKED ((0 << 31) | (1 << 30)) #define MMC_CMD23_ARG_TAG_REQ (1 << 29) u32 resp[4]; unsigned int flags; /* expected response type */ diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 62a6847a3b6f..5894bf912f7b 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -526,6 +526,7 @@ struct mmc_host { /* Host Software Queue support */ bool hsq_enabled; + int hsq_depth; u32 err_stats[MMC_ERR_MAX]; unsigned long private[] ____cacheline_aligned; @@ -538,7 +539,7 @@ struct mmc_host *devm_mmc_alloc_host(struct device *dev, int extra); int mmc_add_host(struct mmc_host *); void mmc_remove_host(struct mmc_host *); void mmc_free_host(struct mmc_host *); -void mmc_of_parse_clk_phase(struct mmc_host *host, +void mmc_of_parse_clk_phase(struct device *dev, struct mmc_clk_phase_map *map); int mmc_of_parse(struct mmc_host *host); int mmc_of_parse_voltage(struct mmc_host *host, u32 *mask); diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 6f7993803ee7..cf2bcb5da30d 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -257,8 +257,6 @@ static inline bool mmc_ready_for_data(u32 status) #define EXT_CSD_FLUSH_CACHE 32 /* W */ #define EXT_CSD_CACHE_CTRL 33 /* R/W */ #define EXT_CSD_POWER_OFF_NOTIFICATION 34 /* R/W */ -#define EXT_CSD_PACKED_FAILURE_INDEX 35 /* RO */ -#define EXT_CSD_PACKED_CMD_STATUS 36 /* RO */ #define EXT_CSD_EXP_EVENTS_STATUS 54 /* RO, 2 bytes */ #define EXT_CSD_EXP_EVENTS_CTRL 56 /* R/W, 2 bytes */ #define EXT_CSD_DATA_SECTOR_SIZE 61 /* R */ @@ -321,8 +319,6 @@ static inline bool mmc_ready_for_data(u32 status) #define EXT_CSD_SUPPORTED_MODE 493 /* RO */ #define EXT_CSD_TAG_UNIT_SIZE 498 /* RO */ #define EXT_CSD_DATA_TAG_SUPPORT 499 /* RO */ -#define EXT_CSD_MAX_PACKED_WRITES 500 /* RO */ -#define EXT_CSD_MAX_PACKED_READS 501 /* RO */ #define EXT_CSD_BKOPS_SUPPORT 502 /* RO */ #define EXT_CSD_HPI_FEATURES 503 /* RO */ @@ -402,18 +398,12 @@ static inline bool mmc_ready_for_data(u32 status) #define EXT_CSD_PWR_CL_8BIT_SHIFT 4 #define EXT_CSD_PWR_CL_4BIT_SHIFT 0 -#define EXT_CSD_PACKED_EVENT_EN BIT(3) - /* * EXCEPTION_EVENT_STATUS field */ #define EXT_CSD_URGENT_BKOPS BIT(0) #define EXT_CSD_DYNCAP_NEEDED BIT(1) #define EXT_CSD_SYSPOOL_EXHAUSTED BIT(2) -#define EXT_CSD_PACKED_FAILURE BIT(3) - -#define EXT_CSD_PACKED_GENERIC_ERROR BIT(0) -#define EXT_CSD_PACKED_INDEXED_ERROR BIT(1) /* * BKOPS status level diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 7c3e7b0b0e8f..39a7714605a7 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -10,7 +10,7 @@ struct vm_area_struct; struct mm_struct; struct vma_iterator; -void dump_page(struct page *page, const char *reason); +void dump_page(const struct page *page, const char *reason); void dump_vma(const struct vm_area_struct *vma); void dump_mm(const struct mm_struct *mm); void vma_iter_dump_tree(const struct vma_iterator *vmi); diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h index f2b7a3f04099..bbaec80c78c5 100644 --- a/include/linux/mmu_context.h +++ b/include/linux/mmu_context.h @@ -11,7 +11,7 @@ #endif #ifndef leave_mm -static inline void leave_mm(int cpu) { } +static inline void leave_mm(void) { } #endif /* diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 6e3c857606f1..f349e08a9dfe 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -459,7 +459,14 @@ mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) lock_map_release(&__mmu_notifier_invalidate_range_start_map); } -static inline int +/* + * This version of mmu_notifier_invalidate_range_start() avoids blocking, but it + * can return an error if a notifier can't proceed without blocking, in which + * case you're not allowed to modify PTEs in the specified range. + * + * This is mainly intended for OOM handling. + */ +static inline int __must_check mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range) { int ret = 0; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4106fbc5b4b3..c11b7cde81ef 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -22,18 +22,21 @@ #include <linux/mm_types.h> #include <linux/page-flags.h> #include <linux/local_lock.h> +#include <linux/zswap.h> #include <asm/page.h> /* Free memory management - zoned buddy allocator. */ #ifndef CONFIG_ARCH_FORCE_MAX_ORDER -#define MAX_ORDER 10 +#define MAX_PAGE_ORDER 10 #else -#define MAX_ORDER CONFIG_ARCH_FORCE_MAX_ORDER +#define MAX_PAGE_ORDER CONFIG_ARCH_FORCE_MAX_ORDER #endif -#define MAX_ORDER_NR_PAGES (1 << MAX_ORDER) +#define MAX_ORDER_NR_PAGES (1 << MAX_PAGE_ORDER) #define IS_MAX_ORDER_ALIGNED(pfn) IS_ALIGNED(pfn, MAX_ORDER_NR_PAGES) +#define NR_PAGE_ORDERS (MAX_PAGE_ORDER + 1) + /* * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed * costly to service. That is between allocation orders which should @@ -73,9 +76,12 @@ extern const char * const migratetype_names[MIGRATE_TYPES]; #ifdef CONFIG_CMA # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) # define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA) +# define is_migrate_cma_folio(folio, pfn) (MIGRATE_CMA == \ + get_pfnblock_flags_mask(&folio->page, pfn, MIGRATETYPE_MASK)) #else # define is_migrate_cma(migratetype) false # define is_migrate_cma_page(_page) false +# define is_migrate_cma_folio(folio, pfn) false #endif static inline bool is_migrate_movable(int mt) @@ -95,7 +101,7 @@ static inline bool migratetype_is_mergeable(int mt) } #define for_each_migratetype_order(order, type) \ - for (order = 0; order <= MAX_ORDER; order++) \ + for (order = 0; order < NR_PAGE_ORDERS; order++) \ for (type = 0; type < MIGRATE_TYPES; type++) extern int page_group_by_mobility_disabled; @@ -207,6 +213,10 @@ enum node_stat_item { PGPROMOTE_SUCCESS, /* promote successfully */ PGPROMOTE_CANDIDATE, /* candidate pages to promote */ #endif + /* PGDEMOTE_*: pages demoted */ + PGDEMOTE_KSWAPD, + PGDEMOTE_DIRECT, + PGDEMOTE_KHUGEPAGED, NR_VM_NODE_STAT_ITEMS }; @@ -435,14 +445,12 @@ struct lru_gen_folio { atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; /* whether the multi-gen LRU is enabled */ bool enabled; -#ifdef CONFIG_MEMCG /* the memcg generation this lru_gen_folio belongs to */ u8 gen; /* the list segment this lru_gen_folio belongs to */ u8 seg; /* per-node lru_gen_folio list for global reclaim */ struct hlist_nulls_node list; -#endif }; enum { @@ -459,7 +467,7 @@ enum { #define NR_BLOOM_FILTERS 2 struct lru_gen_mm_state { - /* set to max_seq after each iteration */ + /* synced with max_seq after each iteration */ unsigned long seq; /* where the current iteration continues after */ struct list_head *head; @@ -474,8 +482,8 @@ struct lru_gen_mm_state { struct lru_gen_mm_walk { /* the lruvec under reclaim */ struct lruvec *lruvec; - /* unstable max_seq from lru_gen_folio */ - unsigned long max_seq; + /* max_seq from lru_gen_folio: can be out of date */ + unsigned long seq; /* the next address within an mm to scan */ unsigned long next_addr; /* to batch promoted pages */ @@ -488,11 +496,6 @@ struct lru_gen_mm_walk { bool force_scan; }; -void lru_gen_init_lruvec(struct lruvec *lruvec); -void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); - -#ifdef CONFIG_MEMCG - /* * For each node, memcgs are divided into two generations: the old and the * young. For each generation, memcgs are randomly sharded into multiple bins @@ -505,33 +508,37 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); * the old generation, is incremented when all its bins become empty. * * There are four operations: - * 1. MEMCG_LRU_HEAD, which moves an memcg to the head of a random bin in its + * 1. MEMCG_LRU_HEAD, which moves a memcg to the head of a random bin in its * current generation (old or young) and updates its "seg" to "head"; - * 2. MEMCG_LRU_TAIL, which moves an memcg to the tail of a random bin in its + * 2. MEMCG_LRU_TAIL, which moves a memcg to the tail of a random bin in its * current generation (old or young) and updates its "seg" to "tail"; - * 3. MEMCG_LRU_OLD, which moves an memcg to the head of a random bin in the old + * 3. MEMCG_LRU_OLD, which moves a memcg to the head of a random bin in the old * generation, updates its "gen" to "old" and resets its "seg" to "default"; - * 4. MEMCG_LRU_YOUNG, which moves an memcg to the tail of a random bin in the + * 4. MEMCG_LRU_YOUNG, which moves a memcg to the tail of a random bin in the * young generation, updates its "gen" to "young" and resets its "seg" to * "default". * * The events that trigger the above operations are: * 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD; - * 2. The first attempt to reclaim an memcg below low, which triggers + * 2. The first attempt to reclaim a memcg below low, which triggers * MEMCG_LRU_TAIL; - * 3. The first attempt to reclaim an memcg below reclaimable size threshold, - * which triggers MEMCG_LRU_TAIL; - * 4. The second attempt to reclaim an memcg below reclaimable size threshold, - * which triggers MEMCG_LRU_YOUNG; - * 5. Attempting to reclaim an memcg below min, which triggers MEMCG_LRU_YOUNG; + * 3. The first attempt to reclaim a memcg offlined or below reclaimable size + * threshold, which triggers MEMCG_LRU_TAIL; + * 4. The second attempt to reclaim a memcg offlined or below reclaimable size + * threshold, which triggers MEMCG_LRU_YOUNG; + * 5. Attempting to reclaim a memcg below min, which triggers MEMCG_LRU_YOUNG; * 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG; - * 7. Offlining an memcg, which triggers MEMCG_LRU_OLD. + * 7. Offlining a memcg, which triggers MEMCG_LRU_OLD. * - * Note that memcg LRU only applies to global reclaim, and the round-robin - * incrementing of their max_seq counters ensures the eventual fairness to all - * eligible memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). + * Notes: + * 1. Memcg LRU only applies to global reclaim, and the round-robin incrementing + * of their max_seq counters ensures the eventual fairness to all eligible + * memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). + * 2. There are only two valid generations: old (seq) and young (seq+1). + * MEMCG_NR_GENS is set to three so that when reading the generation counter + * locklessly, a stale value (seq-1) does not wraparound to young. */ -#define MEMCG_NR_GENS 2 +#define MEMCG_NR_GENS 3 #define MEMCG_NR_BINS 8 struct lru_gen_memcg { @@ -546,6 +553,8 @@ struct lru_gen_memcg { }; void lru_gen_init_pgdat(struct pglist_data *pgdat); +void lru_gen_init_lruvec(struct lruvec *lruvec); +void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); void lru_gen_init_memcg(struct mem_cgroup *memcg); void lru_gen_exit_memcg(struct mem_cgroup *memcg); @@ -554,19 +563,6 @@ void lru_gen_offline_memcg(struct mem_cgroup *memcg); void lru_gen_release_memcg(struct mem_cgroup *memcg); void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid); -#else /* !CONFIG_MEMCG */ - -#define MEMCG_NR_GENS 1 - -struct lru_gen_memcg { -}; - -static inline void lru_gen_init_pgdat(struct pglist_data *pgdat) -{ -} - -#endif /* CONFIG_MEMCG */ - #else /* !CONFIG_LRU_GEN */ static inline void lru_gen_init_pgdat(struct pglist_data *pgdat) @@ -581,8 +577,6 @@ static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) { } -#ifdef CONFIG_MEMCG - static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) { } @@ -607,8 +601,6 @@ static inline void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid) { } -#endif /* CONFIG_MEMCG */ - #endif /* CONFIG_LRU_GEN */ struct lruvec { @@ -631,16 +623,17 @@ struct lruvec { #ifdef CONFIG_LRU_GEN /* evictable pages divided into generations */ struct lru_gen_folio lrugen; +#ifdef CONFIG_LRU_GEN_WALKS_MMU /* to concurrently iterate lru_gen_mm_list */ struct lru_gen_mm_state mm_state; #endif +#endif /* CONFIG_LRU_GEN */ #ifdef CONFIG_MEMCG struct pglist_data *pgdat; #endif + struct zswap_lruvec_state zswap_lruvec_state; }; -/* Isolate unmapped pages */ -#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2) /* Isolate for asynchronous migration */ #define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4) /* Isolate unevictable pages */ @@ -676,15 +669,34 @@ enum zone_watermarks { #define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost) #define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost) +/* + * Flags used in pcp->flags field. + * + * PCPF_PREV_FREE_HIGH_ORDER: a high-order page is freed in the + * previous page freeing. To avoid to drain PCP for an accident + * high-order page freeing. + * + * PCPF_FREE_HIGH_BATCH: preserve "pcp->batch" pages in PCP before + * draining PCP for consecutive high-order pages freeing without + * allocation if data cache slice of CPU is large enough. To reduce + * zone lock contention and keep cache-hot pages reusing. + */ +#define PCPF_PREV_FREE_HIGH_ORDER BIT(0) +#define PCPF_FREE_HIGH_BATCH BIT(1) + struct per_cpu_pages { spinlock_t lock; /* Protects lists field */ int count; /* number of pages in the list */ int high; /* high watermark, emptying needed */ + int high_min; /* min high watermark */ + int high_max; /* max high watermark */ int batch; /* chunk size for buddy add/remove */ - short free_factor; /* batch scaling factor during free */ + u8 flags; /* protected by pcp->lock */ + u8 alloc_factor; /* batch scaling factor during allocate */ #ifdef CONFIG_NUMA - short expire; /* When 0, remote pagesets are drained */ + u8 expire; /* When 0, remote pagesets are drained */ #endif + short free_count; /* consecutive free count */ /* Lists of pages, one per migrate type stored on the pcp-lists */ struct list_head lists[NR_PCP_LISTS]; @@ -837,7 +849,8 @@ struct zone { * the high and batch values are copied to individual pagesets for * faster access */ - int pageset_high; + int pageset_high_min; + int pageset_high_max; int pageset_batch; #ifndef CONFIG_SPARSEMEM @@ -925,10 +938,10 @@ struct zone { CACHELINE_PADDING(_pad1_); /* free areas of different sizes */ - struct free_area free_area[MAX_ORDER + 1]; + struct free_area free_area[NR_PAGE_ORDERS]; #ifdef CONFIG_UNACCEPTED_MEMORY - /* Pages to be accepted. All pages on the list are MAX_ORDER */ + /* Pages to be accepted. All pages on the list are MAX_PAGE_ORDER */ struct list_head unaccepted_pages; #endif @@ -998,6 +1011,7 @@ enum zone_flags { * Cleared when kswapd is woken. */ ZONE_RECLAIM_ACTIVE, /* kswapd may be scanning the zone. */ + ZONE_BELOW_HIGH, /* zone is below high watermark. */ }; static inline unsigned long zone_managed_pages(struct zone *zone) @@ -1737,8 +1751,8 @@ static inline bool movable_only_nodes(nodemask_t *nodes) #define SECTION_BLOCKFLAGS_BITS \ ((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS) -#if (MAX_ORDER + PAGE_SHIFT) > SECTION_SIZE_BITS -#error Allocator MAX_ORDER exceeds SECTION_SIZE +#if (MAX_PAGE_ORDER + PAGE_SHIFT) > SECTION_SIZE_BITS +#error Allocator MAX_PAGE_ORDER exceeds SECTION_SIZE #endif static inline unsigned long pfn_to_section_nr(unsigned long pfn) @@ -1770,6 +1784,7 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec) #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK) struct mem_section_usage { + struct rcu_head rcu; #ifdef CONFIG_SPARSEMEM_VMEMMAP DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION); #endif @@ -1963,7 +1978,7 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) { int idx = subsection_map_index(pfn); - return test_bit(idx, ms->usage->subsection_map); + return test_bit(idx, READ_ONCE(ms->usage)->subsection_map); } #else static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) @@ -1987,6 +2002,7 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) static inline int pfn_valid(unsigned long pfn) { struct mem_section *ms; + int ret; /* * Ensure the upper PAGE_SHIFT bits are clear in the @@ -2000,13 +2016,19 @@ static inline int pfn_valid(unsigned long pfn) if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; ms = __pfn_to_section(pfn); - if (!valid_section(ms)) + rcu_read_lock_sched(); + if (!valid_section(ms)) { + rcu_read_unlock_sched(); return 0; + } /* * Traditionally early sections always returned pfn_valid() for * the entire section-sized span. */ - return early_section(ms) || pfn_section_valid(ms, pfn); + ret = early_section(ms) || pfn_section_valid(ms, pfn); + rcu_read_unlock_sched(); + + return ret; } #endif diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index 057c89867aa2..cd4d5c8781f5 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -115,6 +115,9 @@ static inline bool vfsgid_eq_kgid(vfsgid_t vfsgid, kgid_t kgid) int vfsgid_in_group_p(vfsgid_t vfsgid); +struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap); +void mnt_idmap_put(struct mnt_idmap *idmap); + vfsuid_t make_vfsuid(struct mnt_idmap *idmap, struct user_namespace *fs_userns, kuid_t kuid); @@ -241,7 +244,4 @@ static inline kgid_t mapped_fsgid(struct mnt_idmap *idmap, return from_vfsgid(idmap, fs_userns, VFSGIDT_INIT(current_fsgid())); } -bool check_fsmapping(const struct mnt_idmap *idmap, - const struct super_block *sb); - #endif /* _LINUX_MNT_IDMAPPING_H */ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index b0678b093cb2..7a9a07ea451b 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -935,6 +935,12 @@ enum { * struct cdx_device_id - CDX device identifier * @vendor: Vendor ID * @device: Device ID + * @subvendor: Subsystem vendor ID (or CDX_ANY_ID) + * @subdevice: Subsystem device ID (or CDX_ANY_ID) + * @class: Device class + * Most drivers do not need to specify class/class_mask + * as vendor/device is normally sufficient. + * @class_mask: Limit which sub-fields of the class field are compared. * @override_only: Match only when dev->driver_override is this driver. * * Type of entries in the "device Id" table for CDX devices supported by @@ -943,7 +949,25 @@ enum { struct cdx_device_id { __u16 vendor; __u16 device; + __u16 subvendor; + __u16 subdevice; + __u32 class; + __u32 class_mask; __u32 override_only; }; +struct vchiq_device_id { + char name[32]; +}; + +/** + * struct coreboot_device_id - Identifies a coreboot table entry + * @tag: tag ID + * @driver_data: driver specific data + */ +struct coreboot_device_id { + __u32 tag; + kernel_ulong_t driver_data; +}; + #endif /* LINUX_MOD_DEVICETABLE_H */ diff --git a/include/linux/module.h b/include/linux/module.h index a98e188cf37b..1153b0d99a80 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -540,6 +540,8 @@ struct module { struct static_call_site *static_call_sites; #endif #if IS_ENABLED(CONFIG_KUNIT) + int num_kunit_init_suites; + struct kunit_suite **kunit_init_suites; int num_kunit_suites; struct kunit_suite **kunit_suites; #endif @@ -668,7 +670,7 @@ extern void __module_get(struct module *module); * @module: the module we should check for * * Only try to get a module reference count if the module is not being removed. - * This call will fail if the module is already being removed. + * This call will fail if the module is in the process of being removed. * * Care must also be taken to ensure the module exists and is alive prior to * usage of this call. This can be gauranteed through two means: @@ -883,7 +885,7 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE extern bool retpoline_module_ok(bool has_retpoline); #else static inline bool retpoline_module_ok(bool has_retpoline) diff --git a/include/linux/module_symbol.h b/include/linux/module_symbol.h index 1269543d0634..77c9895b9ddb 100644 --- a/include/linux/module_symbol.h +++ b/include/linux/module_symbol.h @@ -3,7 +3,7 @@ #define _LINUX_MODULE_SYMBOL_H /* This ignores the intensely annoying "mapping symbols" found in ELF files. */ -static inline int is_mapping_symbol(const char *str) +static inline bool is_mapping_symbol(const char *str) { if (str[0] == '.' && str[1] == 'L') return true; diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 001b2ce83832..89b1e0ed9811 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -115,6 +115,14 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod); +#ifdef CONFIG_MODULES +void flush_module_init_free_work(void); +#else +static inline void flush_module_init_free_work(void) +{ +} +#endif + /* Any cleanup needed when module leaves. */ void module_arch_cleanup(struct module *mod); diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 962cd41a2cb5..bfb85fd13e1f 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -276,7 +276,7 @@ struct kparam_array read-only sections (which is part of respective UNIX ABI on these platforms). So 'const' makes no sense and even causes compile failures with some compilers. */ -#if defined(CONFIG_ALPHA) || defined(CONFIG_IA64) || defined(CONFIG_PPC64) +#if defined(CONFIG_ALPHA) || defined(CONFIG_PPC64) #define __moduleparam_const #else #define __moduleparam_const const @@ -293,7 +293,11 @@ struct kparam_array = { __param_str_##name, THIS_MODULE, ops, \ VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } } -/* Obsolete - use module_param_cb() */ +/* + * Useful for describing a set/get pair used only once (i.e. for this + * parameter). For repeated set/get pairs (i.e. the same struct + * kernel_param_ops), use module_param_cb() instead. + */ #define module_param_call(name, _set, _get, arg, perm) \ static const struct kernel_param_ops __param_ops_##name = \ { .flags = 0, .set = _set, .get = _get }; \ @@ -381,6 +385,8 @@ extern bool parameq(const char *name1, const char *name2); */ extern bool parameqn(const char *name1, const char *name2, size_t n); +typedef int (*parse_unknown_fn)(char *param, char *val, const char *doing, void *arg); + /* Called on module insert or kernel boot */ extern char *parse_args(const char *name, char *args, @@ -388,9 +394,7 @@ extern char *parse_args(const char *name, unsigned num, s16 level_min, s16 level_max, - void *arg, - int (*unknown)(char *param, char *val, - const char *doing, void *arg)); + void *arg, parse_unknown_fn unknown); /* Called by module remove. */ #ifdef CONFIG_SYSFS diff --git a/include/linux/mount.h b/include/linux/mount.h index 4f40b40306d0..c34c18b4e8f3 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -50,8 +50,7 @@ struct path; #define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME ) #define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ - MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | \ - MNT_CURSOR) + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | MNT_ONRB) #define MNT_INTERNAL 0x4000 @@ -65,7 +64,7 @@ struct path; #define MNT_SYNC_UMOUNT 0x2000000 #define MNT_MARKED 0x4000000 #define MNT_UMOUNT 0x8000000 -#define MNT_CURSOR 0x10000000 +#define MNT_ONRB 0x10000000 struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ @@ -92,8 +91,8 @@ extern bool __mnt_is_readonly(struct vfsmount *mnt); extern bool mnt_may_suid(struct vfsmount *mnt); extern struct vfsmount *clone_private_mount(const struct path *path); -extern int __mnt_want_write(struct vfsmount *); -extern void __mnt_drop_write(struct vfsmount *); +int mnt_get_write_access(struct vfsmount *mnt); +void mnt_put_write_access(struct vfsmount *mnt); extern struct vfsmount *fc_mount(struct fs_context *fc); extern struct vfsmount *vfs_create_mount(struct fs_context *fc); diff --git a/include/linux/moxtet.h b/include/linux/moxtet.h index 79184948fab4..ac577699edfd 100644 --- a/include/linux/moxtet.h +++ b/include/linux/moxtet.h @@ -35,8 +35,6 @@ enum turris_mox_module_id { #define MOXTET_NIRQS 16 -extern struct bus_type moxtet_type; - struct moxtet { struct device *dev; struct mutex lock; diff --git a/include/linux/msi.h b/include/linux/msi.h index a50ea79522f8..26d07e23052e 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -412,6 +412,7 @@ bool arch_restore_msi_irqs(struct pci_dev *dev); struct irq_domain; struct irq_domain_ops; struct irq_chip; +struct irq_fwspec; struct device_node; struct fwnode_handle; struct msi_domain_info; @@ -431,6 +432,8 @@ struct msi_domain_info; * function. * @msi_post_free: Optional function which is invoked after freeing * all interrupts. + * @msi_translate: Optional translate callback to support the odd wire to + * MSI bridges, e.g. MBIGEN * * @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying * irqdomain. @@ -468,6 +471,8 @@ struct msi_domain_ops { struct device *dev); void (*msi_post_free)(struct irq_domain *domain, struct device *dev); + int (*msi_translate)(struct irq_domain *domain, struct irq_fwspec *fwspec, + irq_hw_number_t *hwirq, unsigned int *type); }; /** @@ -547,12 +552,10 @@ enum { MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS = (1 << 5), /* Free MSI descriptors */ MSI_FLAG_FREE_MSI_DESCS = (1 << 6), - /* - * Quirk to handle MSI implementations which do not provide - * masking. Currently known to affect x86, but has to be partially - * handled in the core MSI code. - */ - MSI_FLAG_NOMASK_QUIRK = (1 << 7), + /* Use dev->fwnode for MSI device domain creation */ + MSI_FLAG_USE_DEV_FWNODE = (1 << 7), + /* Set parent->dev into domain->pm_dev on device domain creation */ + MSI_FLAG_PARENT_PM_DEV = (1 << 8), /* Mask for the generic functionality */ MSI_GENERIC_FLAGS_MASK = GENMASK(15, 0), @@ -578,6 +581,11 @@ enum { * struct msi_parent_ops - MSI parent domain callbacks and configuration info * * @supported_flags: Required: The supported MSI flags of the parent domain + * @required_flags: Optional: The required MSI flags of the parent MSI domain + * @bus_select_token: Optional: The bus token of the real parent domain for + * irq_domain::select() + * @bus_select_mask: Optional: A mask of supported BUS_DOMAINs for + * irq_domain::select() * @prefix: Optional: Prefix for the domain and chip name * @init_dev_msi_info: Required: Callback for MSI parent domains to setup parent * domain specific domain flags, domain ops and interrupt chip @@ -585,6 +593,9 @@ enum { */ struct msi_parent_ops { u32 supported_flags; + u32 required_flags; + u32 bus_select_token; + u32 bus_select_mask; const char *prefix; bool (*init_dev_msi_info)(struct device *dev, struct irq_domain *domain, struct irq_domain *msi_parent_domain, @@ -633,9 +644,6 @@ struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain); struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); -int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec, - irq_write_msi_msg_t write_msi_msg); -void platform_msi_domain_free_irqs(struct device *dev); /* When an MSI domain is used as an intermediate domain */ int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, @@ -662,6 +670,10 @@ int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int vir void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq, unsigned int nvec); void *platform_msi_get_host_data(struct irq_domain *domain); +/* Per device platform MSI */ +int platform_device_msi_init_and_alloc_irqs(struct device *dev, unsigned int nvec, + irq_write_msi_msg_t write_msi_msg); +void platform_device_msi_free_irqs_all(struct device *dev); bool msi_device_has_isolated_msi(struct device *dev); #else /* CONFIG_GENERIC_MSI_IRQ */ diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index d88bb56c18e2..947410faf9e2 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -287,7 +287,7 @@ struct cfi_private { unsigned long chipshift; /* Because they're of the same type */ const char *im_name; /* inter_module name for cmdset_setup */ unsigned long quirks; - struct flchip chips[]; /* per-chip data structure for each chip */ + struct flchip chips[] __counted_by(numchips); /* per-chip data structure for each chip */ }; uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h index c04f690871ca..9798c1a1d3b6 100644 --- a/include/linux/mtd/flashchip.h +++ b/include/linux/mtd/flashchip.h @@ -13,6 +13,7 @@ */ #include <linux/sched.h> #include <linux/mutex.h> +#include <linux/wait.h> typedef enum { FL_READY, diff --git a/include/linux/mtd/lpc32xx_mlc.h b/include/linux/mtd/lpc32xx_mlc.h index d168c628c0d5..35e971be0950 100644 --- a/include/linux/mtd/lpc32xx_mlc.h +++ b/include/linux/mtd/lpc32xx_mlc.h @@ -11,7 +11,7 @@ #include <linux/dmaengine.h> struct lpc32xx_mlc_platform_data { - bool (*dma_filter)(struct dma_chan *chan, void *filter_param); + dma_filter_fn dma_filter; }; #endif /* __LINUX_MTD_LPC32XX_MLC_H */ diff --git a/include/linux/mtd/lpc32xx_slc.h b/include/linux/mtd/lpc32xx_slc.h index cf54a9f80460..a044b806566b 100644 --- a/include/linux/mtd/lpc32xx_slc.h +++ b/include/linux/mtd/lpc32xx_slc.h @@ -11,7 +11,7 @@ #include <linux/dmaengine.h> struct lpc32xx_slc_platform_data { - bool (*dma_filter)(struct dma_chan *chan, void *filter_param); + dma_filter_fn dma_filter; }; #endif /* __LINUX_MTD_LPC32XX_SLC_H */ diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 914a9f974baa..8d10d9d2e830 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -223,7 +223,7 @@ struct mtd_part { * @partitions_lock: lock protecting accesses to the partition list. Protects * not only the master partition list, but also all * sub-partitions. - * @suspended: et to 1 when the device is suspended, 0 otherwise + * @suspended: set to 1 when the device is suspended, 0 otherwise * * This struct is embedded in mtd_info and contains master-specific * properties/fields. The master is the root MTD device from the MTD partition diff --git a/include/linux/mtd/qinfo.h b/include/linux/mtd/qinfo.h index 2e3f43788d48..0421f12156b5 100644 --- a/include/linux/mtd/qinfo.h +++ b/include/linux/mtd/qinfo.h @@ -24,7 +24,7 @@ struct lpddr_private { struct qinfo_chip *qinfo; int numchips; unsigned long chipshift; - struct flchip chips[]; + struct flchip chips[] __counted_by(numchips); }; /* qinfo_query_info structure contains request information for diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index c29ace15a053..e84522e31301 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1003,6 +1003,8 @@ struct nand_op_parser { /** * struct nand_operation - NAND operation descriptor * @cs: the CS line to select for this NAND operation + * @deassert_wp: set to true when the operation requires the WP pin to be + * de-asserted (ERASE, PROG, ...) * @instrs: array of instructions to execute * @ninstrs: length of the @instrs array * @@ -1010,6 +1012,7 @@ struct nand_op_parser { */ struct nand_operation { unsigned int cs; + bool deassert_wp; const struct nand_op_instr *instrs; unsigned int ninstrs; }; @@ -1021,6 +1024,14 @@ struct nand_operation { .ninstrs = ARRAY_SIZE(_instrs), \ } +#define NAND_DESTRUCTIVE_OPERATION(_cs, _instrs) \ + { \ + .cs = _cs, \ + .deassert_wp = true, \ + .instrs = _instrs, \ + .ninstrs = ARRAY_SIZE(_instrs), \ + } + int nand_op_parser_exec_op(struct nand_chip *chip, const struct nand_op_parser *parser, const struct nand_operation *op, bool check_only); @@ -1104,6 +1115,7 @@ struct nand_controller_ops { * the bus without restarting an entire read operation nor * changing the column. * @supported_op.cont_read: The controller supports sequential cache reads. + * @controller_wp: the controller is in charge of handling the WP pin. */ struct nand_controller { struct mutex lock; @@ -1112,6 +1124,7 @@ struct nand_controller { unsigned int data_only_read: 1; unsigned int cont_read: 1; } supported_op; + bool controller_wp; }; static inline void nand_controller_init(struct nand_controller *nfc) @@ -1265,6 +1278,7 @@ struct nand_secure_region { * @cont_read: Sequential page read internals * @cont_read.ongoing: Whether a continuous read is ongoing or not * @cont_read.first_page: Start of the continuous read operation + * @cont_read.pause_page: End of the current sequential cache read operation * @cont_read.last_page: End of the continuous read operation * @controller: The hardware controller structure which is shared among multiple * independent devices @@ -1321,6 +1335,7 @@ struct nand_chip { struct { bool ongoing; unsigned int first_page; + unsigned int pause_page; unsigned int last_page; } cont_read; diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 3e285c09d16d..5c19ead60499 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -169,7 +169,7 @@ struct spinand_op; struct spinand_device; -#define SPINAND_MAX_ID_LEN 4 +#define SPINAND_MAX_ID_LEN 5 /* * For erase, write and read operation, we got the following timings : * tBERS (erase) 1ms to 4ms @@ -263,6 +263,7 @@ struct spinand_manufacturer { extern const struct spinand_manufacturer alliancememory_spinand_manufacturer; extern const struct spinand_manufacturer ato_spinand_manufacturer; extern const struct spinand_manufacturer esmt_c8_spinand_manufacturer; +extern const struct spinand_manufacturer foresee_spinand_manufacturer; extern const struct spinand_manufacturer gigadevice_spinand_manufacturer; extern const struct spinand_manufacturer macronix_spinand_manufacturer; extern const struct spinand_manufacturer micron_spinand_manufacturer; diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h index a529347fd75b..562f92504f2b 100644 --- a/include/linux/mtd/ubi.h +++ b/include/linux/mtd/ubi.h @@ -192,6 +192,7 @@ struct ubi_device_info { * or a volume was removed) * @UBI_VOLUME_RESIZED: a volume has been re-sized * @UBI_VOLUME_RENAMED: a volume has been re-named + * @UBI_VOLUME_SHUTDOWN: a volume is going to removed, shutdown users * @UBI_VOLUME_UPDATED: data has been written to a volume * * These constants define which type of event has happened when a volume @@ -202,6 +203,7 @@ enum { UBI_VOLUME_REMOVED, UBI_VOLUME_RESIZED, UBI_VOLUME_RENAMED, + UBI_VOLUME_SHUTDOWN, UBI_VOLUME_UPDATED, }; diff --git a/include/linux/mutex.h b/include/linux/mutex.h index a33aa9eb9fc3..67edc4ca2bee 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -20,6 +20,7 @@ #include <linux/osq_lock.h> #include <linux/debug_locks.h> #include <linux/cleanup.h> +#include <linux/mutex_types.h> #ifdef CONFIG_DEBUG_LOCK_ALLOC # define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ @@ -31,54 +32,9 @@ # define __DEP_MAP_MUTEX_INITIALIZER(lockname) #endif -#ifndef CONFIG_PREEMPT_RT - -/* - * Simple, straightforward mutexes with strict semantics: - * - * - only one task can hold the mutex at a time - * - only the owner can unlock the mutex - * - multiple unlocks are not permitted - * - recursive locking is not permitted - * - a mutex object must be initialized via the API - * - a mutex object must not be initialized via memset or copying - * - task may not exit with mutex held - * - memory areas where held locks reside must not be freed - * - held mutexes must not be reinitialized - * - mutexes may not be used in hardware or software interrupt - * contexts such as tasklets and timers - * - * These semantics are fully enforced when DEBUG_MUTEXES is - * enabled. Furthermore, besides enforcing the above rules, the mutex - * debugging code also implements a number of additional features - * that make lock debugging easier and faster: - * - * - uses symbolic names of mutexes, whenever they are printed in debug output - * - point-of-acquire tracking, symbolic lookup of function names - * - list of all locks held in the system, printout of them - * - owner tracking - * - detects self-recursing locks and prints out all relevant info - * - detects multi-task circular deadlocks and prints out all affected - * locks and tasks (and only those tasks) - */ -struct mutex { - atomic_long_t owner; - raw_spinlock_t wait_lock; -#ifdef CONFIG_MUTEX_SPIN_ON_OWNER - struct optimistic_spin_queue osq; /* Spinner MCS lock */ -#endif - struct list_head wait_list; -#ifdef CONFIG_DEBUG_MUTEXES - void *magic; -#endif -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif -}; - #ifdef CONFIG_DEBUG_MUTEXES -#define __DEBUG_MUTEX_INITIALIZER(lockname) \ +# define __DEBUG_MUTEX_INITIALIZER(lockname) \ , .magic = &lockname extern void mutex_destroy(struct mutex *lock); @@ -91,6 +47,7 @@ static inline void mutex_destroy(struct mutex *lock) {} #endif +#ifndef CONFIG_PREEMPT_RT /** * mutex_init - initialize the mutex * @mutex: the mutex to be initialized @@ -131,14 +88,6 @@ extern bool mutex_is_locked(struct mutex *lock); /* * Preempt-RT variant based on rtmutexes. */ -#include <linux/rtmutex.h> - -struct mutex { - struct rt_mutex_base rtmutex; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif -}; #define __MUTEX_INITIALIZER(mutexname) \ { \ @@ -151,9 +100,6 @@ struct mutex { extern void __mutex_rt_init(struct mutex *lock, const char *name, struct lock_class_key *key); -extern int mutex_trylock(struct mutex *lock); - -static inline void mutex_destroy(struct mutex *lock) { } #define mutex_is_locked(l) rt_mutex_base_is_locked(&(l)->rtmutex) @@ -221,6 +167,7 @@ extern void mutex_unlock(struct mutex *lock); extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T)) -DEFINE_FREE(mutex, struct mutex *, if (_T) mutex_unlock(_T)) +DEFINE_GUARD_COND(mutex, _try, mutex_trylock(_T)) +DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T) == 0) #endif /* __LINUX_MUTEX_H */ diff --git a/include/linux/mutex_types.h b/include/linux/mutex_types.h new file mode 100644 index 000000000000..fdf7f515fde8 --- /dev/null +++ b/include/linux/mutex_types.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_MUTEX_TYPES_H +#define __LINUX_MUTEX_TYPES_H + +#include <linux/atomic.h> +#include <linux/lockdep_types.h> +#include <linux/osq_lock.h> +#include <linux/spinlock_types.h> +#include <linux/types.h> + +#ifndef CONFIG_PREEMPT_RT + +/* + * Simple, straightforward mutexes with strict semantics: + * + * - only one task can hold the mutex at a time + * - only the owner can unlock the mutex + * - multiple unlocks are not permitted + * - recursive locking is not permitted + * - a mutex object must be initialized via the API + * - a mutex object must not be initialized via memset or copying + * - task may not exit with mutex held + * - memory areas where held locks reside must not be freed + * - held mutexes must not be reinitialized + * - mutexes may not be used in hardware or software interrupt + * contexts such as tasklets and timers + * + * These semantics are fully enforced when DEBUG_MUTEXES is + * enabled. Furthermore, besides enforcing the above rules, the mutex + * debugging code also implements a number of additional features + * that make lock debugging easier and faster: + * + * - uses symbolic names of mutexes, whenever they are printed in debug output + * - point-of-acquire tracking, symbolic lookup of function names + * - list of all locks held in the system, printout of them + * - owner tracking + * - detects self-recursing locks and prints out all relevant info + * - detects multi-task circular deadlocks and prints out all affected + * locks and tasks (and only those tasks) + */ +struct mutex { + atomic_long_t owner; + raw_spinlock_t wait_lock; +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER + struct optimistic_spin_queue osq; /* Spinner MCS lock */ +#endif + struct list_head wait_list; +#ifdef CONFIG_DEBUG_MUTEXES + void *magic; +#endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + +#else /* !CONFIG_PREEMPT_RT */ +/* + * Preempt-RT variant based on rtmutexes. + */ +#include <linux/rtmutex.h> + +struct mutex { + struct rt_mutex_base rtmutex; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + +#endif /* CONFIG_PREEMPT_RT */ + +#endif /* __LINUX_MUTEX_TYPES_H */ diff --git a/include/linux/namei.h b/include/linux/namei.h index 1463cbda4888..74e0cc14ebf8 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -66,6 +66,7 @@ extern struct dentry *kern_path_create(int, const char *, struct path *, unsigne extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); +extern struct dentry *user_path_locked_at(int , const char __user *, struct path *); int vfs_path_parent_lookup(struct filename *filename, unsigned int flags, struct path *parent, struct qstr *last, int *type, const struct path *root); @@ -92,6 +93,30 @@ extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern struct dentry *lock_rename_child(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); +/** + * mode_strip_umask - handle vfs umask stripping + * @dir: parent directory of the new inode + * @mode: mode of the new inode to be created in @dir + * + * In most filesystems, umask stripping depends on whether or not the + * filesystem supports POSIX ACLs. If the filesystem doesn't support it umask + * stripping is done directly in here. If the filesystem does support POSIX + * ACLs umask stripping is deferred until the filesystem calls + * posix_acl_create(). + * + * Some filesystems (like NFSv4) also want to avoid umask stripping by the + * VFS, but don't support POSIX ACLs. Those filesystems can set SB_I_NOUMASK + * to get this effect without declaring that they support POSIX ACLs. + * + * Returns: mode + */ +static inline umode_t __must_check mode_strip_umask(const struct inode *dir, umode_t mode) +{ + if (!IS_POSIXACL(dir) && !(dir->i_sb->s_iflags & SB_I_NOUMASK)) + mode &= ~current_umask(); + return mode; +} + extern int __must_check nd_jump_link(const struct path *path); static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) @@ -112,7 +137,7 @@ static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) static inline bool retry_estale(const long error, const unsigned int flags) { - return error == -ESTALE && !(flags & LOOKUP_REVAL); + return unlikely(error == -ESTALE && !(flags & LOOKUP_REVAL)); } #endif /* _LINUX_NAMEI_H */ diff --git a/include/linux/net.h b/include/linux/net.h index c9b4a63791a4..15df6d5f27a7 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -299,10 +299,7 @@ do { \ net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__) #else #define net_dbg_ratelimited(fmt, ...) \ - do { \ - if (0) \ - no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ - } while (0) + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif #define net_get_random_once(buf, nbytes) \ diff --git a/include/linux/net/intel/i40e_client.h b/include/linux/net/intel/i40e_client.h index ed42bd5f639f..0aa4411528fc 100644 --- a/include/linux/net/intel/i40e_client.h +++ b/include/linux/net/intel/i40e_client.h @@ -45,7 +45,7 @@ struct i40e_qv_info { struct i40e_qvlist_info { u32 num_vectors; - struct i40e_qv_info qv_info[]; + struct i40e_qv_info qv_info[] __counted_by(num_vectors); }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0896aaa91dd7..cb37817d6382 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -225,12 +225,6 @@ struct net_device_core_stats { #include <linux/cache.h> #include <linux/skbuff.h> -#ifdef CONFIG_RPS -#include <linux/static_key.h> -extern struct static_key_false rps_needed; -extern struct static_key_false rfs_needed; -#endif - struct neighbour; struct neigh_parms; struct sk_buff; @@ -380,6 +374,7 @@ struct napi_struct { /* control-path-only fields follow */ struct list_head dev_list; struct hlist_node napi_hash_node; + int irq; }; enum { @@ -480,6 +475,29 @@ static inline bool napi_prefer_busy_poll(struct napi_struct *n) return test_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state); } +/** + * napi_is_scheduled - test if NAPI is scheduled + * @n: NAPI context + * + * This check is "best-effort". With no locking implemented, + * a NAPI can be scheduled or terminate right after this check + * and produce not precise results. + * + * NAPI_STATE_SCHED is an internal state, napi_is_scheduled + * should not be used normally and napi_schedule should be + * used instead. + * + * Use only if the driver really needs to check if a NAPI + * is scheduled for example in the context of delayed timer + * that can be skipped if a NAPI is already scheduled. + * + * Return True if NAPI is scheduled, False otherwise. + */ +static inline bool napi_is_scheduled(struct napi_struct *n) +{ + return test_bit(NAPI_STATE_SCHED, &n->state); +} + bool napi_schedule_prep(struct napi_struct *n); /** @@ -488,11 +506,18 @@ bool napi_schedule_prep(struct napi_struct *n); * * Schedule NAPI poll routine to be called if it is not already * running. + * Return true if we schedule a NAPI or false if not. + * Refer to napi_schedule_prep() for additional reason on why + * a NAPI might not be scheduled. */ -static inline void napi_schedule(struct napi_struct *n) +static inline bool napi_schedule(struct napi_struct *n) { - if (napi_schedule_prep(n)) + if (napi_schedule_prep(n)) { __napi_schedule(n); + return true; + } + + return false; } /** @@ -507,16 +532,6 @@ static inline void napi_schedule_irqoff(struct napi_struct *n) __napi_schedule_irqoff(n); } -/* Try to reschedule poll. Called by dev->poll() after napi_complete(). */ -static inline bool napi_reschedule(struct napi_struct *napi) -{ - if (napi_schedule_prep(napi)) { - __napi_schedule(napi); - return true; - } - return false; -} - /** * napi_complete_done - NAPI processing complete * @n: NAPI context @@ -643,6 +658,10 @@ struct netdev_queue { #ifdef CONFIG_XDP_SOCKETS struct xsk_buff_pool *pool; #endif + /* NAPI instance for the queue + * Readers and writers must hold RTNL + */ + struct napi_struct *napi; /* * write-mostly part */ @@ -705,86 +724,10 @@ static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node #endif } -#ifdef CONFIG_RPS -/* - * This structure holds an RPS map which can be of variable length. The - * map is an array of CPUs. - */ -struct rps_map { - unsigned int len; - struct rcu_head rcu; - u16 cpus[]; -}; -#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) - -/* - * The rps_dev_flow structure contains the mapping of a flow to a CPU, the - * tail pointer for that CPU's input queue at the time of last enqueue, and - * a hardware filter index. - */ -struct rps_dev_flow { - u16 cpu; - u16 filter; - unsigned int last_qtail; -}; -#define RPS_NO_FILTER 0xffff - -/* - * The rps_dev_flow_table structure contains a table of flow mappings. - */ -struct rps_dev_flow_table { - unsigned int mask; - struct rcu_head rcu; - struct rps_dev_flow flows[]; -}; -#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ - ((_num) * sizeof(struct rps_dev_flow))) - -/* - * The rps_sock_flow_table contains mappings of flows to the last CPU - * on which they were processed by the application (set in recvmsg). - * Each entry is a 32bit value. Upper part is the high-order bits - * of flow hash, lower part is CPU number. - * rps_cpu_mask is used to partition the space, depending on number of - * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 - * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f, - * meaning we use 32-6=26 bits for the hash. - */ -struct rps_sock_flow_table { - u32 mask; - - u32 ents[] ____cacheline_aligned_in_smp; -}; -#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) - -#define RPS_NO_CPU 0xffff - -extern u32 rps_cpu_mask; -extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; - -static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, - u32 hash) -{ - if (table && hash) { - unsigned int index = hash & table->mask; - u32 val = hash & ~rps_cpu_mask; - - /* We only give a hint, preemption can change CPU under us */ - val |= raw_smp_processor_id(); - - /* The following WRITE_ONCE() is paired with the READ_ONCE() - * here, and another one in get_rps_cpu(). - */ - if (READ_ONCE(table->ents[index]) != val) - WRITE_ONCE(table->ents[index], val); - } -} - #ifdef CONFIG_RFS_ACCEL bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id, u16 filter_id); #endif -#endif /* CONFIG_RPS */ /* XPS map type and offset of the xps map within net_device->xps_maps[]. */ enum xps_map_type { @@ -917,6 +860,7 @@ struct net_device_path { u8 queue; u16 wcid; u8 bss; + u8 amsdu; } mtk_wdma; }; }; @@ -1034,7 +978,7 @@ struct xfrmdev_ops { bool (*xdo_dev_offload_ok) (struct sk_buff *skb, struct xfrm_state *x); void (*xdo_dev_state_advance_esn) (struct xfrm_state *x); - void (*xdo_dev_state_update_curlft) (struct xfrm_state *x); + void (*xdo_dev_state_update_stats) (struct xfrm_state *x); int (*xdo_dev_policy_add) (struct xfrm_policy *x, struct netlink_ext_ack *extack); void (*xdo_dev_policy_delete) (struct xfrm_policy *x); void (*xdo_dev_policy_free) (struct xfrm_policy *x); @@ -1287,9 +1231,7 @@ struct netdev_net_notifier { * struct net_device *dev, * const unsigned char *addr, u16 vid) * Deletes the FDB entry from dev coresponding to addr. - * int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, struct nlattr *tb[], - * struct net_device *dev, - * u16 vid, + * int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, * struct netlink_ext_ack *extack); * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, * struct net_device *dev, struct net_device *filter_dev, @@ -1303,6 +1245,9 @@ struct netdev_net_notifier { * int (*ndo_mdb_del)(struct net_device *dev, struct nlattr *tb[], * struct netlink_ext_ack *extack); * Deletes the MDB entry from dev. + * int (*ndo_mdb_del_bulk)(struct net_device *dev, struct nlattr *tb[], + * struct netlink_ext_ack *extack); + * Bulk deletes MDB entries from dev. * int (*ndo_mdb_dump)(struct net_device *dev, struct sk_buff *skb, * struct netlink_callback *cb); * Dumps MDB entries from dev. The first argument (marker) in the netlink @@ -1564,10 +1509,8 @@ struct net_device_ops { struct net_device *dev, const unsigned char *addr, u16 vid, struct netlink_ext_ack *extack); - int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, - struct nlattr *tb[], + int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, - u16 vid, struct netlink_ext_ack *extack); int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, @@ -1587,9 +1530,16 @@ struct net_device_ops { int (*ndo_mdb_del)(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack); + int (*ndo_mdb_del_bulk)(struct net_device *dev, + struct nlattr *tb[], + struct netlink_ext_ack *extack); int (*ndo_mdb_dump)(struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb); + int (*ndo_mdb_get)(struct net_device *dev, + struct nlattr *tb[], u32 portid, + u32 seq, + struct netlink_ext_ack *extack); int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh, u16 flags, @@ -1774,6 +1724,22 @@ enum netdev_ml_priv_type { ML_PRIV_CAN, }; +enum netdev_stat_type { + NETDEV_PCPU_STAT_NONE, + NETDEV_PCPU_STAT_LSTATS, /* struct pcpu_lstats */ + NETDEV_PCPU_STAT_TSTATS, /* struct pcpu_sw_netstats */ + NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */ +}; + +enum netdev_reg_state { + NETREG_UNINITIALIZED = 0, + NETREG_REGISTERED, /* completed register_netdevice */ + NETREG_UNREGISTERING, /* called unregister_netdevice */ + NETREG_UNREGISTERED, /* completed unregister todo */ + NETREG_RELEASED, /* called free_netdev */ + NETREG_DUMMY, /* dummy device for NAPI poll */ +}; + /** * struct net_device - The DEVICE structure. * @@ -1835,6 +1801,7 @@ enum netdev_ml_priv_type { * @netdev_ops: Includes several pointers to callbacks, * if one wants to override the ndo_*() functions * @xdp_metadata_ops: Includes pointers to XDP metadata callbacks. + * @xsk_tx_metadata_ops: Includes pointers to AF_XDP TX metadata callbacks. * @ethtool_ops: Management operations * @l3mdev_ops: Layer 3 master device operations * @ndisc_ops: Includes callbacks for different IPv6 neighbour @@ -1968,10 +1935,14 @@ enum netdev_ml_priv_type { * * @ml_priv: Mid-layer private * @ml_priv_type: Mid-layer private type - * @lstats: Loopback statistics - * @tstats: Tunnel statistics - * @dstats: Dummy statistics - * @vstats: Virtual ethernet statistics + * + * @pcpu_stat_type: Type of device statistics which the core should + * allocate/free: none, lstats, tstats, dstats. none + * means the driver is handling statistics allocation/ + * freeing internally. + * @lstats: Loopback statistics: packets, bytes + * @tstats: Tunnel statistics: RX/TX packets, RX/TX bytes + * @dstats: Dummy statistics: RX/TX/drop packets, RX/TX bytes * * @garp_port: GARP * @mrp_port: MRP @@ -1984,6 +1955,7 @@ enum netdev_ml_priv_type { * * @sysfs_rx_queue_group: Space for optional per-rx queue attributes * @rtnl_link_ops: Rtnl_link_ops + * @stat_ops: Optional ops for queue-aware statistics * * @gso_max_size: Maximum size of generic segmentation offload * @tso_max_size: Device (as in HW) limit on the max TSO request size @@ -2049,11 +2021,86 @@ enum netdev_ml_priv_type { * SET_NETDEV_DEVLINK_PORT macro. This pointer is static * during the time netdevice is registered. * + * @dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem, + * where the clock is recovered. + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ struct net_device { + /* Cacheline organization can be found documented in + * Documentation/networking/net_cachelines/net_device.rst. + * Please update the document when adding new fields. + */ + + /* TX read-mostly hotpath */ + __cacheline_group_begin(net_device_read_tx); + unsigned long long priv_flags; + const struct net_device_ops *netdev_ops; + const struct header_ops *header_ops; + struct netdev_queue *_tx; + netdev_features_t gso_partial_features; + unsigned int real_num_tx_queues; + unsigned int gso_max_size; + unsigned int gso_ipv4_max_size; + u16 gso_max_segs; + s16 num_tc; + /* Note : dev->mtu is often read without holding a lock. + * Writers usually hold RTNL. + * It is recommended to use READ_ONCE() to annotate the reads, + * and to use WRITE_ONCE() to annotate the writes. + */ + unsigned int mtu; + unsigned short needed_headroom; + struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; +#ifdef CONFIG_XPS + struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX]; +#endif +#ifdef CONFIG_NETFILTER_EGRESS + struct nf_hook_entries __rcu *nf_hooks_egress; +#endif +#ifdef CONFIG_NET_XGRESS + struct bpf_mprog_entry __rcu *tcx_egress; +#endif + __cacheline_group_end(net_device_read_tx); + + /* TXRX read-mostly hotpath */ + __cacheline_group_begin(net_device_read_txrx); + union { + struct pcpu_lstats __percpu *lstats; + struct pcpu_sw_netstats __percpu *tstats; + struct pcpu_dstats __percpu *dstats; + }; + unsigned long state; + unsigned int flags; + unsigned short hard_header_len; + netdev_features_t features; + struct inet6_dev __rcu *ip6_ptr; + __cacheline_group_end(net_device_read_txrx); + + /* RX read-mostly hotpath */ + __cacheline_group_begin(net_device_read_rx); + struct bpf_prog __rcu *xdp_prog; + struct list_head ptype_specific; + int ifindex; + unsigned int real_num_rx_queues; + struct netdev_rx_queue *_rx; + unsigned long gro_flush_timeout; + int napi_defer_hard_irqs; + unsigned int gro_max_size; + unsigned int gro_ipv4_max_size; + rx_handler_func_t __rcu *rx_handler; + void __rcu *rx_handler_data; + possible_net_t nd_net; +#ifdef CONFIG_NETPOLL + struct netpoll_info __rcu *npinfo; +#endif +#ifdef CONFIG_NET_XGRESS + struct bpf_mprog_entry __rcu *tcx_ingress; +#endif + __cacheline_group_end(net_device_read_rx); + char name[IFNAMSIZ]; struct netdev_name_node *name_node; struct dev_ifalias __rcu *ifalias; @@ -2071,14 +2118,12 @@ struct net_device { * part of the usual set specified in Space.c. */ - unsigned long state; struct list_head dev_list; struct list_head napi_list; struct list_head unreg_list; struct list_head close_list; struct list_head ptype_all; - struct list_head ptype_specific; struct { struct list_head upper; @@ -2086,31 +2131,18 @@ struct net_device { } adj_list; /* Read-mostly cache-line for fast-path access */ - unsigned int flags; xdp_features_t xdp_features; - unsigned long long priv_flags; - const struct net_device_ops *netdev_ops; const struct xdp_metadata_ops *xdp_metadata_ops; - int ifindex; + const struct xsk_tx_metadata_ops *xsk_tx_metadata_ops; unsigned short gflags; - unsigned short hard_header_len; - /* Note : dev->mtu is often read without holding a lock. - * Writers usually hold RTNL. - * It is recommended to use READ_ONCE() to annotate the reads, - * and to use WRITE_ONCE() to annotate the writes. - */ - unsigned int mtu; - unsigned short needed_headroom; unsigned short needed_tailroom; - netdev_features_t features; netdev_features_t hw_features; netdev_features_t wanted_features; netdev_features_t vlan_features; netdev_features_t hw_enc_features; netdev_features_t mpls_features; - netdev_features_t gso_partial_features; unsigned int min_mtu; unsigned int max_mtu; @@ -2148,9 +2180,7 @@ struct net_device { const struct tlsdev_ops *tlsdev_ops; #endif - const struct header_ops *header_ops; - - unsigned char operstate; + unsigned int operstate; unsigned char link_mode; unsigned char if_port; @@ -2190,9 +2220,7 @@ struct net_device { /* Protocol-specific pointers */ - struct in_device __rcu *ip_ptr; - struct inet6_dev __rcu *ip6_ptr; #if IS_ENABLED(CONFIG_VLAN_8021Q) struct vlan_info __rcu *vlan_info; #endif @@ -2227,26 +2255,13 @@ struct net_device { /* Interface address info used in eth_type_trans() */ const unsigned char *dev_addr; - struct netdev_rx_queue *_rx; unsigned int num_rx_queues; - unsigned int real_num_rx_queues; - - struct bpf_prog __rcu *xdp_prog; - unsigned long gro_flush_timeout; - int napi_defer_hard_irqs; #define GRO_LEGACY_MAX_SIZE 65536u /* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE), * and shinfo->gso_segs is a 16bit field. */ #define GRO_MAX_SIZE (8 * 65535u) - unsigned int gro_max_size; - unsigned int gro_ipv4_max_size; unsigned int xdp_zc_max_segs; - rx_handler_func_t __rcu *rx_handler; - void __rcu *rx_handler_data; -#ifdef CONFIG_NET_XGRESS - struct bpf_mprog_entry __rcu *tcx_ingress; -#endif struct netdev_queue __rcu *ingress_queue; #ifdef CONFIG_NETFILTER_INGRESS struct nf_hook_entries __rcu *nf_hooks_ingress; @@ -2261,25 +2276,13 @@ struct net_device { /* * Cache lines mostly used on transmit path */ - struct netdev_queue *_tx ____cacheline_aligned_in_smp; unsigned int num_tx_queues; - unsigned int real_num_tx_queues; struct Qdisc __rcu *qdisc; unsigned int tx_queue_len; spinlock_t tx_global_lock; struct xdp_dev_bulk_queue __percpu *xdp_bulkq; -#ifdef CONFIG_XPS - struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX]; -#endif -#ifdef CONFIG_NET_XGRESS - struct bpf_mprog_entry __rcu *tcx_egress; -#endif -#ifdef CONFIG_NETFILTER_EGRESS - struct nf_hook_entries __rcu *nf_hooks_egress; -#endif - #ifdef CONFIG_NET_SCHED DECLARE_HASHTABLE (qdisc_hash, 4); #endif @@ -2300,13 +2303,7 @@ struct net_device { struct list_head link_watch_list; - enum { NETREG_UNINITIALIZED=0, - NETREG_REGISTERED, /* completed register_netdevice */ - NETREG_UNREGISTERING, /* called unregister_netdevice */ - NETREG_UNREGISTERED, /* completed unregister todo */ - NETREG_RELEASED, /* called free_netdev */ - NETREG_DUMMY, /* dummy device for NAPI poll */ - } reg_state:8; + u8 reg_state; bool dismantle; @@ -2318,21 +2315,11 @@ struct net_device { bool needs_free_netdev; void (*priv_destructor)(struct net_device *dev); -#ifdef CONFIG_NETPOLL - struct netpoll_info __rcu *npinfo; -#endif - - possible_net_t nd_net; - /* mid-layer private */ void *ml_priv; enum netdev_ml_priv_type ml_priv_type; - union { - struct pcpu_lstats __percpu *lstats; - struct pcpu_sw_netstats __percpu *tstats; - struct pcpu_dstats __percpu *dstats; - }; + enum netdev_stat_type pcpu_stat_type:8; #if IS_ENABLED(CONFIG_GARP) struct garp_port __rcu *garp_port; @@ -2349,6 +2336,8 @@ struct net_device { const struct rtnl_link_ops *rtnl_link_ops; + const struct netdev_stat_ops *stat_ops; + /* for setting kernel sock attribute on TCP connection setup */ #define GSO_MAX_SEGS 65535u #define GSO_LEGACY_MAX_SIZE 65536u @@ -2357,20 +2346,15 @@ struct net_device { */ #define GSO_MAX_SIZE (8 * GSO_MAX_SEGS) - unsigned int gso_max_size; #define TSO_LEGACY_MAX_SIZE 65536 #define TSO_MAX_SIZE UINT_MAX unsigned int tso_max_size; - u16 gso_max_segs; #define TSO_MAX_SEGS U16_MAX u16 tso_max_segs; - unsigned int gso_ipv4_max_size; #ifdef CONFIG_DCB const struct dcbnl_rtnl_ops *dcbnl_ops; #endif - s16 num_tc; - struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; u8 prio_tc_map[TC_BITMASK + 1]; #if IS_ENABLED(CONFIG_FCOE) @@ -2405,6 +2389,14 @@ struct net_device { struct rtnl_hw_stats64 *offload_xstats_l3; struct devlink_port *devlink_port; + +#if IS_ENABLED(CONFIG_DPLL) + struct dpll_pin __rcu *dpll_pin; +#endif +#if IS_ENABLED(CONFIG_PAGE_POOL) + /** @page_pools: page pools created for this netdevice */ + struct hlist_head page_pools; +#endif }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -2609,6 +2601,15 @@ static inline void *netdev_priv(const struct net_device *dev) */ #define SET_NETDEV_DEVTYPE(net, devtype) ((net)->dev.type = (devtype)) +void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index, + enum netdev_queue_type type, + struct napi_struct *napi); + +static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) +{ + napi->irq = irq; +} + /* Default NAPI poll() weight * Device drivers are strongly advised to not use bigger value */ @@ -2725,6 +2726,16 @@ struct pcpu_sw_netstats { struct u64_stats_sync syncp; } __aligned(4 * sizeof(u64)); +struct pcpu_dstats { + u64 rx_packets; + u64 rx_bytes; + u64 rx_drops; + u64 tx_packets; + u64 tx_bytes; + u64 tx_drops; + struct u64_stats_sync syncp; +} __aligned(8 * sizeof(u64)); + struct pcpu_lstats { u64_stats_t packets; u64_stats_t bytes; @@ -2985,8 +2996,6 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev); int call_netdevice_notifiers_info(unsigned long val, struct netdev_notifier_info *info); -extern rwlock_t dev_base_lock; /* Device list lock */ - #define for_each_netdev(net, d) \ list_for_each_entry(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_reverse(net, d) \ @@ -3109,7 +3118,7 @@ static inline void unregister_netdevice(struct net_device *dev) int netdev_refcnt_read(const struct net_device *dev); void free_netdev(struct net_device *dev); void netdev_freemem(struct net_device *dev); -int init_dummy_netdev(struct net_device *dev); +void init_dummy_netdev(struct net_device *dev); struct net_device *netdev_get_xmit_slave(struct net_device *dev, struct sk_buff *skb, @@ -3410,6 +3419,16 @@ static inline void netdev_queue_set_dql_min_limit(struct netdev_queue *dev_queue #endif } +static inline int netdev_queue_dql_avail(const struct netdev_queue *txq) +{ +#ifdef CONFIG_BQL + /* Non-BQL migrated drivers will return 0, too. */ + return dql_avail(&txq->dql); +#else + return 0; +#endif +} + /** * netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write * @dev_queue: pointer to transmit queue @@ -3869,7 +3888,7 @@ static inline void dev_consume_skb_any(struct sk_buff *skb) u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, struct bpf_prog *xdp_prog); void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog); -int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb); +int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb); int netif_rx(struct sk_buff *skb); int __netif_rx(struct sk_buff *skb); @@ -3882,8 +3901,6 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); void napi_get_frags_check(struct napi_struct *napi); gro_result_t napi_gro_frags(struct napi_struct *napi); -struct packet_offload *gro_find_receive_by_type(__be16 type); -struct packet_offload *gro_find_complete_by_type(__be16 type); static inline void napi_free_frags(struct napi_struct *napi) { @@ -3912,6 +3929,9 @@ int generic_hwtstamp_get_lower(struct net_device *dev, int generic_hwtstamp_set_lower(struct net_device *dev, struct kernel_hwtstamp_config *kernel_cfg, struct netlink_ext_ack *extack); +int dev_set_hwtstamp_phylib(struct net_device *dev, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack); int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata); unsigned int dev_get_flags(const struct net_device *); int __dev_change_flags(struct net_device *dev, unsigned int flags, @@ -3940,6 +3960,7 @@ int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); + struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); @@ -3980,32 +4001,19 @@ static __always_inline bool __is_skb_forwardable(const struct net_device *dev, return false; } -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev); - -static inline struct net_device_core_stats __percpu *dev_core_stats(struct net_device *dev) -{ - /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */ - struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats); - - if (likely(p)) - return p; - - return netdev_core_stats_alloc(dev); -} +void netdev_core_stats_inc(struct net_device *dev, u32 offset); #define DEV_CORE_STATS_INC(FIELD) \ static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \ { \ - struct net_device_core_stats __percpu *p; \ - \ - p = dev_core_stats(dev); \ - if (p) \ - this_cpu_inc(p->FIELD); \ + netdev_core_stats_inc(dev, \ + offsetof(struct net_device_core_stats, FIELD)); \ } DEV_CORE_STATS_INC(rx_dropped) DEV_CORE_STATS_INC(tx_dropped) DEV_CORE_STATS_INC(rx_nohandler) DEV_CORE_STATS_INC(rx_otherhost_dropped) +#undef DEV_CORE_STATS_INC static __always_inline int ____dev_forward_skb(struct net_device *dev, struct sk_buff *skb, @@ -4145,6 +4153,15 @@ static inline void netdev_ref_replace(struct net_device *odev, void linkwatch_fire_event(struct net_device *dev); /** + * linkwatch_sync_dev - sync linkwatch for the given device + * @dev: network device to sync linkwatch for + * + * Sync linkwatch for the given device, removing it from the + * pending work list (if queued). + */ +void linkwatch_sync_dev(struct net_device *dev); + +/** * netif_carrier_ok - test if carrier present * @dev: network device * @@ -4253,8 +4270,10 @@ static inline bool netif_testing(const struct net_device *dev) */ static inline bool netif_oper_up(const struct net_device *dev) { - return (dev->operstate == IF_OPER_UP || - dev->operstate == IF_OPER_UNKNOWN /* backward compat */); + unsigned int operstate = READ_ONCE(dev->operstate); + + return operstate == IF_OPER_UP || + operstate == IF_OPER_UNKNOWN /* backward compat */; } /** @@ -4693,11 +4712,6 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, const struct pcpu_sw_netstats __percpu *netstats); void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s); -extern int netdev_max_backlog; -extern int dev_rx_weight; -extern int dev_tx_weight; -extern int gro_normal_batch; - enum { NESTED_SYNC_IMM_BIT, NESTED_SYNC_TODO_BIT, @@ -5154,7 +5168,9 @@ static inline const char *netdev_name(const struct net_device *dev) static inline const char *netdev_reg_state(const struct net_device *dev) { - switch (dev->reg_state) { + u8 reg_state = READ_ONCE(dev->reg_state); + + switch (reg_state) { case NETREG_UNINITIALIZED: return " (uninitialized)"; case NETREG_REGISTERED: return ""; case NETREG_UNREGISTERING: return " (unregistering)"; @@ -5163,7 +5179,7 @@ static inline const char *netdev_reg_state(const struct net_device *dev) case NETREG_DUMMY: return " (dummy)"; } - WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, dev->reg_state); + WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, reg_state); return " (unknown)"; } @@ -5205,7 +5221,6 @@ static inline const char *netdev_reg_state(const struct net_device *dev) #define PTYPE_HASH_SIZE (16) #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) -extern struct list_head ptype_all __read_mostly; extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; extern struct net_device *blackhole_netdev; @@ -5214,5 +5229,6 @@ extern struct net_device *blackhole_netdev; #define DEV_STATS_INC(DEV, FIELD) atomic_long_inc(&(DEV)->stats.__##FIELD) #define DEV_STATS_ADD(DEV, FIELD, VAL) \ atomic_long_add((VAL), &(DEV)->stats.__##FIELD) +#define DEV_STATS_READ(DEV, FIELD) atomic_long_read(&(DEV)->stats.__##FIELD) #endif /* _LINUX_NETDEVICE_H */ diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index d68644b7c299..2683b2b77612 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -22,6 +22,16 @@ static inline int NF_DROP_GETERR(int verdict) return -(verdict >> NF_VERDICT_QBITS); } +static __always_inline int +NF_DROP_REASON(struct sk_buff *skb, enum skb_drop_reason reason, u32 err) +{ + BUILD_BUG_ON(err > 0xffff); + + kfree_skb_reason(skb, reason); + + return ((err << 16) | NF_STOLEN); +} + static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1, const union nf_inet_addr *a2) { @@ -360,7 +370,6 @@ __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook, u_int8_t protocol, unsigned short family); int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict, unsigned short family); -int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry); #include <net/flow.h> @@ -464,6 +473,7 @@ struct nf_ct_hook { const struct sk_buff *); void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); void (*set_closing)(struct nf_conntrack *nfct); + int (*confirm)(struct sk_buff *skb); }; extern const struct nf_ct_hook __rcu *nf_ct_hook; diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index e8c350a3ade1..e9f4f845d760 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -186,6 +186,8 @@ struct ip_set_type_variant { /* Return true if "b" set is the same as "a" * according to the create set parameters */ bool (*same_set)(const struct ip_set *a, const struct ip_set *b); + /* Cancel ongoing garbage collectors before destroying the set*/ + void (*cancel_gc)(struct ip_set *set); /* Region-locking is used */ bool region_lock; }; @@ -242,6 +244,8 @@ extern void ip_set_type_unregister(struct ip_set_type *set_type); /* A generic IP set */ struct ip_set { + /* For call_cru in destroy */ + struct rcu_head rcu; /* The name of the set */ char name[IPSET_MAXNAMELEN]; /* Lock protecting the set data */ diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index f980edfdd278..743475ca7e9d 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -42,7 +42,7 @@ static inline int nf_bridge_get_physinif(const struct sk_buff *skb) if (!nf_bridge) return 0; - return nf_bridge->physindev ? nf_bridge->physindev->ifindex : 0; + return nf_bridge->physinif; } static inline int nf_bridge_get_physoutif(const struct sk_buff *skb) @@ -56,11 +56,11 @@ static inline int nf_bridge_get_physoutif(const struct sk_buff *skb) } static inline struct net_device * -nf_bridge_get_physindev(const struct sk_buff *skb) +nf_bridge_get_physindev(const struct sk_buff *skb, struct net *net) { const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); - return nf_bridge ? nf_bridge->physindev : NULL; + return nf_bridge ? dev_get_by_index_rcu(net, nf_bridge->physinif) : NULL; } static inline struct net_device * diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 7834c0be2831..61aa48f46dd7 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -51,7 +51,7 @@ struct nf_ipv6_ops { u32 (*cookie_init_sequence)(const struct ipv6hdr *iph, const struct tcphdr *th, u16 *mssp); int (*cookie_v6_check)(const struct ipv6hdr *iph, - const struct tcphdr *th, __u32 cookie); + const struct tcphdr *th); #endif void (*route_input)(struct sk_buff *skb); int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, @@ -179,16 +179,16 @@ static inline u32 nf_ipv6_cookie_init_sequence(const struct ipv6hdr *iph, } static inline int nf_cookie_v6_check(const struct ipv6hdr *iph, - const struct tcphdr *th, __u32 cookie) + const struct tcphdr *th) { #if IS_ENABLED(CONFIG_SYN_COOKIES) #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); if (v6_ops) - return v6_ops->cookie_v6_check(iph, th, cookie); + return v6_ops->cookie_v6_check(iph, th); #elif IS_BUILTIN(CONFIG_IPV6) - return __cookie_v6_check(iph, th, cookie); + return __cookie_v6_check(iph, th); #endif #endif return 0; diff --git a/include/linux/netfs.h b/include/linux/netfs.h index b11a84f6c32b..100cbb261269 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -109,11 +109,18 @@ static inline int wait_on_page_fscache_killable(struct page *page) return folio_wait_private_2_killable(page_folio(page)); } +/* Marks used on xarray-based buffers */ +#define NETFS_BUF_PUT_MARK XA_MARK_0 /* - Page needs putting */ +#define NETFS_BUF_PAGECACHE_MARK XA_MARK_1 /* - Page needs wb/dirty flag wrangling */ + enum netfs_io_source { NETFS_FILL_WITH_ZEROES, NETFS_DOWNLOAD_FROM_SERVER, NETFS_READ_FROM_CACHE, NETFS_INVALID_READ, + NETFS_UPLOAD_TO_SERVER, + NETFS_WRITE_TO_CACHE, + NETFS_INVALID_WRITE, } __mode(byte); typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error, @@ -129,9 +136,57 @@ struct netfs_inode { struct fscache_cookie *cache; #endif loff_t remote_i_size; /* Size of the remote file */ + loff_t zero_point; /* Size after which we assume there's no data + * on the server */ + unsigned long flags; +#define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ +#define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ +#define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */ +#define NETFS_ICTX_NO_WRITE_STREAMING 3 /* Don't engage in write-streaming */ +}; + +/* + * A netfs group - for instance a ceph snap. This is marked on dirty pages and + * pages marked with a group must be flushed before they can be written under + * the domain of another group. + */ +struct netfs_group { + refcount_t ref; + void (*free)(struct netfs_group *netfs_group); }; /* + * Information about a dirty page (attached only if necessary). + * folio->private + */ +struct netfs_folio { + struct netfs_group *netfs_group; /* Filesystem's grouping marker (or NULL). */ + unsigned int dirty_offset; /* Write-streaming dirty data offset */ + unsigned int dirty_len; /* Write-streaming dirty data length */ +}; +#define NETFS_FOLIO_INFO 0x1UL /* OR'd with folio->private. */ + +static inline struct netfs_folio *netfs_folio_info(struct folio *folio) +{ + void *priv = folio_get_private(folio); + + if ((unsigned long)priv & NETFS_FOLIO_INFO) + return (struct netfs_folio *)((unsigned long)priv & ~NETFS_FOLIO_INFO); + return NULL; +} + +static inline struct netfs_group *netfs_folio_group(struct folio *folio) +{ + struct netfs_folio *finfo; + void *priv = folio_get_private(folio); + + finfo = netfs_folio_info(folio); + if (finfo) + return finfo->netfs_group; + return priv; +} + +/* * Resources required to do operations on a cache. */ struct netfs_cache_resources { @@ -143,17 +198,24 @@ struct netfs_cache_resources { }; /* - * Descriptor for a single component subrequest. + * Descriptor for a single component subrequest. Each operation represents an + * individual read/write from/to a server, a cache, a journal, etc.. + * + * The buffer iterator is persistent for the life of the subrequest struct and + * the pages it points to can be relied on to exist for the duration. */ struct netfs_io_subrequest { struct netfs_io_request *rreq; /* Supervising I/O request */ + struct work_struct work; struct list_head rreq_link; /* Link in rreq->subrequests */ + struct iov_iter io_iter; /* Iterator for this subrequest */ loff_t start; /* Where to start the I/O */ size_t len; /* Size of the I/O */ size_t transferred; /* Amount of data transferred */ refcount_t ref; short error; /* 0 or error that occurred */ unsigned short debug_index; /* Index in list (for debugging output) */ + unsigned int max_nr_segs; /* 0 or max number of segments in an iterator */ enum netfs_io_source source; /* Where to read from/write to */ unsigned long flags; #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */ @@ -168,6 +230,13 @@ enum netfs_io_origin { NETFS_READAHEAD, /* This read was triggered by readahead */ NETFS_READPAGE, /* This read is a synchronous read */ NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ + NETFS_WRITEBACK, /* This write was triggered by writepages */ + NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */ + NETFS_LAUNDER_WRITE, /* This is triggered by ->launder_folio() */ + NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */ + NETFS_DIO_READ, /* This is a direct I/O read */ + NETFS_DIO_WRITE, /* This is a direct I/O write */ + nr__netfs_io_origin } __mode(byte); /* @@ -175,19 +244,34 @@ enum netfs_io_origin { * operations to a variety of data stores and then stitch the result together. */ struct netfs_io_request { - struct work_struct work; + union { + struct work_struct work; + struct rcu_head rcu; + }; struct inode *inode; /* The file being accessed */ struct address_space *mapping; /* The mapping being accessed */ + struct kiocb *iocb; /* AIO completion vector */ struct netfs_cache_resources cache_resources; + struct list_head proc_link; /* Link in netfs_iorequests */ struct list_head subrequests; /* Contributory I/O operations */ + struct iov_iter iter; /* Unencrypted-side iterator */ + struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */ void *netfs_priv; /* Private data for the netfs */ + struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */ + unsigned int direct_bv_count; /* Number of elements in direct_bv[] */ unsigned int debug_id; + unsigned int rsize; /* Maximum read size (0 for none) */ + unsigned int wsize; /* Maximum write size (0 for none) */ + unsigned int subreq_counter; /* Next subreq->debug_index */ atomic_t nr_outstanding; /* Number of ops in progress */ atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */ size_t submitted; /* Amount submitted for I/O so far */ size_t len; /* Length of the request */ + size_t upper_len; /* Length can be extended to here */ + size_t transferred; /* Amount to be indicated as transferred */ short error; /* 0 or error that occurred */ enum netfs_io_origin origin; /* Origin of the request */ + bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ loff_t i_size; /* Size of the file */ loff_t start; /* Start position */ pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ @@ -199,17 +283,25 @@ struct netfs_io_request { #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ #define NETFS_RREQ_FAILED 4 /* The request failed */ #define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ +#define NETFS_RREQ_WRITE_TO_CACHE 7 /* Need to write to the cache */ +#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ +#define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ +#define NETFS_RREQ_BLOCKED 10 /* We blocked */ const struct netfs_request_ops *netfs_ops; + void (*cleanup)(struct netfs_io_request *req); }; /* * Operations the network filesystem can/must provide to the helpers. */ struct netfs_request_ops { + unsigned int io_request_size; /* Alloc size for netfs_io_request struct */ + unsigned int io_subrequest_size; /* Alloc size for netfs_io_subrequest struct */ int (*init_request)(struct netfs_io_request *rreq, struct file *file); void (*free_request)(struct netfs_io_request *rreq); - int (*begin_cache_operation)(struct netfs_io_request *rreq); + void (*free_subrequest)(struct netfs_io_subrequest *rreq); + /* Read request handling */ void (*expand_readahead)(struct netfs_io_request *rreq); bool (*clamp_length)(struct netfs_io_subrequest *subreq); void (*issue_read)(struct netfs_io_subrequest *subreq); @@ -217,6 +309,14 @@ struct netfs_request_ops { int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, struct folio **foliop, void **_fsdata); void (*done)(struct netfs_io_request *rreq); + + /* Modification handling */ + void (*update_i_size)(struct inode *inode, loff_t i_size); + + /* Write request handling */ + void (*create_write_requests)(struct netfs_io_request *wreq, + loff_t start, size_t len); + void (*invalidate_cache)(struct netfs_io_request *wreq); }; /* @@ -229,8 +329,7 @@ enum netfs_read_from_hole { }; /* - * Table of operations for access to a cache. This is obtained by - * rreq->ops->begin_cache_operation(). + * Table of operations for access to a cache. */ struct netfs_cache_ops { /* End an operation */ @@ -265,8 +364,8 @@ struct netfs_cache_ops { * actually do. */ int (*prepare_write)(struct netfs_cache_resources *cres, - loff_t *_start, size_t *_len, loff_t i_size, - bool no_space_allocated_yet); + loff_t *_start, size_t *_len, size_t upper_len, + loff_t i_size, bool no_space_allocated_yet); /* Prepare an on-demand read operation, shortening it to a cached/uncached * boundary as appropriate. @@ -284,22 +383,62 @@ struct netfs_cache_ops { loff_t *_data_start, size_t *_data_len); }; +/* High-level read API. */ +ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter); +ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter); +ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter); + +/* High-level write API */ +ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, + struct netfs_group *netfs_group); +ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from, + struct netfs_group *netfs_group); +ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from); +ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from); + +/* Address operations API */ struct readahead_control; void netfs_readahead(struct readahead_control *); int netfs_read_folio(struct file *, struct folio *); int netfs_write_begin(struct netfs_inode *, struct file *, - struct address_space *, loff_t pos, unsigned int len, - struct folio **, void **fsdata); + struct address_space *, loff_t pos, unsigned int len, + struct folio **, void **fsdata); +int netfs_writepages(struct address_space *mapping, + struct writeback_control *wbc); +bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio); +int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc); +void netfs_clear_inode_writeback(struct inode *inode, const void *aux); +void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length); +bool netfs_release_folio(struct folio *folio, gfp_t gfp); +int netfs_launder_folio(struct folio *folio); + +/* VMA operations API. */ +vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group); +/* (Sub)request management API. */ void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool); void netfs_get_subrequest(struct netfs_io_subrequest *subreq, enum netfs_sreq_ref_trace what); void netfs_put_subrequest(struct netfs_io_subrequest *subreq, bool was_async, enum netfs_sreq_ref_trace what); -void netfs_stats_show(struct seq_file *); ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, struct iov_iter *new, iov_iter_extraction_t extraction_flags); +size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, + size_t max_size, size_t max_segs); +struct netfs_io_subrequest *netfs_create_write_request( + struct netfs_io_request *wreq, enum netfs_io_source dest, + loff_t start, size_t len, work_func_t worker); +void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, + bool was_async); +void netfs_queue_write_request(struct netfs_io_subrequest *subreq); + +int netfs_start_io_read(struct inode *inode); +void netfs_end_io_read(struct inode *inode); +int netfs_start_io_write(struct inode *inode); +void netfs_end_io_write(struct inode *inode); +int netfs_start_io_direct(struct inode *inode); +void netfs_end_io_direct(struct inode *inode); /** * netfs_inode - Get the netfs inode context from the inode @@ -317,30 +456,44 @@ static inline struct netfs_inode *netfs_inode(struct inode *inode) * netfs_inode_init - Initialise a netfslib inode context * @ctx: The netfs inode to initialise * @ops: The netfs's operations list + * @use_zero_point: True to use the zero_point read optimisation * * Initialise the netfs library context struct. This is expected to follow on * directly from the VFS inode struct. */ static inline void netfs_inode_init(struct netfs_inode *ctx, - const struct netfs_request_ops *ops) + const struct netfs_request_ops *ops, + bool use_zero_point) { ctx->ops = ops; ctx->remote_i_size = i_size_read(&ctx->inode); + ctx->zero_point = LLONG_MAX; + ctx->flags = 0; #if IS_ENABLED(CONFIG_FSCACHE) ctx->cache = NULL; #endif + /* ->releasepage() drives zero_point */ + if (use_zero_point) { + ctx->zero_point = ctx->remote_i_size; + mapping_set_release_always(ctx->inode.i_mapping); + } } /** * netfs_resize_file - Note that a file got resized * @ctx: The netfs inode being resized * @new_i_size: The new file size + * @changed_on_server: The change was applied to the server * * Inform the netfs lib that a file got resized so that it can adjust its state. */ -static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size) +static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size, + bool changed_on_server) { - ctx->remote_i_size = new_i_size; + if (changed_on_server) + ctx->remote_i_size = new_i_size; + if (new_i_size < ctx->zero_point) + ctx->zero_point = new_i_size; } /** diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 75d7de34c908..5df7340d4dab 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -228,10 +228,12 @@ bool netlink_strict_get_check(struct sk_buff *skb); int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation); + +typedef int (*netlink_filter_fn)(struct sock *dsk, struct sk_buff *skb, void *data); + int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation, - int (*filter)(struct sock *dsk, - struct sk_buff *skb, void *data), + netlink_filter_fn filter, void *filter_data); int netlink_set_err(struct sock *ssk, __u32 portid, __u32 group, int code); int netlink_register_notifier(struct notifier_block *nb); @@ -289,6 +291,7 @@ struct netlink_callback { u16 answer_flags; u32 min_dump_alloc; unsigned int prev_seq, seq; + int flags; bool strict_check; union { u8 ctx[48]; @@ -321,6 +324,7 @@ struct netlink_dump_control { void *data; struct module *module; u32 min_dump_alloc; + int flags; }; int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, @@ -351,5 +355,6 @@ bool netlink_ns_capable(const struct sk_buff *skb, struct user_namespace *ns, int cap); bool netlink_capable(const struct sk_buff *skb, int cap); bool netlink_net_capable(const struct sk_buff *skb, int cap); +struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast); #endif /* __LINUX_NETLINK_H */ diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 730003c4f4af..ef8d2d618d5b 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -150,7 +150,7 @@ enum nfs_opnum4 { OP_WRITE_SAME = 70, OP_CLONE = 71, - /* xattr support (RFC8726) */ + /* xattr support (RFC8276) */ OP_GETXATTR = 72, OP_SETXATTR = 73, OP_LISTXATTRS = 74, @@ -389,79 +389,203 @@ enum lock_type4 { NFS4_WRITEW_LT = 4 }; +/* + * Symbol names and values are from RFC 7531 Section 2. + * "XDR Description of NFSv4.0" + */ +enum { + FATTR4_SUPPORTED_ATTRS = 0, + FATTR4_TYPE = 1, + FATTR4_FH_EXPIRE_TYPE = 2, + FATTR4_CHANGE = 3, + FATTR4_SIZE = 4, + FATTR4_LINK_SUPPORT = 5, + FATTR4_SYMLINK_SUPPORT = 6, + FATTR4_NAMED_ATTR = 7, + FATTR4_FSID = 8, + FATTR4_UNIQUE_HANDLES = 9, + FATTR4_LEASE_TIME = 10, + FATTR4_RDATTR_ERROR = 11, + FATTR4_ACL = 12, + FATTR4_ACLSUPPORT = 13, + FATTR4_ARCHIVE = 14, + FATTR4_CANSETTIME = 15, + FATTR4_CASE_INSENSITIVE = 16, + FATTR4_CASE_PRESERVING = 17, + FATTR4_CHOWN_RESTRICTED = 18, + FATTR4_FILEHANDLE = 19, + FATTR4_FILEID = 20, + FATTR4_FILES_AVAIL = 21, + FATTR4_FILES_FREE = 22, + FATTR4_FILES_TOTAL = 23, + FATTR4_FS_LOCATIONS = 24, + FATTR4_HIDDEN = 25, + FATTR4_HOMOGENEOUS = 26, + FATTR4_MAXFILESIZE = 27, + FATTR4_MAXLINK = 28, + FATTR4_MAXNAME = 29, + FATTR4_MAXREAD = 30, + FATTR4_MAXWRITE = 31, + FATTR4_MIMETYPE = 32, + FATTR4_MODE = 33, + FATTR4_NO_TRUNC = 34, + FATTR4_NUMLINKS = 35, + FATTR4_OWNER = 36, + FATTR4_OWNER_GROUP = 37, + FATTR4_QUOTA_AVAIL_HARD = 38, + FATTR4_QUOTA_AVAIL_SOFT = 39, + FATTR4_QUOTA_USED = 40, + FATTR4_RAWDEV = 41, + FATTR4_SPACE_AVAIL = 42, + FATTR4_SPACE_FREE = 43, + FATTR4_SPACE_TOTAL = 44, + FATTR4_SPACE_USED = 45, + FATTR4_SYSTEM = 46, + FATTR4_TIME_ACCESS = 47, + FATTR4_TIME_ACCESS_SET = 48, + FATTR4_TIME_BACKUP = 49, + FATTR4_TIME_CREATE = 50, + FATTR4_TIME_DELTA = 51, + FATTR4_TIME_METADATA = 52, + FATTR4_TIME_MODIFY = 53, + FATTR4_TIME_MODIFY_SET = 54, + FATTR4_MOUNTED_ON_FILEID = 55, +}; + +/* + * Symbol names and values are from RFC 5662 Section 2. + * "XDR Description of NFSv4.1" + */ +enum { + FATTR4_DIR_NOTIF_DELAY = 56, + FATTR4_DIRENT_NOTIF_DELAY = 57, + FATTR4_DACL = 58, + FATTR4_SACL = 59, + FATTR4_CHANGE_POLICY = 60, + FATTR4_FS_STATUS = 61, + FATTR4_FS_LAYOUT_TYPES = 62, + FATTR4_LAYOUT_HINT = 63, + FATTR4_LAYOUT_TYPES = 64, + FATTR4_LAYOUT_BLKSIZE = 65, + FATTR4_LAYOUT_ALIGNMENT = 66, + FATTR4_FS_LOCATIONS_INFO = 67, + FATTR4_MDSTHRESHOLD = 68, + FATTR4_RETENTION_GET = 69, + FATTR4_RETENTION_SET = 70, + FATTR4_RETENTEVT_GET = 71, + FATTR4_RETENTEVT_SET = 72, + FATTR4_RETENTION_HOLD = 73, + FATTR4_MODE_SET_MASKED = 74, + FATTR4_SUPPATTR_EXCLCREAT = 75, + FATTR4_FS_CHARSET_CAP = 76, +}; + +/* + * Symbol names and values are from RFC 7863 Section 2. + * "XDR Description of NFSv4.2" + */ +enum { + FATTR4_CLONE_BLKSIZE = 77, + FATTR4_SPACE_FREED = 78, + FATTR4_CHANGE_ATTR_TYPE = 79, + FATTR4_SEC_LABEL = 80, +}; + +/* + * Symbol names and values are from RFC 8275 Section 5. + * "The mode_umask Attribute" + */ +enum { + FATTR4_MODE_UMASK = 81, +}; + +/* + * Symbol names and values are from RFC 8276 Section 8.6. + * "Numeric Values Assigned to Protocol Extensions" + */ +enum { + FATTR4_XATTR_SUPPORT = 82, +}; + +/* + * The following internal definitions enable processing the above + * attribute bits within 32-bit word boundaries. + */ /* Mandatory Attributes */ -#define FATTR4_WORD0_SUPPORTED_ATTRS (1UL << 0) -#define FATTR4_WORD0_TYPE (1UL << 1) -#define FATTR4_WORD0_FH_EXPIRE_TYPE (1UL << 2) -#define FATTR4_WORD0_CHANGE (1UL << 3) -#define FATTR4_WORD0_SIZE (1UL << 4) -#define FATTR4_WORD0_LINK_SUPPORT (1UL << 5) -#define FATTR4_WORD0_SYMLINK_SUPPORT (1UL << 6) -#define FATTR4_WORD0_NAMED_ATTR (1UL << 7) -#define FATTR4_WORD0_FSID (1UL << 8) -#define FATTR4_WORD0_UNIQUE_HANDLES (1UL << 9) -#define FATTR4_WORD0_LEASE_TIME (1UL << 10) -#define FATTR4_WORD0_RDATTR_ERROR (1UL << 11) +#define FATTR4_WORD0_SUPPORTED_ATTRS BIT(FATTR4_SUPPORTED_ATTRS) +#define FATTR4_WORD0_TYPE BIT(FATTR4_TYPE) +#define FATTR4_WORD0_FH_EXPIRE_TYPE BIT(FATTR4_FH_EXPIRE_TYPE) +#define FATTR4_WORD0_CHANGE BIT(FATTR4_CHANGE) +#define FATTR4_WORD0_SIZE BIT(FATTR4_SIZE) +#define FATTR4_WORD0_LINK_SUPPORT BIT(FATTR4_LINK_SUPPORT) +#define FATTR4_WORD0_SYMLINK_SUPPORT BIT(FATTR4_SYMLINK_SUPPORT) +#define FATTR4_WORD0_NAMED_ATTR BIT(FATTR4_NAMED_ATTR) +#define FATTR4_WORD0_FSID BIT(FATTR4_FSID) +#define FATTR4_WORD0_UNIQUE_HANDLES BIT(FATTR4_UNIQUE_HANDLES) +#define FATTR4_WORD0_LEASE_TIME BIT(FATTR4_LEASE_TIME) +#define FATTR4_WORD0_RDATTR_ERROR BIT(FATTR4_RDATTR_ERROR) /* Mandatory in NFSv4.1 */ -#define FATTR4_WORD2_SUPPATTR_EXCLCREAT (1UL << 11) +#define FATTR4_WORD2_SUPPATTR_EXCLCREAT BIT(FATTR4_SUPPATTR_EXCLCREAT - 64) /* Recommended Attributes */ -#define FATTR4_WORD0_ACL (1UL << 12) -#define FATTR4_WORD0_ACLSUPPORT (1UL << 13) -#define FATTR4_WORD0_ARCHIVE (1UL << 14) -#define FATTR4_WORD0_CANSETTIME (1UL << 15) -#define FATTR4_WORD0_CASE_INSENSITIVE (1UL << 16) -#define FATTR4_WORD0_CASE_PRESERVING (1UL << 17) -#define FATTR4_WORD0_CHOWN_RESTRICTED (1UL << 18) -#define FATTR4_WORD0_FILEHANDLE (1UL << 19) -#define FATTR4_WORD0_FILEID (1UL << 20) -#define FATTR4_WORD0_FILES_AVAIL (1UL << 21) -#define FATTR4_WORD0_FILES_FREE (1UL << 22) -#define FATTR4_WORD0_FILES_TOTAL (1UL << 23) -#define FATTR4_WORD0_FS_LOCATIONS (1UL << 24) -#define FATTR4_WORD0_HIDDEN (1UL << 25) -#define FATTR4_WORD0_HOMOGENEOUS (1UL << 26) -#define FATTR4_WORD0_MAXFILESIZE (1UL << 27) -#define FATTR4_WORD0_MAXLINK (1UL << 28) -#define FATTR4_WORD0_MAXNAME (1UL << 29) -#define FATTR4_WORD0_MAXREAD (1UL << 30) -#define FATTR4_WORD0_MAXWRITE (1UL << 31) -#define FATTR4_WORD1_MIMETYPE (1UL << 0) -#define FATTR4_WORD1_MODE (1UL << 1) -#define FATTR4_WORD1_NO_TRUNC (1UL << 2) -#define FATTR4_WORD1_NUMLINKS (1UL << 3) -#define FATTR4_WORD1_OWNER (1UL << 4) -#define FATTR4_WORD1_OWNER_GROUP (1UL << 5) -#define FATTR4_WORD1_QUOTA_HARD (1UL << 6) -#define FATTR4_WORD1_QUOTA_SOFT (1UL << 7) -#define FATTR4_WORD1_QUOTA_USED (1UL << 8) -#define FATTR4_WORD1_RAWDEV (1UL << 9) -#define FATTR4_WORD1_SPACE_AVAIL (1UL << 10) -#define FATTR4_WORD1_SPACE_FREE (1UL << 11) -#define FATTR4_WORD1_SPACE_TOTAL (1UL << 12) -#define FATTR4_WORD1_SPACE_USED (1UL << 13) -#define FATTR4_WORD1_SYSTEM (1UL << 14) -#define FATTR4_WORD1_TIME_ACCESS (1UL << 15) -#define FATTR4_WORD1_TIME_ACCESS_SET (1UL << 16) -#define FATTR4_WORD1_TIME_BACKUP (1UL << 17) -#define FATTR4_WORD1_TIME_CREATE (1UL << 18) -#define FATTR4_WORD1_TIME_DELTA (1UL << 19) -#define FATTR4_WORD1_TIME_METADATA (1UL << 20) -#define FATTR4_WORD1_TIME_MODIFY (1UL << 21) -#define FATTR4_WORD1_TIME_MODIFY_SET (1UL << 22) -#define FATTR4_WORD1_MOUNTED_ON_FILEID (1UL << 23) -#define FATTR4_WORD1_DACL (1UL << 26) -#define FATTR4_WORD1_SACL (1UL << 27) -#define FATTR4_WORD1_FS_LAYOUT_TYPES (1UL << 30) -#define FATTR4_WORD2_LAYOUT_TYPES (1UL << 0) -#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) -#define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) -#define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13) -#define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15) -#define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) -#define FATTR4_WORD2_MODE_UMASK (1UL << 17) -#define FATTR4_WORD2_XATTR_SUPPORT (1UL << 18) +#define FATTR4_WORD0_ACL BIT(FATTR4_ACL) +#define FATTR4_WORD0_ACLSUPPORT BIT(FATTR4_ACLSUPPORT) +#define FATTR4_WORD0_ARCHIVE BIT(FATTR4_ARCHIVE) +#define FATTR4_WORD0_CANSETTIME BIT(FATTR4_CANSETTIME) +#define FATTR4_WORD0_CASE_INSENSITIVE BIT(FATTR4_CASE_INSENSITIVE) +#define FATTR4_WORD0_CASE_PRESERVING BIT(FATTR4_CASE_PRESERVING) +#define FATTR4_WORD0_CHOWN_RESTRICTED BIT(FATTR4_CHOWN_RESTRICTED) +#define FATTR4_WORD0_FILEHANDLE BIT(FATTR4_FILEHANDLE) +#define FATTR4_WORD0_FILEID BIT(FATTR4_FILEID) +#define FATTR4_WORD0_FILES_AVAIL BIT(FATTR4_FILES_AVAIL) +#define FATTR4_WORD0_FILES_FREE BIT(FATTR4_FILES_FREE) +#define FATTR4_WORD0_FILES_TOTAL BIT(FATTR4_FILES_TOTAL) +#define FATTR4_WORD0_FS_LOCATIONS BIT(FATTR4_FS_LOCATIONS) +#define FATTR4_WORD0_HIDDEN BIT(FATTR4_HIDDEN) +#define FATTR4_WORD0_HOMOGENEOUS BIT(FATTR4_HOMOGENEOUS) +#define FATTR4_WORD0_MAXFILESIZE BIT(FATTR4_MAXFILESIZE) +#define FATTR4_WORD0_MAXLINK BIT(FATTR4_MAXLINK) +#define FATTR4_WORD0_MAXNAME BIT(FATTR4_MAXNAME) +#define FATTR4_WORD0_MAXREAD BIT(FATTR4_MAXREAD) +#define FATTR4_WORD0_MAXWRITE BIT(FATTR4_MAXWRITE) + +#define FATTR4_WORD1_MIMETYPE BIT(FATTR4_MIMETYPE - 32) +#define FATTR4_WORD1_MODE BIT(FATTR4_MODE - 32) +#define FATTR4_WORD1_NO_TRUNC BIT(FATTR4_NO_TRUNC - 32) +#define FATTR4_WORD1_NUMLINKS BIT(FATTR4_NUMLINKS - 32) +#define FATTR4_WORD1_OWNER BIT(FATTR4_OWNER - 32) +#define FATTR4_WORD1_OWNER_GROUP BIT(FATTR4_OWNER_GROUP - 32) +#define FATTR4_WORD1_QUOTA_HARD BIT(FATTR4_QUOTA_AVAIL_HARD - 32) +#define FATTR4_WORD1_QUOTA_SOFT BIT(FATTR4_QUOTA_AVAIL_SOFT - 32) +#define FATTR4_WORD1_QUOTA_USED BIT(FATTR4_QUOTA_USED - 32) +#define FATTR4_WORD1_RAWDEV BIT(FATTR4_RAWDEV - 32) +#define FATTR4_WORD1_SPACE_AVAIL BIT(FATTR4_SPACE_AVAIL - 32) +#define FATTR4_WORD1_SPACE_FREE BIT(FATTR4_SPACE_FREE - 32) +#define FATTR4_WORD1_SPACE_TOTAL BIT(FATTR4_SPACE_TOTAL - 32) +#define FATTR4_WORD1_SPACE_USED BIT(FATTR4_SPACE_USED - 32) +#define FATTR4_WORD1_SYSTEM BIT(FATTR4_SYSTEM - 32) +#define FATTR4_WORD1_TIME_ACCESS BIT(FATTR4_TIME_ACCESS - 32) +#define FATTR4_WORD1_TIME_ACCESS_SET BIT(FATTR4_TIME_ACCESS_SET - 32) +#define FATTR4_WORD1_TIME_BACKUP BIT(FATTR4_TIME_BACKUP - 32) +#define FATTR4_WORD1_TIME_CREATE BIT(FATTR4_TIME_CREATE - 32) +#define FATTR4_WORD1_TIME_DELTA BIT(FATTR4_TIME_DELTA - 32) +#define FATTR4_WORD1_TIME_METADATA BIT(FATTR4_TIME_METADATA - 32) +#define FATTR4_WORD1_TIME_MODIFY BIT(FATTR4_TIME_MODIFY - 32) +#define FATTR4_WORD1_TIME_MODIFY_SET BIT(FATTR4_TIME_MODIFY_SET - 32) +#define FATTR4_WORD1_MOUNTED_ON_FILEID BIT(FATTR4_MOUNTED_ON_FILEID - 32) +#define FATTR4_WORD1_DACL BIT(FATTR4_DACL - 32) +#define FATTR4_WORD1_SACL BIT(FATTR4_SACL - 32) +#define FATTR4_WORD1_FS_LAYOUT_TYPES BIT(FATTR4_FS_LAYOUT_TYPES - 32) + +#define FATTR4_WORD2_LAYOUT_TYPES BIT(FATTR4_LAYOUT_TYPES - 64) +#define FATTR4_WORD2_LAYOUT_BLKSIZE BIT(FATTR4_LAYOUT_BLKSIZE - 64) +#define FATTR4_WORD2_MDSTHRESHOLD BIT(FATTR4_MDSTHRESHOLD - 64) +#define FATTR4_WORD2_CLONE_BLKSIZE BIT(FATTR4_CLONE_BLKSIZE - 64) +#define FATTR4_WORD2_CHANGE_ATTR_TYPE BIT(FATTR4_CHANGE_ATTR_TYPE - 64) +#define FATTR4_WORD2_SECURITY_LABEL BIT(FATTR4_SEC_LABEL - 64) +#define FATTR4_WORD2_MODE_UMASK BIT(FATTR4_MODE_UMASK - 64) +#define FATTR4_WORD2_XATTR_SUPPORT BIT(FATTR4_XATTR_SUPPORT - 64) /* MDS threshold bitmap bits */ #define THRESHOLD_RD (1UL << 0) @@ -745,4 +869,26 @@ enum { RCA4_TYPE_MASK_OTHER_LAYOUT_MAX = 15, }; +enum nfs_cb_opnum4 { + OP_CB_GETATTR = 3, + OP_CB_RECALL = 4, + + /* Callback operations new to NFSv4.1 */ + OP_CB_LAYOUTRECALL = 5, + OP_CB_NOTIFY = 6, + OP_CB_PUSH_DELEG = 7, + OP_CB_RECALL_ANY = 8, + OP_CB_RECALLABLE_OBJ_AVAIL = 9, + OP_CB_RECALL_SLOT = 10, + OP_CB_SEQUENCE = 11, + OP_CB_WANTS_CANCELLED = 12, + OP_CB_NOTIFY_LOCK = 13, + OP_CB_NOTIFY_DEVICEID = 14, + + /* Callback operations new to NFSv4.2 */ + OP_CB_OFFLOAD = 15, + + OP_CB_ILLEGAL = 10044, +}; + #endif diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 279262057a92..d59116ac8209 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -595,7 +595,6 @@ extern void nfs_complete_unlink(struct dentry *dentry, struct inode *); * linux/fs/nfs/write.c */ extern int nfs_congestion_kb; -extern int nfs_writepage(struct page *page, struct writeback_control *wbc); extern int nfs_writepages(struct address_space *, struct writeback_control *); extern int nfs_flush_incompatible(struct file *file, struct folio *folio); extern int nfs_update_folio(struct file *file, struct folio *folio, @@ -612,6 +611,7 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_commit_data *data); +void nfs_commit_begin(struct nfs_mds_commit_info *cinfo); bool nfs_commit_end(struct nfs_mds_commit_info *cinfo); static inline bool nfs_have_writebacks(const struct inode *inode) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index cd628c4b011e..92de074e63b9 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -124,6 +124,7 @@ struct nfs_client { char cl_ipaddr[48]; struct net *cl_net; struct list_head pending_cb_stateids; + struct rcu_head rcu; }; /* @@ -239,6 +240,7 @@ struct nfs_server { struct list_head delegations; struct list_head ss_copies; + unsigned long delegation_gen; unsigned long mig_gen; unsigned long mig_status; #define NFS_MIG_IN_TRANSITION (1) @@ -264,6 +266,7 @@ struct nfs_server { const struct cred *cred; bool has_sec_mnt_opts; struct kobject kobj; + struct rcu_head rcu; }; /* Server capabilities */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 12bbb5c63664..d09b9773b20c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1772,7 +1772,7 @@ struct nfs_rpc_ops { void (*rename_rpc_prepare)(struct rpc_task *task, struct nfs_renamedata *); int (*rename_done) (struct rpc_task *task, struct inode *old_dir, struct inode *new_dir); int (*link) (struct inode *, struct inode *, const struct qstr *); - int (*symlink) (struct inode *, struct dentry *, struct page *, + int (*symlink) (struct inode *, struct dentry *, struct folio *, unsigned int, struct iattr *); int (*mkdir) (struct inode *, struct dentry *, struct iattr *); int (*rmdir) (struct inode *, const struct qstr *); @@ -1821,13 +1821,6 @@ struct nfs_rpc_ops { }; /* - * NFS_CALL(getattr, inode, (fattr)); - * into - * NFS_PROTO(inode)->getattr(fattr); - */ -#define NFS_CALL(op, inode, args) NFS_PROTO(inode)->op args - -/* * Function vectors etc. for the NFS client */ extern const struct nfs_rpc_ops nfs_v2_clientops; diff --git a/include/linux/nmi.h b/include/linux/nmi.h index e92e378df000..f53438eae815 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -216,13 +216,6 @@ void watchdog_update_hrtimer_threshold(u64 period); static inline void watchdog_update_hrtimer_threshold(u64 period) { } #endif -struct ctl_table; -int proc_watchdog(struct ctl_table *, int, void *, size_t *, loff_t *); -int proc_nmi_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *); -int proc_soft_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *); -int proc_watchdog_thresh(struct ctl_table *, int , void *, size_t *, loff_t *); -int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *); - #ifdef CONFIG_HAVE_ACPI_APEI_NMI #include <asm/nmi.h> #endif diff --git a/include/linux/node.h b/include/linux/node.h index 427a5975cf40..dfc004e4bee7 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -20,20 +20,32 @@ #include <linux/list.h> /** - * struct node_hmem_attrs - heterogeneous memory performance attributes + * struct access_coordinate - generic performance coordinates container * * @read_bandwidth: Read bandwidth in MB/s * @write_bandwidth: Write bandwidth in MB/s * @read_latency: Read latency in nanoseconds * @write_latency: Write latency in nanoseconds */ -struct node_hmem_attrs { +struct access_coordinate { unsigned int read_bandwidth; unsigned int write_bandwidth; unsigned int read_latency; unsigned int write_latency; }; +/* + * ACCESS_COORDINATE_LOCAL correlates to ACCESS CLASS 0 + * - access_coordinate between target node and nearest initiator node + * ACCESS_COORDINATE_CPU correlates to ACCESS CLASS 1 + * - access_coordinate between target node and nearest CPU node + */ +enum access_coordinate_class { + ACCESS_COORDINATE_LOCAL, + ACCESS_COORDINATE_CPU, + ACCESS_COORDINATE_MAX +}; + enum cache_indexing { NODE_CACHE_DIRECT_MAP, NODE_CACHE_INDEXED, @@ -65,8 +77,8 @@ struct node_cache_attrs { #ifdef CONFIG_HMEM_REPORTING void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs); -void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs, - unsigned access); +void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, + enum access_coordinate_class access); #else static inline void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs) @@ -74,8 +86,8 @@ static inline void node_add_cache(unsigned int nid, } static inline void node_set_perf_attrs(unsigned int nid, - struct node_hmem_attrs *hmem_attrs, - unsigned access) + struct access_coordinate *coord, + enum access_coordinate_class access) { } #endif @@ -137,7 +149,7 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk); extern int register_memory_node_under_compute_node(unsigned int mem_nid, unsigned int cpu_nid, - unsigned access); + enum access_coordinate_class access); #else static inline void node_dev_init(void) { diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 8d07116caaf1..b61438313a73 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -93,10 +93,10 @@ #include <linux/threads.h> #include <linux/bitmap.h> #include <linux/minmax.h> +#include <linux/nodemask_types.h> #include <linux/numa.h> #include <linux/random.h> -typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; extern nodemask_t _unused_nodemask_arg_; /** diff --git a/include/linux/nodemask_types.h b/include/linux/nodemask_types.h new file mode 100644 index 000000000000..6b28d97ea6ed --- /dev/null +++ b/include/linux/nodemask_types.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_NODEMASK_TYPES_H +#define __LINUX_NODEMASK_TYPES_H + +#include <linux/bitops.h> +#include <linux/numa.h> + +typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; + +#endif /* __LINUX_NODEMASK_TYPES_H */ diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 0f1d024bd958..7d22ea50b098 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -7,7 +7,7 @@ struct proc_ns_operations; struct ns_common { - atomic_long_t stashed; + struct dentry *stashed; const struct proc_ns_operations *ops; unsigned int inum; refcount_t count; diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 771cb0285872..5601d14e2886 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -2,6 +2,7 @@ #ifndef _LINUX_NSPROXY_H #define _LINUX_NSPROXY_H +#include <linux/refcount.h> #include <linux/spinlock.h> #include <linux/sched.h> diff --git a/include/linux/nubus.h b/include/linux/nubus.h index bdcd85e622d8..4d103ac8f5c7 100644 --- a/include/linux/nubus.h +++ b/include/linux/nubus.h @@ -89,8 +89,6 @@ struct nubus_driver { void (*remove)(struct nubus_board *board); }; -extern struct bus_type nubus_bus_type; - /* Generic NuBus interface functions, modelled after the PCI interface */ #ifdef CONFIG_PROC_FS extern bool nubus_populate_procfs; diff --git a/include/linux/numa.h b/include/linux/numa.h index 59df211d051f..915033a75731 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_NUMA_H #define _LINUX_NUMA_H +#include <linux/init.h> #include <linux/types.h> #ifdef CONFIG_NODES_SHIFT @@ -12,6 +13,7 @@ #define MAX_NUMNODES (1 << NODES_SHIFT) #define NUMA_NO_NODE (-1) +#define NUMA_NO_MEMBLK (-1) /* optionally keep NUMA memory info available post init */ #ifdef CONFIG_NUMA_KEEP_MEMINFO @@ -21,33 +23,32 @@ #endif #ifdef CONFIG_NUMA -#include <linux/printk.h> #include <asm/sparsemem.h> /* Generic implementation available */ -int numa_map_to_online_node(int node); +int numa_nearest_node(int node, unsigned int state); #ifndef memory_add_physaddr_to_nid -static inline int memory_add_physaddr_to_nid(u64 start) -{ - pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n", - start); - return 0; -} +int memory_add_physaddr_to_nid(u64 start); #endif + #ifndef phys_to_target_node -static inline int phys_to_target_node(u64 start) +int phys_to_target_node(u64 start); +#endif + +#ifndef numa_fill_memblks +static inline int __init numa_fill_memblks(u64 start, u64 end) { - pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n", - start); - return 0; + return NUMA_NO_MEMBLK; } #endif + #else /* !CONFIG_NUMA */ -static inline int numa_map_to_online_node(int node) +static inline int numa_nearest_node(int node, unsigned int state) { return NUMA_NO_NODE; } + static inline int memory_add_physaddr_to_nid(u64 start) { return 0; @@ -58,6 +59,8 @@ static inline int phys_to_target_node(u64 start) } #endif +#define numa_map_to_online_node(node) numa_nearest_node(node, N_ONLINE) + #ifdef CONFIG_HAVE_ARCH_NODE_DEV_GROUP extern const struct attribute_group arch_node_dev_group; #endif diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h index dcb8030062dd..c1d0bc5d9624 100644 --- a/include/linux/nvme-auth.h +++ b/include/linux/nvme-auth.h @@ -9,9 +9,9 @@ #include <crypto/kpp.h> struct nvme_dhchap_key { - u8 *key; size_t len; u8 hash; + u8 key[]; }; u32 nvme_auth_get_seqnum(void); @@ -24,10 +24,13 @@ const char *nvme_auth_digest_name(u8 hmac_id); size_t nvme_auth_hmac_hash_len(u8 hmac_id); u8 nvme_auth_hmac_id(const char *hmac_name); +u32 nvme_auth_key_struct_size(u32 key_len); struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret, u8 key_hash); void nvme_auth_free_key(struct nvme_dhchap_key *key); -u8 *nvme_auth_transform_key(struct nvme_dhchap_key *key, char *nqn); +struct nvme_dhchap_key *nvme_auth_alloc_key(u32 len, u8 hash); +struct nvme_dhchap_key *nvme_auth_transform_key( + struct nvme_dhchap_key *key, char *nqn); int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key); int nvme_auth_augmented_challenge(u8 hmac_id, u8 *skey, size_t skey_len, u8 *challenge, u8 *aug, size_t hlen); diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h new file mode 100644 index 000000000000..e10333d78dbb --- /dev/null +++ b/include/linux/nvme-keyring.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023 Hannes Reinecke, SUSE Labs + */ + +#ifndef _NVME_KEYRING_H +#define _NVME_KEYRING_H + +#if IS_ENABLED(CONFIG_NVME_KEYRING) + +key_serial_t nvme_tls_psk_default(struct key *keyring, + const char *hostnqn, const char *subnqn); + +key_serial_t nvme_keyring_id(void); + +#else + +static inline key_serial_t nvme_tls_psk_default(struct key *keyring, + const char *hostnqn, const char *subnqn) +{ + return 0; +} +static inline key_serial_t nvme_keyring_id(void) +{ + return 0; +} +#endif /* !CONFIG_NVME_KEYRING */ +#endif /* _NVME_KEYRING_H */ diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h index 4dd7e6fe92fb..eb2f04d636c8 100644 --- a/include/linux/nvme-rdma.h +++ b/include/linux/nvme-rdma.h @@ -6,7 +6,11 @@ #ifndef _LINUX_NVME_RDMA_H #define _LINUX_NVME_RDMA_H -#define NVME_RDMA_MAX_QUEUE_SIZE 128 +#define NVME_RDMA_IP_PORT 4420 + +#define NVME_RDMA_MAX_QUEUE_SIZE 256 +#define NVME_RDMA_MAX_METADATA_QUEUE_SIZE 128 +#define NVME_RDMA_DEFAULT_QUEUE_SIZE 128 enum nvme_rdma_cm_fmt { NVME_RDMA_CM_FMT_1_0 = 0x0, diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h index 57ebe1267f7f..e07e8978d691 100644 --- a/include/linux/nvme-tcp.h +++ b/include/linux/nvme-tcp.h @@ -18,6 +18,12 @@ enum nvme_tcp_pfv { NVME_TCP_PFV_1_0 = 0x0, }; +enum nvme_tcp_tls_cipher { + NVME_TCP_TLS_CIPHER_INVALID = 0, + NVME_TCP_TLS_CIPHER_SHA256 = 1, + NVME_TCP_TLS_CIPHER_SHA384 = 2, +}; + enum nvme_tcp_fatal_error_status { NVME_TCP_FES_INVALID_PDU_HDR = 0x01, NVME_TCP_FES_PDU_SEQ_ERR = 0x02, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 26dd3f859d9d..425573202295 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -20,12 +20,9 @@ #define NVMF_TRSVCID_SIZE 32 #define NVMF_TRADDR_SIZE 256 #define NVMF_TSAS_SIZE 256 -#define NVMF_AUTH_HASH_LEN 64 #define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery" -#define NVME_RDMA_IP_PORT 4420 - #define NVME_NSID_ALL 0xffffffff enum nvme_subsys_type { @@ -108,6 +105,13 @@ enum { NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */ }; +/* TSAS SECTYPE for TCP transport */ +enum { + NVMF_TCP_SECTYPE_NONE = 0, /* No Security */ + NVMF_TCP_SECTYPE_TLS12 = 1, /* TLSv1.2, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */ + NVMF_TCP_SECTYPE_TLS13 = 2, /* TLSv1.3, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */ +}; + #define NVME_AQ_DEPTH 32 #define NVME_NR_AEN_COMMANDS 1 #define NVME_AQ_BLK_MQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS) @@ -640,6 +644,7 @@ enum { NVME_CMD_EFFECTS_NCC = 1 << 2, NVME_CMD_EFFECTS_NIC = 1 << 3, NVME_CMD_EFFECTS_CCC = 1 << 4, + NVME_CMD_EFFECTS_CSER_MASK = GENMASK(15, 14), NVME_CMD_EFFECTS_CSE_MASK = GENMASK(18, 16), NVME_CMD_EFFECTS_UUID_SEL = 1 << 19, NVME_CMD_EFFECTS_SCOPE_MASK = GENMASK(31, 20), @@ -810,12 +815,6 @@ struct nvme_reservation_status_ext { struct nvme_registered_ctrl_ext regctl_eds[]; }; -enum nvme_async_event_type { - NVME_AER_TYPE_ERROR = 0, - NVME_AER_TYPE_SMART = 1, - NVME_AER_TYPE_NOTICE = 2, -}; - /* I/O commands */ enum nvme_opcode { @@ -1493,6 +1492,9 @@ struct nvmf_disc_rsp_page_entry { __u16 pkey; __u8 resv10[246]; } rdma; + struct tcp { + __u8 sectype; + } tcp; } tsas; }; @@ -1722,7 +1724,7 @@ struct nvmf_auth_dhchap_success1_data { __u8 rsvd2; __u8 rvalid; __u8 rsvd3[7]; - /* 'hl' bytes of response value if 'rvalid' is set */ + /* 'hl' bytes of response value */ __u8 rval[]; }; @@ -1809,7 +1811,7 @@ struct nvme_command { }; }; -static inline bool nvme_is_fabrics(struct nvme_command *cmd) +static inline bool nvme_is_fabrics(const struct nvme_command *cmd) { return cmd->common.opcode == nvme_fabrics_command; } @@ -1828,7 +1830,7 @@ struct nvme_error_slot { __u8 resv2[24]; }; -static inline bool nvme_is_write(struct nvme_command *cmd) +static inline bool nvme_is_write(const struct nvme_command *cmd) { /* * What a mess... diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h index 4523e4e83319..34c0e58dfa26 100644 --- a/include/linux/nvmem-consumer.h +++ b/include/linux/nvmem-consumer.h @@ -81,6 +81,7 @@ int nvmem_device_cell_write(struct nvmem_device *nvmem, struct nvmem_cell_info *info, void *buf); const char *nvmem_dev_name(struct nvmem_device *nvmem); +size_t nvmem_dev_size(struct nvmem_device *nvmem); void nvmem_add_cell_lookups(struct nvmem_cell_lookup *entries, size_t nentries); @@ -127,6 +128,12 @@ static inline int nvmem_cell_write(struct nvmem_cell *cell, return -EOPNOTSUPP; } +static inline int nvmem_cell_read_u8(struct device *dev, + const char *cell_id, u8 *val) +{ + return -EOPNOTSUPP; +} + static inline int nvmem_cell_read_u16(struct device *dev, const char *cell_id, u16 *val) { @@ -241,7 +248,6 @@ struct nvmem_cell *of_nvmem_cell_get(struct device_node *np, const char *id); struct nvmem_device *of_nvmem_device_get(struct device_node *np, const char *name); -struct device_node *of_nvmem_layout_get_container(struct nvmem_device *nvmem); #else static inline struct nvmem_cell *of_nvmem_cell_get(struct device_node *np, const char *id) @@ -254,12 +260,6 @@ static inline struct nvmem_device *of_nvmem_device_get(struct device_node *np, { return ERR_PTR(-EOPNOTSUPP); } - -static inline struct device_node * -of_nvmem_layout_get_container(struct nvmem_device *nvmem) -{ - return NULL; -} #endif /* CONFIG_NVMEM && CONFIG_OF */ #endif /* ifndef _LINUX_NVMEM_CONSUMER_H */ diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index dae26295e6be..f0ba0e03218f 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -9,6 +9,7 @@ #ifndef _LINUX_NVMEM_PROVIDER_H #define _LINUX_NVMEM_PROVIDER_H +#include <linux/device.h> #include <linux/device/driver.h> #include <linux/err.h> #include <linux/errno.h> @@ -82,13 +83,15 @@ struct nvmem_cell_info { * @owner: Pointer to exporter module. Used for refcounting. * @cells: Optional array of pre-defined NVMEM cells. * @ncells: Number of elements in cells. + * @add_legacy_fixed_of_cells: Read fixed NVMEM cells from old OF syntax. + * @fixup_dt_cell_info: Will be called before a cell is added. Can be + * used to modify the nvmem_cell_info. * @keepout: Optional array of keepout ranges (sorted ascending by start). * @nkeepout: Number of elements in the keepout array. * @type: Type of the nvmem storage * @read_only: Device is read-only. * @root_only: Device is accessibly to root only. * @of_node: If given, this will be used instead of the parent's of_node. - * @no_of_node: Device should not use the parent's of_node even if it's !NULL. * @reg_read: Callback to read data. * @reg_write: Callback to write data. * @size: Device size. @@ -112,6 +115,9 @@ struct nvmem_config { struct module *owner; const struct nvmem_cell_info *cells; int ncells; + bool add_legacy_fixed_of_cells; + void (*fixup_dt_cell_info)(struct nvmem_device *nvmem, + struct nvmem_cell_info *cell); const struct nvmem_keepout *keepout; unsigned int nkeepout; enum nvmem_type type; @@ -120,7 +126,6 @@ struct nvmem_config { bool ignore_wp; struct nvmem_layout *layout; struct device_node *of_node; - bool no_of_node; nvmem_reg_read_t reg_read; nvmem_reg_write_t reg_write; int size; @@ -154,15 +159,11 @@ struct nvmem_cell_table { /** * struct nvmem_layout - NVMEM layout definitions * - * @name: Layout name. - * @of_match_table: Open firmware match table. + * @dev: Device-model layout device. + * @nvmem: The underlying NVMEM device * @add_cells: Will be called if a nvmem device is found which * has this layout. The function will add layout * specific cells with nvmem_add_one_cell(). - * @fixup_cell_info: Will be called before a cell is added. Can be - * used to modify the nvmem_cell_info. - * @owner: Pointer to struct module. - * @node: List node. * * A nvmem device can hold a well defined structure which can just be * evaluated during runtime. For example a TLV list, or a list of "name=val" @@ -170,17 +171,15 @@ struct nvmem_cell_table { * cells. */ struct nvmem_layout { - const char *name; - const struct of_device_id *of_match_table; - int (*add_cells)(struct device *dev, struct nvmem_device *nvmem, - struct nvmem_layout *layout); - void (*fixup_cell_info)(struct nvmem_device *nvmem, - struct nvmem_layout *layout, - struct nvmem_cell_info *cell); + struct device dev; + struct nvmem_device *nvmem; + int (*add_cells)(struct nvmem_layout *layout); +}; - /* private */ - struct module *owner; - struct list_head node; +struct nvmem_layout_driver { + struct device_driver driver; + int (*probe)(struct nvmem_layout *layout); + void (*remove)(struct nvmem_layout *layout); }; #if IS_ENABLED(CONFIG_NVMEM) @@ -197,13 +196,14 @@ void nvmem_del_cell_table(struct nvmem_cell_table *table); int nvmem_add_one_cell(struct nvmem_device *nvmem, const struct nvmem_cell_info *info); -int __nvmem_layout_register(struct nvmem_layout *layout, struct module *owner); -#define nvmem_layout_register(layout) \ - __nvmem_layout_register(layout, THIS_MODULE) +int nvmem_layout_register(struct nvmem_layout *layout); void nvmem_layout_unregister(struct nvmem_layout *layout); -const void *nvmem_layout_get_match_data(struct nvmem_device *nvmem, - struct nvmem_layout *layout); +int nvmem_layout_driver_register(struct nvmem_layout_driver *drv); +void nvmem_layout_driver_unregister(struct nvmem_layout_driver *drv); +#define module_nvmem_layout_driver(__nvmem_layout_driver) \ + module_driver(__nvmem_layout_driver, nvmem_layout_driver_register, \ + nvmem_layout_driver_unregister) #else @@ -235,17 +235,27 @@ static inline int nvmem_layout_register(struct nvmem_layout *layout) static inline void nvmem_layout_unregister(struct nvmem_layout *layout) {} -static inline const void * -nvmem_layout_get_match_data(struct nvmem_device *nvmem, - struct nvmem_layout *layout) +#endif /* CONFIG_NVMEM */ + +#if IS_ENABLED(CONFIG_NVMEM) && IS_ENABLED(CONFIG_OF) + +/** + * of_nvmem_layout_get_container() - Get OF node of layout container + * + * @nvmem: nvmem device + * + * Return: a node pointer with refcount incremented or NULL if no + * container exists. Use of_node_put() on it when done. + */ +struct device_node *of_nvmem_layout_get_container(struct nvmem_device *nvmem); + +#else /* CONFIG_NVMEM && CONFIG_OF */ + +static inline struct device_node *of_nvmem_layout_get_container(struct nvmem_device *nvmem) { return NULL; } -#endif /* CONFIG_NVMEM */ - -#define module_nvmem_layout_driver(__layout_driver) \ - module_driver(__layout_driver, nvmem_layout_register, \ - nvmem_layout_unregister) +#endif /* CONFIG_NVMEM && CONFIG_OF */ #endif /* ifndef _LINUX_NVMEM_PROVIDER_H */ diff --git a/include/linux/objpool.h b/include/linux/objpool.h new file mode 100644 index 000000000000..15aff4a17f0c --- /dev/null +++ b/include/linux/objpool.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_OBJPOOL_H +#define _LINUX_OBJPOOL_H + +#include <linux/types.h> +#include <linux/refcount.h> + +/* + * objpool: ring-array based lockless MPMC queue + * + * Copyright: wuqiang.matt@bytedance.com,mhiramat@kernel.org + * + * objpool is a scalable implementation of high performance queue for + * object allocation and reclamation, such as kretprobe instances. + * + * With leveraging percpu ring-array to mitigate hot spots of memory + * contention, it delivers near-linear scalability for high parallel + * scenarios. The objpool is best suited for the following cases: + * 1) Memory allocation or reclamation are prohibited or too expensive + * 2) Consumers are of different priorities, such as irqs and threads + * + * Limitations: + * 1) Maximum objects (capacity) is fixed after objpool creation + * 2) All pre-allocated objects are managed in percpu ring array, + * which consumes more memory than linked lists + */ + +/** + * struct objpool_slot - percpu ring array of objpool + * @head: head sequence of the local ring array (to retrieve at) + * @tail: tail sequence of the local ring array (to append at) + * @last: the last sequence number marked as ready for retrieve + * @mask: bits mask for modulo capacity to compute array indexes + * @entries: object entries on this slot + * + * Represents a cpu-local array-based ring buffer, its size is specialized + * during initialization of object pool. The percpu objpool node is to be + * allocated from local memory for NUMA system, and to be kept compact in + * continuous memory: CPU assigned number of objects are stored just after + * the body of objpool_node. + * + * Real size of the ring array is far too smaller than the value range of + * head and tail, typed as uint32_t: [0, 2^32), so only lower bits (mask) + * of head and tail are used as the actual position in the ring array. In + * general the ring array is acting like a small sliding window, which is + * always moving forward in the loop of [0, 2^32). + */ +struct objpool_slot { + uint32_t head; + uint32_t tail; + uint32_t last; + uint32_t mask; + void *entries[]; +} __packed; + +struct objpool_head; + +/* + * caller-specified callback for object initial setup, it's only called + * once for each object (just after the memory allocation of the object) + */ +typedef int (*objpool_init_obj_cb)(void *obj, void *context); + +/* caller-specified cleanup callback for objpool destruction */ +typedef int (*objpool_fini_cb)(struct objpool_head *head, void *context); + +/** + * struct objpool_head - object pooling metadata + * @obj_size: object size, aligned to sizeof(void *) + * @nr_objs: total objs (to be pre-allocated with objpool) + * @nr_cpus: local copy of nr_cpu_ids + * @capacity: max objs can be managed by one objpool_slot + * @gfp: gfp flags for kmalloc & vmalloc + * @ref: refcount of objpool + * @flags: flags for objpool management + * @cpu_slots: pointer to the array of objpool_slot + * @release: resource cleanup callback + * @context: caller-provided context + */ +struct objpool_head { + int obj_size; + int nr_objs; + int nr_cpus; + int capacity; + gfp_t gfp; + refcount_t ref; + unsigned long flags; + struct objpool_slot **cpu_slots; + objpool_fini_cb release; + void *context; +}; + +#define OBJPOOL_NR_OBJECT_MAX (1UL << 24) /* maximum numbers of total objects */ +#define OBJPOOL_OBJECT_SIZE_MAX (1UL << 16) /* maximum size of an object */ + +/** + * objpool_init() - initialize objpool and pre-allocated objects + * @pool: the object pool to be initialized, declared by caller + * @nr_objs: total objects to be pre-allocated by this object pool + * @object_size: size of an object (should be > 0) + * @gfp: flags for memory allocation (via kmalloc or vmalloc) + * @context: user context for object initialization callback + * @objinit: object initialization callback for extra setup + * @release: cleanup callback for extra cleanup task + * + * return value: 0 for success, otherwise error code + * + * All pre-allocated objects are to be zeroed after memory allocation. + * Caller could do extra initialization in objinit callback. objinit() + * will be called just after slot allocation and called only once for + * each object. After that the objpool won't touch any content of the + * objects. It's caller's duty to perform reinitialization after each + * pop (object allocation) or do clearance before each push (object + * reclamation). + */ +int objpool_init(struct objpool_head *pool, int nr_objs, int object_size, + gfp_t gfp, void *context, objpool_init_obj_cb objinit, + objpool_fini_cb release); + +/** + * objpool_pop() - allocate an object from objpool + * @pool: object pool + * + * return value: object ptr or NULL if failed + */ +void *objpool_pop(struct objpool_head *pool); + +/** + * objpool_push() - reclaim the object and return back to objpool + * @obj: object ptr to be pushed to objpool + * @pool: object pool + * + * return: 0 or error code (it fails only when user tries to push + * the same object multiple times or wrong "objects" into objpool) + */ +int objpool_push(void *obj, struct objpool_head *pool); + +/** + * objpool_drop() - discard the object and deref objpool + * @obj: object ptr to be discarded + * @pool: object pool + * + * return: 0 if objpool was released; -EAGAIN if there are still + * outstanding objects + * + * objpool_drop is normally for the release of outstanding objects + * after objpool cleanup (objpool_fini). Thinking of this example: + * kretprobe is unregistered and objpool_fini() is called to release + * all remained objects, but there are still objects being used by + * unfinished kretprobes (like blockable function: sys_accept). So + * only when the last outstanding object is dropped could the whole + * objpool be released along with the call of objpool_drop() + */ +int objpool_drop(void *obj, struct objpool_head *pool); + +/** + * objpool_free() - release objpool forcely (all objects to be freed) + * @pool: object pool to be released + */ +void objpool_free(struct objpool_head *pool); + +/** + * objpool_fini() - deref object pool (also releasing unused objects) + * @pool: object pool to be dereferenced + * + * objpool_fini() will try to release all remained free objects and + * then drop an extra reference of the objpool. If all objects are + * already returned to objpool (so called synchronous use cases), + * the objpool itself will be freed together. But if there are still + * outstanding objects (so called asynchronous use cases, such like + * blockable kretprobe), the objpool won't be released until all + * the outstanding objects are dropped, but the caller must assure + * there are no concurrent objpool_push() on the fly. Normally RCU + * is being required to make sure all ongoing objpool_push() must + * be finished before calling objpool_fini(), so does test_objpool, + * kretprobe or rethook + */ +void objpool_fini(struct objpool_head *pool); + +#endif /* _LINUX_OBJPOOL_H */ diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 03f82c2c2ebf..b3b8d3dab52d 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -48,13 +48,13 @@ #define ANNOTATE_NOENDBR \ "986: \n\t" \ ".pushsection .discard.noendbr\n\t" \ - ".long 986b - .\n\t" \ + ".long 986b\n\t" \ ".popsection\n\t" #define ASM_REACHABLE \ "998:\n\t" \ ".pushsection .discard.reachable\n\t" \ - ".long 998b - .\n\t" \ + ".long 998b\n\t" \ ".popsection\n\t" #else /* __ASSEMBLY__ */ @@ -66,7 +66,7 @@ #define ANNOTATE_INTRA_FUNCTION_CALL \ 999: \ .pushsection .discard.intra_function_calls; \ - .long 999b - .; \ + .long 999b; \ .popsection; /* @@ -118,7 +118,7 @@ .macro ANNOTATE_NOENDBR .Lhere_\@: .pushsection .discard.noendbr - .long .Lhere_\@ - . + .long .Lhere_\@ .popsection .endm @@ -130,7 +130,8 @@ * it will be ignored. */ .macro VALIDATE_UNRET_BEGIN -#if defined(CONFIG_NOINSTR_VALIDATION) && defined(CONFIG_CPU_UNRET_ENTRY) +#if defined(CONFIG_NOINSTR_VALIDATION) && \ + (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)) .Lhere_\@: .pushsection .discard.validate_unret .long .Lhere_\@ - . @@ -141,7 +142,7 @@ .macro REACHABLE .Lhere_\@: .pushsection .discard.reachable - .long .Lhere_\@ - . + .long .Lhere_\@ .popsection .endm diff --git a/include/linux/of.h b/include/linux/of.h index 6a9ddf20e79a..a0bedd038a05 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -13,6 +13,7 @@ */ #include <linux/types.h> #include <linux/bitops.h> +#include <linux/cleanup.h> #include <linux/errno.h> #include <linux/kobject.h> #include <linux/mod_devicetable.h> @@ -134,6 +135,7 @@ static inline struct device_node *of_node_get(struct device_node *node) } static inline void of_node_put(struct device_node *node) { } #endif /* !CONFIG_OF_DYNAMIC */ +DEFINE_FREE(device_node, struct device_node *, if (_T) of_node_put(_T)) /* Pointer for first entry in chain of all nodes. */ extern struct device_node *of_root; @@ -180,11 +182,6 @@ static inline bool is_of_node(const struct fwnode_handle *fwnode) &__of_fwnode_handle_node->fwnode : NULL; \ }) -static inline bool of_have_populated_dt(void) -{ - return of_root != NULL; -} - static inline bool of_node_is_root(const struct device_node *node) { return node && (node->parent == NULL); @@ -294,6 +291,8 @@ extern struct device_node *of_get_next_child(const struct device_node *node, struct device_node *prev); extern struct device_node *of_get_next_available_child( const struct device_node *node, struct device_node *prev); +extern struct device_node *of_get_next_reserved_child( + const struct device_node *node, struct device_node *prev); extern struct device_node *of_get_compatible_child(const struct device_node *parent, const char *compatible); @@ -362,9 +361,6 @@ extern struct device_node *of_get_cpu_state_node(struct device_node *cpu_node, int index); extern u64 of_get_cpu_hwid(struct device_node *cpun, unsigned int thread); -#define for_each_property_of_node(dn, pp) \ - for (pp = dn->properties; pp != NULL; pp = pp->next) - extern int of_n_addr_cells(struct device_node *np); extern int of_n_size_cells(struct device_node *np); extern const struct of_device_id *of_match_node( @@ -402,7 +398,20 @@ extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)); extern int of_alias_get_id(struct device_node *np, const char *stem); extern int of_alias_get_highest_id(const char *stem); -extern int of_machine_is_compatible(const char *compat); +bool of_machine_compatible_match(const char *const *compats); + +/** + * of_machine_is_compatible - Test root of device tree for a given compatible value + * @compat: compatible string to look for in root node's compatible property. + * + * Return: true if the root node has the given value in its compatible property. + */ +static inline bool of_machine_is_compatible(const char *compat) +{ + const char *compats[] = { compat, NULL }; + + return of_machine_compatible_match(compats); +} extern int of_add_property(struct device_node *np, struct property *prop); extern int of_remove_property(struct device_node *np, struct property *prop); @@ -541,6 +550,12 @@ static inline struct device_node *of_get_next_available_child( return NULL; } +static inline struct device_node *of_get_next_reserved_child( + const struct device_node *node, struct device_node *prev) +{ + return NULL; +} + static inline struct device_node *of_find_node_with_property( struct device_node *from, const char *prop_name) { @@ -549,11 +564,6 @@ static inline struct device_node *of_find_node_with_property( #define of_fwnode_handle(node) NULL -static inline bool of_have_populated_dt(void) -{ - return false; -} - static inline struct device_node *of_get_compatible_child(const struct device_node *parent, const char *compatible) { @@ -808,6 +818,11 @@ static inline int of_remove_property(struct device_node *np, struct property *pr return 0; } +static inline bool of_machine_compatible_match(const char *const *compats) +{ + return false; +} + static inline bool of_console_check(const struct device_node *dn, const char *name, int index) { return false; @@ -892,6 +907,9 @@ static inline int of_prop_val_eq(struct property *p1, struct property *p2) !memcmp(p1->value, p2->value, (size_t)p1->length); } +#define for_each_property_of_node(dn, pp) \ + for (pp = dn->properties; pp != NULL; pp = pp->next) + #if defined(CONFIG_OF) && defined(CONFIG_NUMA) extern int of_node_to_nid(struct device_node *np); #else @@ -1066,6 +1084,22 @@ static inline int of_parse_phandle_with_optional_args(const struct device_node * } /** + * of_phandle_args_equal() - Compare two of_phandle_args + * @a1: First of_phandle_args to compare + * @a2: Second of_phandle_args to compare + * + * Return: True if a1 and a2 are the same (same node pointer, same phandle + * args), false otherwise. + */ +static inline bool of_phandle_args_equal(const struct of_phandle_args *a1, + const struct of_phandle_args *a2) +{ + return a1->np == a2->np && + a1->args_count == a2->args_count && + !memcmp(a1->args, a2->args, sizeof(a1->args[0]) * a1->args_count); +} + +/** * of_property_count_u8_elems - Count the number of u8 elements in a property * * @np: device node from which the property value is to be read. @@ -1428,9 +1462,25 @@ static inline int of_property_read_s32(const struct device_node *np, #define for_each_child_of_node(parent, child) \ for (child = of_get_next_child(parent, NULL); child != NULL; \ child = of_get_next_child(parent, child)) + +#define for_each_child_of_node_scoped(parent, child) \ + for (struct device_node *child __free(device_node) = \ + of_get_next_child(parent, NULL); \ + child != NULL; \ + child = of_get_next_child(parent, child)) + #define for_each_available_child_of_node(parent, child) \ for (child = of_get_next_available_child(parent, NULL); child != NULL; \ child = of_get_next_available_child(parent, child)) +#define for_each_reserved_child_of_node(parent, child) \ + for (child = of_get_next_reserved_child(parent, NULL); child != NULL; \ + child = of_get_next_reserved_child(parent, child)) + +#define for_each_available_child_of_node_scoped(parent, child) \ + for (struct device_node *child __free(device_node) = \ + of_get_next_available_child(parent, NULL); \ + child != NULL; \ + child = of_get_next_available_child(parent, child)) #define for_each_of_cpu_node(cpu) \ for (cpu = of_get_next_cpu_node(NULL); cpu != NULL; \ @@ -1634,6 +1684,21 @@ static inline bool of_device_is_system_power_controller(const struct device_node return of_property_read_bool(np, "system-power-controller"); } +/** + * of_have_populated_dt() - Has DT been populated by bootloader + * + * Return: True if a DTB has been populated by the bootloader and it isn't the + * empty builtin one. False otherwise. + */ +static inline bool of_have_populated_dt(void) +{ +#ifdef CONFIG_OF + return of_property_present(of_root, "compatible"); +#else + return false; +#endif +} + /* * Overlay support */ diff --git a/include/linux/of_device.h b/include/linux/of_device.h index 2c7a3d4bc775..9042bca5bb84 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -2,10 +2,7 @@ #ifndef _LINUX_OF_DEVICE_H #define _LINUX_OF_DEVICE_H -#include <linux/platform_device.h> -#include <linux/of_platform.h> /* temporary until merge */ - -#include <linux/of.h> +#include <linux/device/driver.h> struct device; struct of_device_id; @@ -40,6 +37,9 @@ static inline int of_dma_configure(struct device *dev, { return of_dma_configure_id(dev, np, force_dma, NULL); } + +void of_device_make_bus_id(struct device *dev); + #else /* CONFIG_OF */ static inline int of_driver_match_device(struct device *dev, @@ -82,6 +82,9 @@ static inline int of_dma_configure(struct device *dev, { return 0; } + +static inline void of_device_make_bus_id(struct device *dev) {} + #endif /* CONFIG_OF */ #endif /* _LINUX_OF_DEVICE_H */ diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h index 4d7756087b6b..a4bea62bfa29 100644 --- a/include/linux/of_graph.h +++ b/include/linux/of_graph.h @@ -41,7 +41,7 @@ struct of_endpoint { bool of_graph_is_present(const struct device_node *node); int of_graph_parse_endpoint(const struct device_node *node, struct of_endpoint *endpoint); -int of_graph_get_endpoint_count(const struct device_node *np); +unsigned int of_graph_get_endpoint_count(const struct device_node *np); struct device_node *of_graph_get_port_by_id(struct device_node *node, u32 id); struct device_node *of_graph_get_next_endpoint(const struct device_node *parent, struct device_node *previous); @@ -68,7 +68,7 @@ static inline int of_graph_parse_endpoint(const struct device_node *node, return -ENOSYS; } -static inline int of_graph_get_endpoint_count(const struct device_node *np) +static inline unsigned int of_graph_get_endpoint_count(const struct device_node *np) { return 0; } diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h index 9a5e6b410dd2..e61cbbe12dac 100644 --- a/include/linux/of_iommu.h +++ b/include/linux/of_iommu.h @@ -8,20 +8,19 @@ struct iommu_ops; #ifdef CONFIG_OF_IOMMU -extern const struct iommu_ops *of_iommu_configure(struct device *dev, - struct device_node *master_np, - const u32 *id); +extern int of_iommu_configure(struct device *dev, struct device_node *master_np, + const u32 *id); extern void of_iommu_get_resv_regions(struct device *dev, struct list_head *list); #else -static inline const struct iommu_ops *of_iommu_configure(struct device *dev, - struct device_node *master_np, - const u32 *id) +static inline int of_iommu_configure(struct device *dev, + struct device_node *master_np, + const u32 *id) { - return NULL; + return -ENODEV; } static inline void of_iommu_get_resv_regions(struct device *dev, diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index fadfea575485..a2ff1ad48f7f 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -7,11 +7,11 @@ */ #include <linux/mod_devicetable.h> -#include <linux/of_device.h> -#include <linux/platform_device.h> struct device; +struct device_node; struct of_device_id; +struct platform_device; /** * struct of_dev_auxdata - lookup table entry for device names & platform_data diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index f86a08ba0207..51421fdbb0ba 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -30,9 +30,6 @@ enum OID { /* PKCS#1 {iso(1) member-body(2) us(840) rsadsi(113549) pkcs(1) pkcs-1(1)} */ OID_rsaEncryption, /* 1.2.840.113549.1.1.1 */ - OID_md2WithRSAEncryption, /* 1.2.840.113549.1.1.2 */ - OID_md3WithRSAEncryption, /* 1.2.840.113549.1.1.3 */ - OID_md4WithRSAEncryption, /* 1.2.840.113549.1.1.4 */ OID_sha1WithRSAEncryption, /* 1.2.840.113549.1.1.5 */ OID_sha256WithRSAEncryption, /* 1.2.840.113549.1.1.11 */ OID_sha384WithRSAEncryption, /* 1.2.840.113549.1.1.12 */ @@ -49,11 +46,6 @@ enum OID { OID_smimeCapabilites, /* 1.2.840.113549.1.9.15 */ OID_smimeAuthenticatedAttrs, /* 1.2.840.113549.1.9.16.2.11 */ - /* {iso(1) member-body(2) us(840) rsadsi(113549) digestAlgorithm(2)} */ - OID_md2, /* 1.2.840.113549.2.2 */ - OID_md4, /* 1.2.840.113549.2.4 */ - OID_md5, /* 1.2.840.113549.2.5 */ - OID_mskrb5, /* 1.2.840.48018.1.2.2 */ OID_krb5, /* 1.2.840.113554.1.2.2 */ OID_krb5u2u, /* 1.2.840.113554.1.2.2.3 */ @@ -141,6 +133,17 @@ enum OID { OID_TPMImportableKey, /* 2.23.133.10.1.4 */ OID_TPMSealedData, /* 2.23.133.10.1.5 */ + /* CSOR FIPS-202 SHA-3 */ + OID_sha3_256, /* 2.16.840.1.101.3.4.2.8 */ + OID_sha3_384, /* 2.16.840.1.101.3.4.2.9 */ + OID_sha3_512, /* 2.16.840.1.101.3.4.2.10 */ + OID_id_ecdsa_with_sha3_256, /* 2.16.840.1.101.3.4.3.10 */ + OID_id_ecdsa_with_sha3_384, /* 2.16.840.1.101.3.4.3.11 */ + OID_id_ecdsa_with_sha3_512, /* 2.16.840.1.101.3.4.3.12 */ + OID_id_rsassa_pkcs1_v1_5_with_sha3_256, /* 2.16.840.1.101.3.4.3.14 */ + OID_id_rsassa_pkcs1_v1_5_with_sha3_384, /* 2.16.840.1.101.3.4.3.15 */ + OID_id_rsassa_pkcs1_v1_5_with_sha3_512, /* 2.16.840.1.101.3.4.3.16 */ + OID__NR }; diff --git a/include/linux/osq_lock.h b/include/linux/osq_lock.h index 5581dbd3bd34..ea8fb31379e3 100644 --- a/include/linux/osq_lock.h +++ b/include/linux/osq_lock.h @@ -6,11 +6,6 @@ * An MCS like lock especially tailored for optimistic spinning for sleeping * lock implementations (mutex, rwsem, etc). */ -struct optimistic_spin_node { - struct optimistic_spin_node *next, *prev; - int locked; /* 1 if lock acquired */ - int cpu; /* encoded CPU # + 1 value */ -}; struct optimistic_spin_queue { /* diff --git a/include/linux/overflow.h b/include/linux/overflow.h index f9b60313eaea..0c7e3dcfe867 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -31,8 +31,10 @@ * credit to Christian Biere. */ #define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type))) -#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) -#define type_min(T) ((T)((T)-type_max(T)-(T)1)) +#define __type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) +#define type_max(t) __type_max(typeof(t)) +#define __type_min(T) ((T)((T)-type_max(T)-(T)1)) +#define type_min(t) __type_min(typeof(t)) /* * Avoids triggering -Wtype-limits compilation warning, @@ -57,46 +59,123 @@ static inline bool __must_check __must_check_overflow(bool overflow) * @b: second addend * @d: pointer to store sum * - * Returns 0 on success. + * Returns true on wrap-around, false otherwise. * - * *@d holds the results of the attempted addition, but is not considered - * "safe for use" on a non-zero return value, which indicates that the - * sum has overflowed or been truncated. + * *@d holds the results of the attempted addition, regardless of whether + * wrap-around occurred. */ #define check_add_overflow(a, b, d) \ __must_check_overflow(__builtin_add_overflow(a, b, d)) /** + * wrapping_add() - Intentionally perform a wrapping addition + * @type: type for result of calculation + * @a: first addend + * @b: second addend + * + * Return the potentially wrapped-around addition without + * tripping any wrap-around sanitizers that may be enabled. + */ +#define wrapping_add(type, a, b) \ + ({ \ + type __val; \ + __builtin_add_overflow(a, b, &__val); \ + __val; \ + }) + +/** + * wrapping_assign_add() - Intentionally perform a wrapping increment assignment + * @var: variable to be incremented + * @offset: amount to add + * + * Increments @var by @offset with wrap-around. Returns the resulting + * value of @var. Will not trip any wrap-around sanitizers. + * + * Returns the new value of @var. + */ +#define wrapping_assign_add(var, offset) \ + ({ \ + typeof(var) *__ptr = &(var); \ + *__ptr = wrapping_add(typeof(var), *__ptr, offset); \ + }) + +/** * check_sub_overflow() - Calculate subtraction with overflow checking * @a: minuend; value to subtract from * @b: subtrahend; value to subtract from @a * @d: pointer to store difference * - * Returns 0 on success. + * Returns true on wrap-around, false otherwise. * - * *@d holds the results of the attempted subtraction, but is not considered - * "safe for use" on a non-zero return value, which indicates that the - * difference has underflowed or been truncated. + * *@d holds the results of the attempted subtraction, regardless of whether + * wrap-around occurred. */ #define check_sub_overflow(a, b, d) \ __must_check_overflow(__builtin_sub_overflow(a, b, d)) /** + * wrapping_sub() - Intentionally perform a wrapping subtraction + * @type: type for result of calculation + * @a: minuend; value to subtract from + * @b: subtrahend; value to subtract from @a + * + * Return the potentially wrapped-around subtraction without + * tripping any wrap-around sanitizers that may be enabled. + */ +#define wrapping_sub(type, a, b) \ + ({ \ + type __val; \ + __builtin_sub_overflow(a, b, &__val); \ + __val; \ + }) + +/** + * wrapping_assign_sub() - Intentionally perform a wrapping decrement assign + * @var: variable to be decremented + * @offset: amount to subtract + * + * Decrements @var by @offset with wrap-around. Returns the resulting + * value of @var. Will not trip any wrap-around sanitizers. + * + * Returns the new value of @var. + */ +#define wrapping_assign_sub(var, offset) \ + ({ \ + typeof(var) *__ptr = &(var); \ + *__ptr = wrapping_sub(typeof(var), *__ptr, offset); \ + }) + +/** * check_mul_overflow() - Calculate multiplication with overflow checking * @a: first factor * @b: second factor * @d: pointer to store product * - * Returns 0 on success. + * Returns true on wrap-around, false otherwise. * - * *@d holds the results of the attempted multiplication, but is not - * considered "safe for use" on a non-zero return value, which indicates - * that the product has overflowed or been truncated. + * *@d holds the results of the attempted multiplication, regardless of whether + * wrap-around occurred. */ #define check_mul_overflow(a, b, d) \ __must_check_overflow(__builtin_mul_overflow(a, b, d)) /** + * wrapping_mul() - Intentionally perform a wrapping multiplication + * @type: type for result of calculation + * @a: first factor + * @b: second factor + * + * Return the potentially wrapped-around multiplication without + * tripping any wrap-around sanitizers that may be enabled. + */ +#define wrapping_mul(type, a, b) \ + ({ \ + type __val; \ + __builtin_mul_overflow(a, b, &__val); \ + __val; \ + }) + +/** * check_shl_overflow() - Calculate a left-shifted value and check overflow * @a: Value to be shifted * @s: How many bits left to shift @@ -120,7 +199,7 @@ static inline bool __must_check __must_check_overflow(bool overflow) typeof(a) _a = a; \ typeof(s) _s = s; \ typeof(d) _d = d; \ - u64 _a_full = _a; \ + unsigned long long _a_full = _a; \ unsigned int _to_shift = \ is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0; \ *_d = (_a_full << _to_shift); \ @@ -130,10 +209,10 @@ static inline bool __must_check __must_check_overflow(bool overflow) #define __overflows_type_constexpr(x, T) ( \ is_unsigned_type(typeof(x)) ? \ - (x) > type_max(typeof(T)) : \ + (x) > type_max(T) : \ is_unsigned_type(typeof(T)) ? \ - (x) < 0 || (x) > type_max(typeof(T)) : \ - (x) < type_min(typeof(T)) || (x) > type_max(typeof(T))) + (x) < 0 || (x) > type_max(T) : \ + (x) < type_min(T) || (x) > type_max(T)) #define __overflows_type(x, T) ({ \ typeof(T) v = 0; \ @@ -309,4 +388,56 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) #define struct_size_t(type, member, count) \ struct_size((type *)NULL, member, count) +/** + * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. + * Enables caller macro to pass (different) initializer. + * + * @type: structure type name, including "struct" keyword. + * @name: Name for a variable to define. + * @member: Name of the array member. + * @count: Number of elements in the array; must be compile-time const. + * @initializer: initializer expression (could be empty for no init). + */ +#define _DEFINE_FLEX(type, name, member, count, initializer...) \ + _Static_assert(__builtin_constant_p(count), \ + "onstack flex array members require compile-time const count"); \ + union { \ + u8 bytes[struct_size_t(type, member, count)]; \ + type obj; \ + } name##_u initializer; \ + type *name = (type *)&name##_u + +/** + * DEFINE_RAW_FLEX() - Define an on-stack instance of structure with a trailing + * flexible array member, when it does not have a __counted_by annotation. + * + * @type: structure type name, including "struct" keyword. + * @name: Name for a variable to define. + * @member: Name of the array member. + * @count: Number of elements in the array; must be compile-time const. + * + * Define a zeroed, on-stack, instance of @type structure with a trailing + * flexible array member. + * Use __struct_size(@name) to get compile-time size of it afterwards. + */ +#define DEFINE_RAW_FLEX(type, name, member, count) \ + _DEFINE_FLEX(type, name, member, count, = {}) + +/** + * DEFINE_FLEX() - Define an on-stack instance of structure with a trailing + * flexible array member. + * + * @TYPE: structure type name, including "struct" keyword. + * @NAME: Name for a variable to define. + * @MEMBER: Name of the array member. + * @COUNTER: Name of the __counted_by member. + * @COUNT: Number of elements in the array; must be compile-time const. + * + * Define a zeroed, on-stack, instance of @TYPE structure with a trailing + * flexible array member. + * Use __struct_size(@NAME) to get compile-time size of it afterwards. + */ +#define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT) \ + _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .obj.COUNTER = COUNT, }) + #endif /* __LINUX_OVERFLOW_H */ diff --git a/include/linux/padata.h b/include/linux/padata.h index 495b16b6b4d7..0146daf34430 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -137,6 +137,7 @@ struct padata_shell { * appropriate for one worker thread to do at once. * @max_threads: Max threads to use for the job, actual number may be less * depending on task size and minimum chunk size. + * @numa_aware: Distribute jobs to different nodes with CPU in a round robin fashion. */ struct padata_mt_job { void (*thread_fn)(unsigned long start, unsigned long end, void *arg); @@ -146,6 +147,7 @@ struct padata_mt_job { unsigned long align; unsigned long min_chunk; int max_threads; + bool numa_aware; }; /** @@ -178,10 +180,6 @@ struct padata_instance { #ifdef CONFIG_PADATA extern void __init padata_init(void); -#else -static inline void __init padata_init(void) {} -#endif - extern struct padata_instance *padata_alloc(const char *name); extern void padata_free(struct padata_instance *pinst); extern struct padata_shell *padata_alloc_shell(struct padata_instance *pinst); @@ -192,4 +190,12 @@ extern void padata_do_serial(struct padata_priv *padata); extern void __init padata_do_multithreaded(struct padata_mt_job *job); extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, cpumask_var_t cpumask); +#else +static inline void __init padata_init(void) {} +static inline void __init padata_do_multithreaded(struct padata_mt_job *job) +{ + job->thread_fn(job->start, job->start + job->size, job->fn_arg); +} +#endif + #endif diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5c02720c53a5..4bf1c25fd1dc 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -190,7 +190,6 @@ enum pageflags { /* At least one page in this folio has the hwpoison flag set */ PG_has_hwpoisoned = PG_error, - PG_hugetlb = PG_active, PG_large_rmappable = PG_workingset, /* anon or file-backed */ }; @@ -237,7 +236,7 @@ static inline const struct page *page_fixed_fake_head(const struct page *page) } #endif -static __always_inline int page_is_fake_head(struct page *page) +static __always_inline int page_is_fake_head(const struct page *page) { return page_fixed_fake_head(page) != page; } @@ -281,12 +280,12 @@ static inline unsigned long _compound_head(const struct page *page) */ #define folio_page(folio, n) nth_page(&(folio)->page, n) -static __always_inline int PageTail(struct page *page) +static __always_inline int PageTail(const struct page *page) { return READ_ONCE(page->compound_head) & 1 || page_is_fake_head(page); } -static __always_inline int PageCompound(struct page *page) +static __always_inline int PageCompound(const struct page *page) { return test_bit(PG_head, &page->flags) || READ_ONCE(page->compound_head) & 1; @@ -306,6 +305,16 @@ static inline void page_init_poison(struct page *page, size_t size) } #endif +static const unsigned long *const_folio_flags(const struct folio *folio, + unsigned n) +{ + const struct page *page = &folio->page; + + VM_BUG_ON_PGFLAGS(PageTail(page), page); + VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); + return &page[n].flags; +} + static unsigned long *folio_flags(struct folio *folio, unsigned n) { struct page *page = &folio->page; @@ -328,9 +337,6 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n) * for compound page all operations related to the page flag applied to * head page. * - * PF_ONLY_HEAD: - * for compound page, callers only ever operate on the head page. - * * PF_NO_TAIL: * modifications of the page flag must be done on small or head pages, * checks can be done on tail pages too. @@ -346,9 +352,6 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n) page; }) #define PF_ANY(page, enforce) PF_POISONED_CHECK(page) #define PF_HEAD(page, enforce) PF_POISONED_CHECK(compound_head(page)) -#define PF_ONLY_HEAD(page, enforce) ({ \ - VM_BUG_ON_PGFLAGS(PageTail(page), page); \ - PF_POISONED_CHECK(page); }) #define PF_NO_TAIL(page, enforce) ({ \ VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \ PF_POISONED_CHECK(compound_head(page)); }) @@ -362,59 +365,81 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n) /* Which page is the flag stored in */ #define FOLIO_PF_ANY 0 #define FOLIO_PF_HEAD 0 -#define FOLIO_PF_ONLY_HEAD 0 #define FOLIO_PF_NO_TAIL 0 #define FOLIO_PF_NO_COMPOUND 0 #define FOLIO_PF_SECOND 1 +#define FOLIO_HEAD_PAGE 0 +#define FOLIO_SECOND_PAGE 1 + /* * Macros to create function definitions for page flags */ +#define FOLIO_TEST_FLAG(name, page) \ +static __always_inline bool folio_test_##name(const struct folio *folio) \ +{ return test_bit(PG_##name, const_folio_flags(folio, page)); } + +#define FOLIO_SET_FLAG(name, page) \ +static __always_inline void folio_set_##name(struct folio *folio) \ +{ set_bit(PG_##name, folio_flags(folio, page)); } + +#define FOLIO_CLEAR_FLAG(name, page) \ +static __always_inline void folio_clear_##name(struct folio *folio) \ +{ clear_bit(PG_##name, folio_flags(folio, page)); } + +#define __FOLIO_SET_FLAG(name, page) \ +static __always_inline void __folio_set_##name(struct folio *folio) \ +{ __set_bit(PG_##name, folio_flags(folio, page)); } + +#define __FOLIO_CLEAR_FLAG(name, page) \ +static __always_inline void __folio_clear_##name(struct folio *folio) \ +{ __clear_bit(PG_##name, folio_flags(folio, page)); } + +#define FOLIO_TEST_SET_FLAG(name, page) \ +static __always_inline bool folio_test_set_##name(struct folio *folio) \ +{ return test_and_set_bit(PG_##name, folio_flags(folio, page)); } + +#define FOLIO_TEST_CLEAR_FLAG(name, page) \ +static __always_inline bool folio_test_clear_##name(struct folio *folio) \ +{ return test_and_clear_bit(PG_##name, folio_flags(folio, page)); } + +#define FOLIO_FLAG(name, page) \ +FOLIO_TEST_FLAG(name, page) \ +FOLIO_SET_FLAG(name, page) \ +FOLIO_CLEAR_FLAG(name, page) + #define TESTPAGEFLAG(uname, lname, policy) \ -static __always_inline bool folio_test_##lname(struct folio *folio) \ -{ return test_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ -static __always_inline int Page##uname(struct page *page) \ +FOLIO_TEST_FLAG(lname, FOLIO_##policy) \ +static __always_inline int Page##uname(const struct page *page) \ { return test_bit(PG_##lname, &policy(page, 0)->flags); } #define SETPAGEFLAG(uname, lname, policy) \ -static __always_inline \ -void folio_set_##lname(struct folio *folio) \ -{ set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +FOLIO_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline void SetPage##uname(struct page *page) \ { set_bit(PG_##lname, &policy(page, 1)->flags); } #define CLEARPAGEFLAG(uname, lname, policy) \ -static __always_inline \ -void folio_clear_##lname(struct folio *folio) \ -{ clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline void ClearPage##uname(struct page *page) \ { clear_bit(PG_##lname, &policy(page, 1)->flags); } #define __SETPAGEFLAG(uname, lname, policy) \ -static __always_inline \ -void __folio_set_##lname(struct folio *folio) \ -{ __set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +__FOLIO_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline void __SetPage##uname(struct page *page) \ { __set_bit(PG_##lname, &policy(page, 1)->flags); } #define __CLEARPAGEFLAG(uname, lname, policy) \ -static __always_inline \ -void __folio_clear_##lname(struct folio *folio) \ -{ __clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +__FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline void __ClearPage##uname(struct page *page) \ { __clear_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTSETFLAG(uname, lname, policy) \ -static __always_inline \ -bool folio_test_set_##lname(struct folio *folio) \ -{ return test_and_set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +FOLIO_TEST_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline int TestSetPage##uname(struct page *page) \ { return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTCLEARFLAG(uname, lname, policy) \ -static __always_inline \ -bool folio_test_clear_##lname(struct folio *folio) \ -{ return test_and_clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +FOLIO_TEST_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline int TestClearPage##uname(struct page *page) \ { return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); } @@ -432,30 +457,51 @@ static __always_inline int TestClearPage##uname(struct page *page) \ TESTSETFLAG(uname, lname, policy) \ TESTCLEARFLAG(uname, lname, policy) +#define FOLIO_TEST_FLAG_FALSE(name) \ +static inline bool folio_test_##name(const struct folio *folio) \ +{ return false; } +#define FOLIO_SET_FLAG_NOOP(name) \ +static inline void folio_set_##name(struct folio *folio) { } +#define FOLIO_CLEAR_FLAG_NOOP(name) \ +static inline void folio_clear_##name(struct folio *folio) { } +#define __FOLIO_SET_FLAG_NOOP(name) \ +static inline void __folio_set_##name(struct folio *folio) { } +#define __FOLIO_CLEAR_FLAG_NOOP(name) \ +static inline void __folio_clear_##name(struct folio *folio) { } +#define FOLIO_TEST_SET_FLAG_FALSE(name) \ +static inline bool folio_test_set_##name(struct folio *folio) \ +{ return false; } +#define FOLIO_TEST_CLEAR_FLAG_FALSE(name) \ +static inline bool folio_test_clear_##name(struct folio *folio) \ +{ return false; } + +#define FOLIO_FLAG_FALSE(name) \ +FOLIO_TEST_FLAG_FALSE(name) \ +FOLIO_SET_FLAG_NOOP(name) \ +FOLIO_CLEAR_FLAG_NOOP(name) + #define TESTPAGEFLAG_FALSE(uname, lname) \ -static inline bool folio_test_##lname(const struct folio *folio) { return false; } \ +FOLIO_TEST_FLAG_FALSE(lname) \ static inline int Page##uname(const struct page *page) { return 0; } #define SETPAGEFLAG_NOOP(uname, lname) \ -static inline void folio_set_##lname(struct folio *folio) { } \ +FOLIO_SET_FLAG_NOOP(lname) \ static inline void SetPage##uname(struct page *page) { } #define CLEARPAGEFLAG_NOOP(uname, lname) \ -static inline void folio_clear_##lname(struct folio *folio) { } \ +FOLIO_CLEAR_FLAG_NOOP(lname) \ static inline void ClearPage##uname(struct page *page) { } #define __CLEARPAGEFLAG_NOOP(uname, lname) \ -static inline void __folio_clear_##lname(struct folio *folio) { } \ +__FOLIO_CLEAR_FLAG_NOOP(lname) \ static inline void __ClearPage##uname(struct page *page) { } #define TESTSETFLAG_FALSE(uname, lname) \ -static inline bool folio_test_set_##lname(struct folio *folio) \ -{ return 0; } \ +FOLIO_TEST_SET_FLAG_FALSE(lname) \ static inline int TestSetPage##uname(struct page *page) { return 0; } #define TESTCLEARFLAG_FALSE(uname, lname) \ -static inline bool folio_test_clear_##lname(struct folio *folio) \ -{ return 0; } \ +FOLIO_TEST_CLEAR_FLAG_FALSE(lname) \ static inline int TestClearPage##uname(struct page *page) { return 0; } #define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \ @@ -465,7 +511,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; } TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname) __PAGEFLAG(Locked, locked, PF_NO_TAIL) -PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) +FOLIO_FLAG(waiters, FOLIO_HEAD_PAGE) PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL) PAGEFLAG(Referenced, referenced, PF_HEAD) TESTCLEARFLAG(Referenced, referenced, PF_HEAD) @@ -532,13 +578,13 @@ PAGEFLAG_FALSE(HighMem, highmem) #endif #ifdef CONFIG_SWAP -static __always_inline bool folio_test_swapcache(struct folio *folio) +static __always_inline bool folio_test_swapcache(const struct folio *folio) { return folio_test_swapbacked(folio) && - test_bit(PG_swapcache, folio_flags(folio, 0)); + test_bit(PG_swapcache, const_folio_flags(folio, 0)); } -static __always_inline bool PageSwapCache(struct page *page) +static __always_inline bool PageSwapCache(const struct page *page) { return folio_test_swapcache(page_folio(page)); } @@ -583,10 +629,10 @@ PAGEFLAG_FALSE(HWPoison, hwpoison) #endif #if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) -TESTPAGEFLAG(Young, young, PF_ANY) -SETPAGEFLAG(Young, young, PF_ANY) -TESTCLEARFLAG(Young, young, PF_ANY) -PAGEFLAG(Idle, idle, PF_ANY) +FOLIO_TEST_FLAG(young, FOLIO_HEAD_PAGE) +FOLIO_SET_FLAG(young, FOLIO_HEAD_PAGE) +FOLIO_TEST_CLEAR_FLAG(young, FOLIO_HEAD_PAGE) +FOLIO_FLAG(idle, FOLIO_HEAD_PAGE) #endif /* @@ -637,22 +683,22 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) */ #define PAGE_MAPPING_DAX_SHARED ((void *)0x1) -static __always_inline bool folio_mapping_flags(struct folio *folio) +static __always_inline bool folio_mapping_flags(const struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) != 0; } -static __always_inline int PageMappingFlags(struct page *page) +static __always_inline int PageMappingFlags(const struct page *page) { return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0; } -static __always_inline bool folio_test_anon(struct folio *folio) +static __always_inline bool folio_test_anon(const struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0; } -static __always_inline bool PageAnon(struct page *page) +static __always_inline bool PageAnon(const struct page *page) { return folio_test_anon(page_folio(page)); } @@ -663,7 +709,7 @@ static __always_inline bool __folio_test_movable(const struct folio *folio) PAGE_MAPPING_MOVABLE; } -static __always_inline int __PageMovable(struct page *page) +static __always_inline int __PageMovable(const struct page *page) { return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_MOVABLE; @@ -676,13 +722,13 @@ static __always_inline int __PageMovable(struct page *page) * is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any * anon_vma, but to that page's node of the stable tree. */ -static __always_inline bool folio_test_ksm(struct folio *folio) +static __always_inline bool folio_test_ksm(const struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_KSM; } -static __always_inline bool PageKsm(struct page *page) +static __always_inline bool PageKsm(const struct page *page) { return folio_test_ksm(page_folio(page)); } @@ -693,6 +739,25 @@ TESTPAGEFLAG_FALSE(Ksm, ksm) u64 stable_page_flags(struct page *page); /** + * folio_xor_flags_has_waiters - Change some folio flags. + * @folio: The folio. + * @mask: Bits set in this word will be changed. + * + * This must only be used for flags which are changed with the folio + * lock held. For example, it is unsafe to use for PG_dirty as that + * can be set without the folio lock held. It can also only be used + * on flags which are in the range 0-6 as some of the implementations + * only affect those bits. + * + * Return: Whether there are tasks waiting on the folio. + */ +static inline bool folio_xor_flags_has_waiters(struct folio *folio, + unsigned long mask) +{ + return xor_unlock_is_negative_byte(mask, folio_flags(folio, 0)); +} + +/** * folio_test_uptodate - Is this folio up to date? * @folio: The folio. * @@ -702,9 +767,9 @@ u64 stable_page_flags(struct page *page); * some of the bytes in it may be; see the is_partially_uptodate() * address_space operation. */ -static inline bool folio_test_uptodate(struct folio *folio) +static inline bool folio_test_uptodate(const struct folio *folio) { - bool ret = test_bit(PG_uptodate, folio_flags(folio, 0)); + bool ret = test_bit(PG_uptodate, const_folio_flags(folio, 0)); /* * Must ensure that the data we read out of the folio is loaded * _after_ we've loaded folio->flags to check the uptodate bit. @@ -719,7 +784,7 @@ static inline bool folio_test_uptodate(struct folio *folio) return ret; } -static inline int PageUptodate(struct page *page) +static inline int PageUptodate(const struct page *page) { return folio_test_uptodate(page_folio(page)); } @@ -753,25 +818,20 @@ static __always_inline void SetPageUptodate(struct page *page) CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL) -bool __folio_start_writeback(struct folio *folio, bool keep_write); -bool set_page_writeback(struct page *page); +void __folio_start_writeback(struct folio *folio, bool keep_write); +void set_page_writeback(struct page *page); #define folio_start_writeback(folio) \ __folio_start_writeback(folio, false) #define folio_start_writeback_keepwrite(folio) \ __folio_start_writeback(folio, true) -static inline bool test_set_page_writeback(struct page *page) +static __always_inline bool folio_test_head(const struct folio *folio) { - return set_page_writeback(page); + return test_bit(PG_head, const_folio_flags(folio, FOLIO_PF_ANY)); } -static __always_inline bool folio_test_head(struct folio *folio) -{ - return test_bit(PG_head, folio_flags(folio, FOLIO_PF_ANY)); -} - -static __always_inline int PageHead(struct page *page) +static __always_inline int PageHead(const struct page *page) { PF_POISONED_CHECK(page); return test_bit(PG_head, &page->flags) && !page_is_fake_head(page); @@ -787,7 +847,7 @@ CLEARPAGEFLAG(Head, head, PF_ANY) * * Return: True if the folio is larger than one page. */ -static inline bool folio_test_large(struct folio *folio) +static inline bool folio_test_large(const struct folio *folio) { return folio_test_head(folio); } @@ -815,29 +875,6 @@ TESTPAGEFLAG_FALSE(LargeRmappable, large_rmappable) #define PG_head_mask ((1UL << PG_head)) -#ifdef CONFIG_HUGETLB_PAGE -int PageHuge(struct page *page); -SETPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) -CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) - -/** - * folio_test_hugetlb - Determine if the folio belongs to hugetlbfs - * @folio: The folio to test. - * - * Context: Any context. Caller should have a reference on the folio to - * prevent it from being turned into a tail page. - * Return: True for hugetlbfs folios, false for anon folios or folios - * belonging to other filesystems. - */ -static inline bool folio_test_hugetlb(struct folio *folio) -{ - return folio_test_large(folio) && - test_bit(PG_hugetlb, folio_flags(folio, 1)); -} -#else -TESTPAGEFLAG_FALSE(Huge, hugetlb) -#endif - #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * PageHuge() only returns true for hugetlbfs pages, but not for @@ -847,7 +884,7 @@ TESTPAGEFLAG_FALSE(Huge, hugetlb) * hugetlbfs pages, but not normal pages. PageTransHuge() can only be * called only in the core VM paths where hugetlbfs pages can't exist. */ -static inline int PageTransHuge(struct page *page) +static inline int PageTransHuge(const struct page *page) { VM_BUG_ON_PAGE(PageTail(page), page); return PageHead(page); @@ -858,7 +895,7 @@ static inline int PageTransHuge(struct page *page) * and hugetlbfs pages, so it should only be called when it's known * that hugetlbfs pages aren't involved. */ -static inline int PageTransCompound(struct page *page) +static inline int PageTransCompound(const struct page *page) { return PageCompound(page); } @@ -868,7 +905,7 @@ static inline int PageTransCompound(struct page *page) * and hugetlbfs pages, so it should only be called when it's known * that hugetlbfs pages aren't involved. */ -static inline int PageTransTail(struct page *page) +static inline int PageTransTail(const struct page *page) { return PageTail(page); } @@ -894,33 +931,22 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) #endif /* - * Check if a page is currently marked HWPoisoned. Note that this check is - * best effort only and inherently racy: there is no way to synchronize with - * failing hardware. - */ -static inline bool is_page_hwpoison(struct page *page) -{ - if (PageHWPoison(page)) - return true; - return PageHuge(page) && PageHWPoison(compound_head(page)); -} - -/* * For pages that are never mapped to userspace (and aren't PageSlab), * page_type may be used. Because it is initialised to -1, we invert the * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and * __ClearPageFoo *sets* the bit used for PageFoo. We reserve a few high and - * low bits so that an underflow or overflow of page_mapcount() won't be + * low bits so that an underflow or overflow of _mapcount won't be * mistaken for a page type value. */ #define PAGE_TYPE_BASE 0xf0000000 -/* Reserve 0x0000007f to catch underflows of page_mapcount */ +/* Reserve 0x0000007f to catch underflows of _mapcount */ #define PAGE_MAPCOUNT_RESERVE -128 #define PG_buddy 0x00000080 #define PG_offline 0x00000100 #define PG_table 0x00000200 #define PG_guard 0x00000400 +#define PG_hugetlb 0x00000800 #define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) @@ -932,40 +958,43 @@ static inline int page_type_has_type(unsigned int page_type) return (int)page_type < PAGE_MAPCOUNT_RESERVE; } -static inline int page_has_type(struct page *page) +static inline int page_has_type(const struct page *page) { return page_type_has_type(page->page_type); } +#define FOLIO_TYPE_OPS(lname, fname) \ +static __always_inline bool folio_test_##fname(const struct folio *folio)\ +{ \ + return folio_test_type(folio, PG_##lname); \ +} \ +static __always_inline void __folio_set_##fname(struct folio *folio) \ +{ \ + VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \ + folio->page.page_type &= ~PG_##lname; \ +} \ +static __always_inline void __folio_clear_##fname(struct folio *folio) \ +{ \ + VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ + folio->page.page_type |= PG_##lname; \ +} + #define PAGE_TYPE_OPS(uname, lname, fname) \ +FOLIO_TYPE_OPS(lname, fname) \ static __always_inline int Page##uname(const struct page *page) \ { \ return PageType(page, PG_##lname); \ } \ -static __always_inline int folio_test_##fname(const struct folio *folio)\ -{ \ - return folio_test_type(folio, PG_##lname); \ -} \ static __always_inline void __SetPage##uname(struct page *page) \ { \ VM_BUG_ON_PAGE(!PageType(page, 0), page); \ page->page_type &= ~PG_##lname; \ } \ -static __always_inline void __folio_set_##fname(struct folio *folio) \ -{ \ - VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \ - folio->page.page_type &= ~PG_##lname; \ -} \ static __always_inline void __ClearPage##uname(struct page *page) \ { \ VM_BUG_ON_PAGE(!Page##uname(page), page); \ page->page_type |= PG_##lname; \ -} \ -static __always_inline void __folio_clear_##fname(struct folio *folio) \ -{ \ - VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ - folio->page.page_type |= PG_##lname; \ -} \ +} /* * PageBuddy() indicates that the page is free and in the buddy system @@ -1012,11 +1041,42 @@ PAGE_TYPE_OPS(Table, table, pgtable) */ PAGE_TYPE_OPS(Guard, guard, guard) +#ifdef CONFIG_HUGETLB_PAGE +FOLIO_TYPE_OPS(hugetlb, hugetlb) +#else +FOLIO_TEST_FLAG_FALSE(hugetlb) +#endif + +/** + * PageHuge - Determine if the page belongs to hugetlbfs + * @page: The page to test. + * + * Context: Any context. + * Return: True for hugetlbfs pages, false for anon pages or pages + * belonging to other filesystems. + */ +static inline bool PageHuge(const struct page *page) +{ + return folio_test_hugetlb(page_folio(page)); +} + +/* + * Check if a page is currently marked HWPoisoned. Note that this check is + * best effort only and inherently racy: there is no way to synchronize with + * failing hardware. + */ +static inline bool is_page_hwpoison(struct page *page) +{ + if (PageHWPoison(page)) + return true; + return PageHuge(page) && PageHWPoison(compound_head(page)); +} + extern bool is_free_buddy_page(struct page *page); PAGEFLAG(Isolated, isolated, PF_ANY); -static __always_inline int PageAnonExclusive(struct page *page) +static __always_inline int PageAnonExclusive(const struct page *page) { VM_BUG_ON_PGFLAGS(!PageAnon(page), page); VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); @@ -1078,7 +1138,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) */ #define PAGE_FLAGS_SECOND \ (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \ - 1UL << PG_hugetlb | 1UL << PG_large_rmappable) + 1UL << PG_large_rmappable) #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) @@ -1089,19 +1149,18 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) * Determine if a page has private stuff, indicating that release routines * should be invoked upon it. */ -static inline int page_has_private(struct page *page) +static inline int page_has_private(const struct page *page) { return !!(page->flags & PAGE_FLAGS_PRIVATE); } -static inline bool folio_has_private(struct folio *folio) +static inline bool folio_has_private(const struct folio *folio) { return page_has_private(&folio->page); } #undef PF_ANY #undef PF_HEAD -#undef PF_ONLY_HEAD #undef PF_NO_TAIL #undef PF_NO_COMPOUND #undef PF_SECOND diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index c141ea9a95ef..8cd858d912c4 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -4,7 +4,7 @@ #include <linux/atomic.h> #include <linux/cache.h> -#include <linux/kernel.h> +#include <linux/limits.h> #include <asm/page.h> struct page_counter { diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index 119a0c9d2a8b..debdc25f08b9 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -11,7 +11,8 @@ extern struct page_ext_operations page_owner_ops; extern void __reset_page_owner(struct page *page, unsigned short order); extern void __set_page_owner(struct page *page, unsigned short order, gfp_t gfp_mask); -extern void __split_page_owner(struct page *page, unsigned int nr); +extern void __split_page_owner(struct page *page, int old_order, + int new_order); extern void __folio_copy_owner(struct folio *newfolio, struct folio *old); extern void __set_page_owner_migrate_reason(struct page *page, int reason); extern void __dump_page_owner(const struct page *page); @@ -31,10 +32,11 @@ static inline void set_page_owner(struct page *page, __set_page_owner(page, order, gfp_mask); } -static inline void split_page_owner(struct page *page, unsigned int nr) +static inline void split_page_owner(struct page *page, int old_order, + int new_order) { if (static_branch_unlikely(&page_owner_inited)) - __split_page_owner(page, nr); + __split_page_owner(page, old_order, new_order); } static inline void folio_copy_owner(struct folio *newfolio, struct folio *old) { @@ -56,11 +58,11 @@ static inline void reset_page_owner(struct page *page, unsigned short order) { } static inline void set_page_owner(struct page *page, - unsigned int order, gfp_t gfp_mask) + unsigned short order, gfp_t gfp_mask) { } -static inline void split_page_owner(struct page *page, - unsigned short order) +static inline void split_page_owner(struct page *page, int old_order, + int new_order) { } static inline void folio_copy_owner(struct folio *newfolio, struct folio *folio) diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index e83c4c095041..3f2409b968ec 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -41,14 +41,14 @@ extern unsigned int pageblock_order; * Huge pages are a constant size, but don't exceed the maximum allocation * granularity. */ -#define pageblock_order min_t(unsigned int, HUGETLB_PAGE_ORDER, MAX_ORDER) +#define pageblock_order min_t(unsigned int, HUGETLB_PAGE_ORDER, MAX_PAGE_ORDER) #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ #else /* CONFIG_HUGETLB_PAGE */ /* If huge pages are not used, group by MAX_ORDER_NR_PAGES */ -#define pageblock_order MAX_ORDER +#define pageblock_order MAX_PAGE_ORDER #endif /* CONFIG_HUGETLB_PAGE */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 351c3b7f93a1..2df35e65557d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -204,6 +204,9 @@ enum mapping_flags { AS_NO_WRITEBACK_TAGS = 5, AS_LARGE_FOLIO_SUPPORT = 6, AS_RELEASE_ALWAYS, /* Call ->release_folio(), even if no private data */ + AS_STABLE_WRITES, /* must wait for writeback before modifying + folio contents */ + AS_UNMOVABLE, /* The mapping cannot be moved, ever */ }; /** @@ -289,6 +292,37 @@ static inline void mapping_clear_release_always(struct address_space *mapping) clear_bit(AS_RELEASE_ALWAYS, &mapping->flags); } +static inline bool mapping_stable_writes(const struct address_space *mapping) +{ + return test_bit(AS_STABLE_WRITES, &mapping->flags); +} + +static inline void mapping_set_stable_writes(struct address_space *mapping) +{ + set_bit(AS_STABLE_WRITES, &mapping->flags); +} + +static inline void mapping_clear_stable_writes(struct address_space *mapping) +{ + clear_bit(AS_STABLE_WRITES, &mapping->flags); +} + +static inline void mapping_set_unmovable(struct address_space *mapping) +{ + /* + * It's expected unmovable mappings are also unevictable. Compaction + * migrate scanner (isolate_migratepages_block()) relies on this to + * reduce page locking. + */ + set_bit(AS_UNEVICTABLE, &mapping->flags); + set_bit(AS_UNMOVABLE, &mapping->flags); +} + +static inline bool mapping_unmovable(struct address_space *mapping) +{ + return test_bit(AS_UNMOVABLE, &mapping->flags); +} + static inline gfp_t mapping_gfp_mask(struct address_space * mapping) { return mapping->gfp_mask; @@ -789,9 +823,6 @@ static inline pgoff_t folio_next_index(struct folio *folio) */ static inline struct page *folio_file_page(struct folio *folio, pgoff_t index) { - /* HugeTLBfs indexes the page cache in units of hpage_size */ - if (folio_test_hugetlb(folio)) - return &folio->page; return folio_page(folio, index & (folio_nr_pages(folio) - 1)); } @@ -807,9 +838,6 @@ static inline struct page *folio_file_page(struct folio *folio, pgoff_t index) */ static inline bool folio_contains(struct folio *folio, pgoff_t index) { - /* HugeTLBfs indexes the page cache in units of hpage_size */ - if (folio_test_hugetlb(folio)) - return folio->index == index; return index - folio_index(folio) < folio_nr_pages(folio); } @@ -867,10 +895,9 @@ static inline struct folio *read_mapping_folio(struct address_space *mapping, } /* - * Get index of the page within radix-tree (but not for hugetlb pages). - * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE) + * Get the offset in PAGE_SIZE (even for hugetlb pages). */ -static inline pgoff_t page_to_index(struct page *page) +static inline pgoff_t page_to_pgoff(struct page *page) { struct page *head; @@ -885,19 +912,6 @@ static inline pgoff_t page_to_index(struct page *page) return head->index + page - head; } -extern pgoff_t hugetlb_basepage_index(struct page *page); - -/* - * Get the offset in PAGE_SIZE (even for hugetlb pages). - * (TODO: hugetlb pages should have ->index in PAGE_SIZE) - */ -static inline pgoff_t page_to_pgoff(struct page *page) -{ - if (unlikely(PageHuge(page))) - return hugetlb_basepage_index(page); - return page_to_index(page); -} - /* * Return byte-offset into filesystem object for page. */ @@ -934,24 +948,16 @@ static inline loff_t folio_file_pos(struct folio *folio) /* * Get the offset in PAGE_SIZE (even for hugetlb folios). - * (TODO: hugetlb folios should have ->index in PAGE_SIZE) */ static inline pgoff_t folio_pgoff(struct folio *folio) { - if (unlikely(folio_test_hugetlb(folio))) - return hugetlb_basepage_index(&folio->page); return folio->index; } -extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma, - unsigned long address); - static inline pgoff_t linear_page_index(struct vm_area_struct *vma, unsigned long address) { pgoff_t pgoff; - if (unlikely(is_vm_hugetlb_page(vma))) - return linear_hugepage_index(vma, address); pgoff = (address - vma->vm_start) >> PAGE_SHIFT; pgoff += vma->vm_pgoff; return pgoff; @@ -1129,6 +1135,7 @@ static inline void wait_on_page_locked(struct page *page) folio_wait_locked(page_folio(page)); } +void folio_end_read(struct folio *folio, bool success); void wait_on_page_writeback(struct page *page); void folio_wait_writeback(struct folio *folio); int folio_wait_writeback_killable(struct folio *folio); diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 87cc678adc85..5d3a0cccc6bf 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -11,8 +11,8 @@ #include <linux/types.h> -/* 15 pointers + header align the folio_batch structure to a power of two */ -#define PAGEVEC_SIZE 15 +/* 31 pointers + header align the folio_batch structure to a power of two */ +#define PAGEVEC_SIZE 31 struct folio; @@ -27,6 +27,7 @@ struct folio; */ struct folio_batch { unsigned char nr; + unsigned char i; bool percpu_pvec_drained; struct folio *folios[PAGEVEC_SIZE]; }; @@ -40,12 +41,14 @@ struct folio_batch { static inline void folio_batch_init(struct folio_batch *fbatch) { fbatch->nr = 0; + fbatch->i = 0; fbatch->percpu_pvec_drained = false; } static inline void folio_batch_reinit(struct folio_batch *fbatch) { fbatch->nr = 0; + fbatch->i = 0; } static inline unsigned int folio_batch_count(struct folio_batch *fbatch) @@ -75,6 +78,21 @@ static inline unsigned folio_batch_add(struct folio_batch *fbatch, return folio_batch_space(fbatch); } +/** + * folio_batch_next - Return the next folio to process. + * @fbatch: The folio batch being processed. + * + * Use this function to implement a queue of folios. + * + * Return: The next folio in the queue, or NULL if the queue is empty. + */ +static inline struct folio *folio_batch_next(struct folio_batch *fbatch) +{ + if (fbatch->i == fbatch->nr) + return NULL; + return fbatch->folios[fbatch->i++]; +} + void __folio_batch_release(struct folio_batch *pvec); static inline void folio_batch_release(struct folio_batch *fbatch) diff --git a/include/linux/parport.h b/include/linux/parport.h index 999eddd619b7..fff39bc30629 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -180,8 +180,6 @@ struct ieee1284_info { struct semaphore irq; }; -#define PARPORT_NAME_MAX_LEN 15 - /* A parallel port */ struct parport { unsigned long base; /* base address */ diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index 6b1301e2498e..3a4860bd2758 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -93,6 +93,6 @@ extern const struct pci_ecam_ops loongson_pci_ecam_ops; /* Loongson PCIe */ #if IS_ENABLED(CONFIG_PCI_HOST_COMMON) /* for DT-based PCI controllers that support ECAM */ int pci_host_common_probe(struct platform_device *pdev); -int pci_host_common_remove(struct platform_device *pdev); +void pci_host_common_remove(struct platform_device *pdev); #endif #endif diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 5cb694031072..cc2f70d061c8 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -19,13 +19,6 @@ enum pci_epc_interface_type { SECONDARY_INTERFACE, }; -enum pci_epc_irq_type { - PCI_EPC_IRQ_UNKNOWN, - PCI_EPC_IRQ_LEGACY, - PCI_EPC_IRQ_MSI, - PCI_EPC_IRQ_MSIX, -}; - static inline const char * pci_epc_interface_string(enum pci_epc_interface_type type) { @@ -79,7 +72,7 @@ struct pci_epc_ops { u16 interrupts, enum pci_barno, u32 offset); int (*get_msix)(struct pci_epc *epc, u8 func_no, u8 vfunc_no); int (*raise_irq)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, - enum pci_epc_irq_type type, u16 interrupt_num); + unsigned int type, u16 interrupt_num); int (*map_msi_irq)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, phys_addr_t phys_addr, u8 interrupt_num, u32 entry_size, u32 *msi_data, @@ -122,7 +115,7 @@ struct pci_epc_mem { * struct pci_epc - represents the PCI EPC device * @dev: PCI EPC device * @pci_epf: list of endpoint functions present in this EPC device - * list_lock: Mutex for protecting pci_epf list + * @list_lock: Mutex for protecting pci_epf list * @ops: function pointers for performing endpoint operations * @windows: array of address space of the endpoint controller * @mem: first window of the endpoint controller, which corresponds to @@ -153,15 +146,44 @@ struct pci_epc { }; /** + * @BAR_PROGRAMMABLE: The BAR mask can be configured by the EPC. + * @BAR_FIXED: The BAR mask is fixed by the hardware. + * @BAR_RESERVED: The BAR should not be touched by an EPF driver. + */ +enum pci_epc_bar_type { + BAR_PROGRAMMABLE = 0, + BAR_FIXED, + BAR_RESERVED, +}; + +/** + * struct pci_epc_bar_desc - hardware description for a BAR + * @type: the type of the BAR + * @fixed_size: the fixed size, only applicable if type is BAR_FIXED_MASK. + * @only_64bit: if true, an EPF driver is not allowed to choose if this BAR + * should be configured as 32-bit or 64-bit, the EPF driver must + * configure this BAR as 64-bit. Additionally, the BAR succeeding + * this BAR must be set to type BAR_RESERVED. + * + * only_64bit should not be set on a BAR of type BAR_RESERVED. + * (If BARx is a 64-bit BAR that an EPF driver is not allowed to + * touch, then both BARx and BARx+1 must be set to type + * BAR_RESERVED.) + */ +struct pci_epc_bar_desc { + enum pci_epc_bar_type type; + u64 fixed_size; + bool only_64bit; +}; + +/** * struct pci_epc_features - features supported by a EPC device per function * @linkup_notifier: indicate if the EPC device can notify EPF driver on link up * @core_init_notifier: indicate cores that can notify about their availability * for initialization * @msi_capable: indicate if the endpoint function has MSI capability * @msix_capable: indicate if the endpoint function has MSI-X capability - * @reserved_bar: bitmap to indicate reserved BAR unavailable to function driver - * @bar_fixed_64bit: bitmap to indicate fixed 64bit BARs - * @bar_fixed_size: Array specifying the size supported by each BAR + * @bar: array specifying the hardware description for each BAR * @align: alignment size required for BAR buffer allocation */ struct pci_epc_features { @@ -169,9 +191,7 @@ struct pci_epc_features { unsigned int core_init_notifier : 1; unsigned int msi_capable : 1; unsigned int msix_capable : 1; - u8 reserved_bar; - u8 bar_fixed_64bit; - u64 bar_fixed_size[PCI_STD_NUM_BARS]; + struct pci_epc_bar_desc bar[PCI_STD_NUM_BARS]; size_t align; }; @@ -229,7 +249,7 @@ int pci_epc_map_msi_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no, phys_addr_t phys_addr, u8 interrupt_num, u32 entry_size, u32 *msi_data, u32 *msi_addr_offset); int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no, - enum pci_epc_irq_type type, u16 interrupt_num); + unsigned int type, u16 interrupt_num); int pci_epc_start(struct pci_epc *epc); void pci_epc_stop(struct pci_epc *epc); const struct pci_epc_features *pci_epc_get_features(struct pci_epc *epc, diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 3f44b6aec477..adee6a1b35db 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -15,6 +15,7 @@ #include <linux/pci.h> struct pci_epf; +struct pci_epc_features; enum pci_epc_interface_type; enum pci_barno { @@ -68,7 +69,7 @@ struct pci_epf_ops { }; /** - * struct pci_epf_event_ops - Callbacks for capturing the EPC events + * struct pci_epc_event_ops - Callbacks for capturing the EPC events * @core_init: Callback for the EPC initialization complete event * @link_up: Callback for the EPC link up event * @link_down: Callback for the EPC link down event @@ -98,7 +99,7 @@ struct pci_epf_driver { void (*remove)(struct pci_epf *epf); struct device_driver driver; - struct pci_epf_ops *ops; + const struct pci_epf_ops *ops; struct module *owner; struct list_head epf_group; const struct pci_epf_device_id *id_table; @@ -216,7 +217,8 @@ int __pci_epf_register_driver(struct pci_epf_driver *driver, struct module *owner); void pci_epf_unregister_driver(struct pci_epf_driver *driver); void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar, - size_t align, enum pci_epc_interface_type type); + const struct pci_epc_features *epc_features, + enum pci_epc_interface_type type); void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar, enum pci_epc_interface_type type); int pci_epf_bind(struct pci_epf *epf); diff --git a/include/linux/pci.h b/include/linux/pci.h index 8c7c2c3c6c65..16493426a04f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -390,9 +390,9 @@ struct pci_dev { unsigned int d3hot_delay; /* D3hot->D0 transition time in ms */ unsigned int d3cold_delay; /* D3cold->D0 transition time in ms */ + u16 l1ss; /* L1SS Capability pointer */ #ifdef CONFIG_PCIEASPM struct pcie_link_state *link_state; /* ASPM link state */ - u16 l1ss; /* L1SS Capability pointer */ unsigned int ltr_path:1; /* Latency Tolerance Reporting supported from root to here */ #endif @@ -713,6 +713,31 @@ static inline bool pci_is_bridge(struct pci_dev *dev) dev->hdr_type == PCI_HEADER_TYPE_CARDBUS; } +/** + * pci_is_vga - check if the PCI device is a VGA device + * @pdev: PCI device + * + * The PCI Code and ID Assignment spec, r1.15, secs 1.4 and 1.1, define + * VGA Base Class and Sub-Classes: + * + * 03 00 PCI_CLASS_DISPLAY_VGA VGA-compatible or 8514-compatible + * 00 01 PCI_CLASS_NOT_DEFINED_VGA VGA-compatible (before Class Code) + * + * Return true if the PCI device is a VGA device and uses the legacy VGA + * resources ([mem 0xa0000-0xbffff], [io 0x3b0-0x3bb], [io 0x3c0-0x3df] and + * aliases). + */ +static inline bool pci_is_vga(struct pci_dev *pdev) +{ + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + return true; + + if ((pdev->class >> 8) == PCI_CLASS_NOT_DEFINED_VGA) + return true; + + return false; +} + #define for_each_pci_bridge(dev, bus) \ list_for_each_entry(dev, &bus->devices, bus_list) \ if (!pci_is_bridge(dev)) {} else @@ -861,7 +886,6 @@ struct module; /** * struct pci_driver - PCI driver structure - * @node: List of driver structures. * @name: Driver name. * @id_table: Pointer to table of device IDs the driver is * interested in. Most drivers should export this @@ -916,7 +940,6 @@ struct module; * own I/O address space. */ struct pci_driver { - struct list_head node; const char *name; const struct pci_device_id *id_table; /* Must be non-NULL for probe to be called */ int (*probe)(struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */ @@ -1049,11 +1072,13 @@ enum { PCI_SCAN_ALL_PCIE_DEVS = 0x00000040, /* Scan all, not just dev 0 */ }; -#define PCI_IRQ_LEGACY (1 << 0) /* Allow legacy interrupts */ +#define PCI_IRQ_INTX (1 << 0) /* Allow INTx interrupts */ #define PCI_IRQ_MSI (1 << 1) /* Allow MSI interrupts */ #define PCI_IRQ_MSIX (1 << 2) /* Allow MSI-X interrupts */ #define PCI_IRQ_AFFINITY (1 << 3) /* Auto-assign affinity */ +#define PCI_IRQ_LEGACY PCI_IRQ_INTX /* Deprecated! Use PCI_IRQ_INTX */ + /* These external functions are only available when PCI support is enabled */ #ifdef CONFIG_PCI @@ -1146,6 +1171,7 @@ int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge); u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp); struct pci_dev *pci_dev_get(struct pci_dev *dev); void pci_dev_put(struct pci_dev *dev); +DEFINE_FREE(pci_dev_put, struct pci_dev *, if (_T) pci_dev_put(_T)) void pci_remove_bus(struct pci_bus *b); void pci_stop_and_remove_bus_device(struct pci_dev *dev); void pci_stop_and_remove_bus_device_locked(struct pci_dev *dev); @@ -1181,6 +1207,8 @@ struct pci_dev *pci_get_slot(struct pci_bus *bus, unsigned int devfn); struct pci_dev *pci_get_domain_bus_and_slot(int domain, unsigned int bus, unsigned int devfn); struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *from); +struct pci_dev *pci_get_base_class(unsigned int class, struct pci_dev *from); + int pci_dev_present(const struct pci_device_id *ids); int pci_bus_read_config_byte(struct pci_bus *bus, unsigned int devfn, @@ -1213,6 +1241,8 @@ int pci_read_config_dword(const struct pci_dev *dev, int where, u32 *val); int pci_write_config_byte(const struct pci_dev *dev, int where, u8 val); int pci_write_config_word(const struct pci_dev *dev, int where, u16 val); int pci_write_config_dword(const struct pci_dev *dev, int where, u32 val); +void pci_clear_and_set_config_dword(const struct pci_dev *dev, int pos, + u32 clear, u32 set); int pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val); int pcie_capability_read_dword(struct pci_dev *dev, int pos, u32 *val); @@ -1338,6 +1368,7 @@ int pcie_set_mps(struct pci_dev *dev, int mps); u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev, enum pci_bus_speed *speed, enum pcie_link_width *width); +int pcie_link_speed_mbps(struct pci_dev *pdev); void pcie_print_link_status(struct pci_dev *dev); int pcie_reset_flr(struct pci_dev *dev, bool probe); int pcie_flr(struct pci_dev *dev); @@ -1391,6 +1422,7 @@ int pci_load_and_free_saved_state(struct pci_dev *dev, struct pci_saved_state **state); int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state); int pci_set_power_state(struct pci_dev *dev, pci_power_t state); +int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state); pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state); bool pci_pme_capable(struct pci_dev *dev, pci_power_t state); void pci_pme_active(struct pci_dev *dev, bool enable); @@ -1594,6 +1626,8 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata); +void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), + void *userdata); int pci_cfg_space_size(struct pci_dev *dev); unsigned char pci_bus_max_busnr(struct pci_bus *bus); void pci_setup_bridge(struct pci_bus *bus); @@ -1624,6 +1658,8 @@ struct msix_entry { u16 entry; /* Driver uses to specify entry, OS writes */ }; +struct msi_domain_template; + #ifdef CONFIG_PCI_MSI int pci_msi_vec_count(struct pci_dev *dev); void pci_disable_msi(struct pci_dev *dev); @@ -1656,6 +1692,11 @@ void pci_msix_free_irq(struct pci_dev *pdev, struct msi_map map); void pci_free_irq_vectors(struct pci_dev *dev); int pci_irq_vector(struct pci_dev *dev, unsigned int nr); const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec); +bool pci_create_ims_domain(struct pci_dev *pdev, const struct msi_domain_template *template, + unsigned int hwsize, void *data); +struct msi_map pci_ims_alloc_irq(struct pci_dev *pdev, union msi_instance_cookie *icookie, + const struct irq_affinity_desc *affdesc); +void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map); #else static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; } @@ -1719,6 +1760,25 @@ static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, { return cpu_possible_mask; } + +static inline bool pci_create_ims_domain(struct pci_dev *pdev, + const struct msi_domain_template *template, + unsigned int hwsize, void *data) +{ return false; } + +static inline struct msi_map pci_ims_alloc_irq(struct pci_dev *pdev, + union msi_instance_cookie *icookie, + const struct irq_affinity_desc *affdesc) +{ + struct msi_map map = { .index = -ENOSYS, }; + + return map; +} + +static inline void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map) +{ +} + #endif /** @@ -1777,6 +1837,7 @@ extern bool pcie_ports_native; int pci_disable_link_state(struct pci_dev *pdev, int state); int pci_disable_link_state_locked(struct pci_dev *pdev, int state); int pci_enable_link_state(struct pci_dev *pdev, int state); +int pci_enable_link_state_locked(struct pci_dev *pdev, int state); void pcie_no_aspm(void); bool pcie_aspm_support_enabled(void); bool pcie_aspm_enabled(struct pci_dev *pdev); @@ -1787,6 +1848,8 @@ static inline int pci_disable_link_state_locked(struct pci_dev *pdev, int state) { return 0; } static inline int pci_enable_link_state(struct pci_dev *pdev, int state) { return 0; } +static inline int pci_enable_link_state_locked(struct pci_dev *pdev, int state) +{ return 0; } static inline void pcie_no_aspm(void) { } static inline bool pcie_aspm_support_enabled(void) { return false; } static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; } @@ -1819,6 +1882,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev); void pci_dev_lock(struct pci_dev *dev); int pci_dev_trylock(struct pci_dev *dev); void pci_dev_unlock(struct pci_dev *dev); +DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T)) /* * PCI domain support. Sometimes called PCI segment (eg by ACPI), @@ -1924,6 +1988,9 @@ static inline struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *from) { return NULL; } +static inline struct pci_dev *pci_get_base_class(unsigned int class, + struct pci_dev *from) +{ return NULL; } static inline int pci_dev_present(const struct pci_device_id *ids) { return 0; } @@ -1961,6 +2028,8 @@ static inline int pci_save_state(struct pci_dev *dev) { return 0; } static inline void pci_restore_state(struct pci_dev *dev) { } static inline int pci_set_power_state(struct pci_dev *dev, pci_power_t state) { return 0; } +static inline int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state) +{ return 0; } static inline int pci_wake_from_d3(struct pci_dev *dev, bool enable) { return 0; } static inline pci_power_t pci_choose_state(struct pci_dev *dev, @@ -2072,14 +2141,14 @@ int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma); (pci_resource_end((dev), (bar)) ? \ resource_size(pci_resource_n((dev), (bar))) : 0) -#define __pci_dev_for_each_res0(dev, res, ...) \ - for (unsigned int __b = 0; \ - res = pci_resource_n(dev, __b), __b < PCI_NUM_RESOURCES; \ +#define __pci_dev_for_each_res0(dev, res, ...) \ + for (unsigned int __b = 0; \ + __b < PCI_NUM_RESOURCES && (res = pci_resource_n(dev, __b)); \ __b++) -#define __pci_dev_for_each_res1(dev, res, __b) \ - for (__b = 0; \ - res = pci_resource_n(dev, __b), __b < PCI_NUM_RESOURCES; \ +#define __pci_dev_for_each_res1(dev, res, __b) \ + for (__b = 0; \ + __b < PCI_NUM_RESOURCES && (res = pci_resource_n(dev, __b)); \ __b++) #define pci_dev_for_each_resource(dev, res, ...) \ @@ -2448,6 +2517,11 @@ static inline struct pci_dev *pcie_find_root_port(struct pci_dev *dev) return NULL; } +static inline bool pci_dev_is_disconnected(const struct pci_dev *dev) +{ + return dev->error_state == pci_channel_io_perm_failure; +} + void pci_request_acs(void); bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags); bool pci_acs_path_enabled(struct pci_dev *start, @@ -2616,14 +2690,6 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev) void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type); #endif -struct msi_domain_template; - -bool pci_create_ims_domain(struct pci_dev *pdev, const struct msi_domain_template *template, - unsigned int hwsize, void *data); -struct msi_map pci_ims_alloc_irq(struct pci_dev *pdev, union msi_instance_cookie *icookie, - const struct irq_affinity_desc *affdesc); -void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map); - #include <linux/dma-mapping.h> #define pci_printk(level, pdev, fmt, arg...) \ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 5fb3d4c393a9..a0c75e467df3 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -180,6 +180,8 @@ #define PCI_DEVICE_ID_BERKOM_A4T 0xffa4 #define PCI_DEVICE_ID_BERKOM_SCITEL_QUADRO 0xffa8 +#define PCI_VENDOR_ID_ITTIM 0x0b48 + #define PCI_VENDOR_ID_COMPAQ 0x0e11 #define PCI_DEVICE_ID_COMPAQ_TOKENRING 0x0508 #define PCI_DEVICE_ID_COMPAQ_TACHYON 0xa0fc @@ -579,6 +581,8 @@ #define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3 0x12c3 #define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3 0x16fb #define PCI_DEVICE_ID_AMD_MI200_DF_F3 0x14d3 +#define PCI_DEVICE_ID_AMD_MI300_DF_F3 0x152b +#define PCI_DEVICE_ID_AMD_VANGOGH_USB 0x163a #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 @@ -2601,6 +2605,8 @@ #define PCI_VENDOR_ID_TEKRAM 0x1de1 #define PCI_DEVICE_ID_TEKRAM_DC290 0xdc29 +#define PCI_VENDOR_ID_ALIBABA 0x1ded + #define PCI_VENDOR_ID_TEHUTI 0x1fc9 #define PCI_DEVICE_ID_TEHUTI_3009 0x3009 #define PCI_DEVICE_ID_TEHUTI_3010 0x3010 @@ -3061,6 +3067,7 @@ #define PCI_DEVICE_ID_INTEL_82443GX_0 0x71a0 #define PCI_DEVICE_ID_INTEL_82443GX_2 0x71a2 #define PCI_DEVICE_ID_INTEL_82372FB_1 0x7601 +#define PCI_DEVICE_ID_INTEL_HDA_ARL 0x7728 #define PCI_DEVICE_ID_INTEL_HDA_RPL_S 0x7a50 #define PCI_DEVICE_ID_INTEL_HDA_ADL_S 0x7ad0 #define PCI_DEVICE_ID_INTEL_HDA_MTL 0x7e28 diff --git a/include/linux/pds/pds_core_if.h b/include/linux/pds/pds_core_if.h index e838a2b90440..17a87c1a55d7 100644 --- a/include/linux/pds/pds_core_if.h +++ b/include/linux/pds/pds_core_if.h @@ -79,6 +79,7 @@ enum pds_core_status_code { PDS_RC_EVFID = 31, /* VF ID does not exist */ PDS_RC_BAD_FW = 32, /* FW file is invalid or corrupted */ PDS_RC_ECLIENT = 33, /* No such client id */ + PDS_RC_BAD_PCI = 255, /* Broken PCI when reading status */ }; /** diff --git a/include/linux/peci.h b/include/linux/peci.h index 9b3d36aff431..90e241458ef6 100644 --- a/include/linux/peci.h +++ b/include/linux/peci.h @@ -58,7 +58,6 @@ static inline struct peci_controller *to_peci_controller(void *d) /** * struct peci_device - PECI device * @dev: device object to register PECI device to the device model - * @controller: manages the bus segment hosting this PECI device * @info: PECI device characteristics * @info.family: device family * @info.model: device model diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 68fac2e7cbe6..8c677f185901 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -132,6 +132,7 @@ extern void __init setup_per_cpu_areas(void); extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1); extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1); extern void free_percpu(void __percpu *__pdata); +extern size_t pcpu_alloc_size(void __percpu *__pdata); DEFINE_FREE(free_percpu, void __percpu *, free_percpu(_T)) diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index d01351b1526f..3a44dd1e33d2 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -57,6 +57,8 @@ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch); s64 __percpu_counter_sum(struct percpu_counter *fbc); int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); +bool __percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, + s64 amount, s32 batch); void percpu_counter_sync(struct percpu_counter *fbc); static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) @@ -69,6 +71,13 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) percpu_counter_add_batch(fbc, amount, percpu_counter_batch); } +static inline bool +percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, s64 amount) +{ + return __percpu_counter_limited_add(fbc, limit, amount, + percpu_counter_batch); +} + /* * With percpu_counter_add_local() and percpu_counter_sub_local(), counts * are accumulated in local per cpu counter and not in fbc->count until @@ -185,6 +194,27 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount) local_irq_restore(flags); } +static inline bool +percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, s64 amount) +{ + unsigned long flags; + bool good = false; + s64 count; + + if (amount == 0) + return true; + + local_irq_save(flags); + count = fbc->count + amount; + if ((amount > 0 && count <= limit) || + (amount < 0 && count >= limit)) { + fbc->count = count; + good = true; + } + local_irq_restore(flags); + return good; +} + /* non-SMP percpu_counter_add_local is the same with percpu_counter_add */ static inline void percpu_counter_add_local(struct percpu_counter *fbc, s64 amount) diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 143fbc10ecfe..b3b34f6670cf 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -60,12 +60,6 @@ struct pmu_hw_events { DECLARE_BITMAP(used_mask, ARMPMU_MAX_HWEVENTS); /* - * Hardware lock to serialize accesses to PMU registers. Needed for the - * read/modify/write sequences. - */ - raw_spinlock_t pmu_lock; - - /* * When using percpu IRQs, we need a percpu dev_id. Place it here as we * already have to allocate this struct per cpu. */ @@ -189,4 +183,26 @@ void armpmu_free_irq(int irq, int cpu); #define ARMV8_SPE_PDEV_NAME "arm,spe-v1" #define ARMV8_TRBE_PDEV_NAME "arm,trbe" +/* Why does everything I do descend into this? */ +#define __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \ + (lo) == (hi) ? #cfg ":" #lo "\n" : #cfg ":" #lo "-" #hi + +#define _GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \ + __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) + +#define GEN_PMU_FORMAT_ATTR(name) \ + PMU_FORMAT_ATTR(name, \ + _GEN_PMU_FORMAT_ATTR(ATTR_CFG_FLD_##name##_CFG, \ + ATTR_CFG_FLD_##name##_LO, \ + ATTR_CFG_FLD_##name##_HI)) + +#define _ATTR_CFG_GET_FLD(attr, cfg, lo, hi) \ + ((((attr)->cfg) >> lo) & GENMASK_ULL(hi - lo, 0)) + +#define ATTR_CFG_GET_FLD(attr, name) \ + _ATTR_CFG_GET_FLD(attr, \ + ATTR_CFG_FLD_##name##_CFG, \ + ATTR_CFG_FLD_##name##_LO, \ + ATTR_CFG_FLD_##name##_HI) + #endif /* __ARM_PMU_H__ */ diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index e3899bd77f5c..46377e134d67 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -215,45 +215,54 @@ #define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ #define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */ #define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */ -#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */ -#define ARMV8_PMU_PMCR_N_MASK 0x1f -#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */ +#define ARMV8_PMU_PMCR_N GENMASK(15, 11) /* Number of counters supported */ +/* Mask for writable bits */ +#define ARMV8_PMU_PMCR_MASK (ARMV8_PMU_PMCR_E | ARMV8_PMU_PMCR_P | \ + ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_D | \ + ARMV8_PMU_PMCR_X | ARMV8_PMU_PMCR_DP | \ + ARMV8_PMU_PMCR_LC | ARMV8_PMU_PMCR_LP) /* * PMOVSR: counters overflow flag status reg */ -#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */ -#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK +#define ARMV8_PMU_OVSR_P GENMASK(30, 0) +#define ARMV8_PMU_OVSR_C BIT(31) +/* Mask for writable bits is both P and C fields */ +#define ARMV8_PMU_OVERFLOWED_MASK (ARMV8_PMU_OVSR_P | ARMV8_PMU_OVSR_C) /* * PMXEVTYPER: Event selection reg */ -#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */ -#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */ +#define ARMV8_PMU_EVTYPE_EVENT GENMASK(15, 0) /* Mask for EVENT bits */ +#define ARMV8_PMU_EVTYPE_TH GENMASK_ULL(43, 32) /* arm64 only */ +#define ARMV8_PMU_EVTYPE_TC GENMASK_ULL(63, 61) /* arm64 only */ /* * Event filters for PMUv3 */ -#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31) -#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30) -#define ARMV8_PMU_INCLUDE_EL2 (1U << 27) +#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31) +#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30) +#define ARMV8_PMU_EXCLUDE_NS_EL1 (1U << 29) +#define ARMV8_PMU_EXCLUDE_NS_EL0 (1U << 28) +#define ARMV8_PMU_INCLUDE_EL2 (1U << 27) +#define ARMV8_PMU_EXCLUDE_EL3 (1U << 26) /* * PMUSERENR: user enable reg */ -#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */ #define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */ #define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */ #define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */ #define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ +/* Mask for writable bits */ +#define ARMV8_PMU_USERENR_MASK (ARMV8_PMU_USERENR_EN | ARMV8_PMU_USERENR_SW | \ + ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_ER) /* PMMIR_EL1.SLOTS mask */ -#define ARMV8_PMU_SLOTS_MASK 0xff - -#define ARMV8_PMU_BUS_SLOTS_SHIFT 8 -#define ARMV8_PMU_BUS_SLOTS_MASK 0xff -#define ARMV8_PMU_BUS_WIDTH_SHIFT 16 -#define ARMV8_PMU_BUS_WIDTH_MASK 0xf +#define ARMV8_PMU_SLOTS GENMASK(7, 0) +#define ARMV8_PMU_BUS_SLOTS GENMASK(15, 8) +#define ARMV8_PMU_BUS_WIDTH GENMASK(19, 16) +#define ARMV8_PMU_THWIDTH GENMASK(23, 20) /* * This code is really good diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7b5406e3288d..d2a15c0c6f8a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -843,11 +843,11 @@ struct perf_event { }; /* - * ,-----------------------[1:n]----------------------. - * V V - * perf_event_context <-[1:n]-> perf_event_pmu_context <--- perf_event - * ^ ^ | | - * `--------[1:n]---------' `-[n:1]-> pmu <-[1:n]-' + * ,-----------------------[1:n]------------------------. + * V V + * perf_event_context <-[1:n]-> perf_event_pmu_context <-[1:n]- perf_event + * | | + * `--[n:1]-> pmu <-[1:n]--' * * * struct perf_event_pmu_context lifetime is refcount based and RCU freed @@ -865,6 +865,9 @@ struct perf_event { * ctx->mutex pinning the configuration. Since we hold a reference on * group_leader (through the filedesc) it can't go away, therefore it's * associated pmu_ctx must exist and cannot change due to ctx->mutex. + * + * perf_event holds a refcount on perf_event_context + * perf_event holds a refcount on perf_event_pmu_context */ struct perf_event_pmu_context { struct pmu *pmu; @@ -879,6 +882,7 @@ struct perf_event_pmu_context { unsigned int embedded : 1; unsigned int nr_events; + unsigned int nr_cgroups; atomic_t refcount; /* event <-> epc */ struct rcu_head rcu_head; @@ -1139,6 +1143,15 @@ static inline bool branch_sample_priv(const struct perf_event *event) return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE; } +static inline bool branch_sample_counters(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS; +} + +static inline bool branch_sample_call_stack(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; +} struct perf_sample_data { /* @@ -1173,6 +1186,7 @@ struct perf_sample_data { struct perf_callchain_entry *callchain; struct perf_raw_record *raw; struct perf_branch_stack *br_stack; + u64 *br_stack_cntr; union perf_sample_weight weight; union perf_mem_data_src data_src; u64 txn; @@ -1250,7 +1264,8 @@ static inline void perf_sample_save_raw_data(struct perf_sample_data *data, static inline void perf_sample_save_brstack(struct perf_sample_data *data, struct perf_event *event, - struct perf_branch_stack *brs) + struct perf_branch_stack *brs, + u64 *brs_cntr) { int size = sizeof(u64); /* nr */ @@ -1258,7 +1273,16 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data, size += sizeof(u64); size += brs->nr * sizeof(struct perf_branch_entry); + /* + * The extension space for counters is appended after the + * struct perf_branch_stack. It is used to store the occurrences + * of events of each branch. + */ + if (brs_cntr) + size += brs->nr * sizeof(u64); + data->br_stack = brs; + data->br_stack_cntr = brs_cntr; data->dyn_size += size; data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; } @@ -1574,7 +1598,7 @@ extern int sysctl_perf_cpu_time_max_percent; extern void perf_sample_event_took(u64 sample_len_ns); -int perf_proc_update_handler(struct ctl_table *table, int write, +int perf_event_max_sample_rate_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index af7639c3b0a3..85fc7554cd52 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -184,6 +184,13 @@ static inline int pmd_young(pmd_t pmd) } #endif +#ifndef pmd_dirty +static inline int pmd_dirty(pmd_t pmd) +{ + return 0; +} +#endif + /* * A facility to provide lazy MMU batching. This allows PTE updates and * page invalidations to be delayed until a call to leave lazy MMU mode @@ -205,15 +212,37 @@ static inline int pmd_young(pmd_t pmd) #define arch_flush_lazy_mmu_mode() do {} while (0) #endif -#ifndef set_ptes +#ifndef pte_batch_hint +/** + * pte_batch_hint - Number of pages that can be added to batch without scanning. + * @ptep: Page table pointer for the entry. + * @pte: Page table entry. + * + * Some architectures know that a set of contiguous ptes all map the same + * contiguous memory with the same permissions. In this case, it can provide a + * hint to aid pte batching without the core code needing to scan every pte. + * + * An architecture implementation may ignore the PTE accessed state. Further, + * the dirty state must apply atomically to all the PTEs described by the hint. + * + * May be overridden by the architecture, else pte_batch_hint is always 1. + */ +static inline unsigned int pte_batch_hint(pte_t *ptep, pte_t pte) +{ + return 1; +} +#endif -#ifndef pte_next_pfn -static inline pte_t pte_next_pfn(pte_t pte) +#ifndef pte_advance_pfn +static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr) { - return __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT)); + return __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT)); } #endif +#define pte_next_pfn(pte) pte_advance_pfn(pte, 1) + +#ifndef set_ptes /** * set_ptes - Map consecutive pages to a contiguous range of addresses. * @mm: Address space to map the pages into. @@ -222,6 +251,10 @@ static inline pte_t pte_next_pfn(pte_t pte) * @pte: Page table entry for the first page. * @nr: Number of pages to map. * + * When nr==1, initial state of pte may be present or not present, and new state + * may be present or not present. When nr>1, initial state of all ptes must be + * not present, and new state must be present. + * * May be overridden by the architecture, or the architecture can define * set_pte() and PFN_PTE_SHIFT. * @@ -292,6 +325,27 @@ static inline pmd_t pmdp_get(pmd_t *pmdp) } #endif +#ifndef pudp_get +static inline pud_t pudp_get(pud_t *pudp) +{ + return READ_ONCE(*pudp); +} +#endif + +#ifndef p4dp_get +static inline p4d_t p4dp_get(p4d_t *p4dp) +{ + return READ_ONCE(*p4dp); +} +#endif + +#ifndef pgdp_get +static inline pgd_t pgdp_get(pgd_t *pgdp) +{ + return READ_ONCE(*pgdp); +} +#endif + #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, @@ -375,7 +429,7 @@ static inline bool arch_has_hw_nonleaf_pmd_young(void) */ static inline bool arch_has_hw_pte_young(void) { - return false; + return IS_ENABLED(CONFIG_ARCH_HAS_HW_PTE_YOUNG); } #endif @@ -552,6 +606,76 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, } #endif +#ifndef get_and_clear_full_ptes +/** + * get_and_clear_full_ptes - Clear present PTEs that map consecutive pages of + * the same folio, collecting dirty/accessed bits. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * @full: Whether we are clearing a full mm. + * + * May be overridden by the architecture; otherwise, implemented as a simple + * loop over ptep_get_and_clear_full(), merging dirty/accessed bits into the + * returned PTE. + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, unsigned int nr, int full) +{ + pte_t pte, tmp_pte; + + pte = ptep_get_and_clear_full(mm, addr, ptep, full); + while (--nr) { + ptep++; + addr += PAGE_SIZE; + tmp_pte = ptep_get_and_clear_full(mm, addr, ptep, full); + if (pte_dirty(tmp_pte)) + pte = pte_mkdirty(pte); + if (pte_young(tmp_pte)) + pte = pte_mkyoung(pte); + } + return pte; +} +#endif + +#ifndef clear_full_ptes +/** + * clear_full_ptes - Clear present PTEs that map consecutive pages of the same + * folio. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * @full: Whether we are clearing a full mm. + * + * May be overridden by the architecture; otherwise, implemented as a simple + * loop over ptep_get_and_clear_full(). + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr, int full) +{ + for (;;) { + ptep_get_and_clear_full(mm, addr, ptep, full); + if (--nr == 0) + break; + ptep++; + addr += PAGE_SIZE; + } +} +#endif /* * If two threads concurrently fault at the same page, the thread that @@ -622,6 +746,37 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres } #endif +#ifndef wrprotect_ptes +/** + * wrprotect_ptes - Write-protect PTEs that map consecutive pages of the same + * folio. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to write-protect. + * + * May be overridden by the architecture; otherwise, implemented as a simple + * loop over ptep_set_wrprotect(). + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline void wrprotect_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr) +{ + for (;;) { + ptep_set_wrprotect(mm, addr, ptep); + if (--nr == 0) + break; + ptep++; + addr += PAGE_SIZE; + } +} +#endif + /* * On some architectures hardware does not set page access bit when accessing * memory page, it is responsibility of software setting this bit. It brings @@ -1622,16 +1777,16 @@ typedef unsigned int pgtbl_mod_mask; * Only meaningful when called on a valid entry. */ #ifndef pgd_leaf -#define pgd_leaf(x) 0 +#define pgd_leaf(x) false #endif #ifndef p4d_leaf -#define p4d_leaf(x) 0 +#define p4d_leaf(x) false #endif #ifndef pud_leaf -#define pud_leaf(x) 0 +#define pud_leaf(x) false #endif #ifndef pmd_leaf -#define pmd_leaf(x) 0 +#define pmd_leaf(x) false #endif #ifndef pgd_leaf_size diff --git a/include/linux/phy.h b/include/linux/phy.h index 1351b802ffcf..3f68b8239bb1 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -30,6 +30,7 @@ #include <linux/refcount.h> #include <linux/atomic.h> +#include <net/eee.h> #define PHY_DEFAULT_FEATURES (SUPPORTED_Autoneg | \ SUPPORTED_TP | \ @@ -54,6 +55,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_fec_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap1_features) __ro_after_init; +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap2_features) __ro_after_init; #define PHY_BASIC_FEATURES ((unsigned long *)&phy_basic_features) #define PHY_BASIC_T1_FEATURES ((unsigned long *)&phy_basic_t1_features) @@ -65,6 +67,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap1_features) __ro_after_init; #define PHY_10GBIT_FEC_FEATURES ((unsigned long *)&phy_10gbit_fec_features) #define PHY_10GBIT_FULL_FEATURES ((unsigned long *)&phy_10gbit_full_features) #define PHY_EEE_CAP1_FEATURES ((unsigned long *)&phy_eee_cap1_features) +#define PHY_EEE_CAP2_FEATURES ((unsigned long *)&phy_eee_cap2_features) extern const int phy_basic_ports_array[3]; extern const int phy_fibre_port_array[1]; @@ -327,7 +330,9 @@ struct mdio_bus_stats { /** * struct phy_package_shared - Shared information in PHY packages - * @addr: Common PHY address used to combine PHYs in one package + * @base_addr: Base PHY address of PHY package used to combine PHYs + * in one package and for offset calculation of phy_package_read/write + * @np: Pointer to the Device Node if PHY package defined in DT * @refcnt: Number of PHYs connected to this shared data * @flags: Initialization of PHY package * @priv_size: Size of the shared private data @priv @@ -338,7 +343,9 @@ struct mdio_bus_stats { * phy_package_leave(). */ struct phy_package_shared { - int addr; + u8 base_addr; + /* With PHY package defined in DT this points to the PHY package node */ + struct device_node *np; refcount_t refcnt; unsigned long flags; size_t priv_size; @@ -568,7 +575,6 @@ struct macsec_ops; * - Bits [31:24] are reserved for defining generic * PHY driver behavior. * @irq: IRQ number of the PHY's interrupt (-1 if none) - * @phy_timer: The timer for handling the state machine * @phylink: Pointer to phylink instance for this PHY * @sfp_bus_attached: Flag indicating whether the SFP bus has been attached * @sfp_bus: SFP bus attached to this PHY's fiber port @@ -589,6 +595,8 @@ struct macsec_ops; * @supported_eee: supported PHY EEE linkmodes * @advertising_eee: Currently advertised EEE linkmodes * @eee_enabled: Flag indicating whether the EEE feature is enabled + * @enable_tx_lpi: When True, MAC should transmit LPI to PHY + * @eee_cfg: User configuration of EEE * @lp_advertising: Current link partner advertised linkmodes * @host_interfaces: PHY interface modes supported by host * @eee_broken_modes: Energy efficient ethernet modes which should be prohibited @@ -605,6 +613,8 @@ struct macsec_ops; * @irq_rerun: Flag indicating interrupts occurred while PHY was suspended, * requiring a rerun of the interrupt handler after resume * @interface: enum phy_interface_t value + * @possible_interfaces: bitmap if interface modes that the attached PHY + * will switch between depending on media speed. * @skb: Netlink message for cable diagnostics * @nest: Netlink nest used for cable diagnostics * @ehdr: nNtlink header for cable diagnostics @@ -636,7 +646,7 @@ struct phy_device { /* Information about the PHY type */ /* And management functions */ - struct phy_driver *drv; + const struct phy_driver *drv; struct device_link *devlink; @@ -674,6 +684,7 @@ struct phy_device { u32 dev_flags; phy_interface_t interface; + DECLARE_PHY_INTERFACE_MASK(possible_interfaces); /* * forced speed & duplex (no autoneg) @@ -695,7 +706,7 @@ struct phy_device { __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising); /* used with phy_speed_down */ __ETHTOOL_DECLARE_LINK_MODE_MASK(adv_old); - /* used for eee validation */ + /* used for eee validation and configuration*/ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported_eee); __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising_eee); bool eee_enabled; @@ -705,6 +716,8 @@ struct phy_device { /* Energy efficient ethernet modes which should be prohibited */ u32 eee_broken_modes; + bool enable_tx_lpi; + struct eee_config eee_cfg; #ifdef CONFIG_LED_TRIGGER_PHY struct phy_led_trigger *phy_led_triggers; @@ -849,6 +862,15 @@ struct phy_plca_status { bool pst; }; +/* Modes for PHY LED configuration */ +enum phy_led_modes { + PHY_LED_ACTIVE_LOW = 0, + PHY_LED_INACTIVE_HIGH_IMPEDANCE = 1, + + /* keep it last */ + __PHY_LED_MODES_NUM, +}; + /** * struct phy_led: An LED driven by the PHY * @@ -1142,6 +1164,19 @@ struct phy_driver { int (*led_hw_control_get)(struct phy_device *dev, u8 index, unsigned long *rules); + /** + * @led_polarity_set: Set the LED polarity modes + * @dev: PHY device which has the LED + * @index: Which LED of the PHY device + * @modes: bitmap of LED polarity modes + * + * Configure LED with all the required polarity modes in @modes + * to make it correctly turn ON or OFF. + * + * Returns 0, or an error code. + */ + int (*led_polarity_set)(struct phy_device *dev, int index, + unsigned long modes); }; #define to_phy_driver(d) container_of(to_mdio_common_driver(d), \ struct phy_driver, mdiodrv) @@ -1560,9 +1595,11 @@ static inline bool phy_has_txtstamp(struct phy_device *phydev) return phydev && phydev->mii_ts && phydev->mii_ts->txtstamp; } -static inline int phy_hwtstamp(struct phy_device *phydev, struct ifreq *ifr) +static inline int phy_hwtstamp(struct phy_device *phydev, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack) { - return phydev->mii_ts->hwtstamp(phydev->mii_ts, ifr); + return phydev->mii_ts->hwtstamp(phydev->mii_ts, cfg, extack); } static inline bool phy_rxtstamp(struct phy_device *phydev, struct sk_buff *skb, @@ -1736,6 +1773,7 @@ void phy_detach(struct phy_device *phydev); void phy_start(struct phy_device *phydev); void phy_stop(struct phy_device *phydev); int phy_config_aneg(struct phy_device *phydev); +int _phy_start_aneg(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); int phy_aneg_done(struct phy_device *phydev); int phy_speed_down(struct phy_device *phydev, bool sync); @@ -1845,7 +1883,7 @@ int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, /* Clause 37 */ int genphy_c37_config_aneg(struct phy_device *phydev); -int genphy_c37_read_status(struct phy_device *phydev); +int genphy_c37_read_status(struct phy_device *phydev, bool *changed); /* Clause 45 PHY */ int genphy_c45_restart_aneg(struct phy_device *phydev); @@ -1860,6 +1898,7 @@ int genphy_c45_an_config_aneg(struct phy_device *phydev); int genphy_c45_an_disable_aneg(struct phy_device *phydev); int genphy_c45_read_mdix(struct phy_device *phydev); int genphy_c45_pma_read_abilities(struct phy_device *phydev); +int genphy_c45_pma_read_ext_abilities(struct phy_device *phydev); int genphy_c45_pma_baset1_read_abilities(struct phy_device *phydev); int genphy_c45_read_eee_abilities(struct phy_device *phydev); int genphy_c45_pma_baset1_read_master_slave(struct phy_device *phydev); @@ -1879,9 +1918,9 @@ int genphy_c45_plca_get_status(struct phy_device *phydev, int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv, unsigned long *lp, bool *is_enabled); int genphy_c45_ethtool_get_eee(struct phy_device *phydev, - struct ethtool_eee *data); + struct ethtool_keee *data); int genphy_c45_ethtool_set_eee(struct phy_device *phydev, - struct ethtool_eee *data); + struct ethtool_keee *data); int genphy_c45_write_eee_adv(struct phy_device *phydev, unsigned long *adv); int genphy_c45_an_config_eee_aneg(struct phy_device *phydev); int genphy_c45_read_eee_adv(struct phy_device *phydev, unsigned long *adv); @@ -1931,8 +1970,10 @@ int phy_get_rate_matching(struct phy_device *phydev, void phy_set_max_speed(struct phy_device *phydev, u32 max_speed); void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); void phy_advertise_supported(struct phy_device *phydev); +void phy_advertise_eee_all(struct phy_device *phydev); void phy_support_sym_pause(struct phy_device *phydev); void phy_support_asym_pause(struct phy_device *phydev); +void phy_support_eee(struct phy_device *phydev); void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx, bool autoneg); void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx); @@ -1959,8 +2000,8 @@ int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask); int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable); int phy_get_eee_err(struct phy_device *phydev); -int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data); -int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data); +int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data); +int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data); int phy_ethtool_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); void phy_ethtool_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); @@ -1969,10 +2010,13 @@ int phy_ethtool_get_link_ksettings(struct net_device *ndev, int phy_ethtool_set_link_ksettings(struct net_device *ndev, const struct ethtool_link_ksettings *cmd); int phy_ethtool_nway_reset(struct net_device *ndev); -int phy_package_join(struct phy_device *phydev, int addr, size_t priv_size); +int phy_package_join(struct phy_device *phydev, int base_addr, size_t priv_size); +int of_phy_package_join(struct phy_device *phydev, size_t priv_size); void phy_package_leave(struct phy_device *phydev); int devm_phy_package_join(struct device *dev, struct phy_device *phydev, - int addr, size_t priv_size); + int base_addr, size_t priv_size); +int devm_of_phy_package_join(struct device *dev, struct phy_device *phydev, + size_t priv_size); int __init mdio_bus_init(void); void mdio_bus_exit(void); @@ -1995,48 +2039,83 @@ int __phy_hwtstamp_set(struct phy_device *phydev, struct kernel_hwtstamp_config *config, struct netlink_ext_ack *extack); -static inline int phy_package_read(struct phy_device *phydev, u32 regnum) +static inline int phy_package_address(struct phy_device *phydev, + unsigned int addr_offset) { struct phy_package_shared *shared = phydev->shared; + u8 base_addr = shared->base_addr; - if (!shared) + if (addr_offset >= PHY_MAX_ADDR - base_addr) return -EIO; - return mdiobus_read(phydev->mdio.bus, shared->addr, regnum); + /* we know that addr will be in the range 0..31 and thus the + * implicit cast to a signed int is not a problem. + */ + return base_addr + addr_offset; } -static inline int __phy_package_read(struct phy_device *phydev, u32 regnum) +static inline int phy_package_read(struct phy_device *phydev, + unsigned int addr_offset, u32 regnum) { - struct phy_package_shared *shared = phydev->shared; + int addr = phy_package_address(phydev, addr_offset); - if (!shared) - return -EIO; + if (addr < 0) + return addr; - return __mdiobus_read(phydev->mdio.bus, shared->addr, regnum); + return mdiobus_read(phydev->mdio.bus, addr, regnum); +} + +static inline int __phy_package_read(struct phy_device *phydev, + unsigned int addr_offset, u32 regnum) +{ + int addr = phy_package_address(phydev, addr_offset); + + if (addr < 0) + return addr; + + return __mdiobus_read(phydev->mdio.bus, addr, regnum); } static inline int phy_package_write(struct phy_device *phydev, - u32 regnum, u16 val) + unsigned int addr_offset, u32 regnum, + u16 val) { - struct phy_package_shared *shared = phydev->shared; + int addr = phy_package_address(phydev, addr_offset); - if (!shared) - return -EIO; + if (addr < 0) + return addr; - return mdiobus_write(phydev->mdio.bus, shared->addr, regnum, val); + return mdiobus_write(phydev->mdio.bus, addr, regnum, val); } static inline int __phy_package_write(struct phy_device *phydev, - u32 regnum, u16 val) + unsigned int addr_offset, u32 regnum, + u16 val) { - struct phy_package_shared *shared = phydev->shared; + int addr = phy_package_address(phydev, addr_offset); - if (!shared) - return -EIO; + if (addr < 0) + return addr; - return __mdiobus_write(phydev->mdio.bus, shared->addr, regnum, val); + return __mdiobus_write(phydev->mdio.bus, addr, regnum, val); } +int __phy_package_read_mmd(struct phy_device *phydev, + unsigned int addr_offset, int devad, + u32 regnum); + +int phy_package_read_mmd(struct phy_device *phydev, + unsigned int addr_offset, int devad, + u32 regnum); + +int __phy_package_write_mmd(struct phy_device *phydev, + unsigned int addr_offset, int devad, + u32 regnum, u16 val); + +int phy_package_write_mmd(struct phy_device *phydev, + unsigned int addr_offset, int devad, + u32 regnum, u16 val); + static inline bool __phy_package_set_once(struct phy_device *phydev, unsigned int b) { @@ -2058,7 +2137,7 @@ static inline bool phy_package_probe_once(struct phy_device *phydev) return __phy_package_set_once(phydev, PHY_SHARED_F_PROBE_DONE); } -extern struct bus_type mdio_bus_type; +extern const struct bus_type mdio_bus_type; struct mdio_board_info { const char *bus_id; diff --git a/include/linux/phy/phy-dp.h b/include/linux/phy/phy-dp.h index 18cad23642cd..9cce5766bc0b 100644 --- a/include/linux/phy/phy-dp.h +++ b/include/linux/phy/phy-dp.h @@ -8,6 +8,9 @@ #include <linux/types.h> +#define PHY_SUBMODE_DP 0 +#define PHY_SUBMODE_EDP 1 + /** * struct phy_configure_opts_dp - DisplayPort PHY configuration set * diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index f6d607ef0e80..03cd5bae92d3 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -122,6 +122,11 @@ struct phy_ops { union phy_configure_opts *opts); int (*reset)(struct phy *phy); int (*calibrate)(struct phy *phy); + + /* notify phy connect status change */ + int (*connect)(struct phy *phy, int port); + int (*disconnect)(struct phy *phy, int port); + void (*release)(struct phy *phy); struct module *owner; }; @@ -176,7 +181,7 @@ struct phy_provider { struct module *owner; struct list_head list; struct phy * (*of_xlate)(struct device *dev, - struct of_phandle_args *args); + const struct of_phandle_args *args); }; /** @@ -243,6 +248,8 @@ static inline enum phy_mode phy_get_mode(struct phy *phy) } int phy_reset(struct phy *phy); int phy_calibrate(struct phy *phy); +int phy_notify_connect(struct phy *phy, int port); +int phy_notify_disconnect(struct phy *phy, int port); static inline int phy_get_bus_width(struct phy *phy) { return phy->attrs.bus_width; @@ -265,7 +272,7 @@ void phy_put(struct device *dev, struct phy *phy); void devm_phy_put(struct device *dev, struct phy *phy); struct phy *of_phy_get(struct device_node *np, const char *con_id); struct phy *of_phy_simple_xlate(struct device *dev, - struct of_phandle_args *args); + const struct of_phandle_args *args); struct phy *phy_create(struct device *dev, struct device_node *node, const struct phy_ops *ops); struct phy *devm_phy_create(struct device *dev, struct device_node *node, @@ -275,11 +282,11 @@ void devm_phy_destroy(struct device *dev, struct phy *phy); struct phy_provider *__of_phy_provider_register(struct device *dev, struct device_node *children, struct module *owner, struct phy * (*of_xlate)(struct device *dev, - struct of_phandle_args *args)); + const struct of_phandle_args *args)); struct phy_provider *__devm_of_phy_provider_register(struct device *dev, struct device_node *children, struct module *owner, struct phy * (*of_xlate)(struct device *dev, - struct of_phandle_args *args)); + const struct of_phandle_args *args)); void of_phy_provider_unregister(struct phy_provider *phy_provider); void devm_of_phy_provider_unregister(struct device *dev, struct phy_provider *phy_provider); @@ -396,6 +403,20 @@ static inline int phy_calibrate(struct phy *phy) return -ENOSYS; } +static inline int phy_notify_connect(struct phy *phy, int index) +{ + if (!phy) + return 0; + return -ENOSYS; +} + +static inline int phy_notify_disconnect(struct phy *phy, int index) +{ + if (!phy) + return 0; + return -ENOSYS; +} + static inline int phy_configure(struct phy *phy, union phy_configure_opts *opts) { @@ -479,7 +500,7 @@ static inline struct phy *of_phy_get(struct device_node *np, const char *con_id) } static inline struct phy *of_phy_simple_xlate(struct device *dev, - struct of_phandle_args *args) + const struct of_phandle_args *args) { return ERR_PTR(-ENOSYS); } @@ -509,7 +530,7 @@ static inline void devm_phy_destroy(struct device *dev, struct phy *phy) static inline struct phy_provider *__of_phy_provider_register( struct device *dev, struct device_node *children, struct module *owner, struct phy * (*of_xlate)(struct device *dev, - struct of_phandle_args *args)) + const struct of_phandle_args *args)) { return ERR_PTR(-ENOSYS); } @@ -517,7 +538,7 @@ static inline struct phy_provider *__of_phy_provider_register( static inline struct phy_provider *__devm_of_phy_provider_register(struct device *dev, struct device_node *children, struct module *owner, struct phy * (*of_xlate)(struct device *dev, - struct of_phandle_args *args)) + const struct of_phandle_args *args)) { return ERR_PTR(-ENOSYS); } diff --git a/include/linux/phy/tegra/xusb.h b/include/linux/phy/tegra/xusb.h index 70998e6dd6fd..6ca51e0080ec 100644 --- a/include/linux/phy/tegra/xusb.h +++ b/include/linux/phy/tegra/xusb.h @@ -26,6 +26,7 @@ void tegra_phy_xusb_utmi_pad_power_down(struct phy *phy); int tegra_phy_xusb_utmi_port_reset(struct phy *phy); int tegra_xusb_padctl_get_usb3_companion(struct tegra_xusb_padctl *padctl, unsigned int port); +int tegra_xusb_padctl_get_port_number(struct phy *phy); int tegra_xusb_padctl_enable_phy_sleepwalk(struct tegra_xusb_padctl *padctl, struct phy *phy, enum usb_device_speed speed); int tegra_xusb_padctl_disable_phy_sleepwalk(struct tegra_xusb_padctl *padctl, struct phy *phy); diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 2b886ea654bb..9a57deefcb07 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -99,72 +99,6 @@ static inline bool phylink_autoneg_inband(unsigned int mode) } /** - * phylink_pcs_neg_mode() - helper to determine PCS inband mode - * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND. - * @interface: interface mode to be used - * @advertising: adertisement ethtool link mode mask - * - * Determines the negotiation mode to be used by the PCS, and returns - * one of: - * - * - %PHYLINK_PCS_NEG_NONE: interface mode does not support inband - * - %PHYLINK_PCS_NEG_OUTBAND: an out of band mode (e.g. reading the PHY) - * will be used. - * - %PHYLINK_PCS_NEG_INBAND_DISABLED: inband mode selected but autoneg - * disabled - * - %PHYLINK_PCS_NEG_INBAND_ENABLED: inband mode selected and autoneg enabled - * - * Note: this is for cases where the PCS itself is involved in negotiation - * (e.g. Clause 37, SGMII and similar) not Clause 73. - */ -static inline unsigned int phylink_pcs_neg_mode(unsigned int mode, - phy_interface_t interface, - const unsigned long *advertising) -{ - unsigned int neg_mode; - - switch (interface) { - case PHY_INTERFACE_MODE_SGMII: - case PHY_INTERFACE_MODE_QSGMII: - case PHY_INTERFACE_MODE_QUSGMII: - case PHY_INTERFACE_MODE_USXGMII: - /* These protocols are designed for use with a PHY which - * communicates its negotiation result back to the MAC via - * inband communication. Note: there exist PHYs that run - * with SGMII but do not send the inband data. - */ - if (!phylink_autoneg_inband(mode)) - neg_mode = PHYLINK_PCS_NEG_OUTBAND; - else - neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED; - break; - - case PHY_INTERFACE_MODE_1000BASEX: - case PHY_INTERFACE_MODE_2500BASEX: - /* 1000base-X is designed for use media-side for Fibre - * connections, and thus the Autoneg bit needs to be - * taken into account. We also do this for 2500base-X - * as well, but drivers may not support this, so may - * need to override this. - */ - if (!phylink_autoneg_inband(mode)) - neg_mode = PHYLINK_PCS_NEG_OUTBAND; - else if (linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, - advertising)) - neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED; - else - neg_mode = PHYLINK_PCS_NEG_INBAND_DISABLED; - break; - - default: - neg_mode = PHYLINK_PCS_NEG_NONE; - break; - } - - return neg_mode; -} - -/** * struct phylink_link_state - link state structure * @advertising: ethtool bitmask containing advertised link modes * @lp_advertising: ethtool bitmask containing link partner advertised link @@ -227,7 +161,7 @@ void phylink_limit_mac_speed(struct phylink_config *config, u32 max_speed); /** * struct phylink_mac_ops - MAC operations structure. - * @validate: Validate and update the link configuration. + * @mac_get_caps: Get MAC capabilities for interface mode. * @mac_select_pcs: Select a PCS for the interface mode. * @mac_prepare: prepare for a major reconfiguration of the interface. * @mac_config: configure the MAC for the selected mode and state. @@ -238,9 +172,8 @@ void phylink_limit_mac_speed(struct phylink_config *config, u32 max_speed); * The individual methods are described more fully below. */ struct phylink_mac_ops { - void (*validate)(struct phylink_config *config, - unsigned long *supported, - struct phylink_link_state *state); + unsigned long (*mac_get_caps)(struct phylink_config *config, + phy_interface_t interface); struct phylink_pcs *(*mac_select_pcs)(struct phylink_config *config, phy_interface_t interface); int (*mac_prepare)(struct phylink_config *config, unsigned int mode, @@ -259,39 +192,17 @@ struct phylink_mac_ops { #if 0 /* For kernel-doc purposes only. */ /** - * validate - Validate and update the link configuration + * mac_get_caps: Get MAC capabilities for interface mode. * @config: a pointer to a &struct phylink_config. - * @supported: ethtool bitmask for supported link modes. - * @state: a pointer to a &struct phylink_link_state. + * @interface: PHY interface mode. * - * Clear bits in the @supported and @state->advertising masks that - * are not supportable by the MAC. - * - * Note that the PHY may be able to transform from one connection - * technology to another, so, eg, don't clear 1000BaseX just - * because the MAC is unable to BaseX mode. This is more about - * clearing unsupported speeds and duplex settings. The port modes - * should not be cleared; phylink_set_port_modes() will help with this. - * - * When @config->supported_interfaces has been set, phylink will iterate - * over the supported interfaces to determine the full capability of the - * MAC. The validation function must not print errors if @state->interface - * is set to an unexpected value. - * - * When @config->supported_interfaces is empty, phylink will call this - * function with @state->interface set to %PHY_INTERFACE_MODE_NA, and - * expects the MAC driver to return all supported link modes. - * - * If the @state->interface mode is not supported, then the @supported - * mask must be cleared. - * - * This member is optional; if not set, the generic validator will be - * used making use of @config->mac_capabilities and - * @config->supported_interfaces to determine which link modes are - * supported. + * Optional method. When not provided, config->mac_capabilities will be used. + * When implemented, this returns the MAC capabilities for the specified + * interface mode where there is some special handling required by the MAC + * driver (e.g. not supporting half-duplex in certain interface modes.) */ -void validate(struct phylink_config *config, unsigned long *supported, - struct phylink_link_state *state); +unsigned long mac_get_caps(struct phylink_config *config, + phy_interface_t interface); /** * mac_select_pcs: Select a PCS for the interface mode. * @config: a pointer to a &struct phylink_config. @@ -569,9 +480,6 @@ void pcs_disable(struct phylink_pcs *pcs); * negotiation completion state in @state->an_complete, and link up state * in @state->link. If possible, @state->lp_advertising should also be * populated. - * - * When present, this overrides pcs_get_state() in &struct - * phylink_pcs_ops. */ void pcs_get_state(struct phylink_pcs *pcs, struct phylink_link_state *state); @@ -636,17 +544,6 @@ void pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, int speed, int duplex); #endif -void phylink_caps_to_linkmodes(unsigned long *linkmodes, unsigned long caps); -unsigned long phylink_get_capabilities(phy_interface_t interface, - unsigned long mac_capabilities, - int rate_matching); -void phylink_validate_mask_caps(unsigned long *supported, - struct phylink_link_state *state, - unsigned long caps); -void phylink_generic_validate(struct phylink_config *config, - unsigned long *supported, - struct phylink_link_state *state); - struct phylink *phylink_create(struct phylink_config *, const struct fwnode_handle *, phy_interface_t, @@ -684,8 +581,8 @@ int phylink_ethtool_set_pauseparam(struct phylink *, struct ethtool_pauseparam *); int phylink_get_eee_err(struct phylink *); int phylink_init_eee(struct phylink *, bool); -int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *); -int phylink_ethtool_set_eee(struct phylink *, struct ethtool_eee *); +int phylink_ethtool_get_eee(struct phylink *link, struct ethtool_keee *eee); +int phylink_ethtool_set_eee(struct phylink *link, struct ethtool_keee *eee); int phylink_mii_ioctl(struct phylink *, struct ifreq *, int); int phylink_speed_down(struct phylink *pl, bool sync); int phylink_speed_up(struct phylink *pl); diff --git a/include/linux/pid.h b/include/linux/pid.h index 653a527574c4..a3aad9b4074c 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -2,18 +2,12 @@ #ifndef _LINUX_PID_H #define _LINUX_PID_H +#include <linux/pid_types.h> #include <linux/rculist.h> -#include <linux/wait.h> +#include <linux/rcupdate.h> #include <linux/refcount.h> - -enum pid_type -{ - PIDTYPE_PID, - PIDTYPE_TGID, - PIDTYPE_PGID, - PIDTYPE_SID, - PIDTYPE_MAX, -}; +#include <linux/sched.h> +#include <linux/wait.h> /* * What is struct pid? @@ -51,6 +45,8 @@ enum pid_type * find_pid_ns() using the int nr and struct pid_namespace *ns. */ +#define RESERVED_PIDS 300 + struct upid { int nr; struct pid_namespace *ns; @@ -61,6 +57,8 @@ struct pid refcount_t count; unsigned int level; spinlock_t lock; + struct dentry *stashed; + u64 ino; /* lists of tasks that use this pid */ struct hlist_head tasks[PIDTYPE_MAX]; struct hlist_head inodes; @@ -72,15 +70,13 @@ struct pid extern struct pid init_struct_pid; -extern const struct file_operations pidfd_fops; - struct file; -extern struct pid *pidfd_pid(const struct file *file); +struct pid *pidfd_pid(const struct file *file); struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags); -int pidfd_create(struct pid *pid, unsigned int flags); int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret); +void do_notify_pidfd(struct task_struct *task); static inline struct pid *get_pid(struct pid *pid) { @@ -110,9 +106,6 @@ extern void exchange_tids(struct task_struct *task, struct task_struct *old); extern void transfer_pid(struct task_struct *old, struct task_struct *new, enum pid_type); -struct pid_namespace; -extern struct pid_namespace init_pid_ns; - extern int pid_max; extern int pid_max_min, pid_max_max; @@ -215,4 +208,127 @@ pid_t pid_vnr(struct pid *pid); } \ task = tg___; \ } while_each_pid_task(pid, type, task) + +static inline struct pid *task_pid(struct task_struct *task) +{ + return task->thread_pid; +} + +/* + * the helpers to get the task's different pids as they are seen + * from various namespaces + * + * task_xid_nr() : global id, i.e. the id seen from the init namespace; + * task_xid_vnr() : virtual id, i.e. the id seen from the pid namespace of + * current. + * task_xid_nr_ns() : id seen from the ns specified; + * + * see also pid_nr() etc in include/linux/pid.h + */ +pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns); + +static inline pid_t task_pid_nr(struct task_struct *tsk) +{ + return tsk->pid; +} + +static inline pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns); +} + +static inline pid_t task_pid_vnr(struct task_struct *tsk) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL); +} + + +static inline pid_t task_tgid_nr(struct task_struct *tsk) +{ + return tsk->tgid; +} + +/** + * pid_alive - check that a task structure is not stale + * @p: Task structure to be checked. + * + * Test if a process is not yet dead (at most zombie state) + * If pid_alive fails, then pointers within the task structure + * can be stale and must not be dereferenced. + * + * Return: 1 if the process is alive. 0 otherwise. + */ +static inline int pid_alive(const struct task_struct *p) +{ + return p->thread_pid != NULL; +} + +static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns); +} + +static inline pid_t task_pgrp_vnr(struct task_struct *tsk) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL); +} + + +static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns); +} + +static inline pid_t task_session_vnr(struct task_struct *tsk) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); +} + +static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns); +} + +static inline pid_t task_tgid_vnr(struct task_struct *tsk) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_TGID, NULL); +} + +static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) +{ + pid_t pid = 0; + + rcu_read_lock(); + if (pid_alive(tsk)) + pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns); + rcu_read_unlock(); + + return pid; +} + +static inline pid_t task_ppid_nr(const struct task_struct *tsk) +{ + return task_ppid_nr_ns(tsk, &init_pid_ns); +} + +/* Obsolete, do not use: */ +static inline pid_t task_pgrp_nr(struct task_struct *tsk) +{ + return task_pgrp_nr_ns(tsk, &init_pid_ns); +} + +/** + * is_global_init - check if a task structure is init. Since init + * is free to have sub-threads we need to check tgid. + * @tsk: Task structure to be checked. + * + * Check if a task structure is the first user space task the kernel created. + * + * Return: 1 if the task structure is init. 0 otherwise. + */ +static inline int is_global_init(struct task_struct *tsk) +{ + return task_tgid_nr(tsk) == 1; +} + #endif /* _LINUX_PID_H */ diff --git a/include/linux/pid_types.h b/include/linux/pid_types.h new file mode 100644 index 000000000000..c2aee1d91dcf --- /dev/null +++ b/include/linux/pid_types.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PID_TYPES_H +#define _LINUX_PID_TYPES_H + +enum pid_type { + PIDTYPE_PID, + PIDTYPE_TGID, + PIDTYPE_PGID, + PIDTYPE_SID, + PIDTYPE_MAX, +}; + +struct pid_namespace; +extern struct pid_namespace init_pid_ns; + +#endif /* _LINUX_PID_TYPES_H */ diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h new file mode 100644 index 000000000000..75bdf9807802 --- /dev/null +++ b/include/linux/pidfs.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PID_FS_H +#define _LINUX_PID_FS_H + +struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags); +void __init pidfs_init(void); + +#endif /* _LINUX_PID_FS_H */ diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h index 4729d54e8995..73de70362b98 100644 --- a/include/linux/pinctrl/consumer.h +++ b/include/linux/pinctrl/consumer.h @@ -17,6 +17,7 @@ #include <linux/pinctrl/pinctrl-state.h> struct device; +struct gpio_chip; /* This struct is private to the core and should be regarded as a cookie */ struct pinctrl; @@ -25,27 +26,30 @@ struct pinctrl_state; #ifdef CONFIG_PINCTRL /* External interface to pin control */ -extern bool pinctrl_gpio_can_use_line(unsigned gpio); -extern int pinctrl_gpio_request(unsigned gpio); -extern void pinctrl_gpio_free(unsigned gpio); -extern int pinctrl_gpio_direction_input(unsigned gpio); -extern int pinctrl_gpio_direction_output(unsigned gpio); -extern int pinctrl_gpio_set_config(unsigned gpio, unsigned long config); +bool pinctrl_gpio_can_use_line(struct gpio_chip *gc, unsigned int offset); +int pinctrl_gpio_request(struct gpio_chip *gc, unsigned int offset); +void pinctrl_gpio_free(struct gpio_chip *gc, unsigned int offset); +int pinctrl_gpio_direction_input(struct gpio_chip *gc, + unsigned int offset); +int pinctrl_gpio_direction_output(struct gpio_chip *gc, + unsigned int offset); +int pinctrl_gpio_set_config(struct gpio_chip *gc, unsigned int offset, + unsigned long config); -extern struct pinctrl * __must_check pinctrl_get(struct device *dev); -extern void pinctrl_put(struct pinctrl *p); -extern struct pinctrl_state * __must_check pinctrl_lookup_state(struct pinctrl *p, - const char *name); -extern int pinctrl_select_state(struct pinctrl *p, struct pinctrl_state *s); +struct pinctrl * __must_check pinctrl_get(struct device *dev); +void pinctrl_put(struct pinctrl *p); +struct pinctrl_state * __must_check pinctrl_lookup_state(struct pinctrl *p, + const char *name); +int pinctrl_select_state(struct pinctrl *p, struct pinctrl_state *s); -extern struct pinctrl * __must_check devm_pinctrl_get(struct device *dev); -extern void devm_pinctrl_put(struct pinctrl *p); -extern int pinctrl_select_default_state(struct device *dev); +struct pinctrl * __must_check devm_pinctrl_get(struct device *dev); +void devm_pinctrl_put(struct pinctrl *p); +int pinctrl_select_default_state(struct device *dev); #ifdef CONFIG_PM -extern int pinctrl_pm_select_default_state(struct device *dev); -extern int pinctrl_pm_select_sleep_state(struct device *dev); -extern int pinctrl_pm_select_idle_state(struct device *dev); +int pinctrl_pm_select_default_state(struct device *dev); +int pinctrl_pm_select_sleep_state(struct device *dev); +int pinctrl_pm_select_idle_state(struct device *dev); #else static inline int pinctrl_pm_select_default_state(struct device *dev) { @@ -63,31 +67,38 @@ static inline int pinctrl_pm_select_idle_state(struct device *dev) #else /* !CONFIG_PINCTRL */ -static inline bool pinctrl_gpio_can_use_line(unsigned gpio) +static inline bool +pinctrl_gpio_can_use_line(struct gpio_chip *gc, unsigned int offset) { return true; } -static inline int pinctrl_gpio_request(unsigned gpio) +static inline int +pinctrl_gpio_request(struct gpio_chip *gc, unsigned int offset) { return 0; } -static inline void pinctrl_gpio_free(unsigned gpio) +static inline void +pinctrl_gpio_free(struct gpio_chip *gc, unsigned int offset) { } -static inline int pinctrl_gpio_direction_input(unsigned gpio) +static inline int +pinctrl_gpio_direction_input(struct gpio_chip *gc, unsigned int offset) { return 0; } -static inline int pinctrl_gpio_direction_output(unsigned gpio) +static inline int +pinctrl_gpio_direction_output(struct gpio_chip *gc, unsigned int offset) { return 0; } -static inline int pinctrl_gpio_set_config(unsigned gpio, unsigned long config) +static inline int +pinctrl_gpio_set_config(struct gpio_chip *gc, unsigned int offset, + unsigned long config) { return 0; } diff --git a/include/linux/pinctrl/machine.h b/include/linux/pinctrl/machine.h index 0639b36f43c5..673e96df453b 100644 --- a/include/linux/pinctrl/machine.h +++ b/include/linux/pinctrl/machine.h @@ -11,7 +11,7 @@ #ifndef __LINUX_PINCTRL_MACHINE_H #define __LINUX_PINCTRL_MACHINE_H -#include <linux/kernel.h> /* ARRAY_SIZE() */ +#include <linux/array_size.h> #include <linux/pinctrl/pinctrl-state.h> @@ -47,7 +47,7 @@ struct pinctrl_map_mux { struct pinctrl_map_configs { const char *group_or_pin; unsigned long *configs; - unsigned num_configs; + unsigned int num_configs; }; /** @@ -154,13 +154,13 @@ struct pinctrl_map; #ifdef CONFIG_PINCTRL extern int pinctrl_register_mappings(const struct pinctrl_map *map, - unsigned num_maps); + unsigned int num_maps); extern void pinctrl_unregister_mappings(const struct pinctrl_map *map); extern void pinctrl_provide_dummies(void); #else static inline int pinctrl_register_mappings(const struct pinctrl_map *map, - unsigned num_maps) + unsigned int num_maps) { return 0; } diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index d74b7a4ea154..a65d3d078e58 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -193,17 +193,17 @@ struct pinconf_generic_params { int pinconf_generic_dt_subnode_to_map(struct pinctrl_dev *pctldev, struct device_node *np, struct pinctrl_map **map, - unsigned *reserved_maps, unsigned *num_maps, + unsigned int *reserved_maps, unsigned int *num_maps, enum pinctrl_map_type type); int pinconf_generic_dt_node_to_map(struct pinctrl_dev *pctldev, struct device_node *np_config, struct pinctrl_map **map, - unsigned *num_maps, enum pinctrl_map_type type); + unsigned int *num_maps, enum pinctrl_map_type type); void pinconf_generic_dt_free_map(struct pinctrl_dev *pctldev, - struct pinctrl_map *map, unsigned num_maps); + struct pinctrl_map *map, unsigned int num_maps); static inline int pinconf_generic_dt_node_to_map_group(struct pinctrl_dev *pctldev, struct device_node *np_config, struct pinctrl_map **map, - unsigned *num_maps) + unsigned int *num_maps) { return pinconf_generic_dt_node_to_map(pctldev, np_config, map, num_maps, PIN_MAP_TYPE_CONFIGS_GROUP); @@ -211,7 +211,7 @@ static inline int pinconf_generic_dt_node_to_map_group(struct pinctrl_dev *pctld static inline int pinconf_generic_dt_node_to_map_pin(struct pinctrl_dev *pctldev, struct device_node *np_config, struct pinctrl_map **map, - unsigned *num_maps) + unsigned int *num_maps) { return pinconf_generic_dt_node_to_map(pctldev, np_config, map, num_maps, PIN_MAP_TYPE_CONFIGS_PIN); diff --git a/include/linux/pinctrl/pinconf.h b/include/linux/pinctrl/pinconf.h index f8a8215e9021..770ec2221156 100644 --- a/include/linux/pinctrl/pinconf.h +++ b/include/linux/pinctrl/pinconf.h @@ -40,25 +40,25 @@ struct pinconf_ops { bool is_generic; #endif int (*pin_config_get) (struct pinctrl_dev *pctldev, - unsigned pin, + unsigned int pin, unsigned long *config); int (*pin_config_set) (struct pinctrl_dev *pctldev, - unsigned pin, + unsigned int pin, unsigned long *configs, - unsigned num_configs); + unsigned int num_configs); int (*pin_config_group_get) (struct pinctrl_dev *pctldev, - unsigned selector, + unsigned int selector, unsigned long *config); int (*pin_config_group_set) (struct pinctrl_dev *pctldev, - unsigned selector, + unsigned int selector, unsigned long *configs, - unsigned num_configs); + unsigned int num_configs); void (*pin_config_dbg_show) (struct pinctrl_dev *pctldev, struct seq_file *s, - unsigned offset); + unsigned int offset); void (*pin_config_group_dbg_show) (struct pinctrl_dev *pctldev, struct seq_file *s, - unsigned selector); + unsigned int selector); void (*pin_config_config_dbg_show) (struct pinctrl_dev *pctldev, struct seq_file *s, unsigned long config); diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 4d252ea00ed1..9a8189ffd0f2 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -54,7 +54,7 @@ struct pingroup { * @drv_data: driver-defined per-pin data. pinctrl core does not touch this */ struct pinctrl_pin_desc { - unsigned number; + unsigned int number; const char *name; void *drv_data; }; @@ -82,7 +82,7 @@ struct pinctrl_gpio_range { unsigned int base; unsigned int pin_base; unsigned int npins; - unsigned const *pins; + unsigned int const *pins; struct gpio_chip *gc; }; @@ -108,18 +108,18 @@ struct pinctrl_gpio_range { struct pinctrl_ops { int (*get_groups_count) (struct pinctrl_dev *pctldev); const char *(*get_group_name) (struct pinctrl_dev *pctldev, - unsigned selector); + unsigned int selector); int (*get_group_pins) (struct pinctrl_dev *pctldev, - unsigned selector, - const unsigned **pins, - unsigned *num_pins); + unsigned int selector, + const unsigned int **pins, + unsigned int *num_pins); void (*pin_dbg_show) (struct pinctrl_dev *pctldev, struct seq_file *s, - unsigned offset); + unsigned int offset); int (*dt_node_to_map) (struct pinctrl_dev *pctldev, struct device_node *np_config, - struct pinctrl_map **map, unsigned *num_maps); + struct pinctrl_map **map, unsigned int *num_maps); void (*dt_free_map) (struct pinctrl_dev *pctldev, - struct pinctrl_map *map, unsigned num_maps); + struct pinctrl_map *map, unsigned int num_maps); }; /** @@ -193,7 +193,7 @@ extern void pinctrl_add_gpio_range(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range); extern void pinctrl_add_gpio_ranges(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *ranges, - unsigned nranges); + unsigned int nranges); extern void pinctrl_remove_gpio_range(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range); @@ -203,8 +203,8 @@ extern struct pinctrl_gpio_range * pinctrl_find_gpio_range_from_pin(struct pinctrl_dev *pctldev, unsigned int pin); extern int pinctrl_get_group_pins(struct pinctrl_dev *pctldev, - const char *pin_group, const unsigned **pins, - unsigned *num_pins); + const char *pin_group, const unsigned int **pins, + unsigned int *num_pins); /** * struct pinfunction - Description about a function diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h index a7e370965c53..d6f7b58d6ad0 100644 --- a/include/linux/pinctrl/pinmux.h +++ b/include/linux/pinctrl/pinmux.h @@ -57,26 +57,26 @@ struct pinctrl_gpio_range; * the pin request. */ struct pinmux_ops { - int (*request) (struct pinctrl_dev *pctldev, unsigned offset); - int (*free) (struct pinctrl_dev *pctldev, unsigned offset); + int (*request) (struct pinctrl_dev *pctldev, unsigned int offset); + int (*free) (struct pinctrl_dev *pctldev, unsigned int offset); int (*get_functions_count) (struct pinctrl_dev *pctldev); const char *(*get_function_name) (struct pinctrl_dev *pctldev, - unsigned selector); + unsigned int selector); int (*get_function_groups) (struct pinctrl_dev *pctldev, - unsigned selector, - const char * const **groups, - unsigned *num_groups); - int (*set_mux) (struct pinctrl_dev *pctldev, unsigned func_selector, - unsigned group_selector); + unsigned int selector, + const char * const **groups, + unsigned int *num_groups); + int (*set_mux) (struct pinctrl_dev *pctldev, unsigned int func_selector, + unsigned int group_selector); int (*gpio_request_enable) (struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, - unsigned offset); + unsigned int offset); void (*gpio_disable_free) (struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, - unsigned offset); + unsigned int offset); int (*gpio_set_direction) (struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, - unsigned offset, + unsigned int offset, bool input); bool strict; }; diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 608a9eb86bff..8ff23bf5a819 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -62,9 +62,6 @@ struct pipe_inode_info { unsigned int tail; unsigned int max_usage; unsigned int ring_size; -#ifdef CONFIG_WATCH_QUEUE - bool note_loss; -#endif unsigned int nr_accounted; unsigned int readers; unsigned int writers; @@ -72,6 +69,9 @@ struct pipe_inode_info { unsigned int r_counter; unsigned int w_counter; bool poll_usage; +#ifdef CONFIG_WATCH_QUEUE + bool note_loss; +#endif struct page *tmp_page; struct fasync_struct *fasync_readers; struct fasync_struct *fasync_writers; @@ -125,6 +125,22 @@ struct pipe_buf_operations { }; /** + * pipe_has_watch_queue - Check whether the pipe is a watch_queue, + * i.e. it was created with O_NOTIFICATION_PIPE + * @pipe: The pipe to check + * + * Return: true if pipe is a watch queue, false otherwise. + */ +static inline bool pipe_has_watch_queue(const struct pipe_inode_info *pipe) +{ +#ifdef CONFIG_WATCH_QUEUE + return pipe->watch_queue != NULL; +#else + return false; +#endif +} + +/** * pipe_empty - Return true if the pipe is empty * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h index 80cb00db42a4..2f1b952d596a 100644 --- a/include/linux/pktcdvd.h +++ b/include/linux/pktcdvd.h @@ -154,7 +154,9 @@ struct packet_stacked_data struct pktcdvd_device { - struct block_device *bdev; /* dev attached */ + struct file *bdev_file; /* dev attached */ + /* handle acquired for bdev during pkt_open_dev() */ + struct file *f_open_bdev; dev_t pkt_dev; /* our dev */ struct packet_settings settings; struct packet_stats stats; diff --git a/include/linux/platform_data/brcmfmac.h b/include/linux/platform_data/brcmfmac.h index f922a192fe58..ec99b7b73d1d 100644 --- a/include/linux/platform_data/brcmfmac.h +++ b/include/linux/platform_data/brcmfmac.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 201 Broadcom Corporation + * Copyright (c) 2016 Broadcom Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index ab721cf13a98..ecc47d5fe239 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -3961,60 +3961,52 @@ struct ec_response_i2c_passthru { } __ec_align1; /*****************************************************************************/ -/* Power button hang detect */ - +/* AP hang detect */ #define EC_CMD_HANG_DETECT 0x009F -/* Reasons to start hang detection timer */ -/* Power button pressed */ -#define EC_HANG_START_ON_POWER_PRESS BIT(0) - -/* Lid closed */ -#define EC_HANG_START_ON_LID_CLOSE BIT(1) - - /* Lid opened */ -#define EC_HANG_START_ON_LID_OPEN BIT(2) +#define EC_HANG_DETECT_MIN_TIMEOUT 5 +#define EC_HANG_DETECT_MAX_TIMEOUT 65535 -/* Start of AP S3->S0 transition (booting or resuming from suspend) */ -#define EC_HANG_START_ON_RESUME BIT(3) +/* EC hang detect commands */ +enum ec_hang_detect_cmds { + /* Reload AP hang detect timer. */ + EC_HANG_DETECT_CMD_RELOAD = 0x0, -/* Reasons to cancel hang detection */ + /* Stop AP hang detect timer. */ + EC_HANG_DETECT_CMD_CANCEL = 0x1, -/* Power button released */ -#define EC_HANG_STOP_ON_POWER_RELEASE BIT(8) - -/* Any host command from AP received */ -#define EC_HANG_STOP_ON_HOST_COMMAND BIT(9) - -/* Stop on end of AP S0->S3 transition (suspending or shutting down) */ -#define EC_HANG_STOP_ON_SUSPEND BIT(10) + /* Configure watchdog with given reboot timeout and + * cancel currently running AP hang detect timer. + */ + EC_HANG_DETECT_CMD_SET_TIMEOUT = 0x2, -/* - * If this flag is set, all the other fields are ignored, and the hang detect - * timer is started. This provides the AP a way to start the hang timer - * without reconfiguring any of the other hang detect settings. Note that - * you must previously have configured the timeouts. - */ -#define EC_HANG_START_NOW BIT(30) + /* Get last hang status - whether the AP boot was clear or not */ + EC_HANG_DETECT_CMD_GET_STATUS = 0x3, -/* - * If this flag is set, all the other fields are ignored (including - * EC_HANG_START_NOW). This provides the AP a way to stop the hang timer - * without reconfiguring any of the other hang detect settings. - */ -#define EC_HANG_STOP_NOW BIT(31) + /* Clear last hang status. Called when AP is rebooting/shutting down + * gracefully. + */ + EC_HANG_DETECT_CMD_CLEAR_STATUS = 0x4 +}; struct ec_params_hang_detect { - /* Flags; see EC_HANG_* */ - uint32_t flags; - - /* Timeout in msec before generating host event, if enabled */ - uint16_t host_event_timeout_msec; + uint16_t command; /* enum ec_hang_detect_cmds */ + /* Timeout in seconds before generating reboot */ + uint16_t reboot_timeout_sec; +} __ec_align2; - /* Timeout in msec before generating warm reboot, if enabled */ - uint16_t warm_reboot_timeout_msec; -} __ec_align4; +/* Status codes that describe whether AP has boot normally or the hang has been + * detected and EC has reset AP + */ +enum ec_hang_detect_status { + EC_HANG_DETECT_AP_BOOT_NORMAL = 0x0, + EC_HANG_DETECT_AP_BOOT_EC_WDT = 0x1, + EC_HANG_DETECT_AP_BOOT_COUNT, +}; +struct ec_response_hang_detect { + uint8_t status; /* enum ec_hang_detect_status */ +} __ec_align1; /*****************************************************************************/ /* Commands for battery charging */ @@ -4436,8 +4428,20 @@ struct ec_response_i2c_passthru_protect { * These commands are for sending and receiving message via HDMI CEC */ +#define EC_CEC_MAX_PORTS 16 + #define MAX_CEC_MSG_LEN 16 +/* + * Helper macros for packing/unpacking cec_events. + * bits[27:0] : bitmask of events from enum mkbp_cec_event + * bits[31:28]: port number + */ +#define EC_MKBP_EVENT_CEC_PACK(events, port) \ + (((events) & GENMASK(27, 0)) | (((port) & 0xf) << 28)) +#define EC_MKBP_EVENT_CEC_GET_EVENTS(event) ((event) & GENMASK(27, 0)) +#define EC_MKBP_EVENT_CEC_GET_PORT(event) (((event) >> 28) & 0xf) + /* CEC message from the AP to be written on the CEC bus */ #define EC_CMD_CEC_WRITE_MSG 0x00B8 @@ -4449,19 +4453,54 @@ struct ec_params_cec_write { uint8_t msg[MAX_CEC_MSG_LEN]; } __ec_align1; +/** + * struct ec_params_cec_write_v1 - Message to write to the CEC bus + * @port: CEC port to write the message on + * @msg_len: length of msg in bytes + * @msg: message content to write to the CEC bus + */ +struct ec_params_cec_write_v1 { + uint8_t port; + uint8_t msg_len; + uint8_t msg[MAX_CEC_MSG_LEN]; +} __ec_align1; + +/* CEC message read from a CEC bus reported back to the AP */ +#define EC_CMD_CEC_READ_MSG 0x00B9 + +/** + * struct ec_params_cec_read - Read a message from the CEC bus + * @port: CEC port to read a message on + */ +struct ec_params_cec_read { + uint8_t port; +} __ec_align1; + +/** + * struct ec_response_cec_read - Message read from the CEC bus + * @msg_len: length of msg in bytes + * @msg: message content read from the CEC bus + */ +struct ec_response_cec_read { + uint8_t msg_len; + uint8_t msg[MAX_CEC_MSG_LEN]; +} __ec_align1; + /* Set various CEC parameters */ #define EC_CMD_CEC_SET 0x00BA /** * struct ec_params_cec_set - CEC parameters set * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS + * @port: CEC port to set the parameter on * @val: in case cmd is CEC_CMD_ENABLE, this field can be 0 to disable CEC * or 1 to enable CEC functionality, in case cmd is * CEC_CMD_LOGICAL_ADDRESS, this field encodes the requested logical * address between 0 and 15 or 0xff to unregister */ struct ec_params_cec_set { - uint8_t cmd; /* enum cec_command */ + uint8_t cmd : 4; /* enum cec_command */ + uint8_t port : 4; uint8_t val; } __ec_align1; @@ -4471,9 +4510,11 @@ struct ec_params_cec_set { /** * struct ec_params_cec_get - CEC parameters get * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS + * @port: CEC port to get the parameter on */ struct ec_params_cec_get { - uint8_t cmd; /* enum cec_command */ + uint8_t cmd : 4; /* enum cec_command */ + uint8_t port : 4; } __ec_align1; /** @@ -4487,6 +4528,17 @@ struct ec_response_cec_get { uint8_t val; } __ec_align1; +/* Get the number of CEC ports */ +#define EC_CMD_CEC_PORT_COUNT 0x00C1 + +/** + * struct ec_response_cec_port_count - CEC port count response + * @port_count: number of CEC ports + */ +struct ec_response_cec_port_count { + uint8_t port_count; +} __ec_align1; + /* CEC parameters command */ enum cec_command { /* CEC reading, writing and events enable */ @@ -4501,6 +4553,8 @@ enum mkbp_cec_event { EC_MKBP_CEC_SEND_OK = BIT(0), /* Outgoing message was not acknowledged */ EC_MKBP_CEC_SEND_FAILED = BIT(1), + /* Incoming message can be read out by AP */ + EC_MKBP_CEC_HAVE_DATA = BIT(2), }; /*****************************************************************************/ diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 4f9f756bc17c..8865e350c12a 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -258,7 +258,7 @@ bool cros_ec_check_features(struct cros_ec_dev *ec, int feature); int cros_ec_get_sensor_count(struct cros_ec_dev *ec); -int cros_ec_cmd(struct cros_ec_device *ec_dev, unsigned int version, int command, void *outdata, +int cros_ec_cmd(struct cros_ec_device *ec_dev, unsigned int version, int command, const void *outdata, size_t outsize, void *indata, size_t insize); /** diff --git a/include/linux/platform_data/gpio-omap.h b/include/linux/platform_data/gpio-omap.h index f377817ce75c..cdd8cfb424f5 100644 --- a/include/linux/platform_data/gpio-omap.h +++ b/include/linux/platform_data/gpio-omap.h @@ -144,9 +144,6 @@ #define OMAP_MAX_GPIO_LINES 192 -#define OMAP_MPUIO(nr) (OMAP_MAX_GPIO_LINES + (nr)) -#define OMAP_GPIO_IS_MPUIO(nr) ((nr) >= OMAP_MAX_GPIO_LINES) - #ifndef __ASSEMBLER__ struct omap_gpio_reg_offs { u16 revision; diff --git a/include/linux/platform_data/gsc_hwmon.h b/include/linux/platform_data/gsc_hwmon.h index f2781aa7eff8..70e8a6bec0f6 100644 --- a/include/linux/platform_data/gsc_hwmon.h +++ b/include/linux/platform_data/gsc_hwmon.h @@ -40,6 +40,6 @@ struct gsc_hwmon_platform_data { unsigned int resolution; unsigned int vreference; unsigned int fan_base; - struct gsc_hwmon_channel channels[]; + struct gsc_hwmon_channel channels[] __counted_by(nchannels); }; #endif diff --git a/include/linux/platform_data/i2c-mux-reg.h b/include/linux/platform_data/i2c-mux-reg.h index 2543c2a1c9ae..e2e895768311 100644 --- a/include/linux/platform_data/i2c-mux-reg.h +++ b/include/linux/platform_data/i2c-mux-reg.h @@ -17,7 +17,6 @@ * @n_values: Number of multiplexer channels * @little_endian: Indicating if the register is in little endian * @write_only: Reading the register is not allowed by hardware - * @classes: Optional I2C auto-detection classes * @idle: Value to write to mux when idle * @idle_in_use: indicate if idle value is in use * @reg: Virtual address of the register to switch channel @@ -30,7 +29,6 @@ struct i2c_mux_reg_platform_data { int n_values; bool little_endian; bool write_only; - const unsigned int *classes; u32 idle; bool idle_in_use; void __iomem *reg; diff --git a/include/linux/platform_data/keypad-omap.h b/include/linux/platform_data/keypad-omap.h index 3e7c64c854f4..f3f1311cdf3a 100644 --- a/include/linux/platform_data/keypad-omap.h +++ b/include/linux/platform_data/keypad-omap.h @@ -19,9 +19,6 @@ struct omap_kp_platform_data { bool rep; unsigned long delay; bool dbounce; - /* specific to OMAP242x*/ - unsigned int *row_gpios; - unsigned int *col_gpios; }; /* Group (0..3) -- when multiple keys are pressed, only the diff --git a/include/linux/platform_data/mdio-bcm-unimac.h b/include/linux/platform_data/mdio-bcm-unimac.h index 8a5f9f0b2c52..724e1f57b81f 100644 --- a/include/linux/platform_data/mdio-bcm-unimac.h +++ b/include/linux/platform_data/mdio-bcm-unimac.h @@ -1,11 +1,14 @@ #ifndef __MDIO_BCM_UNIMAC_PDATA_H #define __MDIO_BCM_UNIMAC_PDATA_H +struct clk; + struct unimac_mdio_pdata { u32 phy_mask; int (*wait_func)(void *data); void *wait_func_data; const char *bus_name; + struct clk *clk; }; #define UNIMAC_MDIO_DRV_NAME "unimac-mdio" diff --git a/include/linux/platform_data/microchip-ksz.h b/include/linux/platform_data/microchip-ksz.h index ea1cc6d829e9..8c659db4da6b 100644 --- a/include/linux/platform_data/microchip-ksz.h +++ b/include/linux/platform_data/microchip-ksz.h @@ -20,10 +20,32 @@ #define __MICROCHIP_KSZ_H #include <linux/types.h> +#include <linux/platform_data/dsa.h> + +enum ksz_chip_id { + KSZ8563_CHIP_ID = 0x8563, + KSZ8795_CHIP_ID = 0x8795, + KSZ8794_CHIP_ID = 0x8794, + KSZ8765_CHIP_ID = 0x8765, + KSZ8830_CHIP_ID = 0x8830, + KSZ9477_CHIP_ID = 0x00947700, + KSZ9896_CHIP_ID = 0x00989600, + KSZ9897_CHIP_ID = 0x00989700, + KSZ9893_CHIP_ID = 0x00989300, + KSZ9563_CHIP_ID = 0x00956300, + KSZ8567_CHIP_ID = 0x00856700, + KSZ9567_CHIP_ID = 0x00956700, + LAN9370_CHIP_ID = 0x00937000, + LAN9371_CHIP_ID = 0x00937100, + LAN9372_CHIP_ID = 0x00937200, + LAN9373_CHIP_ID = 0x00937300, + LAN9374_CHIP_ID = 0x00937400, +}; struct ksz_platform_data { + /* Must be first such that dsa_register_switch() can access it */ + struct dsa_chip_data cd; u32 chip_id; - u16 enabled_ports; }; #endif diff --git a/include/linux/platform_data/net-cw1200.h b/include/linux/platform_data/net-cw1200.h index c510734405bb..89d0ec6f7d46 100644 --- a/include/linux/platform_data/net-cw1200.h +++ b/include/linux/platform_data/net-cw1200.h @@ -14,8 +14,6 @@ struct cw1200_platform_data_spi { /* All others are optional */ bool have_5ghz; - int reset; /* GPIO to RSTn signal (0 disables) */ - int powerup; /* GPIO to POWERUP signal (0 disables) */ int (*power_ctrl)(const struct cw1200_platform_data_spi *pdata, bool enable); /* Control 3v3 / 1v8 supply */ int (*clk_ctrl)(const struct cw1200_platform_data_spi *pdata, @@ -30,8 +28,6 @@ struct cw1200_platform_data_sdio { /* All others are optional */ bool have_5ghz; bool no_nptb; /* SDIO hardware does not support non-power-of-2-blocksizes */ - int reset; /* GPIO to RSTn signal (0 disables) */ - int powerup; /* GPIO to POWERUP signal (0 disables) */ int irq; /* IRQ line or 0 to use SDIO IRQ */ int (*power_ctrl)(const struct cw1200_platform_data_sdio *pdata, bool enable); /* Control 3v3 / 1v8 supply */ diff --git a/include/linux/platform_data/omap-twl4030.h b/include/linux/platform_data/omap-twl4030.h index 0dd851ea1c72..7fcb55fe21c9 100644 --- a/include/linux/platform_data/omap-twl4030.h +++ b/include/linux/platform_data/omap-twl4030.h @@ -37,9 +37,6 @@ struct omap_tw4030_pdata { bool has_digimic0; bool has_digimic1; u8 has_linein; - - /* Jack detect GPIO or <= 0 if it is not implemented */ - int jack_detect; }; #endif /* _OMAP_TWL4030_H_ */ diff --git a/include/linux/platform_data/pca953x.h b/include/linux/platform_data/pca953x.h index 96c1a14ab365..3c3787c4d96c 100644 --- a/include/linux/platform_data/pca953x.h +++ b/include/linux/platform_data/pca953x.h @@ -11,21 +11,8 @@ struct pca953x_platform_data { /* number of the first GPIO */ unsigned gpio_base; - /* initial polarity inversion setting */ - u32 invert; - /* interrupt base */ int irq_base; - - void *context; /* param to setup/teardown */ - - int (*setup)(struct i2c_client *client, - unsigned gpio, unsigned ngpio, - void *context); - void (*teardown)(struct i2c_client *client, - unsigned gpio, unsigned ngpio, - void *context); - const char *const *names; }; #endif /* _LINUX_PCA953X_H */ diff --git a/include/linux/platform_data/shmob_drm.h b/include/linux/platform_data/shmob_drm.h index d661399b217d..6c19d4fbbe39 100644 --- a/include/linux/platform_data/shmob_drm.h +++ b/include/linux/platform_data/shmob_drm.h @@ -10,7 +10,7 @@ #ifndef __SHMOB_DRM_H__ #define __SHMOB_DRM_H__ -#include <drm/drm_mode.h> +#include <video/videomode.h> enum shmob_drm_clk_source { SHMOB_DRM_CLK_BUS, @@ -18,72 +18,21 @@ enum shmob_drm_clk_source { SHMOB_DRM_CLK_EXTERNAL, }; -enum shmob_drm_interface { - SHMOB_DRM_IFACE_RGB8, /* 24bpp, 8:8:8 */ - SHMOB_DRM_IFACE_RGB9, /* 18bpp, 9:9 */ - SHMOB_DRM_IFACE_RGB12A, /* 24bpp, 12:12 */ - SHMOB_DRM_IFACE_RGB12B, /* 12bpp */ - SHMOB_DRM_IFACE_RGB16, /* 16bpp */ - SHMOB_DRM_IFACE_RGB18, /* 18bpp */ - SHMOB_DRM_IFACE_RGB24, /* 24bpp */ - SHMOB_DRM_IFACE_YUV422, /* 16bpp */ - SHMOB_DRM_IFACE_SYS8A, /* 24bpp, 8:8:8 */ - SHMOB_DRM_IFACE_SYS8B, /* 18bpp, 8:8:2 */ - SHMOB_DRM_IFACE_SYS8C, /* 18bpp, 2:8:8 */ - SHMOB_DRM_IFACE_SYS8D, /* 16bpp, 8:8 */ - SHMOB_DRM_IFACE_SYS9, /* 18bpp, 9:9 */ - SHMOB_DRM_IFACE_SYS12, /* 24bpp, 12:12 */ - SHMOB_DRM_IFACE_SYS16A, /* 16bpp */ - SHMOB_DRM_IFACE_SYS16B, /* 18bpp, 16:2 */ - SHMOB_DRM_IFACE_SYS16C, /* 18bpp, 2:16 */ - SHMOB_DRM_IFACE_SYS18, /* 18bpp */ - SHMOB_DRM_IFACE_SYS24, /* 24bpp */ -}; - -struct shmob_drm_backlight_data { - const char *name; - int max_brightness; - int (*get_brightness)(void); - int (*set_brightness)(int brightness); -}; - struct shmob_drm_panel_data { unsigned int width_mm; /* Panel width in mm */ unsigned int height_mm; /* Panel height in mm */ - struct drm_mode_modeinfo mode; + struct videomode mode; }; -struct shmob_drm_sys_interface_data { - unsigned int read_latch:6; - unsigned int read_setup:8; - unsigned int read_cycle:8; - unsigned int read_strobe:8; - unsigned int write_setup:8; - unsigned int write_cycle:8; - unsigned int write_strobe:8; - unsigned int cs_setup:3; - unsigned int vsync_active_high:1; - unsigned int vsync_dir_input:1; -}; - -#define SHMOB_DRM_IFACE_FL_DWPOL (1 << 0) /* Rising edge dot clock data latch */ -#define SHMOB_DRM_IFACE_FL_DIPOL (1 << 1) /* Active low display enable */ -#define SHMOB_DRM_IFACE_FL_DAPOL (1 << 2) /* Active low display data */ -#define SHMOB_DRM_IFACE_FL_HSCNT (1 << 3) /* Disable HSYNC during VBLANK */ -#define SHMOB_DRM_IFACE_FL_DWCNT (1 << 4) /* Disable dotclock during blanking */ - struct shmob_drm_interface_data { - enum shmob_drm_interface interface; - struct shmob_drm_sys_interface_data sys; + unsigned int bus_fmt; /* MEDIA_BUS_FMT_* */ unsigned int clk_div; - unsigned int flags; }; struct shmob_drm_platform_data { enum shmob_drm_clk_source clk_source; struct shmob_drm_interface_data iface; struct shmob_drm_panel_data panel; - struct shmob_drm_backlight_data backlight; }; #endif /* __SHMOB_DRM_H__ */ diff --git a/include/linux/platform_data/si5351.h b/include/linux/platform_data/si5351.h index c71a2dd66143..5f412a615532 100644 --- a/include/linux/platform_data/si5351.h +++ b/include/linux/platform_data/si5351.h @@ -105,10 +105,12 @@ struct si5351_clkout_config { * @clk_xtal: xtal input clock * @clk_clkin: clkin input clock * @pll_src: array of pll source clock setting + * @pll_reset: array indicating if plls should be reset after setting the rate * @clkout: array of clkout configuration */ struct si5351_platform_data { enum si5351_pll_src pll_src[2]; + bool pll_reset[2]; struct si5351_clkout_config clkout[8]; }; diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index 16e99a1c37fc..ab1c7deff118 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -58,6 +58,10 @@ #define ASUS_WMI_DEVID_KBD_BACKLIGHT 0x00050021 #define ASUS_WMI_DEVID_LIGHT_SENSOR 0x00050022 /* ?? */ #define ASUS_WMI_DEVID_LIGHTBAR 0x00050025 +/* This can only be used to disable the screen, not re-enable */ +#define ASUS_WMI_DEVID_SCREENPAD_POWER 0x00050031 +/* Writing a brightness re-enables the screen if disabled */ +#define ASUS_WMI_DEVID_SCREENPAD_LIGHT 0x00050032 #define ASUS_WMI_DEVID_FAN_BOOST_MODE 0x00110018 #define ASUS_WMI_DEVID_THROTTLE_THERMAL_POLICY 0x00120075 @@ -110,6 +114,9 @@ /* Charging mode - 1=Barrel, 2=USB */ #define ASUS_WMI_DEVID_CHARGE_MODE 0x0012006C +/* MCU powersave mode */ +#define ASUS_WMI_DEVID_MCU_POWERSAVE 0x001200E2 + /* epu is connected? 1 == true */ #define ASUS_WMI_DEVID_EGPU_CONNECTED 0x00090018 /* egpu on/off */ diff --git a/include/linux/platform_data/x86/clk-lpss.h b/include/linux/platform_data/x86/clk-lpss.h index 41df326583f9..7f132029316a 100644 --- a/include/linux/platform_data/x86/clk-lpss.h +++ b/include/linux/platform_data/x86/clk-lpss.h @@ -15,6 +15,6 @@ struct lpss_clk_data { struct clk *clk; }; -extern int lpss_atom_clk_init(void); +int lpss_atom_clk_init(void); #endif /* __CLK_LPSS_H */ diff --git a/include/linux/platform_data/x86/pmc_atom.h b/include/linux/platform_data/x86/pmc_atom.h index b8a701c77fd0..161e4bc1c9ee 100644 --- a/include/linux/platform_data/x86/pmc_atom.h +++ b/include/linux/platform_data/x86/pmc_atom.h @@ -43,6 +43,19 @@ BIT_ORED_DEDICATED_IRQ_GPSC | \ BIT_SHARED_IRQ_GPSS) +/* External clk generator settings */ +#define PMC_CLK_CTL_OFFSET 0x60 +#define PMC_CLK_CTL_SIZE 4 +#define PMC_CLK_NUM 6 +#define PMC_CLK_CTL_GATED_ON_D3 0x0 +#define PMC_CLK_CTL_FORCE_ON 0x1 +#define PMC_CLK_CTL_FORCE_OFF 0x2 +#define PMC_CLK_CTL_RESERVED 0x3 +#define PMC_MASK_CLK_CTL GENMASK(1, 0) +#define PMC_MASK_CLK_FREQ BIT(2) +#define PMC_CLK_FREQ_XTAL (0 << 2) /* 25 MHz */ +#define PMC_CLK_FREQ_PLL (1 << 2) /* 19.2 MHz */ + /* The timers accumulate time spent in sleep state */ #define PMC_S0IR_TMR 0x80 #define PMC_S0I1_TMR 0x84 @@ -104,14 +117,14 @@ #define BIT_SCC_SDIO BIT(9) #define BIT_SCC_SDCARD BIT(10) #define BIT_SCC_MIPI BIT(11) -#define BIT_HDA BIT(12) +#define BIT_HDA BIT(12) /* CHT datasheet: reserved */ #define BIT_LPE BIT(13) #define BIT_OTG BIT(14) -#define BIT_USH BIT(15) -#define BIT_GBE BIT(16) -#define BIT_SATA BIT(17) -#define BIT_USB_EHCI BIT(18) -#define BIT_SEC BIT(19) +#define BIT_USH BIT(15) /* CHT datasheet: reserved */ +#define BIT_GBE BIT(16) /* CHT datasheet: reserved */ +#define BIT_SATA BIT(17) /* CHT datasheet: reserved */ +#define BIT_USB_EHCI BIT(18) /* CHT datasheet: XHCI! */ +#define BIT_SEC BIT(19) /* BYT datasheet: reserved */ #define BIT_PCIE_PORT0 BIT(20) #define BIT_PCIE_PORT1 BIT(21) #define BIT_PCIE_PORT2 BIT(22) diff --git a/include/linux/platform_data/x86/pwm-lpss.h b/include/linux/platform_data/x86/pwm-lpss.h index c852fe24fe2a..752c06b47cc8 100644 --- a/include/linux/platform_data/x86/pwm-lpss.h +++ b/include/linux/platform_data/x86/pwm-lpss.h @@ -27,7 +27,7 @@ struct pwm_lpss_boardinfo { bool other_devices_aml_touches_pwm_regs; }; -struct pwm_lpss_chip *devm_pwm_lpss_probe(struct device *dev, void __iomem *base, - const struct pwm_lpss_boardinfo *info); +struct pwm_chip *devm_pwm_lpss_probe(struct device *dev, void __iomem *base, + const struct pwm_lpss_boardinfo *info); #endif /* __PLATFORM_DATA_X86_PWM_LPSS_H */ diff --git a/include/linux/plist.h b/include/linux/plist.h index 0f352c1d3c80..8c1c8adf7fe9 100644 --- a/include/linux/plist.h +++ b/include/linux/plist.h @@ -75,20 +75,10 @@ #include <linux/container_of.h> #include <linux/list.h> -#include <linux/types.h> +#include <linux/plist_types.h> #include <asm/bug.h> -struct plist_head { - struct list_head node_list; -}; - -struct plist_node { - int prio; - struct list_head prio_list; - struct list_head node_list; -}; - /** * PLIST_HEAD_INIT - static struct plist_head initializer * @head: struct plist_head variable name diff --git a/include/linux/plist_types.h b/include/linux/plist_types.h new file mode 100644 index 000000000000..c37e784330af --- /dev/null +++ b/include/linux/plist_types.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _LINUX_PLIST_TYPES_H +#define _LINUX_PLIST_TYPES_H + +#include <linux/types.h> + +struct plist_head { + struct list_head node_list; +}; + +struct plist_node { + int prio; + struct list_head prio_list; + struct list_head node_list; +}; + +#endif /* _LINUX_PLIST_TYPES_H */ diff --git a/include/linux/pm.h b/include/linux/pm.h index 1400c37b29c7..97b0e23363c8 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -374,24 +374,39 @@ const struct dev_pm_ops name = { \ RUNTIME_PM_OPS(runtime_suspend_fn, runtime_resume_fn, idle_fn) \ } -#ifdef CONFIG_PM -#define _EXPORT_DEV_PM_OPS(name, license, ns) \ +#define _EXPORT_PM_OPS(name, license, ns) \ const struct dev_pm_ops name; \ __EXPORT_SYMBOL(name, license, ns); \ const struct dev_pm_ops name -#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name) -#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, ns) -#else -#define _EXPORT_DEV_PM_OPS(name, license, ns) \ + +#define _DISCARD_PM_OPS(name, license, ns) \ static __maybe_unused const struct dev_pm_ops __static_##name + +#ifdef CONFIG_PM +#define _EXPORT_DEV_PM_OPS(name, license, ns) _EXPORT_PM_OPS(name, license, ns) +#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name) +#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, ns) +#else +#define _EXPORT_DEV_PM_OPS(name, license, ns) _DISCARD_PM_OPS(name, license, ns) #define EXPORT_PM_FN_GPL(name) #define EXPORT_PM_FN_NS_GPL(name, ns) #endif -#define EXPORT_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "", "") -#define EXPORT_GPL_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "GPL", "") -#define EXPORT_NS_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "", #ns) -#define EXPORT_NS_GPL_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "GPL", #ns) +#ifdef CONFIG_PM_SLEEP +#define _EXPORT_DEV_SLEEP_PM_OPS(name, license, ns) _EXPORT_PM_OPS(name, license, ns) +#else +#define _EXPORT_DEV_SLEEP_PM_OPS(name, license, ns) _DISCARD_PM_OPS(name, license, ns) +#endif + +#define EXPORT_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "", "") +#define EXPORT_GPL_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "GPL", "") +#define EXPORT_NS_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "", #ns) +#define EXPORT_NS_GPL_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "GPL", #ns) + +#define EXPORT_DEV_SLEEP_PM_OPS(name) _EXPORT_DEV_SLEEP_PM_OPS(name, "", "") +#define EXPORT_GPL_DEV_SLEEP_PM_OPS(name) _EXPORT_DEV_SLEEP_PM_OPS(name, "GPL", "") +#define EXPORT_NS_DEV_SLEEP_PM_OPS(name, ns) _EXPORT_DEV_SLEEP_PM_OPS(name, "", #ns) +#define EXPORT_NS_GPL_DEV_SLEEP_PM_OPS(name, ns) _EXPORT_DEV_SLEEP_PM_OPS(name, "GPL", #ns) /* * Use this if you want to use the same suspend and resume callbacks for suspend @@ -404,19 +419,19 @@ const struct dev_pm_ops name = { \ _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL) #define EXPORT_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ - EXPORT_DEV_PM_OPS(name) = { \ + EXPORT_DEV_SLEEP_PM_OPS(name) = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } #define EXPORT_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ - EXPORT_GPL_DEV_PM_OPS(name) = { \ + EXPORT_GPL_DEV_SLEEP_PM_OPS(name) = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } #define EXPORT_NS_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \ - EXPORT_NS_DEV_PM_OPS(name, ns) = { \ + EXPORT_NS_DEV_SLEEP_PM_OPS(name, ns) = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } #define EXPORT_NS_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \ - EXPORT_NS_GPL_DEV_PM_OPS(name, ns) = { \ + EXPORT_NS_GPL_DEV_SLEEP_PM_OPS(name, ns) = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } @@ -647,8 +662,8 @@ struct pm_subsys_data { struct dev_pm_info { pm_message_t power_state; - unsigned int can_wakeup:1; - unsigned int async_suspend:1; + bool can_wakeup:1; + bool async_suspend:1; bool in_dpm_list:1; /* Owned by the PM core */ bool is_prepared:1; /* Owned by the PM core */ bool is_suspended:1; /* Ditto */ @@ -666,10 +681,11 @@ struct dev_pm_info { bool wakeup_path:1; bool syscore:1; bool no_pm_callbacks:1; /* Owned by the PM core */ - unsigned int must_resume:1; /* Owned by the PM core */ - unsigned int may_skip_resume:1; /* Set by subsystems */ + bool async_in_progress:1; /* Owned by the PM core */ + bool must_resume:1; /* Owned by the PM core */ + bool may_skip_resume:1; /* Set by subsystems */ #else - unsigned int should_wakeup:1; + bool should_wakeup:1; #endif #ifdef CONFIG_PM struct hrtimer suspend_timer; @@ -680,17 +696,17 @@ struct dev_pm_info { atomic_t usage_count; atomic_t child_count; unsigned int disable_depth:3; - unsigned int idle_notification:1; - unsigned int request_pending:1; - unsigned int deferred_resume:1; - unsigned int needs_force_resume:1; - unsigned int runtime_auto:1; + bool idle_notification:1; + bool request_pending:1; + bool deferred_resume:1; + bool needs_force_resume:1; + bool runtime_auto:1; bool ignore_children:1; - unsigned int no_callbacks:1; - unsigned int irq_safe:1; - unsigned int use_autosuspend:1; - unsigned int timer_autosuspends:1; - unsigned int memalloc_noio:1; + bool no_callbacks:1; + bool irq_safe:1; + bool use_autosuspend:1; + bool timer_autosuspends:1; + bool memalloc_noio:1; unsigned int links_count; enum rpm_request request; enum rpm_status runtime_status; @@ -719,6 +735,7 @@ extern void dev_pm_put_subsys_data(struct device *dev); * @activate: Called before executing probe routines for bus types and drivers. * @sync: Called after successful driver probe. * @dismiss: Called after unsuccessful driver probe and after driver removal. + * @set_performance_state: Called to request a new performance state. * * Power domains provide callbacks that are executed during system suspend, * hibernation, system resume and during runtime PM transitions instead of @@ -731,6 +748,7 @@ struct dev_pm_domain { int (*activate)(struct device *dev); void (*sync)(struct device *dev); void (*dismiss)(struct device *dev); + int (*set_performance_state)(struct device *dev, unsigned int state); }; /* diff --git a/include/linux/pm_clock.h b/include/linux/pm_clock.h index ada3a0ab10bf..68669ce18720 100644 --- a/include/linux/pm_clock.h +++ b/include/linux/pm_clock.h @@ -91,10 +91,10 @@ static inline int devm_pm_clk_create(struct device *dev) #endif #ifdef CONFIG_HAVE_CLK -extern void pm_clk_add_notifier(struct bus_type *bus, +extern void pm_clk_add_notifier(const struct bus_type *bus, struct pm_clk_notifier_block *clknb); #else -static inline void pm_clk_add_notifier(struct bus_type *bus, +static inline void pm_clk_add_notifier(const struct bus_type *bus, struct pm_clk_notifier_block *clknb) { } diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index f776fb93eaa0..772d3280d35f 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -20,6 +20,33 @@ #include <linux/time64.h> /* + * Flags to control the behaviour when attaching a device to its PM domains. + * + * PD_FLAG_NO_DEV_LINK: As the default behaviour creates a device-link + * for every PM domain that gets attached, this + * flag can be used to skip that. + * + * PD_FLAG_DEV_LINK_ON: Add the DL_FLAG_RPM_ACTIVE to power-on the + * supplier and its PM domain when creating the + * device-links. + * + */ +#define PD_FLAG_NO_DEV_LINK BIT(0) +#define PD_FLAG_DEV_LINK_ON BIT(1) + +struct dev_pm_domain_attach_data { + const char * const *pd_names; + const u32 num_pd_names; + const u32 pd_flags; +}; + +struct dev_pm_domain_list { + struct device **pd_devs; + struct device_link **pd_links; + u32 num_pds; +}; + +/* * Flags to control the behaviour of a genpd. * * These flags may be set in the struct generic_pm_domain's flags field by a @@ -61,6 +88,10 @@ * GENPD_FLAG_MIN_RESIDENCY: Enable the genpd governor to consider its * components' next wakeup when determining the * optimal idle state. + * + * GENPD_FLAG_OPP_TABLE_FW: The genpd provider supports performance states, + * but its corresponding OPP tables are not + * described in DT, but are given directly by FW. */ #define GENPD_FLAG_PM_CLK (1U << 0) #define GENPD_FLAG_IRQ_SAFE (1U << 1) @@ -69,6 +100,7 @@ #define GENPD_FLAG_CPU_DOMAIN (1U << 4) #define GENPD_FLAG_RPM_ALWAYS_ON (1U << 5) #define GENPD_FLAG_MIN_RESIDENCY (1U << 6) +#define GENPD_FLAG_OPP_TABLE_FW (1U << 7) enum gpd_status { GENPD_STATE_ON = 0, /* PM domain is on */ @@ -113,7 +145,6 @@ struct genpd_power_state { }; struct genpd_lock_ops; -struct dev_pm_opp; struct opp_table; struct generic_pm_domain { @@ -141,8 +172,6 @@ struct generic_pm_domain { int (*power_on)(struct generic_pm_domain *domain); struct raw_notifier_head power_notifiers; /* Power on/off notifiers */ struct opp_table *opp_table; /* OPP table of the genpd */ - unsigned int (*opp_to_performance_state)(struct generic_pm_domain *genpd, - struct dev_pm_opp *opp); int (*set_performance_state)(struct generic_pm_domain *genpd, unsigned int state); struct gpd_dev_ops dev_ops; @@ -320,7 +349,7 @@ static inline void dev_pm_genpd_resume(struct device *dev) {} /* OF PM domain providers */ struct of_device_id; -typedef struct generic_pm_domain *(*genpd_xlate_t)(struct of_phandle_args *args, +typedef struct generic_pm_domain *(*genpd_xlate_t)(const struct of_phandle_args *args, void *data); struct genpd_onecell_data { @@ -335,16 +364,14 @@ int of_genpd_add_provider_simple(struct device_node *np, int of_genpd_add_provider_onecell(struct device_node *np, struct genpd_onecell_data *data); void of_genpd_del_provider(struct device_node *np); -int of_genpd_add_device(struct of_phandle_args *args, struct device *dev); -int of_genpd_add_subdomain(struct of_phandle_args *parent_spec, - struct of_phandle_args *subdomain_spec); -int of_genpd_remove_subdomain(struct of_phandle_args *parent_spec, - struct of_phandle_args *subdomain_spec); +int of_genpd_add_device(const struct of_phandle_args *args, struct device *dev); +int of_genpd_add_subdomain(const struct of_phandle_args *parent_spec, + const struct of_phandle_args *subdomain_spec); +int of_genpd_remove_subdomain(const struct of_phandle_args *parent_spec, + const struct of_phandle_args *subdomain_spec); struct generic_pm_domain *of_genpd_remove_last(struct device_node *np); int of_genpd_parse_idle_states(struct device_node *dn, struct genpd_power_state **states, int *n); -unsigned int pm_genpd_opp_to_performance_state(struct device *genpd_dev, - struct dev_pm_opp *opp); int genpd_dev_pm_attach(struct device *dev); struct device *genpd_dev_pm_attach_by_id(struct device *dev, @@ -366,20 +393,20 @@ static inline int of_genpd_add_provider_onecell(struct device_node *np, static inline void of_genpd_del_provider(struct device_node *np) {} -static inline int of_genpd_add_device(struct of_phandle_args *args, +static inline int of_genpd_add_device(const struct of_phandle_args *args, struct device *dev) { return -ENODEV; } -static inline int of_genpd_add_subdomain(struct of_phandle_args *parent_spec, - struct of_phandle_args *subdomain_spec) +static inline int of_genpd_add_subdomain(const struct of_phandle_args *parent_spec, + const struct of_phandle_args *subdomain_spec) { return -ENODEV; } -static inline int of_genpd_remove_subdomain(struct of_phandle_args *parent_spec, - struct of_phandle_args *subdomain_spec) +static inline int of_genpd_remove_subdomain(const struct of_phandle_args *parent_spec, + const struct of_phandle_args *subdomain_spec) { return -ENODEV; } @@ -390,13 +417,6 @@ static inline int of_genpd_parse_idle_states(struct device_node *dn, return -ENODEV; } -static inline unsigned int -pm_genpd_opp_to_performance_state(struct device *genpd_dev, - struct dev_pm_opp *opp) -{ - return 0; -} - static inline int genpd_dev_pm_attach(struct device *dev) { return 0; @@ -427,9 +447,14 @@ struct device *dev_pm_domain_attach_by_id(struct device *dev, unsigned int index); struct device *dev_pm_domain_attach_by_name(struct device *dev, const char *name); +int dev_pm_domain_attach_list(struct device *dev, + const struct dev_pm_domain_attach_data *data, + struct dev_pm_domain_list **list); void dev_pm_domain_detach(struct device *dev, bool power_off); +void dev_pm_domain_detach_list(struct dev_pm_domain_list *list); int dev_pm_domain_start(struct device *dev); void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd); +int dev_pm_domain_set_performance_state(struct device *dev, unsigned int state); #else static inline int dev_pm_domain_attach(struct device *dev, bool power_on) { @@ -445,13 +470,25 @@ static inline struct device *dev_pm_domain_attach_by_name(struct device *dev, { return NULL; } +static inline int dev_pm_domain_attach_list(struct device *dev, + const struct dev_pm_domain_attach_data *data, + struct dev_pm_domain_list **list) +{ + return 0; +} static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {} +static inline void dev_pm_domain_detach_list(struct dev_pm_domain_list *list) {} static inline int dev_pm_domain_start(struct device *dev) { return 0; } static inline void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd) {} +static inline int dev_pm_domain_set_performance_state(struct device *dev, + unsigned int state) +{ + return 0; +} #endif #endif /* _LINUX_PM_DOMAIN_H */ diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 91f87d7e807c..065a47382302 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -16,6 +16,7 @@ #include <linux/notifier.h> struct clk; +struct cpufreq_frequency_table; struct regulator; struct dev_pm_opp; struct device; @@ -45,18 +46,6 @@ struct dev_pm_opp_supply { unsigned long u_watt; }; -/** - * struct dev_pm_opp_icc_bw - Interconnect bandwidth values - * @avg: Average bandwidth corresponding to this OPP (in icc units) - * @peak: Peak bandwidth corresponding to this OPP (in icc units) - * - * This structure stores the bandwidth values for a single interconnect path. - */ -struct dev_pm_opp_icc_bw { - u32 avg; - u32 peak; -}; - typedef int (*config_regulators_t)(struct device *dev, struct dev_pm_opp *old_opp, struct dev_pm_opp *new_opp, struct regulator **regulators, unsigned int count); @@ -74,8 +63,10 @@ typedef int (*config_clks_t)(struct device *dev, struct opp_table *opp_table, * @supported_hw_count: Number of elements in the array. * @regulator_names: Array of pointers to the names of the regulator, NULL terminated. * @genpd_names: Null terminated array of pointers containing names of genpd to - * attach. - * @virt_devs: Pointer to return the array of virtual devices. + * attach. Mutually exclusive with required_devs. + * @virt_devs: Pointer to return the array of genpd virtual devices. Mutually + * exclusive with required_devs. + * @required_devs: Required OPP devices. Mutually exclusive with genpd_names/virt_devs. * * This structure contains platform specific OPP configurations for the device. */ @@ -90,6 +81,24 @@ struct dev_pm_opp_config { const char * const *regulator_names; const char * const *genpd_names; struct device ***virt_devs; + struct device **required_devs; +}; + +#define OPP_LEVEL_UNSET U32_MAX + +/** + * struct dev_pm_opp_data - The data to use to initialize an OPP. + * @turbo: Flag to indicate whether the OPP is to be marked turbo or not. + * @level: The performance level for the OPP. Set level to OPP_LEVEL_UNSET if + * level field isn't used. + * @freq: The clock rate in Hz for the OPP. + * @u_volt: The voltage in uV for the OPP. + */ +struct dev_pm_opp_data { + bool turbo; + unsigned int level; + unsigned long freq; + unsigned long u_volt; }; #if defined(CONFIG_PM_OPP) @@ -144,6 +153,9 @@ struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, unsigned int *level); +struct dev_pm_opp *dev_pm_opp_find_level_floor(struct device *dev, + unsigned int *level); + struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, unsigned int *bw, int index); @@ -152,8 +164,8 @@ struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev, void dev_pm_opp_put(struct dev_pm_opp *opp); -int dev_pm_opp_add(struct device *dev, unsigned long freq, - unsigned long u_volt); +int dev_pm_opp_add_dynamic(struct device *dev, struct dev_pm_opp_data *opp); + void dev_pm_opp_remove(struct device *dev, unsigned long freq); void dev_pm_opp_remove_all_dynamic(struct device *dev); @@ -308,6 +320,12 @@ static inline struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, return ERR_PTR(-EOPNOTSUPP); } +static inline struct dev_pm_opp *dev_pm_opp_find_level_floor(struct device *dev, + unsigned int *level) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, unsigned int *bw, int index) { @@ -322,8 +340,8 @@ static inline struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev, static inline void dev_pm_opp_put(struct dev_pm_opp *opp) {} -static inline int dev_pm_opp_add(struct device *dev, unsigned long freq, - unsigned long u_volt) +static inline int +dev_pm_opp_add_dynamic(struct device *dev, struct dev_pm_opp_data *opp) { return -EOPNOTSUPP; } @@ -429,6 +447,21 @@ static inline int dev_pm_opp_sync_regulators(struct device *dev) #endif /* CONFIG_PM_OPP */ +#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP) +int dev_pm_opp_init_cpufreq_table(struct device *dev, struct cpufreq_frequency_table **table); +void dev_pm_opp_free_cpufreq_table(struct device *dev, struct cpufreq_frequency_table **table); +#else +static inline int dev_pm_opp_init_cpufreq_table(struct device *dev, struct cpufreq_frequency_table **table) +{ + return -EINVAL; +} + +static inline void dev_pm_opp_free_cpufreq_table(struct device *dev, struct cpufreq_frequency_table **table) +{ +} +#endif + + #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) int dev_pm_opp_of_add_table(struct device *dev); int dev_pm_opp_of_add_table_indexed(struct device *dev, int index); @@ -519,6 +552,17 @@ static inline int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_ta /* OPP Configuration helpers */ +static inline int dev_pm_opp_add(struct device *dev, unsigned long freq, + unsigned long u_volt) +{ + struct dev_pm_opp_data data = { + .freq = freq, + .u_volt = u_volt, + }; + + return dev_pm_opp_add_dynamic(dev, &data); +} + /* Regulators helpers */ static inline int dev_pm_opp_set_regulators(struct device *dev, const char * const names[]) diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 7c9b35448563..d39dc863f612 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -72,7 +72,8 @@ extern int pm_runtime_force_resume(struct device *dev); extern int __pm_runtime_idle(struct device *dev, int rpmflags); extern int __pm_runtime_suspend(struct device *dev, int rpmflags); extern int __pm_runtime_resume(struct device *dev, int rpmflags); -extern int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count); +extern int pm_runtime_get_if_active(struct device *dev); +extern int pm_runtime_get_if_in_use(struct device *dev); extern int pm_schedule_suspend(struct device *dev, unsigned int delay); extern int __pm_runtime_set_status(struct device *dev, unsigned int status); extern int pm_runtime_barrier(struct device *dev); @@ -95,18 +96,6 @@ extern void pm_runtime_release_supplier(struct device_link *link); extern int devm_pm_runtime_enable(struct device *dev); /** - * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter. - * @dev: Target device. - * - * Increment the runtime PM usage counter of @dev if its runtime PM status is - * %RPM_ACTIVE and its runtime PM usage counter is greater than 0. - */ -static inline int pm_runtime_get_if_in_use(struct device *dev) -{ - return pm_runtime_get_if_active(dev, false); -} - -/** * pm_suspend_ignore_children - Set runtime PM behavior regarding children. * @dev: Target device. * @enable: Whether or not to ignore possible dependencies on children. @@ -275,8 +264,7 @@ static inline int pm_runtime_get_if_in_use(struct device *dev) { return -EINVAL; } -static inline int pm_runtime_get_if_active(struct device *dev, - bool ign_usage_count) +static inline int pm_runtime_get_if_active(struct device *dev) { return -EINVAL; } @@ -461,6 +449,18 @@ static inline int pm_runtime_put(struct device *dev) } /** + * __pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0. + * @dev: Target device. + * + * Decrement the runtime PM usage counter of @dev and if it turns out to be + * equal to 0, queue up a work item for @dev like in pm_request_autosuspend(). + */ +static inline int __pm_runtime_put_autosuspend(struct device *dev) +{ + return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_ASYNC | RPM_AUTO); +} + +/** * pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0. * @dev: Target device. * diff --git a/include/linux/pnp.h b/include/linux/pnp.h index c2a7cfbca713..ddbe7c3ca4ce 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -291,7 +291,7 @@ static inline void pnp_set_drvdata(struct pnp_dev *pdev, void *data) struct pnp_fixup { char id[7]; - void (*quirk_function) (struct pnp_dev * dev); /* fixup function */ + void (*quirk_function) (struct pnp_dev *dev); /* fixup function */ }; /* config parameters */ @@ -419,8 +419,8 @@ struct pnp_protocol { /* protocol specific suspend/resume */ bool (*can_wakeup) (struct pnp_dev *dev); - int (*suspend) (struct pnp_dev * dev, pm_message_t state); - int (*resume) (struct pnp_dev * dev); + int (*suspend) (struct pnp_dev *dev, pm_message_t state); + int (*resume) (struct pnp_dev *dev); /* used by pnp layer only (look but don't touch) */ unsigned char number; /* protocol number */ @@ -435,7 +435,7 @@ struct pnp_protocol { #define protocol_for_each_dev(protocol, dev) \ list_for_each_entry(dev, &(protocol)->devices, protocol_list) -extern struct bus_type pnp_bus_type; +extern const struct bus_type pnp_bus_type; #if defined(CONFIG_PNP) @@ -492,7 +492,7 @@ static inline int pnp_start_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_stop_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_activate_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_disable_dev(struct pnp_dev *dev) { return -ENODEV; } -static inline int pnp_range_reserved(resource_size_t start, resource_size_t end) { return 0;} +static inline int pnp_range_reserved(resource_size_t start, resource_size_t end) { return 0; } /* protocol helpers */ static inline int pnp_is_active(struct pnp_dev *dev) { return 0; } diff --git a/include/linux/poison.h b/include/linux/poison.h index 851a855d3868..1f0ee2459f2a 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -83,6 +83,8 @@ /********** net/core/skbuff.c **********/ #define SKB_LIST_POISON_NEXT ((void *)(0x800 + POISON_POINTER_DELTA)) +/********** net/ **********/ +#define NET_PTR_POISON ((void *)(0x801 + POISON_POINTER_DELTA)) /********** kernel/bpf/ **********/ #define BPF_PTR_POISON ((void *)(0xeB9FUL + POISON_POINTER_DELTA)) @@ -90,4 +92,7 @@ /********** VFS **********/ #define VFS_PTR_POISON ((void *)(0xF5 + POISON_POINTER_DELTA)) +/********** lib/stackdepot.c **********/ +#define STACK_DEPOT_POISON ((void *)(0xD390 + POISON_POINTER_DELTA)) + #endif diff --git a/include/linux/poll.h b/include/linux/poll.h index a9e0e1c2d1f2..d1ea4f3714a8 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -14,11 +14,7 @@ /* ~832 bytes of stack space used max in sys_select/sys_poll before allocating additional memory. */ -#ifdef __clang__ -#define MAX_STACK_ALLOC 768 -#else #define MAX_STACK_ALLOC 832 -#endif #define FRONTEND_STACK_ALLOC 256 #define SELECT_STACK_ALLOC FRONTEND_STACK_ALLOC #define POLL_STACK_ALLOC FRONTEND_STACK_ALLOC diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index 468328b1e1dd..ef8619f48920 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -14,6 +14,7 @@ #include <linux/rwsem.h> struct posix_clock; +struct posix_clock_context; /** * struct posix_clock_operations - functional interface to the clock @@ -50,18 +51,18 @@ struct posix_clock_operations { /* * Optional character device methods: */ - long (*ioctl) (struct posix_clock *pc, - unsigned int cmd, unsigned long arg); + long (*ioctl)(struct posix_clock_context *pccontext, unsigned int cmd, + unsigned long arg); - int (*open) (struct posix_clock *pc, fmode_t f_mode); + int (*open)(struct posix_clock_context *pccontext, fmode_t f_mode); - __poll_t (*poll) (struct posix_clock *pc, - struct file *file, poll_table *wait); + __poll_t (*poll)(struct posix_clock_context *pccontext, struct file *file, + poll_table *wait); - int (*release) (struct posix_clock *pc); + int (*release)(struct posix_clock_context *pccontext); - ssize_t (*read) (struct posix_clock *pc, - uint flags, char __user *buf, size_t cnt); + ssize_t (*read)(struct posix_clock_context *pccontext, uint flags, + char __user *buf, size_t cnt); }; /** @@ -91,6 +92,24 @@ struct posix_clock { }; /** + * struct posix_clock_context - represents clock file operations context + * + * @clk: Pointer to the clock + * @private_clkdata: Pointer to user data + * + * Drivers should use struct posix_clock_context during specific character + * device file operation methods to access the posix clock. + * + * Drivers can store a private data structure during the open operation + * if they have specific information that is required in other file + * operations. + */ +struct posix_clock_context { + struct posix_clock *clk; + void *private_clkdata; +}; + +/** * posix_clock_register() - register a new clock * @clk: Pointer to the clock. Caller must provide 'ops' field * @dev: Pointer to the initialized device. Caller must provide diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index d607f51404fc..dc7b738de299 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -2,40 +2,16 @@ #ifndef _linux_POSIX_TIMERS_H #define _linux_POSIX_TIMERS_H -#include <linux/spinlock.h> +#include <linux/alarmtimer.h> #include <linux/list.h> #include <linux/mutex.h> -#include <linux/alarmtimer.h> +#include <linux/posix-timers_types.h> +#include <linux/spinlock.h> #include <linux/timerqueue.h> struct kernel_siginfo; struct task_struct; -/* - * Bit fields within a clockid: - * - * The most significant 29 bits hold either a pid or a file descriptor. - * - * Bit 2 indicates whether a cpu clock refers to a thread or a process. - * - * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3. - * - * A clockid is invalid if bits 2, 1, and 0 are all set. - */ -#define CPUCLOCK_PID(clock) ((pid_t) ~((clock) >> 3)) -#define CPUCLOCK_PERTHREAD(clock) \ - (((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0) - -#define CPUCLOCK_PERTHREAD_MASK 4 -#define CPUCLOCK_WHICH(clock) ((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK) -#define CPUCLOCK_CLOCK_MASK 3 -#define CPUCLOCK_PROF 0 -#define CPUCLOCK_VIRT 1 -#define CPUCLOCK_SCHED 2 -#define CPUCLOCK_MAX 3 -#define CLOCKFD CPUCLOCK_MAX -#define CLOCKFD_MASK (CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK) - static inline clockid_t make_process_cpuclock(const unsigned int pid, const clockid_t clock) { @@ -109,44 +85,6 @@ static inline void cpu_timer_setexpires(struct cpu_timer *ctmr, u64 exp) ctmr->node.expires = exp; } -/** - * posix_cputimer_base - Container per posix CPU clock - * @nextevt: Earliest-expiration cache - * @tqhead: timerqueue head for cpu_timers - */ -struct posix_cputimer_base { - u64 nextevt; - struct timerqueue_head tqhead; -}; - -/** - * posix_cputimers - Container for posix CPU timer related data - * @bases: Base container for posix CPU clocks - * @timers_active: Timers are queued. - * @expiry_active: Timer expiry is active. Used for - * process wide timers to avoid multiple - * task trying to handle expiry concurrently - * - * Used in task_struct and signal_struct - */ -struct posix_cputimers { - struct posix_cputimer_base bases[CPUCLOCK_MAX]; - unsigned int timers_active; - unsigned int expiry_active; -}; - -/** - * posix_cputimers_work - Container for task work based posix CPU timer expiry - * @work: The task work to be scheduled - * @mutex: Mutex held around expiry in context of this task work - * @scheduled: @work has been scheduled already, no further processing - */ -struct posix_cputimers_work { - struct callback_head work; - struct mutex mutex; - unsigned int scheduled; -}; - static inline void posix_cputimers_init(struct posix_cputimers *pct) { memset(pct, 0, sizeof(*pct)); @@ -179,7 +117,6 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct, .bases = INIT_CPU_TIMERBASES(s.posix_cputimers.bases), \ }, #else -struct posix_cputimers { }; struct cpu_timer { }; #define INIT_CPU_TIMERS(s) static inline void posix_cputimers_init(struct posix_cputimers *pct) { } diff --git a/include/linux/posix-timers_types.h b/include/linux/posix-timers_types.h new file mode 100644 index 000000000000..a4712c1008c9 --- /dev/null +++ b/include/linux/posix-timers_types.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _linux_POSIX_TIMERS_TYPES_H +#define _linux_POSIX_TIMERS_TYPES_H + +#include <linux/mutex_types.h> +#include <linux/timerqueue_types.h> +#include <linux/types.h> + +/* + * Bit fields within a clockid: + * + * The most significant 29 bits hold either a pid or a file descriptor. + * + * Bit 2 indicates whether a cpu clock refers to a thread or a process. + * + * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3. + * + * A clockid is invalid if bits 2, 1, and 0 are all set. + */ +#define CPUCLOCK_PID(clock) ((pid_t) ~((clock) >> 3)) +#define CPUCLOCK_PERTHREAD(clock) \ + (((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0) + +#define CPUCLOCK_PERTHREAD_MASK 4 +#define CPUCLOCK_WHICH(clock) ((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK) +#define CPUCLOCK_CLOCK_MASK 3 +#define CPUCLOCK_PROF 0 +#define CPUCLOCK_VIRT 1 +#define CPUCLOCK_SCHED 2 +#define CPUCLOCK_MAX 3 +#define CLOCKFD CPUCLOCK_MAX +#define CLOCKFD_MASK (CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK) + +#ifdef CONFIG_POSIX_TIMERS + +/** + * posix_cputimer_base - Container per posix CPU clock + * @nextevt: Earliest-expiration cache + * @tqhead: timerqueue head for cpu_timers + */ +struct posix_cputimer_base { + u64 nextevt; + struct timerqueue_head tqhead; +}; + +/** + * posix_cputimers - Container for posix CPU timer related data + * @bases: Base container for posix CPU clocks + * @timers_active: Timers are queued. + * @expiry_active: Timer expiry is active. Used for + * process wide timers to avoid multiple + * task trying to handle expiry concurrently + * + * Used in task_struct and signal_struct + */ +struct posix_cputimers { + struct posix_cputimer_base bases[CPUCLOCK_MAX]; + unsigned int timers_active; + unsigned int expiry_active; +}; + +/** + * posix_cputimers_work - Container for task work based posix CPU timer expiry + * @work: The task work to be scheduled + * @mutex: Mutex held around expiry in context of this task work + * @scheduled: @work has been scheduled already, no further processing + */ +struct posix_cputimers_work { + struct callback_head work; + struct mutex mutex; + unsigned int scheduled; +}; + +#else /* CONFIG_POSIX_TIMERS */ + +struct posix_cputimers { }; + +#endif /* CONFIG_POSIX_TIMERS */ + +#endif /* _linux_POSIX_TIMERS_TYPES_H */ diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index 7c8d65414a70..b9e5bd2b42d3 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -61,7 +61,6 @@ struct bq27xxx_reg_cache { struct bq27xxx_device_info { struct device *dev; - int id; enum bq27xxx_chip chip; u32 opts; const char *name; @@ -83,5 +82,6 @@ struct bq27xxx_device_info { void bq27xxx_battery_update(struct bq27xxx_device_info *di); int bq27xxx_battery_setup(struct bq27xxx_device_info *di); void bq27xxx_battery_teardown(struct bq27xxx_device_info *di); +extern const struct dev_pm_ops bq27xxx_battery_battery_pm_ops; #endif diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index a427f13c757f..8e5705a56b85 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -242,6 +242,7 @@ struct power_supply_config { struct power_supply_desc { const char *name; enum power_supply_type type; + u8 charge_behaviours; const enum power_supply_usb_type *usb_types; size_t num_usb_types; const enum power_supply_property *properties; @@ -767,7 +768,6 @@ struct power_supply_battery_info { int bti_resistance_tolerance; }; -extern struct atomic_notifier_head power_supply_notifier; extern int power_supply_reg_notifier(struct notifier_block *nb); extern void power_supply_unreg_notifier(struct notifier_block *nb); #if IS_ENABLED(CONFIG_POWER_SUPPLY) @@ -895,8 +895,7 @@ extern int power_supply_powers(struct power_supply *psy, struct device *dev); #define to_power_supply(device) container_of(device, struct power_supply, dev) extern void *power_supply_get_drvdata(struct power_supply *psy); -/* For APM emulation, think legacy userspace. */ -extern struct class *power_supply_class; +extern int power_supply_for_each_device(void *data, int (*fn)(struct device *dev, void *data)); static inline bool power_supply_is_amp_property(enum power_supply_property psp) { diff --git a/include/linux/prandom.h b/include/linux/prandom.h index f2ed5b72b3d6..f7f1e5251c67 100644 --- a/include/linux/prandom.h +++ b/include/linux/prandom.h @@ -10,7 +10,6 @@ #include <linux/types.h> #include <linux/once.h> -#include <linux/percpu.h> #include <linux/random.h> struct rnd_state { diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 1424670df161..7233e9cf1bab 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -9,7 +9,7 @@ #include <linux/linkage.h> #include <linux/cleanup.h> -#include <linux/list.h> +#include <linux/types.h> /* * We put the hardirq and softirq counter into the preemption @@ -99,14 +99,21 @@ static __always_inline unsigned char interrupt_context_level(void) return level; } +/* + * These macro definitions avoid redundant invocations of preempt_count() + * because such invocations would result in redundant loads given that + * preempt_count() is commonly implemented with READ_ONCE(). + */ + #define nmi_count() (preempt_count() & NMI_MASK) #define hardirq_count() (preempt_count() & HARDIRQ_MASK) #ifdef CONFIG_PREEMPT_RT # define softirq_count() (current->softirq_disable_cnt & SOFTIRQ_MASK) +# define irq_count() ((preempt_count() & (NMI_MASK | HARDIRQ_MASK)) | softirq_count()) #else # define softirq_count() (preempt_count() & SOFTIRQ_MASK) +# define irq_count() (preempt_count() & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK)) #endif -#define irq_count() (nmi_count() | hardirq_count() | softirq_count()) /* * Macros to retrieve the current execution context: @@ -119,7 +126,11 @@ static __always_inline unsigned char interrupt_context_level(void) #define in_nmi() (nmi_count()) #define in_hardirq() (hardirq_count()) #define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) -#define in_task() (!(in_nmi() | in_hardirq() | in_serving_softirq())) +#ifdef CONFIG_PREEMPT_RT +# define in_task() (!((preempt_count() & (NMI_MASK | HARDIRQ_MASK)) | in_serving_softirq())) +#else +# define in_task() (!(preempt_count() & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) +#endif /* * The following macros are deprecated and should not be used in new code: @@ -349,7 +360,9 @@ void preempt_notifier_unregister(struct preempt_notifier *notifier); static inline void preempt_notifier_init(struct preempt_notifier *notifier, struct preempt_ops *ops) { - INIT_HLIST_NODE(¬ifier->link); + /* INIT_HLIST_NODE() open coded, to avoid dependency on list.h */ + notifier->link.next = NULL; + notifier->link.pprev = NULL; notifier->ops = ops; } diff --git a/include/linux/printk.h b/include/linux/printk.h index 8ef499ab3c1e..955e31860095 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -273,6 +273,8 @@ static inline void printk_trigger_flush(void) } #endif +bool this_cpu_in_panic(void); + #ifdef CONFIG_SMP extern int __printk_cpu_sync_try_get(void); extern void __printk_cpu_sync_wait(void); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index de407e7c3b55..0b2a89854440 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -65,6 +65,7 @@ struct proc_fs_info { kgid_t pid_gid; enum proc_hidepid hide_pid; enum proc_pidonly pidonly; + struct rcu_head rcu; }; static inline struct proc_fs_info *proc_sb_info(struct super_block *sb) diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 49539bc416ce..5ea470eb4d76 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -66,7 +66,7 @@ static inline void proc_free_inum(unsigned int inum) {} static inline int ns_alloc_inum(struct ns_common *ns) { - atomic_long_set(&ns->stashed, 0); + WRITE_ONCE(ns->stashed, NULL); return proc_alloc_inum(&ns->inum); } diff --git a/include/linux/profile.h b/include/linux/profile.h index 11db1ec516e2..04ae5ebcb637 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -18,13 +18,8 @@ struct proc_dir_entry; struct notifier_block; #if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS) -void create_prof_cpu_mask(void); int create_proc_profile(void); #else -static inline void create_prof_cpu_mask(void) -{ -} - static inline int create_proc_profile(void) { return 0; diff --git a/include/linux/property.h b/include/linux/property.h index 8c3c6685a2ae..3a1045eb786c 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -10,6 +10,8 @@ #ifndef _LINUX_PROPERTY_H_ #define _LINUX_PROPERTY_H_ +#include <linux/args.h> +#include <linux/array_size.h> #include <linux/bits.h> #include <linux/fwnode.h> #include <linux/stddef.h> @@ -26,12 +28,6 @@ enum dev_prop_type { DEV_PROP_REF, }; -enum dev_dma_attr { - DEV_DMA_NOT_SUPPORTED, - DEV_DMA_NON_COHERENT, - DEV_DMA_COHERENT, -}; - const struct fwnode_handle *__dev_fwnode_const(const struct device *dev); struct fwnode_handle *__dev_fwnode(struct device *dev); #define dev_fwnode(dev) \ @@ -79,6 +75,16 @@ int fwnode_property_match_string(const struct fwnode_handle *fwnode, bool fwnode_device_is_available(const struct fwnode_handle *fwnode); +static inline bool fwnode_device_is_big_endian(const struct fwnode_handle *fwnode) +{ + if (fwnode_property_present(fwnode, "big-endian")) + return true; + if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) && + fwnode_property_present(fwnode, "native-endian")) + return true; + return false; +} + static inline bool fwnode_device_is_compatible(const struct fwnode_handle *fwnode, const char *compat) { @@ -86,6 +92,22 @@ bool fwnode_device_is_compatible(const struct fwnode_handle *fwnode, const char } /** + * device_is_big_endian - check if a device has BE registers + * @dev: Pointer to the struct device + * + * Returns: true if the device has a "big-endian" property, or if the kernel + * was compiled for BE *and* the device has a "native-endian" property. + * Returns false otherwise. + * + * Callers would nominally use ioread32be/iowrite32be if + * device_is_big_endian() == true, or readl/writel otherwise. + */ +static inline bool device_is_big_endian(const struct device *dev) +{ + return fwnode_device_is_big_endian(dev_fwnode(dev)); +} + +/** * device_is_compatible - match 'compatible' property of the device with a given string * @dev: Pointer to the struct device * @compat: The string to match 'compatible' property with @@ -97,6 +119,18 @@ static inline bool device_is_compatible(const struct device *dev, const char *co return fwnode_device_is_compatible(dev_fwnode(dev), compat); } +int fwnode_property_match_property_string(const struct fwnode_handle *fwnode, + const char *propname, + const char * const *array, size_t n); + +static inline +int device_property_match_property_string(const struct device *dev, + const char *propname, + const char * const *array, size_t n) +{ + return fwnode_property_match_property_string(dev_fwnode(dev), propname, array, n); +} + int fwnode_property_get_reference_args(const struct fwnode_handle *fwnode, const char *prop, const char *nargs_prop, unsigned int nargs, unsigned int index, @@ -108,6 +142,7 @@ struct fwnode_handle *fwnode_find_reference(const struct fwnode_handle *fwnode, const char *fwnode_get_name(const struct fwnode_handle *fwnode); const char *fwnode_get_name_prefix(const struct fwnode_handle *fwnode); +bool fwnode_name_eq(const struct fwnode_handle *fwnode, const char *name); struct fwnode_handle *fwnode_get_parent(const struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_get_next_parent(struct fwnode_handle *fwnode); @@ -116,11 +151,9 @@ struct fwnode_handle *fwnode_get_next_parent(struct fwnode_handle *fwnode); for (parent = fwnode_get_parent(fwnode); parent; \ parent = fwnode_get_next_parent(parent)) -struct device *fwnode_get_next_parent_dev(const struct fwnode_handle *fwnode); unsigned int fwnode_count_parents(const struct fwnode_handle *fwn); struct fwnode_handle *fwnode_get_nth_parent(struct fwnode_handle *fwn, unsigned int depth); -bool fwnode_is_ancestor_of(const struct fwnode_handle *ancestor, const struct fwnode_handle *child); struct fwnode_handle *fwnode_get_next_child_node( const struct fwnode_handle *fwnode, struct fwnode_handle *child); struct fwnode_handle *fwnode_get_next_available_child_node( @@ -288,7 +321,7 @@ struct software_node_ref_args { #define SOFTWARE_NODE_REFERENCE(_ref_, ...) \ (const struct software_node_ref_args) { \ .node = _ref_, \ - .nargs = ARRAY_SIZE(((u64[]){ 0, ##__VA_ARGS__ })) - 1, \ + .nargs = COUNT_ARGS(__VA_ARGS__), \ .args = { __VA_ARGS__ }, \ } @@ -488,6 +521,13 @@ struct software_node { const struct property_entry *properties; }; +#define SOFTWARE_NODE(_name_, _properties_, _parent_) \ + (struct software_node) { \ + .name = _name_, \ + .properties = _properties_, \ + .parent = _parent_, \ + } + bool is_software_node(const struct fwnode_handle *fwnode); const struct software_node * to_software_node(const struct fwnode_handle *fwnode); diff --git a/include/linux/pseudo_fs.h b/include/linux/pseudo_fs.h index eceda1d1407a..730f77381d55 100644 --- a/include/linux/pseudo_fs.h +++ b/include/linux/pseudo_fs.h @@ -5,7 +5,7 @@ struct pseudo_fs_context { const struct super_operations *ops; - const struct xattr_handler **xattr; + const struct xattr_handler * const *xattr; const struct dentry_operations *dops; unsigned long magic; }; diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 7fd17e82bab4..3705c2044fc0 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -78,6 +78,36 @@ enum sev_cmd { SEV_CMD_DBG_DECRYPT = 0x060, SEV_CMD_DBG_ENCRYPT = 0x061, + /* SNP specific commands */ + SEV_CMD_SNP_INIT = 0x081, + SEV_CMD_SNP_SHUTDOWN = 0x082, + SEV_CMD_SNP_PLATFORM_STATUS = 0x083, + SEV_CMD_SNP_DF_FLUSH = 0x084, + SEV_CMD_SNP_INIT_EX = 0x085, + SEV_CMD_SNP_SHUTDOWN_EX = 0x086, + SEV_CMD_SNP_DECOMMISSION = 0x090, + SEV_CMD_SNP_ACTIVATE = 0x091, + SEV_CMD_SNP_GUEST_STATUS = 0x092, + SEV_CMD_SNP_GCTX_CREATE = 0x093, + SEV_CMD_SNP_GUEST_REQUEST = 0x094, + SEV_CMD_SNP_ACTIVATE_EX = 0x095, + SEV_CMD_SNP_LAUNCH_START = 0x0A0, + SEV_CMD_SNP_LAUNCH_UPDATE = 0x0A1, + SEV_CMD_SNP_LAUNCH_FINISH = 0x0A2, + SEV_CMD_SNP_DBG_DECRYPT = 0x0B0, + SEV_CMD_SNP_DBG_ENCRYPT = 0x0B1, + SEV_CMD_SNP_PAGE_SWAP_OUT = 0x0C0, + SEV_CMD_SNP_PAGE_SWAP_IN = 0x0C1, + SEV_CMD_SNP_PAGE_MOVE = 0x0C2, + SEV_CMD_SNP_PAGE_MD_INIT = 0x0C3, + SEV_CMD_SNP_PAGE_SET_STATE = 0x0C6, + SEV_CMD_SNP_PAGE_RECLAIM = 0x0C7, + SEV_CMD_SNP_PAGE_UNSMASH = 0x0C8, + SEV_CMD_SNP_CONFIG = 0x0C9, + SEV_CMD_SNP_DOWNLOAD_FIRMWARE_EX = 0x0CA, + SEV_CMD_SNP_COMMIT = 0x0CB, + SEV_CMD_SNP_VLEK_LOAD = 0x0CD, + SEV_CMD_MAX, }; @@ -523,12 +553,269 @@ struct sev_data_attestation_report { u32 len; /* In/Out */ } __packed; +/** + * struct sev_data_snp_download_firmware - SNP_DOWNLOAD_FIRMWARE command params + * + * @address: physical address of firmware image + * @len: length of the firmware image + */ +struct sev_data_snp_download_firmware { + u64 address; /* In */ + u32 len; /* In */ +} __packed; + +/** + * struct sev_data_snp_activate - SNP_ACTIVATE command params + * + * @gctx_paddr: system physical address guest context page + * @asid: ASID to bind to the guest + */ +struct sev_data_snp_activate { + u64 gctx_paddr; /* In */ + u32 asid; /* In */ +} __packed; + +/** + * struct sev_data_snp_addr - generic SNP command params + * + * @address: physical address of generic data param + */ +struct sev_data_snp_addr { + u64 address; /* In/Out */ +} __packed; + +/** + * struct sev_data_snp_launch_start - SNP_LAUNCH_START command params + * + * @gctx_paddr: system physical address of guest context page + * @policy: guest policy + * @ma_gctx_paddr: system physical address of migration agent + * @ma_en: the guest is associated with a migration agent + * @imi_en: launch flow is launching an IMI (Incoming Migration Image) for the + * purpose of guest-assisted migration. + * @rsvd: reserved + * @gosvw: guest OS-visible workarounds, as defined by hypervisor + */ +struct sev_data_snp_launch_start { + u64 gctx_paddr; /* In */ + u64 policy; /* In */ + u64 ma_gctx_paddr; /* In */ + u32 ma_en:1; /* In */ + u32 imi_en:1; /* In */ + u32 rsvd:30; + u8 gosvw[16]; /* In */ +} __packed; + +/* SNP support page type */ +enum { + SNP_PAGE_TYPE_NORMAL = 0x1, + SNP_PAGE_TYPE_VMSA = 0x2, + SNP_PAGE_TYPE_ZERO = 0x3, + SNP_PAGE_TYPE_UNMEASURED = 0x4, + SNP_PAGE_TYPE_SECRET = 0x5, + SNP_PAGE_TYPE_CPUID = 0x6, + + SNP_PAGE_TYPE_MAX +}; + +/** + * struct sev_data_snp_launch_update - SNP_LAUNCH_UPDATE command params + * + * @gctx_paddr: system physical address of guest context page + * @page_size: page size 0 indicates 4K and 1 indicates 2MB page + * @page_type: encoded page type + * @imi_page: indicates that this page is part of the IMI (Incoming Migration + * Image) of the guest + * @rsvd: reserved + * @rsvd2: reserved + * @address: system physical address of destination page to encrypt + * @rsvd3: reserved + * @vmpl1_perms: VMPL permission mask for VMPL1 + * @vmpl2_perms: VMPL permission mask for VMPL2 + * @vmpl3_perms: VMPL permission mask for VMPL3 + * @rsvd4: reserved + */ +struct sev_data_snp_launch_update { + u64 gctx_paddr; /* In */ + u32 page_size:1; /* In */ + u32 page_type:3; /* In */ + u32 imi_page:1; /* In */ + u32 rsvd:27; + u32 rsvd2; + u64 address; /* In */ + u32 rsvd3:8; + u32 vmpl1_perms:8; /* In */ + u32 vmpl2_perms:8; /* In */ + u32 vmpl3_perms:8; /* In */ + u32 rsvd4; +} __packed; + +/** + * struct sev_data_snp_launch_finish - SNP_LAUNCH_FINISH command params + * + * @gctx_paddr: system physical address of guest context page + * @id_block_paddr: system physical address of ID block + * @id_auth_paddr: system physical address of ID block authentication structure + * @id_block_en: indicates whether ID block is present + * @auth_key_en: indicates whether author key is present in authentication structure + * @rsvd: reserved + * @host_data: host-supplied data for guest, not interpreted by firmware + */ +struct sev_data_snp_launch_finish { + u64 gctx_paddr; + u64 id_block_paddr; + u64 id_auth_paddr; + u8 id_block_en:1; + u8 auth_key_en:1; + u64 rsvd:62; + u8 host_data[32]; +} __packed; + +/** + * struct sev_data_snp_guest_status - SNP_GUEST_STATUS command params + * + * @gctx_paddr: system physical address of guest context page + * @address: system physical address of guest status page + */ +struct sev_data_snp_guest_status { + u64 gctx_paddr; + u64 address; +} __packed; + +/** + * struct sev_data_snp_page_reclaim - SNP_PAGE_RECLAIM command params + * + * @paddr: system physical address of page to be claimed. The 0th bit in the + * address indicates the page size. 0h indicates 4KB and 1h indicates + * 2MB page. + */ +struct sev_data_snp_page_reclaim { + u64 paddr; +} __packed; + +/** + * struct sev_data_snp_page_unsmash - SNP_PAGE_UNSMASH command params + * + * @paddr: system physical address of page to be unsmashed. The 0th bit in the + * address indicates the page size. 0h indicates 4 KB and 1h indicates + * 2 MB page. + */ +struct sev_data_snp_page_unsmash { + u64 paddr; +} __packed; + +/** + * struct sev_data_snp_dbg - DBG_ENCRYPT/DBG_DECRYPT command parameters + * + * @gctx_paddr: system physical address of guest context page + * @src_addr: source address of data to operate on + * @dst_addr: destination address of data to operate on + */ +struct sev_data_snp_dbg { + u64 gctx_paddr; /* In */ + u64 src_addr; /* In */ + u64 dst_addr; /* In */ +} __packed; + +/** + * struct sev_data_snp_guest_request - SNP_GUEST_REQUEST command params + * + * @gctx_paddr: system physical address of guest context page + * @req_paddr: system physical address of request page + * @res_paddr: system physical address of response page + */ +struct sev_data_snp_guest_request { + u64 gctx_paddr; /* In */ + u64 req_paddr; /* In */ + u64 res_paddr; /* In */ +} __packed; + +/** + * struct sev_data_snp_init_ex - SNP_INIT_EX structure + * + * @init_rmp: indicate that the RMP should be initialized. + * @list_paddr_en: indicate that list_paddr is valid + * @rsvd: reserved + * @rsvd1: reserved + * @list_paddr: system physical address of range list + * @rsvd2: reserved + */ +struct sev_data_snp_init_ex { + u32 init_rmp:1; + u32 list_paddr_en:1; + u32 rsvd:30; + u32 rsvd1; + u64 list_paddr; + u8 rsvd2[48]; +} __packed; + +/** + * struct sev_data_range - RANGE structure + * + * @base: system physical address of first byte of range + * @page_count: number of 4KB pages in this range + * @rsvd: reserved + */ +struct sev_data_range { + u64 base; + u32 page_count; + u32 rsvd; +} __packed; + +/** + * struct sev_data_range_list - RANGE_LIST structure + * + * @num_elements: number of elements in RANGE_ARRAY + * @rsvd: reserved + * @ranges: array of num_elements of type RANGE + */ +struct sev_data_range_list { + u32 num_elements; + u32 rsvd; + struct sev_data_range ranges[]; +} __packed; + +/** + * struct sev_data_snp_shutdown_ex - SNP_SHUTDOWN_EX structure + * + * @len: length of the command buffer read by the PSP + * @iommu_snp_shutdown: Disable enforcement of SNP in the IOMMU + * @rsvd1: reserved + */ +struct sev_data_snp_shutdown_ex { + u32 len; + u32 iommu_snp_shutdown:1; + u32 rsvd1:31; +} __packed; + +/** + * struct sev_platform_init_args + * + * @error: SEV firmware error code + * @probe: True if this is being called as part of CCP module probe, which + * will defer SEV_INIT/SEV_INIT_EX firmware initialization until needed + * unless psp_init_on_probe module param is set + */ +struct sev_platform_init_args { + int error; + bool probe; +}; + +/** + * struct sev_data_snp_commit - SNP_COMMIT structure + * + * @len: length of the command buffer read by the PSP + */ +struct sev_data_snp_commit { + u32 len; +} __packed; + #ifdef CONFIG_CRYPTO_DEV_SP_PSP /** * sev_platform_init - perform SEV INIT command * - * @error: SEV command return code + * @args: struct sev_platform_init_args to pass in arguments * * Returns: * 0 if the SEV successfully processed the command @@ -537,7 +824,7 @@ struct sev_data_attestation_report { * -%ETIMEDOUT if the SEV command timed out * -%EIO if the SEV returned a non-zero return code */ -int sev_platform_init(int *error); +int sev_platform_init(struct sev_platform_init_args *args); /** * sev_platform_status - perform SEV PLATFORM_STATUS command @@ -637,14 +924,32 @@ int sev_guest_df_flush(int *error); */ int sev_guest_decommission(struct sev_data_decommission *data, int *error); +/** + * sev_do_cmd - issue an SEV or an SEV-SNP command + * + * @cmd: SEV or SEV-SNP firmware command to issue + * @data: arguments for firmware command + * @psp_ret: SEV command return code + * + * Returns: + * 0 if the SEV device successfully processed the command + * -%ENODEV if the PSP device is not available + * -%ENOTSUPP if PSP device does not support SEV + * -%ETIMEDOUT if the SEV command timed out + * -%EIO if PSP device returned a non-zero return code + */ +int sev_do_cmd(int cmd, void *data, int *psp_ret); + void *psp_copy_user_blob(u64 uaddr, u32 len); +void *snp_alloc_firmware_page(gfp_t mask); +void snp_free_firmware_page(void *addr); #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ static inline int sev_platform_status(struct sev_user_data_status *status, int *error) { return -ENODEV; } -static inline int sev_platform_init(int *error) { return -ENODEV; } +static inline int sev_platform_init(struct sev_platform_init_args *args) { return -ENODEV; } static inline int sev_guest_deactivate(struct sev_data_deactivate *data, int *error) { return -ENODEV; } @@ -653,6 +958,9 @@ static inline int sev_guest_decommission(struct sev_data_decommission *data, int *error) { return -ENODEV; } static inline int +sev_do_cmd(int cmd, void *data, int *psp_ret) { return -ENODEV; } + +static inline int sev_guest_activate(struct sev_data_activate *data, int *error) { return -ENODEV; } static inline int sev_guest_df_flush(int *error) { return -ENODEV; } @@ -662,6 +970,13 @@ sev_issue_cmd_external_user(struct file *filep, unsigned int id, void *data, int static inline void *psp_copy_user_blob(u64 __user uaddr, u32 len) { return ERR_PTR(-EINVAL); } +static inline void *snp_alloc_firmware_page(gfp_t mask) +{ + return NULL; +} + +static inline void snp_free_firmware_page(void *addr) { } + #endif /* CONFIG_CRYPTO_DEV_SP_PSP */ #endif /* __PSP_SEV_H__ */ diff --git a/include/linux/ptdump.h b/include/linux/ptdump.h index 2a3a95586425..8dbd51ea8626 100644 --- a/include/linux/ptdump.h +++ b/include/linux/ptdump.h @@ -18,6 +18,16 @@ struct ptdump_state { const struct ptdump_range *range; }; +bool ptdump_walk_pgd_level_core(struct seq_file *m, + struct mm_struct *mm, pgd_t *pgd, + bool checkwx, bool dmesg); void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd); +bool ptdump_check_wx(void); + +static inline void debug_checkwx(void) +{ + if (IS_ENABLED(CONFIG_DEBUG_WX)) + ptdump_check_wx(); +} #endif /* _LINUX_PTDUMP_H */ diff --git a/include/linux/pti.h b/include/linux/pti.h index 1a941efcaa62..1fbf9d6c20ef 100644 --- a/include/linux/pti.h +++ b/include/linux/pti.h @@ -2,7 +2,7 @@ #ifndef _INCLUDE_PTI_H #define _INCLUDE_PTI_H -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION #include <asm/pti.h> #else static inline void pti_init(void) { } diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 1ef4e0f9bd2a..6e4b8206c7d0 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -200,6 +200,7 @@ struct ptp_clock; enum ptp_clock_events { PTP_CLOCK_ALARM, PTP_CLOCK_EXTTS, + PTP_CLOCK_EXTOFF, PTP_CLOCK_PPS, PTP_CLOCK_PPSUSR, }; @@ -210,6 +211,7 @@ enum ptp_clock_events { * @type: One of the ptp_clock_events enumeration values. * @index: Identifies the source of the event. * @timestamp: When the event occurred (%PTP_CLOCK_EXTTS only). + * @offset: When the event occurred (%PTP_CLOCK_EXTOFF only). * @pps_times: When the event occurred (%PTP_CLOCK_PPSUSR only). */ @@ -218,6 +220,7 @@ struct ptp_clock_event { int index; union { u64 timestamp; + s64 offset; struct pps_event_time pps_times; }; }; diff --git a/include/linux/ptp_kvm.h b/include/linux/ptp_kvm.h index 746fd67c3480..e8c74fa3f455 100644 --- a/include/linux/ptp_kvm.h +++ b/include/linux/ptp_kvm.h @@ -8,15 +8,15 @@ #ifndef _PTP_KVM_H_ #define _PTP_KVM_H_ +#include <linux/clocksource_ids.h> #include <linux/types.h> struct timespec64; -struct clocksource; int kvm_arch_ptp_init(void); void kvm_arch_ptp_exit(void); int kvm_arch_ptp_get_clock(struct timespec64 *ts); int kvm_arch_ptp_get_crosststamp(u64 *cycle, - struct timespec64 *tspec, struct clocksource **cs); + struct timespec64 *tspec, enum clocksource_ids *cs_id); #endif /* _PTP_KVM_H_ */ diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index eaaef3ffec22..90507d4afcd6 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -393,6 +393,10 @@ static inline void user_single_step_report(struct pt_regs *regs) #define current_user_stack_pointer() user_stack_pointer(current_pt_regs()) #endif +#ifndef exception_ip +#define exception_ip(x) instruction_pointer(x) +#endif + extern int task_current_syscall(struct task_struct *target, struct syscall_info *info); extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact); diff --git a/include/linux/pwm.h b/include/linux/pwm.h index d2f9f690a9c1..4a6568dfdf3f 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -41,8 +41,8 @@ struct pwm_args { }; enum { - PWMF_REQUESTED = 1 << 0, - PWMF_EXPORTED = 1 << 1, + PWMF_REQUESTED = 0, + PWMF_EXPORTED = 1, }; /* @@ -69,9 +69,7 @@ struct pwm_state { * @label: name of the PWM device * @flags: flags associated with the PWM device * @hwpwm: per-chip relative index of the PWM device - * @pwm: global index of the PWM device * @chip: PWM chip providing this PWM device - * @chip_data: chip-private data associated with the PWM device * @args: PWM arguments * @state: last applied state * @last: last implemented state (for PWM_DEBUG) @@ -80,9 +78,7 @@ struct pwm_device { const char *label; unsigned long flags; unsigned int hwpwm; - unsigned int pwm; struct pwm_chip *chip; - void *chip_data; struct pwm_args args; struct pwm_state state; @@ -95,8 +91,8 @@ struct pwm_device { * @state: state to fill with the current PWM state * * The returned PWM state represents the state that was applied by a previous call to - * pwm_apply_state(). Drivers may have to slightly tweak that state before programming it to - * hardware. If pwm_apply_state() was never called, this returns either the current hardware + * pwm_apply_might_sleep(). Drivers may have to slightly tweak that state before programming it to + * hardware. If pwm_apply_might_sleep() was never called, this returns either the current hardware * state (if supported) or the default settings. */ static inline void pwm_get_state(const struct pwm_device *pwm, @@ -114,12 +110,6 @@ static inline bool pwm_is_enabled(const struct pwm_device *pwm) return state.enabled; } -static inline void pwm_set_period(struct pwm_device *pwm, u64 period) -{ - if (pwm) - pwm->state.period = period; -} - static inline u64 pwm_get_period(const struct pwm_device *pwm) { struct pwm_state state; @@ -129,12 +119,6 @@ static inline u64 pwm_get_period(const struct pwm_device *pwm) return state.period; } -static inline void pwm_set_duty_cycle(struct pwm_device *pwm, unsigned int duty) -{ - if (pwm) - pwm->state.duty_cycle = duty; -} - static inline u64 pwm_get_duty_cycle(const struct pwm_device *pwm) { struct pwm_state state; @@ -160,20 +144,20 @@ static inline void pwm_get_args(const struct pwm_device *pwm, } /** - * pwm_init_state() - prepare a new state to be applied with pwm_apply_state() + * pwm_init_state() - prepare a new state to be applied with pwm_apply_might_sleep() * @pwm: PWM device * @state: state to fill with the prepared PWM state * * This functions prepares a state that can later be tweaked and applied - * to the PWM device with pwm_apply_state(). This is a convenient function + * to the PWM device with pwm_apply_might_sleep(). This is a convenient function * that first retrieves the current PWM state and the replaces the period * and polarity fields with the reference values defined in pwm->args. * Once the function returns, you can adjust the ->enabled and ->duty_cycle - * fields according to your needs before calling pwm_apply_state(). + * fields according to your needs before calling pwm_apply_might_sleep(). * * ->duty_cycle is initially set to zero to avoid cases where the current * ->duty_cycle value exceed the pwm_args->period one, which would trigger - * an error if the user calls pwm_apply_state() without adjusting ->duty_cycle + * an error if the user calls pwm_apply_might_sleep() without adjusting ->duty_cycle * first. */ static inline void pwm_init_state(const struct pwm_device *pwm, @@ -229,7 +213,7 @@ pwm_get_relative_duty_cycle(const struct pwm_state *state, unsigned int scale) * * pwm_init_state(pwm, &state); * pwm_set_relative_duty_cycle(&state, 50, 100); - * pwm_apply_state(pwm, &state); + * pwm_apply_might_sleep(pwm, &state); * * This functions returns -EINVAL if @duty_cycle and/or @scale are * inconsistent (@scale == 0 or @duty_cycle > @scale). @@ -267,7 +251,6 @@ struct pwm_capture { * @get_state: get the current PWM state. This function is only * called once per PWM device when the PWM chip is * registered. - * @owner: helps prevent removal of modules exporting active PWMs */ struct pwm_ops { int (*request)(struct pwm_chip *chip, struct pwm_device *pwm); @@ -278,38 +261,63 @@ struct pwm_ops { const struct pwm_state *state); int (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state); - struct module *owner; }; /** * struct pwm_chip - abstract a PWM controller * @dev: device providing the PWMs * @ops: callbacks for this PWM controller - * @base: number of first PWM controlled by this chip + * @owner: module providing this chip + * @id: unique number of this PWM chip * @npwm: number of PWMs controlled by this chip * @of_xlate: request a PWM device given a device tree PWM specifier - * @of_pwm_n_cells: number of cells expected in the device tree PWM specifier - * @list: list node for internal use + * @atomic: can the driver's ->apply() be called in atomic context + * @driver_data: Private pointer for driver specific info * @pwms: array of PWM devices allocated by the framework */ struct pwm_chip { struct device *dev; const struct pwm_ops *ops; - int base; + struct module *owner; + unsigned int id; unsigned int npwm; struct pwm_device * (*of_xlate)(struct pwm_chip *chip, const struct of_phandle_args *args); - unsigned int of_pwm_n_cells; + bool atomic; /* only used internally by the PWM framework */ - struct list_head list; + void *driver_data; struct pwm_device *pwms; }; +static inline struct device *pwmchip_parent(const struct pwm_chip *chip) +{ + return chip->dev; +} + +static inline void *pwmchip_get_drvdata(struct pwm_chip *chip) +{ + /* + * After pwm_chip got a dedicated struct device, this can be replaced by + * dev_get_drvdata(&chip->dev); + */ + return chip->driver_data; +} + +static inline void pwmchip_set_drvdata(struct pwm_chip *chip, void *data) +{ + /* + * After pwm_chip got a dedicated struct device, this can be replaced by + * dev_set_drvdata(&chip->dev, data); + */ + chip->driver_data = data; +} + #if IS_ENABLED(CONFIG_PWM) /* PWM user APIs */ -int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state); +int pwm_apply_might_sleep(struct pwm_device *pwm, const struct pwm_state *state); +int pwm_apply_atomic(struct pwm_device *pwm, const struct pwm_state *state); int pwm_adjust_config(struct pwm_device *pwm); /** @@ -337,7 +345,7 @@ static inline int pwm_config(struct pwm_device *pwm, int duty_ns, state.duty_cycle = duty_ns; state.period = period_ns; - return pwm_apply_state(pwm, &state); + return pwm_apply_might_sleep(pwm, &state); } /** @@ -358,7 +366,7 @@ static inline int pwm_enable(struct pwm_device *pwm) return 0; state.enabled = true; - return pwm_apply_state(pwm, &state); + return pwm_apply_might_sleep(pwm, &state); } /** @@ -377,19 +385,34 @@ static inline void pwm_disable(struct pwm_device *pwm) return; state.enabled = false; - pwm_apply_state(pwm, &state); + pwm_apply_might_sleep(pwm, &state); +} + +/** + * pwm_might_sleep() - is pwm_apply_atomic() supported? + * @pwm: PWM device + * + * Returns: false if pwm_apply_atomic() can be called from atomic context. + */ +static inline bool pwm_might_sleep(struct pwm_device *pwm) +{ + return !pwm->chip->atomic; } /* PWM provider APIs */ int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result, unsigned long timeout); -int pwm_set_chip_data(struct pwm_device *pwm, void *data); -void *pwm_get_chip_data(struct pwm_device *pwm); -int pwmchip_add(struct pwm_chip *chip); +void pwmchip_put(struct pwm_chip *chip); +struct pwm_chip *pwmchip_alloc(struct device *parent, unsigned int npwm, size_t sizeof_priv); +struct pwm_chip *devm_pwmchip_alloc(struct device *parent, unsigned int npwm, size_t sizeof_priv); + +int __pwmchip_add(struct pwm_chip *chip, struct module *owner); +#define pwmchip_add(chip) __pwmchip_add(chip, THIS_MODULE) void pwmchip_remove(struct pwm_chip *chip); -int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip); +int __devm_pwmchip_add(struct device *dev, struct pwm_chip *chip, struct module *owner); +#define devm_pwmchip_add(dev, chip) __devm_pwmchip_add(dev, chip, THIS_MODULE) struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip, unsigned int index, @@ -408,16 +431,27 @@ struct pwm_device *devm_fwnode_pwm_get(struct device *dev, struct fwnode_handle *fwnode, const char *con_id); #else -static inline int pwm_apply_state(struct pwm_device *pwm, - const struct pwm_state *state) +static inline bool pwm_might_sleep(struct pwm_device *pwm) +{ + return true; +} + +static inline int pwm_apply_might_sleep(struct pwm_device *pwm, + const struct pwm_state *state) { might_sleep(); - return -ENOTSUPP; + return -EOPNOTSUPP; +} + +static inline int pwm_apply_atomic(struct pwm_device *pwm, + const struct pwm_state *state) +{ + return -EOPNOTSUPP; } static inline int pwm_adjust_config(struct pwm_device *pwm) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int pwm_config(struct pwm_device *pwm, int duty_ns, @@ -445,14 +479,22 @@ static inline int pwm_capture(struct pwm_device *pwm, return -EINVAL; } -static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data) +static inline void pwmchip_put(struct pwm_chip *chip) { - return -EINVAL; } -static inline void *pwm_get_chip_data(struct pwm_device *pwm) +static inline struct pwm_chip *pwmchip_alloc(struct device *parent, + unsigned int npwm, + size_t sizeof_priv) +{ + return ERR_PTR(-EINVAL); +} + +static inline struct pwm_chip *devm_pwmchip_alloc(struct device *parent, + unsigned int npwm, + size_t sizeof_priv) { - return NULL; + return pwmchip_alloc(parent, npwm, sizeof_priv); } static inline int pwmchip_add(struct pwm_chip *chip) @@ -536,7 +578,14 @@ static inline void pwm_apply_args(struct pwm_device *pwm) state.period = pwm->args.period; state.usage_power = false; - pwm_apply_state(pwm, &state); + pwm_apply_might_sleep(pwm, &state); +} + +/* only for backwards-compatibility, new code should not use this */ +static inline int pwm_apply_state(struct pwm_device *pwm, + const struct pwm_state *state) +{ + return pwm_apply_might_sleep(pwm, state); } struct pwm_lookup { diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 4fa4ef0a173a..06cc8888199e 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -74,7 +74,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags); int dquot_alloc_inode(struct inode *inode); -int dquot_claim_space_nodirty(struct inode *inode, qsize_t number); +void dquot_claim_space_nodirty(struct inode *inode, qsize_t number); void dquot_free_inode(struct inode *inode); void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number); @@ -257,10 +257,9 @@ static inline void __dquot_free_space(struct inode *inode, qsize_t number, inode_sub_bytes(inode, number); } -static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) +static inline void dquot_claim_space_nodirty(struct inode *inode, qsize_t number) { inode_add_bytes(inode, number); - return 0; } static inline int dquot_reclaim_space_nodirty(struct inode *inode, @@ -358,14 +357,10 @@ static inline int dquot_reserve_block(struct inode *inode, qsize_t nr) DQUOT_SPACE_WARN|DQUOT_SPACE_RESERVE); } -static inline int dquot_claim_block(struct inode *inode, qsize_t nr) +static inline void dquot_claim_block(struct inode *inode, qsize_t nr) { - int ret; - - ret = dquot_claim_space_nodirty(inode, nr << inode->i_blkbits); - if (!ret) - mark_inode_dirty_sync(inode); - return ret; + dquot_claim_space_nodirty(inode, nr << inode->i_blkbits); + mark_inode_dirty_sync(inode); } static inline void dquot_reclaim_block(struct inode *inode, qsize_t nr) diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 006e18decfad..98030accf641 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -84,8 +84,6 @@ extern const struct raid6_calls raid6_intx1; extern const struct raid6_calls raid6_intx2; extern const struct raid6_calls raid6_intx4; extern const struct raid6_calls raid6_intx8; -extern const struct raid6_calls raid6_intx16; -extern const struct raid6_calls raid6_intx32; extern const struct raid6_calls raid6_mmxx1; extern const struct raid6_calls raid6_mmxx2; extern const struct raid6_calls raid6_sse1x1; diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index 5d868505a94e..6d92b68efbf6 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -80,7 +80,7 @@ DECLARE_PER_CPU(u32, kstack_offset); if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ &randomize_kstack_offset)) { \ u32 offset = raw_cpu_read(kstack_offset); \ - offset ^= (rand); \ + offset = ror32(offset, 5) ^ (rand); \ raw_cpu_write(kstack_offset, offset); \ } \ } while (0) diff --git a/include/linux/ras.h b/include/linux/ras.h index 1f4048bf2674..a64182bc72ad 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -25,6 +25,7 @@ void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id, const char *fru_text, const u8 sev, const u8 *err, const u32 len); void log_arm_hw_error(struct cper_sec_proc_arm *err); + #else static inline void log_non_standard_event(const guid_t *sec_type, @@ -35,4 +36,21 @@ static inline void log_arm_hw_error(struct cper_sec_proc_arm *err) { return; } #endif +struct atl_err { + u64 addr; + u64 ipid; + u32 cpu; +}; + +#if IS_ENABLED(CONFIG_AMD_ATL) +void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *)); +void amd_atl_unregister_decoder(void); +void amd_retire_dram_row(struct atl_err *err); +unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err); +#else +static inline void amd_retire_dram_row(struct atl_err *err) { } +static inline unsigned long +amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; } +#endif /* CONFIG_AMD_ATL */ + #endif /* __RAS_H__ */ diff --git a/include/linux/rcu_notifier.h b/include/linux/rcu_notifier.h new file mode 100644 index 000000000000..5640f024773b --- /dev/null +++ b/include/linux/rcu_notifier.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Read-Copy Update notifiers, initially RCU CPU stall notifier. + * Separate from rcupdate.h to avoid #include loops. + * + * Copyright (C) 2023 Paul E. McKenney. + */ + +#ifndef __LINUX_RCU_NOTIFIER_H +#define __LINUX_RCU_NOTIFIER_H + +// Actions for RCU CPU stall notifier calls. +#define RCU_STALL_NOTIFY_NORM 1 +#define RCU_STALL_NOTIFY_EXP 2 + +#if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER) + +#include <linux/notifier.h> +#include <linux/types.h> + +int rcu_stall_chain_notifier_register(struct notifier_block *n); +int rcu_stall_chain_notifier_unregister(struct notifier_block *n); + +#else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER) + +// No RCU CPU stall warnings in Tiny RCU. +static inline int rcu_stall_chain_notifier_register(struct notifier_block *n) { return -EEXIST; } +static inline int rcu_stall_chain_notifier_unregister(struct notifier_block *n) { return -ENOENT; } + +#endif // #else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER) + +#endif /* __LINUX_RCU_NOTIFIER_H */ diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h index 0027d4c8087c..3860dbb9107a 100644 --- a/include/linux/rcu_sync.h +++ b/include/linux/rcu_sync.h @@ -37,7 +37,6 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) } extern void rcu_sync_init(struct rcu_sync *); -extern void rcu_sync_enter_start(struct rcu_sync *); extern void rcu_sync_enter(struct rcu_sync *); extern void rcu_sync_exit(struct rcu_sync *); extern void rcu_sync_dtor(struct rcu_sync *); diff --git a/include/linux/rculist.h b/include/linux/rculist.h index d29740be4833..3dc1e58865f7 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -355,7 +355,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, }) /** - * list_next_or_null_rcu - get the first element from a list + * list_next_or_null_rcu - get the next element from a list * @head: the head for the list. * @ptr: the list head to take the next element from. * @type: the type of the struct this is embedded in. diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 5e5f920ade90..17d7ed5f3ae6 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -34,9 +34,6 @@ #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) -#define ulong2long(a) (*(long *)(&(a))) -#define USHORT_CMP_GE(a, b) (USHRT_MAX / 2 >= (unsigned short)((a) - (b))) -#define USHORT_CMP_LT(a, b) (USHRT_MAX / 2 < (unsigned short)((a) - (b))) /* Exported common interfaces */ void call_rcu(struct rcu_head *head, rcu_callback_t func); @@ -122,8 +119,6 @@ static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) void rcu_init(void); extern int rcu_scheduler_active; void rcu_sched_clock_irq(int user); -void rcu_report_dead(unsigned int cpu); -void rcutree_migrate_callbacks(int cpu); #ifdef CONFIG_TASKS_RCU_GENERIC void rcu_init_tasks_generic(void); @@ -189,9 +184,9 @@ void rcu_tasks_trace_qs_blkd(struct task_struct *t); do { \ int ___rttq_nesting = READ_ONCE((t)->trc_reader_nesting); \ \ - if (likely(!READ_ONCE((t)->trc_reader_special.b.need_qs)) && \ + if (unlikely(READ_ONCE((t)->trc_reader_special.b.need_qs) == TRC_NEED_QS) && \ likely(!___rttq_nesting)) { \ - rcu_trc_cmpxchg_need_qs((t), 0, TRC_NEED_QS_CHECKED); \ + rcu_trc_cmpxchg_need_qs((t), TRC_NEED_QS, TRC_NEED_QS_CHECKED); \ } else if (___rttq_nesting && ___rttq_nesting != INT_MIN && \ !READ_ONCE((t)->trc_reader_special.b.blocked)) { \ rcu_tasks_trace_qs_blkd(t); \ @@ -252,6 +247,37 @@ do { \ cond_resched(); \ } while (0) +/** + * rcu_softirq_qs_periodic - Report RCU and RCU-Tasks quiescent states + * @old_ts: jiffies at start of processing. + * + * This helper is for long-running softirq handlers, such as NAPI threads in + * networking. The caller should initialize the variable passed in as @old_ts + * at the beginning of the softirq handler. When invoked frequently, this macro + * will invoke rcu_softirq_qs() every 100 milliseconds thereafter, which will + * provide both RCU and RCU-Tasks quiescent states. Note that this macro + * modifies its old_ts argument. + * + * Because regions of code that have disabled softirq act as RCU read-side + * critical sections, this macro should be invoked with softirq (and + * preemption) enabled. + * + * The macro is not needed when CONFIG_PREEMPT_RT is defined. RT kernels would + * have more chance to invoke schedule() calls and provide necessary quiescent + * states. As a contrast, calling cond_resched() only won't achieve the same + * effect because cond_resched() does not provide RCU-Tasks quiescent states. + */ +#define rcu_softirq_qs_periodic(old_ts) \ +do { \ + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && \ + time_after(jiffies, (old_ts) + HZ / 10)) { \ + preempt_disable(); \ + rcu_softirq_qs(); \ + preempt_enable(); \ + (old_ts) = jiffies; \ + } \ +} while (0) + /* * Infrastructure to implement the synchronize_() primitives in * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. @@ -303,6 +329,11 @@ static inline void rcu_lock_acquire(struct lockdep_map *map) lock_acquire(map, 0, 0, 2, 0, NULL, _THIS_IP_); } +static inline void rcu_try_lock_acquire(struct lockdep_map *map) +{ + lock_acquire(map, 0, 1, 2, 0, NULL, _THIS_IP_); +} + static inline void rcu_lock_release(struct lockdep_map *map) { lock_release(map, _THIS_IP_); @@ -317,6 +348,7 @@ int rcu_read_lock_any_held(void); #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ # define rcu_lock_acquire(a) do { } while (0) +# define rcu_try_lock_acquire(a) do { } while (0) # define rcu_lock_release(a) do { } while (0) static inline int rcu_read_lock_held(void) diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h index 5e0f74f2f8ca..d07f0848802e 100644 --- a/include/linux/rcupdate_wait.h +++ b/include/linux/rcupdate_wait.h @@ -8,6 +8,7 @@ #include <linux/rcupdate.h> #include <linux/completion.h> +#include <linux/sched.h> /* * Structure allowing asynchronous waiting on RCU. @@ -55,4 +56,13 @@ do { \ #define synchronize_rcu_mult(...) \ _wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__) +static inline void cond_resched_rcu(void) +{ +#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU) + rcu_read_unlock(); + cond_resched(); + rcu_read_lock(); +#endif +} + #endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */ diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 7b949292908a..d9ac7b136aea 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -171,6 +171,6 @@ static inline void rcu_all_qs(void) { barrier(); } #define rcutree_offline_cpu NULL #define rcutree_dead_cpu NULL #define rcutree_dying_cpu NULL -static inline void rcu_cpu_starting(unsigned int cpu) { } +static inline void rcutree_report_cpu_starting(unsigned int cpu) { } #endif /* __LINUX_RCUTINY_H */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 126f6b418f6a..254244202ea9 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -37,7 +37,6 @@ void synchronize_rcu_expedited(void); void kvfree_call_rcu(struct rcu_head *head, void *ptr); void rcu_barrier(void); -bool rcu_eqs_special_set(int cpu); void rcu_momentary_dyntick_idle(void); void kfree_rcu_scheduler_running(void); bool rcu_gp_might_be_stalled(void); @@ -111,9 +110,21 @@ void rcu_all_qs(void); /* RCUtree hotplug events */ int rcutree_prepare_cpu(unsigned int cpu); int rcutree_online_cpu(unsigned int cpu); -int rcutree_offline_cpu(unsigned int cpu); +void rcutree_report_cpu_starting(unsigned int cpu); + +#ifdef CONFIG_HOTPLUG_CPU int rcutree_dead_cpu(unsigned int cpu); int rcutree_dying_cpu(unsigned int cpu); -void rcu_cpu_starting(unsigned int cpu); +int rcutree_offline_cpu(unsigned int cpu); +#else +#define rcutree_dead_cpu NULL +#define rcutree_dying_cpu NULL +#define rcutree_offline_cpu NULL +#endif + +void rcutree_migrate_callbacks(int cpu); + +/* Called from hotplug and also arm64 early secondary boot failure */ +void rcutree_report_cpu_dead(void); #endif /* __LINUX_RCUTREE_H */ diff --git a/include/linux/reboot.h b/include/linux/reboot.h index 2b6bb593be5b..abcdde4df697 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -129,11 +129,14 @@ enum sys_off_mode { * @cb_data: User's callback data. * @cmd: Command string. Currently used only by the sys-off restart mode, * NULL otherwise. + * @dev: Device of the sys-off handler. Only if known (devm_register_*), + * NULL otherwise. */ struct sys_off_data { int mode; void *cb_data; const char *cmd; + struct device *dev; }; struct sys_off_handler * @@ -174,7 +177,17 @@ void ctrl_alt_del(void); extern void orderly_poweroff(bool force); extern void orderly_reboot(void); -void hw_protection_shutdown(const char *reason, int ms_until_forced); +void __hw_protection_shutdown(const char *reason, int ms_until_forced, bool shutdown); + +static inline void hw_protection_reboot(const char *reason, int ms_until_forced) +{ + __hw_protection_shutdown(reason, ms_until_forced, false); +} + +static inline void hw_protection_shutdown(const char *reason, int ms_until_forced) +{ + __hw_protection_shutdown(reason, ms_until_forced, true); +} /* * Emergency restart, callable from an interrupt handler. diff --git a/include/linux/refcount.h b/include/linux/refcount.h index a62fcca97486..59b3b752394d 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -96,22 +96,11 @@ #include <linux/bug.h> #include <linux/compiler.h> #include <linux/limits.h> +#include <linux/refcount_types.h> #include <linux/spinlock_types.h> struct mutex; -/** - * typedef refcount_t - variant of atomic_t specialized for reference counts - * @refs: atomic_t counter field - * - * The counter saturates at REFCOUNT_SATURATED and will not move once - * there. This avoids wrapping the counter and causing 'spurious' - * use-after-free bugs. - */ -typedef struct refcount_struct { - atomic_t refs; -} refcount_t; - #define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), } #define REFCOUNT_MAX INT_MAX #define REFCOUNT_SATURATED (INT_MIN / 2) @@ -147,7 +136,8 @@ static inline unsigned int refcount_read(const refcount_t *r) return atomic_read(&r->refs); } -static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp) +static inline __must_check __signed_wrap +bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp) { int old = refcount_read(r); @@ -188,7 +178,8 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) return __refcount_add_not_zero(i, r, NULL); } -static inline void __refcount_add(int i, refcount_t *r, int *oldp) +static inline __signed_wrap +void __refcount_add(int i, refcount_t *r, int *oldp) { int old = atomic_fetch_add_relaxed(i, &r->refs); @@ -267,7 +258,8 @@ static inline void refcount_inc(refcount_t *r) __refcount_inc(r, NULL); } -static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) +static inline __must_check __signed_wrap +bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) { int old = atomic_fetch_sub_release(i, &r->refs); diff --git a/include/linux/refcount_types.h b/include/linux/refcount_types.h new file mode 100644 index 000000000000..162004f06edf --- /dev/null +++ b/include/linux/refcount_types.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_REFCOUNT_TYPES_H +#define _LINUX_REFCOUNT_TYPES_H + +#include <linux/types.h> + +/** + * typedef refcount_t - variant of atomic_t specialized for reference counts + * @refs: atomic_t counter field + * + * The counter saturates at REFCOUNT_SATURATED and will not move once + * there. This avoids wrapping the counter and causing 'spurious' + * use-after-free bugs. + */ +typedef struct refcount_struct { + atomic_t refs; +} refcount_t; + +#endif /* _LINUX_REFCOUNT_TYPES_H */ diff --git a/include/linux/regmap.h b/include/linux/regmap.h index c9182a47736e..b743241cfb7c 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -332,6 +332,10 @@ typedef void (*regmap_unlock)(void *); * @io_port: Support IO port accessors. Makes sense only when MMIO vs. IO port * access can be distinguished. * @max_register: Optional, specifies the maximum valid register address. + * @max_register_is_0: Optional, specifies that zero value in @max_register + * should be taken into account. This is a workaround to + * apply handling of @max_register for regmap that contains + * only one register. * @wr_table: Optional, points to a struct regmap_access_table specifying * valid ranges for write access. * @rd_table: As above, for read access. @@ -422,6 +426,7 @@ struct regmap_config { bool io_port; unsigned int max_register; + bool max_register_is_0; const struct regmap_access_table *wr_table; const struct regmap_access_table *rd_table; const struct regmap_access_table *volatile_table; diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 39b666b40ea6..4660582a3302 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -33,6 +33,7 @@ #include <linux/err.h> #include <linux/suspend.h> +#include <regulator/regulator.h> struct device; struct notifier_block; @@ -85,52 +86,6 @@ struct regulator_dev; #define REGULATOR_MODE_STANDBY 0x8 /* - * Regulator notifier events. - * - * UNDER_VOLTAGE Regulator output is under voltage. - * OVER_CURRENT Regulator output current is too high. - * REGULATION_OUT Regulator output is out of regulation. - * FAIL Regulator output has failed. - * OVER_TEMP Regulator over temp. - * FORCE_DISABLE Regulator forcibly shut down by software. - * VOLTAGE_CHANGE Regulator voltage changed. - * Data passed is old voltage cast to (void *). - * DISABLE Regulator was disabled. - * PRE_VOLTAGE_CHANGE Regulator is about to have voltage changed. - * Data passed is "struct pre_voltage_change_data" - * ABORT_VOLTAGE_CHANGE Regulator voltage change failed for some reason. - * Data passed is old voltage cast to (void *). - * PRE_DISABLE Regulator is about to be disabled - * ABORT_DISABLE Regulator disable failed for some reason - * - * NOTE: These events can be OR'ed together when passed into handler. - */ - -#define REGULATOR_EVENT_UNDER_VOLTAGE 0x01 -#define REGULATOR_EVENT_OVER_CURRENT 0x02 -#define REGULATOR_EVENT_REGULATION_OUT 0x04 -#define REGULATOR_EVENT_FAIL 0x08 -#define REGULATOR_EVENT_OVER_TEMP 0x10 -#define REGULATOR_EVENT_FORCE_DISABLE 0x20 -#define REGULATOR_EVENT_VOLTAGE_CHANGE 0x40 -#define REGULATOR_EVENT_DISABLE 0x80 -#define REGULATOR_EVENT_PRE_VOLTAGE_CHANGE 0x100 -#define REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE 0x200 -#define REGULATOR_EVENT_PRE_DISABLE 0x400 -#define REGULATOR_EVENT_ABORT_DISABLE 0x800 -#define REGULATOR_EVENT_ENABLE 0x1000 -/* - * Following notifications should be emitted only if detected condition - * is such that the HW is likely to still be working but consumers should - * take a recovery action to prevent problems esacalating into errors. - */ -#define REGULATOR_EVENT_UNDER_VOLTAGE_WARN 0x2000 -#define REGULATOR_EVENT_OVER_CURRENT_WARN 0x4000 -#define REGULATOR_EVENT_OVER_VOLTAGE_WARN 0x8000 -#define REGULATOR_EVENT_OVER_TEMP_WARN 0x10000 -#define REGULATOR_EVENT_WARN_MASK 0x1E000 - -/* * Regulator errors that can be queried using regulator_get_error_flags * * UNDER_VOLTAGE Regulator output is under voltage. diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 4b7eceb3828b..22a07c0900a4 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -51,12 +51,7 @@ enum regulator_detection_severity { /* Initialize struct linear_range for regulators */ #define REGULATOR_LINEAR_RANGE(_min_uV, _min_sel, _max_sel, _step_uV) \ -{ \ - .min = _min_uV, \ - .min_sel = _min_sel, \ - .max_sel = _max_sel, \ - .step = _step_uV, \ -} + LINEAR_RANGE(_min_uV, _min_sel, _max_sel, _step_uV) /** * struct regulator_ops - regulator operations. diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index 621b7f4a3639..0cd76d264727 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -49,6 +49,13 @@ struct regulator; #define DISABLE_IN_SUSPEND 1 #define ENABLE_IN_SUSPEND 2 +/* + * Default time window (in milliseconds) following a critical under-voltage + * event during which less critical actions can be safely carried out by the + * system. + */ +#define REGULATOR_DEF_UV_LESS_CRITICAL_WINDOW_MS 10 + /* Regulator active discharge flags */ enum regulator_active_discharge { REGULATOR_ACTIVE_DISCHARGE_DEFAULT, @@ -127,6 +134,8 @@ struct notification_limit { * @ramp_disable: Disable ramp delay when initialising or when setting voltage. * @soft_start: Enable soft start so that voltage ramps slowly. * @pull_down: Enable pull down when regulator is disabled. + * @system_critical: Set if the regulator is critical to system stability or + * functionality. * @over_current_protection: Auto disable on over current event. * * @over_current_detection: Configure over current limits. @@ -153,6 +162,13 @@ struct notification_limit { * regulator_active_discharge values are used for * initialisation. * @enable_time: Turn-on time of the rails (unit: microseconds) + * @uv_less_critical_window_ms: Specifies the time window (in milliseconds) + * following a critical under-voltage (UV) event + * during which less critical actions can be + * safely carried out by the system (for example + * logging). After this time window more critical + * actions should be done (for example prevent + * HW damage). */ struct regulation_constraints { @@ -204,6 +220,7 @@ struct regulation_constraints { unsigned int settling_time_up; unsigned int settling_time_down; unsigned int enable_time; + unsigned int uv_less_critical_window_ms; unsigned int active_discharge; @@ -214,6 +231,7 @@ struct regulation_constraints { unsigned ramp_disable:1; /* disable ramp delay */ unsigned soft_start:1; /* ramp voltage slowly */ unsigned pull_down:1; /* pull down resistor when regulator off */ + unsigned system_critical:1; /* critical to system stability */ unsigned over_current_protection:1; /* auto disable on over current */ unsigned over_current_detection:1; /* notify on over current */ unsigned over_voltage_detection:1; /* notify on over voltage */ diff --git a/include/linux/regulator/max8973-regulator.h b/include/linux/regulator/max8973-regulator.h index 8313e7ed6aec..a225e9eeb30d 100644 --- a/include/linux/regulator/max8973-regulator.h +++ b/include/linux/regulator/max8973-regulator.h @@ -48,10 +48,6 @@ * control signal from EN input pin. If it is false then * voltage output will be enabled/disabled through EN bit of * device register. - * @enable_gpio: Enable GPIO. If EN pin is controlled through GPIO from host - * then GPIO number can be provided. If no GPIO controlled then - * it should be -1. - * @dvs_gpio: GPIO for dvs. It should be -1 if this is tied with fixed logic. * @dvs_def_state: Default state of dvs. 1 if it is high else 0. */ struct max8973_regulator_platform_data { @@ -59,8 +55,6 @@ struct max8973_regulator_platform_data { unsigned long control_flags; unsigned long junction_temp_warning; bool enable_ext_control; - int enable_gpio; - int dvs_gpio; unsigned dvs_def_state:1; }; diff --git a/include/linux/regulator/mt6358-regulator.h b/include/linux/regulator/mt6358-regulator.h index c71a6a9fce7a..562386f9b80e 100644 --- a/include/linux/regulator/mt6358-regulator.h +++ b/include/linux/regulator/mt6358-regulator.h @@ -86,6 +86,9 @@ enum { MT6366_ID_VMC, MT6366_ID_VAUD28, MT6366_ID_VSIM2, + MT6366_ID_VM18, + MT6366_ID_VMDDR, + MT6366_ID_VSRAM_CORE, MT6366_ID_RG_MAX, }; diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 8334eeacfec5..a365f67131ec 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -6,6 +6,12 @@ #include <linux/list.h> #include <linux/pid.h> +/* CLOSID, RMID value used by the default control group */ +#define RESCTRL_RESERVED_CLOSID 0 +#define RESCTRL_RESERVED_RMID 0 + +#define RESCTRL_PICK_ANY_CPU -1 + #ifdef CONFIG_PROC_CPU_RESCTRL int proc_resctrl_show(struct seq_file *m, @@ -94,7 +100,7 @@ struct rdt_domain { * zero CBM. * @shareable_bits: Bitmask of shareable resource with other * executing entities - * @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid. + * @arch_has_sparse_bitmasks: True if a bitmask like f00f is valid. * @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache * level has CPU scope. */ @@ -102,7 +108,7 @@ struct resctrl_cache { unsigned int cbm_len; unsigned int min_cbm_bits; unsigned int shareable_bits; - bool arch_has_sparse_bitmaps; + bool arch_has_sparse_bitmasks; bool arch_has_per_cpu_cfg; }; @@ -153,7 +159,7 @@ struct resctrl_schema; * @cache_level: Which cache level defines scope of this resource * @cache: Cache allocation related data * @membw: If the component has bandwidth controls, their properties. - * @domains: All domains for this resource + * @domains: RCU list of all domains for this resource * @name: Name to use in "schemata" file. * @data_width: Character width of data when displaying * @default_ctrl: Specifies default cache cbm or memory B/W percent. @@ -219,36 +225,70 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, u32 closid, enum resctrl_conf_type type); int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d); void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d); +void resctrl_online_cpu(unsigned int cpu); +void resctrl_offline_cpu(unsigned int cpu); /** * resctrl_arch_rmid_read() - Read the eventid counter corresponding to rmid * for this resource and domain. * @r: resource that the counter should be read from. * @d: domain that the counter should be read from. + * @closid: closid that matches the rmid. Depending on the architecture, the + * counter may match traffic of both @closid and @rmid, or @rmid + * only. * @rmid: rmid of the counter to read. * @eventid: eventid to read, e.g. L3 occupancy. * @val: result of the counter read in bytes. + * @arch_mon_ctx: An architecture specific value from + * resctrl_arch_mon_ctx_alloc(), for MPAM this identifies + * the hardware monitor allocated for this read request. * - * Call from process context on a CPU that belongs to domain @d. + * Some architectures need to sleep when first programming some of the counters. + * (specifically: arm64's MPAM cache occupancy counters can return 'not ready' + * for a short period of time). Call from a non-migrateable process context on + * a CPU that belongs to domain @d. e.g. use smp_call_on_cpu() or + * schedule_work_on(). This function can be called with interrupts masked, + * e.g. using smp_call_function_any(), but may consistently return an error. * * Return: * 0 on success, or -EIO, -EINVAL etc on error. */ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, - u32 rmid, enum resctrl_event_id eventid, u64 *val); + u32 closid, u32 rmid, enum resctrl_event_id eventid, + u64 *val, void *arch_mon_ctx); + +/** + * resctrl_arch_rmid_read_context_check() - warn about invalid contexts + * + * When built with CONFIG_DEBUG_ATOMIC_SLEEP generate a warning when + * resctrl_arch_rmid_read() is called with preemption disabled. + * + * The contract with resctrl_arch_rmid_read() is that if interrupts + * are unmasked, it can sleep. This allows NOHZ_FULL systems to use an + * IPI, (and fail if the call needed to sleep), while most of the time + * the work is scheduled, allowing the call to sleep. + */ +static inline void resctrl_arch_rmid_read_context_check(void) +{ + if (!irqs_disabled()) + might_sleep(); +} /** * resctrl_arch_reset_rmid() - Reset any private state associated with rmid * and eventid. * @r: The domain's resource. * @d: The rmid's domain. + * @closid: closid that matches the rmid. Depending on the architecture, the + * counter may match traffic of both @closid and @rmid, or @rmid only. * @rmid: The rmid whose counter values should be reset. * @eventid: The eventid whose counter values should be reset. * * This can be called from any CPU. */ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, - u32 rmid, enum resctrl_event_id eventid); + u32 closid, u32 rmid, + enum resctrl_event_id eventid); /** * resctrl_arch_reset_rmid_all() - Reset all private state associated with diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h index 0fa4f60e1186..357df16ede32 100644 --- a/include/linux/reset-controller.h +++ b/include/linux/reset-controller.h @@ -60,6 +60,9 @@ struct reset_control_lookup { * @reset_control_head: head of internal list of requested reset controls * @dev: corresponding driver model device struct * @of_node: corresponding device tree node as phandle target + * @of_args: for reset-gpios controllers: corresponding phandle args with + * of_node and GPIO number complementing of_node; either this or + * of_node should be present * @of_reset_n_cells: number of cells in reset line specifiers * @of_xlate: translation function to translate from specifier as found in the * device tree to id as given to the reset control ops, defaults @@ -73,6 +76,7 @@ struct reset_controller_dev { struct list_head reset_control_head; struct device *dev; struct device_node *of_node; + const struct of_phandle_args *of_args; int of_reset_n_cells; int (*of_xlate)(struct reset_controller_dev *rcdev, const struct of_phandle_args *reset_spec); diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h index 980a65594412..13f17676c5f4 100644 --- a/include/linux/restart_block.h +++ b/include/linux/restart_block.h @@ -7,8 +7,8 @@ #include <linux/compiler.h> #include <linux/types.h> -#include <linux/time64.h> +struct __kernel_timespec; struct timespec; struct old_timespec32; struct pollfd; diff --git a/include/linux/resume_user_mode.h b/include/linux/resume_user_mode.h index f8f3e958e9cf..e0135e0adae0 100644 --- a/include/linux/resume_user_mode.h +++ b/include/linux/resume_user_mode.h @@ -6,6 +6,7 @@ #include <linux/sched.h> #include <linux/task_work.h> #include <linux/memcontrol.h> +#include <linux/rseq.h> #include <linux/blk-cgroup.h> /** diff --git a/include/linux/rethook.h b/include/linux/rethook.h index 26b6f3c81a76..ba60962805f6 100644 --- a/include/linux/rethook.h +++ b/include/linux/rethook.h @@ -6,11 +6,10 @@ #define _LINUX_RETHOOK_H #include <linux/compiler.h> -#include <linux/freelist.h> +#include <linux/objpool.h> #include <linux/kallsyms.h> #include <linux/llist.h> #include <linux/rcupdate.h> -#include <linux/refcount.h> struct rethook_node; @@ -29,15 +28,18 @@ typedef void (*rethook_handler_t) (struct rethook_node *, void *, unsigned long, */ struct rethook { void *data; - rethook_handler_t handler; - struct freelist_head pool; - refcount_t ref; + /* + * To avoid sparse warnings, this uses a raw function pointer with + * __rcu, instead of rethook_handler_t. But this must be same as + * rethook_handler_t. + */ + void (__rcu *handler) (struct rethook_node *, void *, unsigned long, struct pt_regs *); + struct objpool_head pool; struct rcu_head rcu; }; /** * struct rethook_node - The rethook shadow-stack entry node. - * @freelist: The freelist, linked to struct rethook::pool. * @rcu: The rcu_head for deferred freeing. * @llist: The llist, linked to a struct task_struct::rethooks. * @rethook: The pointer to the struct rethook. @@ -48,20 +50,16 @@ struct rethook { * on each entry of the shadow stack. */ struct rethook_node { - union { - struct freelist_node freelist; - struct rcu_head rcu; - }; + struct rcu_head rcu; struct llist_node llist; struct rethook *rethook; unsigned long ret_addr; unsigned long frame; }; -struct rethook *rethook_alloc(void *data, rethook_handler_t handler); +struct rethook *rethook_alloc(void *data, rethook_handler_t handler, int size, int num); void rethook_stop(struct rethook *rh); void rethook_free(struct rethook *rh); -void rethook_add_node(struct rethook *rh, struct rethook_node *node); struct rethook_node *rethook_try_get(struct rethook *rh); void rethook_recycle(struct rethook_node *node); void rethook_hook(struct rethook_node *node, struct pt_regs *regs, bool mcount); @@ -98,4 +96,3 @@ void rethook_flush_task(struct task_struct *tk); #endif #endif - diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h index 57467cbf4c5b..b6f3797277ff 100644 --- a/include/linux/rhashtable-types.h +++ b/include/linux/rhashtable-types.h @@ -12,7 +12,7 @@ #include <linux/atomic.h> #include <linux/compiler.h> #include <linux/mutex.h> -#include <linux/workqueue.h> +#include <linux/workqueue_types.h> struct rhash_head { struct rhash_head __rcu *next; diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 782e14f62201..dc5ae4e96aee 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -98,7 +98,9 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k __ring_buffer_alloc((size), (flags), &__key); \ }) -int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full); +typedef bool (*ring_buffer_cond_fn)(void *data); +int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full, + ring_buffer_cond_fn cond, void *data); __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table, int full); void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu); @@ -141,6 +143,7 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter); bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter); unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu); +unsigned long ring_buffer_max_event_size(struct trace_buffer *buffer); void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu); void ring_buffer_reset_online_cpus(struct trace_buffer *buffer); @@ -191,15 +194,24 @@ bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer); size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu); size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu); -void *ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu); -void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, void *data); -int ring_buffer_read_page(struct trace_buffer *buffer, void **data_page, +struct buffer_data_read_page; +struct buffer_data_read_page * +ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu); +void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, + struct buffer_data_read_page *page); +int ring_buffer_read_page(struct trace_buffer *buffer, + struct buffer_data_read_page *data_page, size_t len, int cpu, int full); +void *ring_buffer_read_page_data(struct buffer_data_read_page *page); struct trace_seq; int ring_buffer_print_entry_header(struct trace_seq *s); -int ring_buffer_print_page_header(struct trace_seq *s); +int ring_buffer_print_page_header(struct trace_buffer *buffer, struct trace_seq *s); + +int ring_buffer_subbuf_order_get(struct trace_buffer *buffer); +int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order); +int ring_buffer_subbuf_size_get(struct trace_buffer *buffer); enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 51cc21ebb568..b7944a833668 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -121,6 +121,11 @@ static inline void anon_vma_lock_write(struct anon_vma *anon_vma) down_write(&anon_vma->root->rwsem); } +static inline int anon_vma_trylock_write(struct anon_vma *anon_vma) +{ + return down_write_trylock(&anon_vma->root->rwsem); +} + static inline void anon_vma_unlock_write(struct anon_vma *anon_vma) { up_write(&anon_vma->root->rwsem); @@ -172,133 +177,323 @@ struct anon_vma *folio_get_anon_vma(struct folio *folio); typedef int __bitwise rmap_t; /* - * No special request: if the page is a subpage of a compound page, it is - * mapped via a PTE. The mapped (sub)page is possibly shared between processes. + * No special request: A mapped anonymous (sub)page is possibly shared between + * processes. */ #define RMAP_NONE ((__force rmap_t)0) -/* The (sub)page is exclusive to a single process. */ +/* The anonymous (sub)page is exclusive to a single process. */ #define RMAP_EXCLUSIVE ((__force rmap_t)BIT(0)) /* - * The compound page is not mapped via PTEs, but instead via a single PMD and - * should be accounted accordingly. + * Internally, we're using an enum to specify the granularity. We make the + * compiler emit specialized code for each granularity. */ -#define RMAP_COMPOUND ((__force rmap_t)BIT(1)) +enum rmap_level { + RMAP_LEVEL_PTE = 0, + RMAP_LEVEL_PMD, +}; + +static inline void __folio_rmap_sanity_checks(struct folio *folio, + struct page *page, int nr_pages, enum rmap_level level) +{ + /* hugetlb folios are handled separately. */ + VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); + + /* + * TODO: we get driver-allocated folios that have nothing to do with + * the rmap using vm_insert_page(); therefore, we cannot assume that + * folio_test_large_rmappable() holds for large folios. We should + * handle any desired mapcount+stats accounting for these folios in + * VM_MIXEDMAP VMAs separately, and then sanity-check here that + * we really only get rmappable folios. + */ + + VM_WARN_ON_ONCE(nr_pages <= 0); + VM_WARN_ON_FOLIO(page_folio(page) != folio, folio); + VM_WARN_ON_FOLIO(page_folio(page + nr_pages - 1) != folio, folio); + + switch (level) { + case RMAP_LEVEL_PTE: + break; + case RMAP_LEVEL_PMD: + /* + * We don't support folios larger than a single PMD yet. So + * when RMAP_LEVEL_PMD is set, we assume that we are creating + * a single "entire" mapping of the folio. + */ + VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PMD_NR, folio); + VM_WARN_ON_FOLIO(nr_pages != HPAGE_PMD_NR, folio); + break; + default: + VM_WARN_ON_ONCE(true); + } +} /* * rmap interfaces called when adding or removing pte of page */ -void page_move_anon_rmap(struct page *, struct vm_area_struct *); -void page_add_anon_rmap(struct page *, struct vm_area_struct *, - unsigned long address, rmap_t flags); -void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, - unsigned long address); +void folio_move_anon_rmap(struct folio *, struct vm_area_struct *); +void folio_add_anon_rmap_ptes(struct folio *, struct page *, int nr_pages, + struct vm_area_struct *, unsigned long address, rmap_t flags); +#define folio_add_anon_rmap_pte(folio, page, vma, address, flags) \ + folio_add_anon_rmap_ptes(folio, page, 1, vma, address, flags) +void folio_add_anon_rmap_pmd(struct folio *, struct page *, + struct vm_area_struct *, unsigned long address, rmap_t flags); void folio_add_new_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address); -void page_add_file_rmap(struct page *, struct vm_area_struct *, - bool compound); -void folio_add_file_rmap_range(struct folio *, struct page *, unsigned int nr, - struct vm_area_struct *, bool compound); -void page_remove_rmap(struct page *, struct vm_area_struct *, - bool compound); +void folio_add_file_rmap_ptes(struct folio *, struct page *, int nr_pages, + struct vm_area_struct *); +#define folio_add_file_rmap_pte(folio, page, vma) \ + folio_add_file_rmap_ptes(folio, page, 1, vma) +void folio_add_file_rmap_pmd(struct folio *, struct page *, + struct vm_area_struct *); +void folio_remove_rmap_ptes(struct folio *, struct page *, int nr_pages, + struct vm_area_struct *); +#define folio_remove_rmap_pte(folio, page, vma) \ + folio_remove_rmap_ptes(folio, page, 1, vma) +void folio_remove_rmap_pmd(struct folio *, struct page *, + struct vm_area_struct *); -void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *, +void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address, rmap_t flags); -void hugepage_add_new_anon_rmap(struct folio *, struct vm_area_struct *, +void hugetlb_add_new_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address); -static inline void __page_dup_rmap(struct page *page, bool compound) +/* See folio_try_dup_anon_rmap_*() */ +static inline int hugetlb_try_dup_anon_rmap(struct folio *folio, + struct vm_area_struct *vma) { - if (compound) { - struct folio *folio = (struct folio *)page; + VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); + VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); - VM_BUG_ON_PAGE(compound && !PageHead(page), page); - atomic_inc(&folio->_entire_mapcount); - } else { - atomic_inc(&page->_mapcount); + if (PageAnonExclusive(&folio->page)) { + if (unlikely(folio_needs_cow_for_dma(vma, folio))) + return -EBUSY; + ClearPageAnonExclusive(&folio->page); } + atomic_inc(&folio->_entire_mapcount); + return 0; } -static inline void page_dup_file_rmap(struct page *page, bool compound) +/* See folio_try_share_anon_rmap_*() */ +static inline int hugetlb_try_share_anon_rmap(struct folio *folio) { - __page_dup_rmap(page, compound); + VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); + VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); + VM_WARN_ON_FOLIO(!PageAnonExclusive(&folio->page), folio); + + /* Paired with the memory barrier in try_grab_folio(). */ + if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) + smp_mb(); + + if (unlikely(folio_maybe_dma_pinned(folio))) + return -EBUSY; + ClearPageAnonExclusive(&folio->page); + + /* + * This is conceptually a smp_wmb() paired with the smp_rmb() in + * gup_must_unshare(). + */ + if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) + smp_mb__after_atomic(); + return 0; +} + +static inline void hugetlb_add_file_rmap(struct folio *folio) +{ + VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); + VM_WARN_ON_FOLIO(folio_test_anon(folio), folio); + + atomic_inc(&folio->_entire_mapcount); +} + +static inline void hugetlb_remove_rmap(struct folio *folio) +{ + VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); + + atomic_dec(&folio->_entire_mapcount); +} + +static __always_inline void __folio_dup_file_rmap(struct folio *folio, + struct page *page, int nr_pages, enum rmap_level level) +{ + __folio_rmap_sanity_checks(folio, page, nr_pages, level); + + switch (level) { + case RMAP_LEVEL_PTE: + do { + atomic_inc(&page->_mapcount); + } while (page++, --nr_pages > 0); + break; + case RMAP_LEVEL_PMD: + atomic_inc(&folio->_entire_mapcount); + break; + } } /** - * page_try_dup_anon_rmap - try duplicating a mapping of an already mapped - * anonymous page - * @page: the page to duplicate the mapping for - * @compound: the page is mapped as compound or as a small page - * @vma: the source vma + * folio_dup_file_rmap_ptes - duplicate PTE mappings of a page range of a folio + * @folio: The folio to duplicate the mappings of + * @page: The first page to duplicate the mappings of + * @nr_pages: The number of pages of which the mapping will be duplicated * - * The caller needs to hold the PT lock and the vma->vma_mm->write_protect_seq. + * The page range of the folio is defined by [page, page + nr_pages) * - * Duplicating the mapping can only fail if the page may be pinned; device - * private pages cannot get pinned and consequently this function cannot fail. + * The caller needs to hold the page table lock. + */ +static inline void folio_dup_file_rmap_ptes(struct folio *folio, + struct page *page, int nr_pages) +{ + __folio_dup_file_rmap(folio, page, nr_pages, RMAP_LEVEL_PTE); +} +#define folio_dup_file_rmap_pte(folio, page) \ + folio_dup_file_rmap_ptes(folio, page, 1) + +/** + * folio_dup_file_rmap_pmd - duplicate a PMD mapping of a page range of a folio + * @folio: The folio to duplicate the mapping of + * @page: The first page to duplicate the mapping of * - * If duplicating the mapping succeeds, the page has to be mapped R/O into - * the parent and the child. It must *not* get mapped writable after this call. + * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) * - * Returns 0 if duplicating the mapping succeeded. Returns -EBUSY otherwise. + * The caller needs to hold the page table lock. */ -static inline int page_try_dup_anon_rmap(struct page *page, bool compound, - struct vm_area_struct *vma) +static inline void folio_dup_file_rmap_pmd(struct folio *folio, + struct page *page) { - VM_BUG_ON_PAGE(!PageAnon(page), page); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, RMAP_LEVEL_PTE); +#else + WARN_ON_ONCE(true); +#endif +} - /* - * No need to check+clear for already shared pages, including KSM - * pages. - */ - if (!PageAnonExclusive(page)) - goto dup; +static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, + struct page *page, int nr_pages, struct vm_area_struct *src_vma, + enum rmap_level level) +{ + bool maybe_pinned; + int i; + + VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); + __folio_rmap_sanity_checks(folio, page, nr_pages, level); /* - * If this page may have been pinned by the parent process, - * don't allow to duplicate the mapping but instead require to e.g., - * copy the page immediately for the child so that we'll always - * guarantee the pinned page won't be randomly replaced in the + * If this folio may have been pinned by the parent process, + * don't allow to duplicate the mappings but instead require to e.g., + * copy the subpage immediately for the child so that we'll always + * guarantee the pinned folio won't be randomly replaced in the * future on write faults. */ - if (likely(!is_device_private_page(page) && - unlikely(page_needs_cow_for_dma(vma, page)))) - return -EBUSY; + maybe_pinned = likely(!folio_is_device_private(folio)) && + unlikely(folio_needs_cow_for_dma(src_vma, folio)); - ClearPageAnonExclusive(page); /* - * It's okay to share the anon page between both processes, mapping - * the page R/O into both processes. + * No need to check+clear for already shared PTEs/PMDs of the + * folio. But if any page is PageAnonExclusive, we must fallback to + * copying if the folio maybe pinned. */ -dup: - __page_dup_rmap(page, compound); + switch (level) { + case RMAP_LEVEL_PTE: + if (unlikely(maybe_pinned)) { + for (i = 0; i < nr_pages; i++) + if (PageAnonExclusive(page + i)) + return -EBUSY; + } + do { + if (PageAnonExclusive(page)) + ClearPageAnonExclusive(page); + atomic_inc(&page->_mapcount); + } while (page++, --nr_pages > 0); + break; + case RMAP_LEVEL_PMD: + if (PageAnonExclusive(page)) { + if (unlikely(maybe_pinned)) + return -EBUSY; + ClearPageAnonExclusive(page); + } + atomic_inc(&folio->_entire_mapcount); + break; + } return 0; } /** - * page_try_share_anon_rmap - try marking an exclusive anonymous page possibly - * shared to prepare for KSM or temporary unmapping - * @page: the exclusive anonymous page to try marking possibly shared + * folio_try_dup_anon_rmap_ptes - try duplicating PTE mappings of a page range + * of a folio + * @folio: The folio to duplicate the mappings of + * @page: The first page to duplicate the mappings of + * @nr_pages: The number of pages of which the mapping will be duplicated + * @src_vma: The vm area from which the mappings are duplicated * - * The caller needs to hold the PT lock and has to have the page table entry - * cleared/invalidated. + * The page range of the folio is defined by [page, page + nr_pages) * - * This is similar to page_try_dup_anon_rmap(), however, not used during fork() - * to duplicate a mapping, but instead to prepare for KSM or temporarily - * unmapping a page (swap, migration) via page_remove_rmap(). + * The caller needs to hold the page table lock and the + * vma->vma_mm->write_protect_seq. * - * Marking the page shared can only fail if the page may be pinned; device - * private pages cannot get pinned and consequently this function cannot fail. + * Duplicating the mappings can only fail if the folio may be pinned; device + * private folios cannot get pinned and consequently this function cannot fail + * for them. * - * Returns 0 if marking the page possibly shared succeeded. Returns -EBUSY - * otherwise. + * If duplicating the mappings succeeded, the duplicated PTEs have to be R/O in + * the parent and the child. They must *not* be writable after this call + * succeeded. + * + * Returns 0 if duplicating the mappings succeeded. Returns -EBUSY otherwise. + */ +static inline int folio_try_dup_anon_rmap_ptes(struct folio *folio, + struct page *page, int nr_pages, struct vm_area_struct *src_vma) +{ + return __folio_try_dup_anon_rmap(folio, page, nr_pages, src_vma, + RMAP_LEVEL_PTE); +} +#define folio_try_dup_anon_rmap_pte(folio, page, vma) \ + folio_try_dup_anon_rmap_ptes(folio, page, 1, vma) + +/** + * folio_try_dup_anon_rmap_pmd - try duplicating a PMD mapping of a page range + * of a folio + * @folio: The folio to duplicate the mapping of + * @page: The first page to duplicate the mapping of + * @src_vma: The vm area from which the mapping is duplicated + * + * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) + * + * The caller needs to hold the page table lock and the + * vma->vma_mm->write_protect_seq. + * + * Duplicating the mapping can only fail if the folio may be pinned; device + * private folios cannot get pinned and consequently this function cannot fail + * for them. + * + * If duplicating the mapping succeeds, the duplicated PMD has to be R/O in + * the parent and the child. They must *not* be writable after this call + * succeeded. + * + * Returns 0 if duplicating the mapping succeeded. Returns -EBUSY otherwise. */ -static inline int page_try_share_anon_rmap(struct page *page) +static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio, + struct page *page, struct vm_area_struct *src_vma) { - VM_BUG_ON_PAGE(!PageAnon(page) || !PageAnonExclusive(page), page); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + return __folio_try_dup_anon_rmap(folio, page, HPAGE_PMD_NR, src_vma, + RMAP_LEVEL_PMD); +#else + WARN_ON_ONCE(true); + return -EBUSY; +#endif +} + +static __always_inline int __folio_try_share_anon_rmap(struct folio *folio, + struct page *page, int nr_pages, enum rmap_level level) +{ + VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); + VM_WARN_ON_FOLIO(!PageAnonExclusive(page), folio); + __folio_rmap_sanity_checks(folio, page, nr_pages, level); - /* device private pages cannot get pinned via GUP. */ - if (unlikely(is_device_private_page(page))) { + /* device private folios cannot get pinned via GUP. */ + if (unlikely(folio_is_device_private(folio))) { ClearPageAnonExclusive(page); return 0; } @@ -349,7 +544,7 @@ static inline int page_try_share_anon_rmap(struct page *page) if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) smp_mb(); - if (unlikely(page_maybe_dma_pinned(page))) + if (unlikely(folio_maybe_dma_pinned(folio))) return -EBUSY; ClearPageAnonExclusive(page); @@ -362,6 +557,68 @@ static inline int page_try_share_anon_rmap(struct page *page) return 0; } +/** + * folio_try_share_anon_rmap_pte - try marking an exclusive anonymous page + * mapped by a PTE possibly shared to prepare + * for KSM or temporary unmapping + * @folio: The folio to share a mapping of + * @page: The mapped exclusive page + * + * The caller needs to hold the page table lock and has to have the page table + * entries cleared/invalidated. + * + * This is similar to folio_try_dup_anon_rmap_pte(), however, not used during + * fork() to duplicate mappings, but instead to prepare for KSM or temporarily + * unmapping parts of a folio (swap, migration) via folio_remove_rmap_pte(). + * + * Marking the mapped page shared can only fail if the folio maybe pinned; + * device private folios cannot get pinned and consequently this function cannot + * fail. + * + * Returns 0 if marking the mapped page possibly shared succeeded. Returns + * -EBUSY otherwise. + */ +static inline int folio_try_share_anon_rmap_pte(struct folio *folio, + struct page *page) +{ + return __folio_try_share_anon_rmap(folio, page, 1, RMAP_LEVEL_PTE); +} + +/** + * folio_try_share_anon_rmap_pmd - try marking an exclusive anonymous page + * range mapped by a PMD possibly shared to + * prepare for temporary unmapping + * @folio: The folio to share the mapping of + * @page: The first page to share the mapping of + * + * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) + * + * The caller needs to hold the page table lock and has to have the page table + * entries cleared/invalidated. + * + * This is similar to folio_try_dup_anon_rmap_pmd(), however, not used during + * fork() to duplicate a mapping, but instead to prepare for temporarily + * unmapping parts of a folio (swap, migration) via folio_remove_rmap_pmd(). + * + * Marking the mapped pages shared can only fail if the folio maybe pinned; + * device private folios cannot get pinned and consequently this function cannot + * fail. + * + * Returns 0 if marking the mapped pages possibly shared succeeded. Returns + * -EBUSY otherwise. + */ +static inline int folio_try_share_anon_rmap_pmd(struct folio *folio, + struct page *page) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + return __folio_try_share_anon_rmap(folio, page, HPAGE_PMD_NR, + RMAP_LEVEL_PMD); +#else + WARN_ON_ONCE(true); + return -EBUSY; +#endif +} + /* * Called from mm/vmscan.c to handle paging out */ diff --git a/include/linux/rseq.h b/include/linux/rseq.h new file mode 100644 index 000000000000..bc8af3eb5598 --- /dev/null +++ b/include/linux/rseq.h @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +#ifndef _LINUX_RSEQ_H +#define _LINUX_RSEQ_H + +#ifdef CONFIG_RSEQ + +#include <linux/preempt.h> +#include <linux/sched.h> + +/* + * Map the event mask on the user-space ABI enum rseq_cs_flags + * for direct mask checks. + */ +enum rseq_event_mask_bits { + RSEQ_EVENT_PREEMPT_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT, + RSEQ_EVENT_SIGNAL_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT, + RSEQ_EVENT_MIGRATE_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT, +}; + +enum rseq_event_mask { + RSEQ_EVENT_PREEMPT = (1U << RSEQ_EVENT_PREEMPT_BIT), + RSEQ_EVENT_SIGNAL = (1U << RSEQ_EVENT_SIGNAL_BIT), + RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT), +}; + +static inline void rseq_set_notify_resume(struct task_struct *t) +{ + if (t->rseq) + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); +} + +void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs); + +static inline void rseq_handle_notify_resume(struct ksignal *ksig, + struct pt_regs *regs) +{ + if (current->rseq) + __rseq_handle_notify_resume(ksig, regs); +} + +static inline void rseq_signal_deliver(struct ksignal *ksig, + struct pt_regs *regs) +{ + preempt_disable(); + __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); + preempt_enable(); + rseq_handle_notify_resume(ksig, regs); +} + +/* rseq_preempt() requires preemption to be disabled. */ +static inline void rseq_preempt(struct task_struct *t) +{ + __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask); + rseq_set_notify_resume(t); +} + +/* rseq_migrate() requires preemption to be disabled. */ +static inline void rseq_migrate(struct task_struct *t) +{ + __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask); + rseq_set_notify_resume(t); +} + +/* + * If parent process has a registered restartable sequences area, the + * child inherits. Unregister rseq for a clone with CLONE_VM set. + */ +static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) +{ + if (clone_flags & CLONE_VM) { + t->rseq = NULL; + t->rseq_len = 0; + t->rseq_sig = 0; + t->rseq_event_mask = 0; + } else { + t->rseq = current->rseq; + t->rseq_len = current->rseq_len; + t->rseq_sig = current->rseq_sig; + t->rseq_event_mask = current->rseq_event_mask; + } +} + +static inline void rseq_execve(struct task_struct *t) +{ + t->rseq = NULL; + t->rseq_len = 0; + t->rseq_sig = 0; + t->rseq_event_mask = 0; +} + +#else + +static inline void rseq_set_notify_resume(struct task_struct *t) +{ +} +static inline void rseq_handle_notify_resume(struct ksignal *ksig, + struct pt_regs *regs) +{ +} +static inline void rseq_signal_deliver(struct ksignal *ksig, + struct pt_regs *regs) +{ +} +static inline void rseq_preempt(struct task_struct *t) +{ +} +static inline void rseq_migrate(struct task_struct *t) +{ +} +static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) +{ +} +static inline void rseq_execve(struct task_struct *t) +{ +} + +#endif + +#ifdef CONFIG_DEBUG_RSEQ + +void rseq_syscall(struct pt_regs *regs); + +#else + +static inline void rseq_syscall(struct pt_regs *regs) +{ +} + +#endif + +#endif /* _LINUX_RSEQ_H */ diff --git a/include/linux/rslib.h b/include/linux/rslib.h index 238bb85243d3..a04dacbdc8ae 100644 --- a/include/linux/rslib.h +++ b/include/linux/rslib.h @@ -10,7 +10,6 @@ #ifndef _RSLIB_H_ #define _RSLIB_H_ -#include <linux/list.h> #include <linux/types.h> /* for gfp_t */ #include <linux/gfp.h> /* for GFP_KERNEL */ diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 4c0bcbeb1f00..3f4d315aaec9 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -42,7 +42,7 @@ static inline time64_t rtc_tm_sub(struct rtc_time *lhs, struct rtc_time *rhs) #include <linux/timerqueue.h> #include <linux/workqueue.h> -extern struct class *rtc_class; +extern const struct class rtc_class; /* * For these RTC methods the device parameter is the physical device @@ -225,6 +225,23 @@ static inline bool is_leap_year(unsigned int year) return (!(year % 4) && (year % 100)) || !(year % 400); } +/** + * rtc_bound_alarmtime() - Return alarm time bound by rtc limit + * @rtc: Pointer to rtc device structure + * @requested: Requested alarm timeout + * + * Return: Alarm timeout bound by maximum alarm time supported by rtc. + */ +static inline ktime_t rtc_bound_alarmtime(struct rtc_device *rtc, + ktime_t requested) +{ + if (rtc->alarm_offset_max && + rtc->alarm_offset_max * MSEC_PER_SEC < ktime_to_ms(requested)) + return ms_to_ktime(rtc->alarm_offset_max * MSEC_PER_SEC); + + return requested; +} + #define devm_rtc_register_device(device) \ __devm_rtc_register_device(THIS_MODULE, device) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 3d6cf306cd55..cdfc897f1e3c 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -10,6 +10,13 @@ #include <uapi/linux/rtnetlink.h> extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); + +static inline int rtnetlink_maybe_send(struct sk_buff *skb, struct net *net, + u32 pid, u32 group, int echo) +{ + return !skb ? 0 : rtnetlink_send(skb, net, pid, group, echo); +} + extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid); extern void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, const struct nlmsghdr *nlh, gfp_t flags); @@ -40,6 +47,7 @@ extern int rtnl_lock_killable(void); extern bool refcount_dec_and_rtnl_lock(refcount_t *r); extern wait_queue_head_t netdev_unregistering_wq; +extern atomic_t dev_unreg_count; extern struct rw_semaphore pernet_ops_rwsem; extern struct rw_semaphore net_rwsem; @@ -72,6 +80,18 @@ static inline bool lockdep_rtnl_is_held(void) #define rtnl_dereference(p) \ rcu_dereference_protected(p, lockdep_rtnl_is_held()) +/** + * rcu_replace_pointer_rtnl - replace an RCU pointer under rtnl_lock, returning + * its old value + * @rp: RCU pointer, whose value is returned + * @p: regular pointer + * + * Perform a replacement under rtnl_lock, where @rp is an RCU-annotated + * pointer. The old value of @rp is returned, and @rp is set to @p + */ +#define rcu_replace_pointer_rtnl(rp, p) \ + rcu_replace_pointer(rp, p, lockdep_rtnl_is_held()) + static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) { return rtnl_dereference(dev->ingress_queue); @@ -130,4 +150,28 @@ extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, extern void rtnl_offload_xstats_notify(struct net_device *dev); +static inline int rtnl_has_listeners(const struct net *net, u32 group) +{ + struct sock *rtnl = net->rtnl; + + return netlink_has_listeners(rtnl, group); +} + +/** + * rtnl_notify_needed - check if notification is needed + * @net: Pointer to the net namespace + * @nlflags: netlink ingress message flags + * @group: rtnl group + * + * Based on the ingress message flags and rtnl group, returns true + * if a notification is needed, false otherwise. + */ +static inline bool +rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group) +{ + return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group); +} + +void netdev_set_operstate(struct net_device *dev, int newstate); + #endif /* __LINUX_RTNETLINK_H */ diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h index 534038d962e4..4612ef09a0c7 100644 --- a/include/linux/rtsx_pci.h +++ b/include/linux/rtsx_pci.h @@ -60,6 +60,7 @@ #define SD_EXIST (1 << 16) #define DELINK_INT GPIO0_INT #define MS_OC_INT (1 << 23) +#define SD_OVP_INT (1 << 23) #define SD_OC_INT (1 << 22) #define CARD_INT (XD_INT | MS_INT | SD_INT) @@ -80,6 +81,7 @@ #define OC_INT_EN (1 << 23) #define DELINK_INT_EN GPIO0_INT_EN #define MS_OC_INT_EN (1 << 23) +#define SD_OVP_INT_EN (1 << 23) #define SD_OC_INT_EN (1 << 22) #define RTSX_DUM_REG 0x1C @@ -583,6 +585,7 @@ #define OBFF_DISABLE 0x00 #define CDRESUMECTL 0xFE52 +#define CDGW 0xFE53 #define WAKE_SEL_CTL 0xFE54 #define PCLK_CTL 0xFE55 #define PCLK_MODE_SEL 0x20 @@ -764,6 +767,9 @@ #define SD_VIO_LDO_1V8 0x40 #define SD_VIO_LDO_3V3 0x70 +#define RTS5264_AUTOLOAD_CFG2 0xFF7D +#define RTS5264_CHIP_RST_N_SEL (1 << 6) + #define RTS5260_AUTOLOAD_CFG4 0xFF7F #define RTS5260_MIMO_DISABLE 0x8A /*RTS5261*/ @@ -1261,6 +1267,7 @@ struct rtsx_pcr { u8 dma_error_count; u8 ocp_stat; u8 ocp_stat2; + u8 ovp_stat; u8 rtd3_en; }; @@ -1271,6 +1278,7 @@ struct rtsx_pcr { #define PID_5260 0x5260 #define PID_5261 0x5261 #define PID_5228 0x5228 +#define PID_5264 0x5264 #define CHK_PCI_PID(pcr, pid) ((pcr)->pci->device == (pid)) #define PCI_VID(pcr) ((pcr)->pci->vendor) diff --git a/include/linux/rw_hint.h b/include/linux/rw_hint.h new file mode 100644 index 000000000000..309ca72f2dfb --- /dev/null +++ b/include/linux/rw_hint.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_RW_HINT_H +#define _LINUX_RW_HINT_H + +#include <linux/build_bug.h> +#include <linux/compiler_attributes.h> +#include <uapi/linux/fcntl.h> + +/* Block storage write lifetime hint values. */ +enum rw_hint { + WRITE_LIFE_NOT_SET = RWH_WRITE_LIFE_NOT_SET, + WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE, + WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT, + WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM, + WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG, + WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, +} __packed; + +/* Sparse ignores __packed annotations on enums, hence the #ifndef below. */ +#ifndef __CHECKER__ +static_assert(sizeof(enum rw_hint) == 1); +#endif + +#endif /* _LINUX_RW_HINT_H */ diff --git a/include/linux/rwbase_rt.h b/include/linux/rwbase_rt.h index 1d264dd08625..f2394a409c9d 100644 --- a/include/linux/rwbase_rt.h +++ b/include/linux/rwbase_rt.h @@ -26,12 +26,17 @@ struct rwbase_rt { } while (0) -static __always_inline bool rw_base_is_locked(struct rwbase_rt *rwb) +static __always_inline bool rw_base_is_locked(const struct rwbase_rt *rwb) { return atomic_read(&rwb->readers) != READER_BIAS; } -static __always_inline bool rw_base_is_contended(struct rwbase_rt *rwb) +static __always_inline bool rw_base_is_write_locked(const struct rwbase_rt *rwb) +{ + return atomic_read(&rwb->readers) == WRITER_BIAS; +} + +static __always_inline bool rw_base_is_contended(const struct rwbase_rt *rwb) { return atomic_read(&rwb->readers) > 0; } diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 1dd530ce8b45..c8b543d428b0 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -66,14 +66,24 @@ struct rw_semaphore { #endif }; -/* In all implementations count != 0 means locked */ +#define RWSEM_UNLOCKED_VALUE 0UL +#define RWSEM_WRITER_LOCKED (1UL << 0) +#define __RWSEM_COUNT_INIT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE) + static inline int rwsem_is_locked(struct rw_semaphore *sem) { - return atomic_long_read(&sem->count) != 0; + return atomic_long_read(&sem->count) != RWSEM_UNLOCKED_VALUE; } -#define RWSEM_UNLOCKED_VALUE 0L -#define __RWSEM_COUNT_INIT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE) +static inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem) +{ + WARN_ON(atomic_long_read(&sem->count) == RWSEM_UNLOCKED_VALUE); +} + +static inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem) +{ + WARN_ON(!(atomic_long_read(&sem->count) & RWSEM_WRITER_LOCKED)); +} /* Common initializer macros and functions */ @@ -152,11 +162,21 @@ do { \ __init_rwsem((sem), #sem, &__key); \ } while (0) -static __always_inline int rwsem_is_locked(struct rw_semaphore *sem) +static __always_inline int rwsem_is_locked(const struct rw_semaphore *sem) { return rw_base_is_locked(&sem->rwbase); } +static __always_inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem) +{ + WARN_ON(!rwsem_is_locked(sem)); +} + +static __always_inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem) +{ + WARN_ON(!rw_base_is_write_locked(&sem->rwbase)); +} + static __always_inline int rwsem_is_contended(struct rw_semaphore *sem) { return rw_base_is_contended(&sem->rwbase); @@ -169,6 +189,22 @@ static __always_inline int rwsem_is_contended(struct rw_semaphore *sem) * the RT specific variant. */ +static inline void rwsem_assert_held(const struct rw_semaphore *sem) +{ + if (IS_ENABLED(CONFIG_LOCKDEP)) + lockdep_assert_held(sem); + else + rwsem_assert_held_nolockdep(sem); +} + +static inline void rwsem_assert_held_write(const struct rw_semaphore *sem) +{ + if (IS_ENABLED(CONFIG_LOCKDEP)) + lockdep_assert_held_write(sem); + else + rwsem_assert_held_write_nolockdep(sem); +} + /* * lock for reading */ @@ -203,11 +239,11 @@ extern void up_read(struct rw_semaphore *sem); extern void up_write(struct rw_semaphore *sem); DEFINE_GUARD(rwsem_read, struct rw_semaphore *, down_read(_T), up_read(_T)) -DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) - -DEFINE_FREE(up_read, struct rw_semaphore *, if (_T) up_read(_T)) -DEFINE_FREE(up_write, struct rw_semaphore *, if (_T) up_write(_T)) +DEFINE_GUARD_COND(rwsem_read, _try, down_read_trylock(_T)) +DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T) == 0) +DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) +DEFINE_GUARD_COND(rwsem_write, _try, down_write_trylock(_T)) /* * downgrade write lock to read lock diff --git a/include/linux/sched.h b/include/linux/sched.h index 77f01ac385f7..3c2abbc587b4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -10,33 +10,41 @@ #include <uapi/linux/sched.h> #include <asm/current.h> +#include <asm/processor.h> +#include <linux/thread_info.h> +#include <linux/preempt.h> +#include <linux/cpumask.h> -#include <linux/pid.h> -#include <linux/sem.h> +#include <linux/cache.h> +#include <linux/irqflags_types.h> +#include <linux/smp_types.h> +#include <linux/pid_types.h> +#include <linux/sem_types.h> #include <linux/shm.h> #include <linux/kmsan_types.h> -#include <linux/mutex.h> -#include <linux/plist.h> -#include <linux/hrtimer.h> -#include <linux/irqflags.h> -#include <linux/seccomp.h> -#include <linux/nodemask.h> -#include <linux/rcupdate.h> -#include <linux/refcount.h> +#include <linux/mutex_types.h> +#include <linux/plist_types.h> +#include <linux/hrtimer_types.h> +#include <linux/timer_types.h> +#include <linux/seccomp_types.h> +#include <linux/nodemask_types.h> +#include <linux/refcount_types.h> #include <linux/resource.h> #include <linux/latencytop.h> #include <linux/sched/prio.h> #include <linux/sched/types.h> #include <linux/signal_types.h> -#include <linux/syscall_user_dispatch.h> +#include <linux/syscall_user_dispatch_types.h> #include <linux/mm_types_task.h> #include <linux/task_io_accounting.h> -#include <linux/posix-timers.h> -#include <linux/rseq.h> -#include <linux/seqlock.h> +#include <linux/posix-timers_types.h> +#include <linux/restart_block.h> +#include <uapi/linux/rseq.h> +#include <linux/seqlock_types.h> #include <linux/kcsan.h> #include <linux/rv.h> #include <linux/livepatch_sched.h> +#include <linux/uidgid_types.h> #include <asm/kmap_size.h> /* task_struct member predeclarations (sorted alphabetically): */ @@ -63,12 +71,13 @@ struct robust_list_head; struct root_domain; struct rq; struct sched_attr; -struct sched_param; +struct sched_dl_entity; struct seq_file; struct sighand_struct; struct signal_struct; struct task_delay_info; struct task_group; +struct task_struct; struct user_event_mm; /* @@ -370,6 +379,10 @@ extern struct root_domain def_root_domain; extern struct mutex sched_domains_mutex; #endif +struct sched_param { + int sched_priority; +}; + struct sched_info { #ifdef CONFIG_SCHED_INFO /* Cumulative counters: */ @@ -410,42 +423,6 @@ struct load_weight { u32 inv_weight; }; -/** - * struct util_est - Estimation utilization of FAIR tasks - * @enqueued: instantaneous estimated utilization of a task/cpu - * @ewma: the Exponential Weighted Moving Average (EWMA) - * utilization of a task - * - * Support data structure to track an Exponential Weighted Moving Average - * (EWMA) of a FAIR task's utilization. New samples are added to the moving - * average each time a task completes an activation. Sample's weight is chosen - * so that the EWMA will be relatively insensitive to transient changes to the - * task's workload. - * - * The enqueued attribute has a slightly different meaning for tasks and cpus: - * - task: the task's util_avg at last task dequeue time - * - cfs_rq: the sum of util_est.enqueued for each RUNNABLE task on that CPU - * Thus, the util_est.enqueued of a task represents the contribution on the - * estimated utilization of the CPU where that task is currently enqueued. - * - * Only for tasks we track a moving average of the past instantaneous - * estimated utilization. This allows to absorb sporadic drops in utilization - * of an otherwise almost periodic task. - * - * The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg - * updates. When a task is dequeued, its util_est should not be updated if its - * util_avg has not been updated in the meantime. - * This information is mapped into the MSB bit of util_est.enqueued at dequeue - * time. Since max value of util_est.enqueued for a task is 1024 (PELT util_avg - * for a task) it is safe to use MSB. - */ -struct util_est { - unsigned int enqueued; - unsigned int ewma; -#define UTIL_EST_WEIGHT_SHIFT 2 -#define UTIL_AVG_UNCHANGED 0x80000000 -} __attribute__((__aligned__(sizeof(u64)))); - /* * The load/runnable/util_avg accumulates an infinite geometric series * (see __update_load_avg_cfs_rq() in kernel/sched/pelt.c). @@ -500,9 +477,20 @@ struct sched_avg { unsigned long load_avg; unsigned long runnable_avg; unsigned long util_avg; - struct util_est util_est; + unsigned int util_est; } ____cacheline_aligned; +/* + * The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg + * updates. When a task is dequeued, its util_est should not be updated if its + * util_avg has not been updated in the meantime. + * This information is mapped into the MSB bit of util_est at dequeue time. + * Since max value of util_est for a task is 1024 (PELT util_avg for a task) + * it is safe to use MSB. + */ +#define UTIL_EST_WEIGHT_SHIFT 2 +#define UTIL_AVG_UNCHANGED 0x80000000 + struct sched_statistics { #ifdef CONFIG_SCHEDSTATS u64 wait_start; @@ -520,7 +508,7 @@ struct sched_statistics { u64 block_max; s64 sum_block_runtime; - u64 exec_max; + s64 exec_max; u64 slice_max; u64 nr_migrations_cold; @@ -550,7 +538,7 @@ struct sched_entity { struct load_weight load; struct rb_node run_node; u64 deadline; - u64 min_deadline; + u64 min_vruntime; struct list_head group_node; unsigned int on_rq; @@ -604,6 +592,9 @@ struct sched_rt_entity { #endif } __randomize_layout; +typedef bool (*dl_server_has_tasks_f)(struct sched_dl_entity *); +typedef struct task_struct *(*dl_server_pick_f)(struct sched_dl_entity *); + struct sched_dl_entity { struct rb_node rb_node; @@ -651,6 +642,7 @@ struct sched_dl_entity { unsigned int dl_yielded : 1; unsigned int dl_non_contending : 1; unsigned int dl_overrun : 1; + unsigned int dl_server : 1; /* * Bandwidth enforcement timer. Each -deadline task has its @@ -665,7 +657,20 @@ struct sched_dl_entity { * timer is needed to decrease the active utilization at the correct * time. */ - struct hrtimer inactive_timer; + struct hrtimer inactive_timer; + + /* + * Bits for DL-server functionality. Also see the comment near + * dl_server_update(). + * + * @rq the runqueue this server is for + * + * @server_has_tasks() returns true if @server_pick return a + * runnable task. + */ + struct rq *rq; + dl_server_has_tasks_f server_has_tasks; + dl_server_pick_f server_pick; #ifdef CONFIG_RT_MUTEXES /* @@ -750,10 +755,8 @@ struct task_struct { #endif unsigned int __state; -#ifdef CONFIG_PREEMPT_RT /* saved state for "spinlock sleepers" */ unsigned int saved_state; -#endif /* * This begins the randomizable portion of task_struct. Only @@ -794,6 +797,7 @@ struct task_struct { struct sched_entity se; struct sched_rt_entity rt; struct sched_dl_entity dl; + struct sched_dl_entity *dl_server; const struct sched_class *sched_class; #ifdef CONFIG_SCHED_CORE @@ -854,6 +858,8 @@ struct task_struct { u8 rcu_tasks_idx; int rcu_tasks_idle_cpu; struct list_head rcu_tasks_holdout_list; + int rcu_tasks_exit_cpu; + struct list_head rcu_tasks_exit_list; #endif /* #ifdef CONFIG_TASKS_RCU */ #ifdef CONFIG_TASKS_TRACE_RCU @@ -875,6 +881,7 @@ struct task_struct { struct mm_struct *mm; struct mm_struct *active_mm; + struct address_space *faults_disabled_mapping; int exit_state; int exit_code; @@ -911,8 +918,11 @@ struct task_struct { * ->sched_remote_wakeup gets used, so it can be in this word. */ unsigned sched_remote_wakeup:1; +#ifdef CONFIG_RT_MUTEXES + unsigned sched_rt_mutex:1; +#endif - /* Bit to tell LSMs we're in execve(): */ + /* Bit to tell TOMOYO we're in execve(): */ unsigned in_execve:1; unsigned in_iowait:1; #ifndef TIF_RESTORE_SIGMASK @@ -949,7 +959,7 @@ struct task_struct { /* Recursion prevention for eventfd_signal() */ unsigned in_eventfd:1; #endif -#ifdef CONFIG_IOMMU_SVA +#ifdef CONFIG_ARCH_HAS_CPU_PASID unsigned pasid_activated:1; #endif #ifdef CONFIG_CPU_SUP_INTEL @@ -1002,7 +1012,6 @@ struct task_struct { /* PID/PID hash table linkage. */ struct pid *thread_pid; struct hlist_node pid_links[PIDTYPE_MAX]; - struct list_head thread_group; struct list_head thread_node; struct completion *vfork_done; @@ -1252,6 +1261,7 @@ struct task_struct { /* Protected by alloc_lock: */ struct mempolicy *mempolicy; short il_prev; + u8 il_weight; short pref_node_fork; #endif #ifdef CONFIG_NUMA_BALANCING @@ -1443,6 +1453,10 @@ struct task_struct { struct mem_cgroup *active_memcg; #endif +#ifdef CONFIG_MEMCG_KMEM + struct obj_cgroup *objcg; +#endif + #ifdef CONFIG_BLK_CGROUP struct gendisk *throttle_disk; #endif @@ -1553,114 +1567,6 @@ struct task_struct { */ }; -static inline struct pid *task_pid(struct task_struct *task) -{ - return task->thread_pid; -} - -/* - * the helpers to get the task's different pids as they are seen - * from various namespaces - * - * task_xid_nr() : global id, i.e. the id seen from the init namespace; - * task_xid_vnr() : virtual id, i.e. the id seen from the pid namespace of - * current. - * task_xid_nr_ns() : id seen from the ns specified; - * - * see also pid_nr() etc in include/linux/pid.h - */ -pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns); - -static inline pid_t task_pid_nr(struct task_struct *tsk) -{ - return tsk->pid; -} - -static inline pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns); -} - -static inline pid_t task_pid_vnr(struct task_struct *tsk) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL); -} - - -static inline pid_t task_tgid_nr(struct task_struct *tsk) -{ - return tsk->tgid; -} - -/** - * pid_alive - check that a task structure is not stale - * @p: Task structure to be checked. - * - * Test if a process is not yet dead (at most zombie state) - * If pid_alive fails, then pointers within the task structure - * can be stale and must not be dereferenced. - * - * Return: 1 if the process is alive. 0 otherwise. - */ -static inline int pid_alive(const struct task_struct *p) -{ - return p->thread_pid != NULL; -} - -static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns); -} - -static inline pid_t task_pgrp_vnr(struct task_struct *tsk) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL); -} - - -static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns); -} - -static inline pid_t task_session_vnr(struct task_struct *tsk) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); -} - -static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns); -} - -static inline pid_t task_tgid_vnr(struct task_struct *tsk) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_TGID, NULL); -} - -static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) -{ - pid_t pid = 0; - - rcu_read_lock(); - if (pid_alive(tsk)) - pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns); - rcu_read_unlock(); - - return pid; -} - -static inline pid_t task_ppid_nr(const struct task_struct *tsk) -{ - return task_ppid_nr_ns(tsk, &init_pid_ns); -} - -/* Obsolete, do not use: */ -static inline pid_t task_pgrp_nr(struct task_struct *tsk) -{ - return task_pgrp_nr_ns(tsk, &init_pid_ns); -} - #define TASK_REPORT_IDLE (TASK_REPORT + 1) #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) @@ -1704,20 +1610,6 @@ static inline char task_state_to_char(struct task_struct *tsk) return task_index_to_char(task_state_index(tsk)); } -/** - * is_global_init - check if a task structure is init. Since init - * is free to have sub-threads we need to check tgid. - * @tsk: Task structure to be checked. - * - * Check if a task structure is the first user space task the kernel created. - * - * Return: 1 if the task structure is init. 0 otherwise. - */ -static inline int is_global_init(struct task_struct *tsk) -{ - return task_tgid_nr(tsk) == 1; -} - extern struct pid *cad_pid; /* @@ -1734,26 +1626,27 @@ extern struct pid *cad_pid; #define PF_SUPERPRIV 0x00000100 /* Used super-user privileges */ #define PF_DUMPCORE 0x00000200 /* Dumped core */ #define PF_SIGNALED 0x00000400 /* Killed by a signal */ -#define PF_MEMALLOC 0x00000800 /* Allocating memory */ +#define PF_MEMALLOC 0x00000800 /* Allocating memory to free memory. See memalloc_noreclaim_save() */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ #define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF__HOLE__00010000 0x00010000 #define PF_KSWAPD 0x00020000 /* I am kswapd */ -#define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */ -#define PF_MEMALLOC_NOIO 0x00080000 /* All allocation requests will inherit GFP_NOIO */ +#define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */ +#define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */ #define PF_LOCAL_THROTTLE 0x00100000 /* Throttle writes only against the bdi I write to, * I am cleaning dirty pages from some other bdi. */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ -#define PF__HOLE__00800000 0x00800000 -#define PF__HOLE__01000000 0x01000000 +#define PF_MEMALLOC_NORECLAIM 0x00800000 /* All allocation requests will clear __GFP_DIRECT_RECLAIM */ +#define PF_MEMALLOC_NOWARN 0x01000000 /* All allocation requests will inherit __GFP_NOWARN */ #define PF__HOLE__02000000 0x02000000 #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ -#define PF_MEMALLOC_PIN 0x10000000 /* Allocation context constrained to zones which allow long term pinning. */ -#define PF__HOLE__20000000 0x20000000 +#define PF_MEMALLOC_PIN 0x10000000 /* Allocations constrained to zones which allow long term pinning. + * See memalloc_pin_save() */ +#define PF_BLOCK_TS 0x20000000 /* plug has ts that needs updating */ #define PF__HOLE__40000000 0x40000000 #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ @@ -1947,9 +1840,7 @@ extern void ia64_set_curr_task(int cpu, struct task_struct *p); void yield(void); union thread_union { -#ifndef CONFIG_ARCH_TASK_STRUCT_ON_STACK struct task_struct task; -#endif #ifndef CONFIG_THREAD_INFO_IN_TASK struct thread_info thread_info; #endif @@ -2169,15 +2060,6 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock); __cond_resched_rwlock_write(lock); \ }) -static inline void cond_resched_rcu(void) -{ -#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU) - rcu_read_unlock(); - cond_resched(); - rcu_read_lock(); -#endif -} - #ifdef CONFIG_PREEMPT_DYNAMIC extern bool preempt_model_none(void); @@ -2219,37 +2101,6 @@ static inline bool preempt_model_preemptible(void) return preempt_model_full() || preempt_model_rt(); } -/* - * Does a critical section need to be broken due to another - * task waiting?: (technically does not depend on CONFIG_PREEMPTION, - * but a general need for low latency) - */ -static inline int spin_needbreak(spinlock_t *lock) -{ -#ifdef CONFIG_PREEMPTION - return spin_is_contended(lock); -#else - return 0; -#endif -} - -/* - * Check if a rwlock is contended. - * Returns non-zero if there is another task waiting on the rwlock. - * Returns zero if the lock is not contended or the system / underlying - * rwlock implementation does not support contention detection. - * Technically does not depend on CONFIG_PREEMPTION, but a general need - * for low latency. - */ -static inline int rwlock_needbreak(rwlock_t *lock) -{ -#ifdef CONFIG_PREEMPTION - return rwlock_is_contended(lock); -#else - return 0; -#endif -} - static __always_inline bool need_resched(void) { return unlikely(tif_need_resched()); @@ -2284,6 +2135,8 @@ extern bool sched_task_on_rq(struct task_struct *p); extern unsigned long get_wchan(struct task_struct *p); extern struct task_struct *cpu_curr_snapshot(int cpu); +#include <linux/spinlock.h> + /* * In order to reduce various lock holder preemption latencies provide an * interface to see if a vCPU is currently running or not. @@ -2320,129 +2173,6 @@ static inline bool owner_on_cpu(struct task_struct *owner) unsigned long sched_cpu_util(int cpu); #endif /* CONFIG_SMP */ -#ifdef CONFIG_RSEQ - -/* - * Map the event mask on the user-space ABI enum rseq_cs_flags - * for direct mask checks. - */ -enum rseq_event_mask_bits { - RSEQ_EVENT_PREEMPT_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT, - RSEQ_EVENT_SIGNAL_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT, - RSEQ_EVENT_MIGRATE_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT, -}; - -enum rseq_event_mask { - RSEQ_EVENT_PREEMPT = (1U << RSEQ_EVENT_PREEMPT_BIT), - RSEQ_EVENT_SIGNAL = (1U << RSEQ_EVENT_SIGNAL_BIT), - RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT), -}; - -static inline void rseq_set_notify_resume(struct task_struct *t) -{ - if (t->rseq) - set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); -} - -void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs); - -static inline void rseq_handle_notify_resume(struct ksignal *ksig, - struct pt_regs *regs) -{ - if (current->rseq) - __rseq_handle_notify_resume(ksig, regs); -} - -static inline void rseq_signal_deliver(struct ksignal *ksig, - struct pt_regs *regs) -{ - preempt_disable(); - __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); - preempt_enable(); - rseq_handle_notify_resume(ksig, regs); -} - -/* rseq_preempt() requires preemption to be disabled. */ -static inline void rseq_preempt(struct task_struct *t) -{ - __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask); - rseq_set_notify_resume(t); -} - -/* rseq_migrate() requires preemption to be disabled. */ -static inline void rseq_migrate(struct task_struct *t) -{ - __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask); - rseq_set_notify_resume(t); -} - -/* - * If parent process has a registered restartable sequences area, the - * child inherits. Unregister rseq for a clone with CLONE_VM set. - */ -static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) -{ - if (clone_flags & CLONE_VM) { - t->rseq = NULL; - t->rseq_len = 0; - t->rseq_sig = 0; - t->rseq_event_mask = 0; - } else { - t->rseq = current->rseq; - t->rseq_len = current->rseq_len; - t->rseq_sig = current->rseq_sig; - t->rseq_event_mask = current->rseq_event_mask; - } -} - -static inline void rseq_execve(struct task_struct *t) -{ - t->rseq = NULL; - t->rseq_len = 0; - t->rseq_sig = 0; - t->rseq_event_mask = 0; -} - -#else - -static inline void rseq_set_notify_resume(struct task_struct *t) -{ -} -static inline void rseq_handle_notify_resume(struct ksignal *ksig, - struct pt_regs *regs) -{ -} -static inline void rseq_signal_deliver(struct ksignal *ksig, - struct pt_regs *regs) -{ -} -static inline void rseq_preempt(struct task_struct *t) -{ -} -static inline void rseq_migrate(struct task_struct *t) -{ -} -static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) -{ -} -static inline void rseq_execve(struct task_struct *t) -{ -} - -#endif - -#ifdef CONFIG_DEBUG_RSEQ - -void rseq_syscall(struct pt_regs *regs); - -#else - -static inline void rseq_syscall(struct pt_regs *regs) -{ -} - -#endif - #ifdef CONFIG_SCHED_CORE extern void sched_core_free(struct task_struct *tsk); extern void sched_core_fork(struct task_struct *p); diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 0ee96ea7a0e9..02f5090ffea2 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -71,6 +71,7 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ #define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ +#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_OOM_REAP_QUEUED 25 /* mm was queued for oom_reaper */ #define MMF_MULTIPROCESS 26 /* mm is shared between processes */ /* @@ -85,10 +86,22 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_HAS_MDWE 28 #define MMF_HAS_MDWE_MASK (1 << MMF_HAS_MDWE) -#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) + +#define MMF_HAS_MDWE_NO_INHERIT 29 + +#define MMF_VM_MERGE_ANY 30 +#define MMF_VM_MERGE_ANY_MASK (1 << MMF_VM_MERGE_ANY) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ - MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK) + MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\ + MMF_VM_MERGE_ANY_MASK) + +static inline unsigned long mmf_init_flags(unsigned long flags) +{ + if (flags & (1UL << MMF_HAS_MDWE_NO_INHERIT)) + flags &= ~((1UL << MMF_HAS_MDWE) | + (1UL << MMF_HAS_MDWE_NO_INHERIT)); + return flags & MMF_INIT_MASK; +} -#define MMF_VM_MERGE_ANY 29 #endif /* _LINUX_SCHED_COREDUMP_H */ diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index 7c83d4d5a971..df3aca89d4f5 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SCHED_DEADLINE_H +#define _LINUX_SCHED_DEADLINE_H /* * SCHED_DEADLINE tasks has negative priorities, reflecting @@ -34,3 +36,5 @@ extern void dl_add_task_root_domain(struct task_struct *p); extern void dl_clear_root_domain(struct root_domain *rd); #endif /* CONFIG_SMP */ + +#endif /* _LINUX_SCHED_DEADLINE_H */ diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index fe1a46f30d24..2b461129d1fa 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -2,6 +2,7 @@ #define _LINUX_SCHED_ISOLATION_H #include <linux/cpumask.h> +#include <linux/cpuset.h> #include <linux/init.h> #include <linux/tick.h> @@ -67,7 +68,8 @@ static inline bool housekeeping_cpu(int cpu, enum hk_type type) static inline bool cpu_is_isolated(int cpu) { return !housekeeping_test_cpu(cpu, HK_TYPE_DOMAIN) || - !housekeeping_test_cpu(cpu, HK_TYPE_TICK); + !housekeeping_test_cpu(cpu, HK_TYPE_TICK) || + cpuset_cpu_is_isolated(cpu); } #endif /* _LINUX_SCHED_ISOLATION_H */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 8d89c8c4fac1..b6543f9d78d6 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -236,16 +236,25 @@ static inline gfp_t current_gfp_context(gfp_t flags) { unsigned int pflags = READ_ONCE(current->flags); - if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_PIN))) { + if (unlikely(pflags & (PF_MEMALLOC_NOIO | + PF_MEMALLOC_NOFS | + PF_MEMALLOC_NORECLAIM | + PF_MEMALLOC_NOWARN | + PF_MEMALLOC_PIN))) { /* - * NOIO implies both NOIO and NOFS and it is a weaker context - * so always make sure it makes precedence + * Stronger flags before weaker flags: + * NORECLAIM implies NOIO, which in turn implies NOFS */ - if (pflags & PF_MEMALLOC_NOIO) + if (pflags & PF_MEMALLOC_NORECLAIM) + flags &= ~__GFP_DIRECT_RECLAIM; + else if (pflags & PF_MEMALLOC_NOIO) flags &= ~(__GFP_IO | __GFP_FS); else if (pflags & PF_MEMALLOC_NOFS) flags &= ~__GFP_FS; + if (pflags & PF_MEMALLOC_NOWARN) + flags |= __GFP_NOWARN; + if (pflags & PF_MEMALLOC_PIN) flags &= ~__GFP_MOVABLE; } @@ -307,6 +316,24 @@ static inline void might_alloc(gfp_t gfp_mask) } /** + * memalloc_flags_save - Add a PF_* flag to current->flags, save old value + * + * This allows PF_* flags to be conveniently added, irrespective of current + * value, and then the old version restored with memalloc_flags_restore(). + */ +static inline unsigned memalloc_flags_save(unsigned flags) +{ + unsigned oldflags = ~current->flags & flags; + current->flags |= flags; + return oldflags; +} + +static inline void memalloc_flags_restore(unsigned flags) +{ + current->flags &= ~flags; +} + +/** * memalloc_noio_save - Marks implicit GFP_NOIO allocation scope. * * This functions marks the beginning of the GFP_NOIO allocation scope. @@ -315,13 +342,12 @@ static inline void might_alloc(gfp_t gfp_mask) * point of view. Use memalloc_noio_restore to end the scope with flags * returned by this function. * - * This function is safe to be used from any context. + * Context: This function is safe to be used from any context. + * Return: The saved flags to be passed to memalloc_noio_restore. */ static inline unsigned int memalloc_noio_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC_NOIO; - current->flags |= PF_MEMALLOC_NOIO; - return flags; + return memalloc_flags_save(PF_MEMALLOC_NOIO); } /** @@ -334,7 +360,7 @@ static inline unsigned int memalloc_noio_save(void) */ static inline void memalloc_noio_restore(unsigned int flags) { - current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; + memalloc_flags_restore(flags); } /** @@ -346,13 +372,12 @@ static inline void memalloc_noio_restore(unsigned int flags) * point of view. Use memalloc_nofs_restore to end the scope with flags * returned by this function. * - * This function is safe to be used from any context. + * Context: This function is safe to be used from any context. + * Return: The saved flags to be passed to memalloc_nofs_restore. */ static inline unsigned int memalloc_nofs_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC_NOFS; - current->flags |= PF_MEMALLOC_NOFS; - return flags; + return memalloc_flags_save(PF_MEMALLOC_NOFS); } /** @@ -365,32 +390,76 @@ static inline unsigned int memalloc_nofs_save(void) */ static inline void memalloc_nofs_restore(unsigned int flags) { - current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags; + memalloc_flags_restore(flags); } +/** + * memalloc_noreclaim_save - Marks implicit __GFP_MEMALLOC scope. + * + * This function marks the beginning of the __GFP_MEMALLOC allocation scope. + * All further allocations will implicitly add the __GFP_MEMALLOC flag, which + * prevents entering reclaim and allows access to all memory reserves. This + * should only be used when the caller guarantees the allocation will allow more + * memory to be freed very shortly, i.e. it needs to allocate some memory in + * the process of freeing memory, and cannot reclaim due to potential recursion. + * + * Users of this scope have to be extremely careful to not deplete the reserves + * completely and implement a throttling mechanism which controls the + * consumption of the reserve based on the amount of freed memory. Usage of a + * pre-allocated pool (e.g. mempool) should be always considered before using + * this scope. + * + * Individual allocations under the scope can opt out using __GFP_NOMEMALLOC + * + * Context: This function should not be used in an interrupt context as that one + * does not give PF_MEMALLOC access to reserves. + * See __gfp_pfmemalloc_flags(). + * Return: The saved flags to be passed to memalloc_noreclaim_restore. + */ static inline unsigned int memalloc_noreclaim_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC; - current->flags |= PF_MEMALLOC; - return flags; + return memalloc_flags_save(PF_MEMALLOC); } +/** + * memalloc_noreclaim_restore - Ends the implicit __GFP_MEMALLOC scope. + * @flags: Flags to restore. + * + * Ends the implicit __GFP_MEMALLOC scope started by memalloc_noreclaim_save + * function. Always make sure that the given flags is the return value from the + * pairing memalloc_noreclaim_save call. + */ static inline void memalloc_noreclaim_restore(unsigned int flags) { - current->flags = (current->flags & ~PF_MEMALLOC) | flags; + memalloc_flags_restore(flags); } +/** + * memalloc_pin_save - Marks implicit ~__GFP_MOVABLE scope. + * + * This function marks the beginning of the ~__GFP_MOVABLE allocation scope. + * All further allocations will implicitly remove the __GFP_MOVABLE flag, which + * will constraint the allocations to zones that allow long term pinning, i.e. + * not ZONE_MOVABLE zones. + * + * Return: The saved flags to be passed to memalloc_pin_restore. + */ static inline unsigned int memalloc_pin_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC_PIN; - - current->flags |= PF_MEMALLOC_PIN; - return flags; + return memalloc_flags_save(PF_MEMALLOC_PIN); } +/** + * memalloc_pin_restore - Ends the implicit ~__GFP_MOVABLE scope. + * @flags: Flags to restore. + * + * Ends the implicit ~__GFP_MOVABLE scope started by memalloc_pin_save function. + * Always make sure that the given flags is the return value from the pairing + * memalloc_pin_save call. + */ static inline void memalloc_pin_restore(unsigned int flags) { - current->flags = (current->flags & ~PF_MEMALLOC_PIN) | flags; + memalloc_flags_restore(flags); } #ifdef CONFIG_MEMCG @@ -403,6 +472,10 @@ DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg); * __GFP_ACCOUNT allocations till the end of the scope will be charged to the * given memcg. * + * Please, make sure that caller has a reference to the passed memcg structure, + * so its lifetime is guaranteed to exceed the scope between two + * set_active_memcg() calls. + * * NOTE: This function can nest. Users must save the return value and * reset the previous value after their own charging scope is over. */ diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h index 3988762efe15..52b22c5c396d 100644 --- a/include/linux/sched/numa_balancing.h +++ b/include/linux/sched/numa_balancing.h @@ -15,13 +15,23 @@ #define TNF_FAULT_LOCAL 0x08 #define TNF_MIGRATE_FAIL 0x10 +enum numa_vmaskip_reason { + NUMAB_SKIP_UNSUITABLE, + NUMAB_SKIP_SHARED_RO, + NUMAB_SKIP_INACCESSIBLE, + NUMAB_SKIP_SCAN_DELAY, + NUMAB_SKIP_PID_INACTIVE, + NUMAB_SKIP_IGNORE_PID, + NUMAB_SKIP_SEQ_COMPLETED, +}; + #ifdef CONFIG_NUMA_BALANCING extern void task_numa_fault(int last_node, int node, int pages, int flags); extern pid_t task_numa_group_id(struct task_struct *p); extern void set_numabalancing_state(bool enabled); extern void task_numa_free(struct task_struct *p, bool final); -extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page, - int src_nid, int dst_cpu); +bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio, + int src_nid, int dst_cpu); #else static inline void task_numa_fault(int last_node, int node, int pages, int flags) @@ -38,7 +48,7 @@ static inline void task_numa_free(struct task_struct *p, bool final) { } static inline bool should_numa_migrate_memory(struct task_struct *p, - struct page *page, int src_nid, int dst_cpu) + struct folio *folio, int src_nid, int dst_cpu) { return true; } diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index 994c25640e15..b2b9e6eb9683 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -30,6 +30,10 @@ static inline bool task_is_realtime(struct task_struct *tsk) } #ifdef CONFIG_RT_MUTEXES +extern void rt_mutex_pre_schedule(void); +extern void rt_mutex_schedule(void); +extern void rt_mutex_post_schedule(void); + /* * Must hold either p->pi_lock or task_rq(p)->lock. */ diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h index fad77b5172e2..b04a5d04dee9 100644 --- a/include/linux/sched/sd_flags.h +++ b/include/linux/sched/sd_flags.h @@ -110,13 +110,20 @@ SD_FLAG(SD_ASYM_CPUCAPACITY_FULL, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS) SD_FLAG(SD_SHARE_CPUCAPACITY, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) /* - * Domain members share CPU package resources (i.e. caches) + * Domain members share CPU cluster (LLC tags or L2 cache) + * + * NEEDS_GROUPS: Clusters are shared between groups. + */ +SD_FLAG(SD_CLUSTER, SDF_NEEDS_GROUPS) + +/* + * Domain members share CPU Last Level Caches * * SHARED_CHILD: Set from the base domain up until spanned CPUs no longer share * the same cache(s). * NEEDS_GROUPS: Caches are shared between groups. */ -SD_FLAG(SD_SHARE_PKG_RESOURCES, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) +SD_FLAG(SD_SHARE_LLC, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) /* * Only a single load balancing instance diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 0014d3adaf84..0a0e23c45406 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -9,6 +9,7 @@ #include <linux/sched/task.h> #include <linux/cred.h> #include <linux/refcount.h> +#include <linux/pid.h> #include <linux/posix-timers.h> #include <linux/mm_types.h> #include <asm/ptrace.h> @@ -303,20 +304,11 @@ static inline void kernel_signal_stop(void) schedule(); } -#ifdef __ia64__ -# define ___ARCH_SI_IA64(_a1, _a2, _a3) , _a1, _a2, _a3 -#else -# define ___ARCH_SI_IA64(_a1, _a2, _a3) -#endif -int force_sig_fault_to_task(int sig, int code, void __user *addr - ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) - , struct task_struct *t); -int force_sig_fault(int sig, int code, void __user *addr - ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)); -int send_sig_fault(int sig, int code, void __user *addr - ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) - , struct task_struct *t); +int force_sig_fault_to_task(int sig, int code, void __user *addr, + struct task_struct *t); +int force_sig_fault(int sig, int code, void __user *addr); +int send_sig_fault(int sig, int code, void __user *addr, struct task_struct *t); int force_sig_mceerr(int code, void __user *, short); int send_sig_mceerr(int code, void __user *, short, struct task_struct *); @@ -441,7 +433,6 @@ static inline bool fault_signal_pending(vm_fault_t fault_flags, * This is required every time the blocked sigset_t changes. * callers must hold sighand->siglock. */ -extern void recalc_sigpending_and_wake(struct task_struct *t); extern void recalc_sigpending(void); extern void calculate_sigpending(void); @@ -655,8 +646,12 @@ extern bool current_is_single_threaded(void); #define while_each_thread(g, t) \ while ((t = next_thread(t)) != g) +#define for_other_threads(p, t) \ + for (t = p; (t = next_thread(t)) != p; ) + #define __for_each_thread(signal, t) \ - list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node) + list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node, \ + lockdep_is_held(&tasklist_lock)) #define for_each_thread(p, t) \ __for_each_thread((p)->signal, t) @@ -715,22 +710,31 @@ bool same_thread_group(struct task_struct *p1, struct task_struct *p2) return p1->signal == p2->signal; } -static inline struct task_struct *next_thread(const struct task_struct *p) +/* + * returns NULL if p is the last thread in the thread group + */ +static inline struct task_struct *__next_thread(struct task_struct *p) { - return list_entry_rcu(p->thread_group.next, - struct task_struct, thread_group); + return list_next_or_null_rcu(&p->signal->thread_head, + &p->thread_node, + struct task_struct, + thread_node); +} + +static inline struct task_struct *next_thread(struct task_struct *p) +{ + return __next_thread(p) ?: p->group_leader; } static inline int thread_group_empty(struct task_struct *p) { - return list_empty(&p->thread_group); + return thread_group_leader(p) && + list_is_last(&p->thread_node, &p->signal->thread_head); } #define delay_group_leader(p) \ (thread_group_leader(p) && !thread_group_empty(p)) -extern bool thread_group_exited(struct pid *pid); - extern struct sighand_struct *__lock_task_sighand(struct task_struct *task, unsigned long *flags); diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h index 59d3736c454c..fb1e295e7e63 100644 --- a/include/linux/sched/smt.h +++ b/include/linux/sched/smt.h @@ -17,4 +17,4 @@ static inline bool sched_smt_active(void) { return false; } void arch_smt_update(void); -#endif +#endif /* _LINUX_SCHED_SMT_H */ diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index a23af225c898..d362aacf9f89 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -7,6 +7,8 @@ * functionality: */ +#include <linux/rcupdate.h> +#include <linux/refcount.h> #include <linux/sched.h> #include <linux/uaccess.h> @@ -226,4 +228,6 @@ static inline void task_unlock(struct task_struct *p) spin_unlock(&p->alloc_lock); } +DEFINE_GUARD(task_lock, struct task_struct *, task_lock(_T), task_unlock(_T)) + #endif /* _LINUX_SCHED_TASK_H */ diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index f158b025c175..ccd72b978e1f 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -8,6 +8,7 @@ #include <linux/sched.h> #include <linux/magic.h> +#include <linux/refcount.h> #ifdef CONFIG_THREAD_INFO_IN_TASK diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 67b573d5bf28..18572c9ea724 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -38,21 +38,21 @@ extern const struct sd_flag_debug sd_flag_debug[]; #ifdef CONFIG_SCHED_SMT static inline int cpu_smt_flags(void) { - return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; + return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC; } #endif #ifdef CONFIG_SCHED_CLUSTER static inline int cpu_cluster_flags(void) { - return SD_SHARE_PKG_RESOURCES; + return SD_CLUSTER | SD_SHARE_LLC; } #endif #ifdef CONFIG_SCHED_MC static inline int cpu_core_flags(void) { - return SD_SHARE_PKG_RESOURCES; + return SD_SHARE_LLC; } #endif @@ -109,8 +109,6 @@ struct sched_domain { u64 max_newidle_lb_cost; unsigned long last_decay_max_lb_cost; - u64 avg_scan_cost; /* select_idle_sibling */ - #ifdef CONFIG_SCHEDSTATS /* load_balance() stats */ unsigned int lb_count[CPU_MAX_IDLE_TYPES]; @@ -178,7 +176,9 @@ extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], cpumask_var_t *alloc_sched_domains(unsigned int ndoms); void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); +bool cpus_equal_capacity(int this_cpu, int that_cpu); bool cpus_share_cache(int this_cpu, int that_cpu); +bool cpus_share_resources(int this_cpu, int that_cpu); typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); typedef int (*sched_domain_flags_f)(void); @@ -227,11 +227,21 @@ partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], { } +static inline bool cpus_equal_capacity(int this_cpu, int that_cpu) +{ + return true; +} + static inline bool cpus_share_cache(int this_cpu, int that_cpu) { return true; } +static inline bool cpus_share_resources(int this_cpu, int that_cpu) +{ + return true; +} + #endif /* !CONFIG_SMP */ #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) @@ -275,6 +285,14 @@ void arch_update_thermal_pressure(const struct cpumask *cpus, { } #endif +#ifndef arch_scale_freq_ref +static __always_inline +unsigned int arch_scale_freq_ref(int cpu) +{ + return 0; +} +#endif + static inline int task_node(const struct task_struct *p) { return cpu_to_node(task_cpu(p)); diff --git a/include/linux/sched/types.h b/include/linux/sched/types.h index 3c3e049224ae..969aaf5ef9d6 100644 --- a/include/linux/sched/types.h +++ b/include/linux/sched/types.h @@ -20,4 +20,4 @@ struct task_cputime { unsigned long long sum_exec_runtime; }; -#endif +#endif /* _LINUX_SCHED_TYPES_H */ diff --git a/include/linux/sched/vhost_task.h b/include/linux/sched/vhost_task.h index 837a23624a66..bc60243d43b3 100644 --- a/include/linux/sched/vhost_task.h +++ b/include/linux/sched/vhost_task.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_VHOST_TASK_H -#define _LINUX_VHOST_TASK_H - +#ifndef _LINUX_SCHED_VHOST_TASK_H +#define _LINUX_SCHED_VHOST_TASK_H struct vhost_task; @@ -11,4 +10,4 @@ void vhost_task_start(struct vhost_task *vtsk); void vhost_task_stop(struct vhost_task *vtsk); void vhost_task_wake(struct vhost_task *vtsk); -#endif +#endif /* _LINUX_SCHED_VHOST_TASK_H */ diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index e6fe4f73ffe6..b807141acc14 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -47,6 +47,10 @@ struct scmi_clock_info { bool rate_discrete; bool rate_changed_notifications; bool rate_change_requested_notifications; + bool state_ctrl_forbidden; + bool rate_ctrl_forbidden; + bool parent_ctrl_forbidden; + bool extended_config; union { struct { int num_rates; @@ -58,6 +62,8 @@ struct scmi_clock_info { u64 step_size; } range; }; + int num_parents; + u32 *parents; }; enum scmi_power_scale { @@ -70,6 +76,13 @@ struct scmi_handle; struct scmi_device; struct scmi_protocol_handle; +enum scmi_clock_oem_config { + SCMI_CLOCK_CFG_DUTY_CYCLE = 0x1, + SCMI_CLOCK_CFG_PHASE, + SCMI_CLOCK_CFG_OEM_START = 0x80, + SCMI_CLOCK_CFG_OEM_END = 0xFF, +}; + /** * struct scmi_clk_proto_ops - represents the various operations provided * by SCMI Clock Protocol @@ -80,6 +93,11 @@ struct scmi_protocol_handle; * @rate_set: set the clock rate of a clock * @enable: enables the specified clock * @disable: disables the specified clock + * @state_get: get the status of the specified clock + * @config_oem_get: get the value of an OEM specific clock config + * @config_oem_set: set the value of an OEM specific clock config + * @parent_get: get the parent id of a clk + * @parent_set: set the parent of a clock */ struct scmi_clk_proto_ops { int (*count_get)(const struct scmi_protocol_handle *ph); @@ -90,23 +108,40 @@ struct scmi_clk_proto_ops { u64 *rate); int (*rate_set)(const struct scmi_protocol_handle *ph, u32 clk_id, u64 rate); - int (*enable)(const struct scmi_protocol_handle *ph, u32 clk_id); - int (*disable)(const struct scmi_protocol_handle *ph, u32 clk_id); - int (*enable_atomic)(const struct scmi_protocol_handle *ph, u32 clk_id); - int (*disable_atomic)(const struct scmi_protocol_handle *ph, - u32 clk_id); + int (*enable)(const struct scmi_protocol_handle *ph, u32 clk_id, + bool atomic); + int (*disable)(const struct scmi_protocol_handle *ph, u32 clk_id, + bool atomic); + int (*state_get)(const struct scmi_protocol_handle *ph, u32 clk_id, + bool *enabled, bool atomic); + int (*config_oem_get)(const struct scmi_protocol_handle *ph, u32 clk_id, + enum scmi_clock_oem_config oem_type, + u32 *oem_val, u32 *attributes, bool atomic); + int (*config_oem_set)(const struct scmi_protocol_handle *ph, u32 clk_id, + enum scmi_clock_oem_config oem_type, + u32 oem_val, bool atomic); + int (*parent_get)(const struct scmi_protocol_handle *ph, u32 clk_id, u32 *parent_id); + int (*parent_set)(const struct scmi_protocol_handle *ph, u32 clk_id, u32 parent_id); +}; + +struct scmi_perf_domain_info { + char name[SCMI_MAX_STR_SIZE]; + bool set_perf; }; /** * struct scmi_perf_proto_ops - represents the various operations provided * by SCMI Performance Protocol * + * @num_domains_get: gets the number of supported performance domains + * @info_get: get the information of a performance domain * @limits_set: sets limits on the performance level of a domain * @limits_get: gets limits on the performance level of a domain * @level_set: sets the performance level of a domain * @level_get: gets the performance level of a domain - * @device_domain_id: gets the scmi domain id for a given device * @transition_latency_get: gets the DVFS transition latency for a given device + * @rate_limit_get: gets the minimum time (us) required between successive + * requests * @device_opps_add: adds all the OPPs for a given device * @freq_set: sets the frequency for a given device using sustained frequency * to sustained performance level mapping @@ -116,10 +151,15 @@ struct scmi_clk_proto_ops { * at a given frequency * @fast_switch_possible: indicates if fast DVFS switching is possible or not * for a given device + * @fast_switch_rate_limit: gets the minimum time (us) required between + * successive fast_switching requests * @power_scale_mw_get: indicates if the power values provided are in milliWatts * or in some other (abstract) scale */ struct scmi_perf_proto_ops { + int (*num_domains_get)(const struct scmi_protocol_handle *ph); + const struct scmi_perf_domain_info __must_check *(*info_get) + (const struct scmi_protocol_handle *ph, u32 domain); int (*limits_set)(const struct scmi_protocol_handle *ph, u32 domain, u32 max_perf, u32 min_perf); int (*limits_get)(const struct scmi_protocol_handle *ph, u32 domain, @@ -128,11 +168,12 @@ struct scmi_perf_proto_ops { u32 level, bool poll); int (*level_get)(const struct scmi_protocol_handle *ph, u32 domain, u32 *level, bool poll); - int (*device_domain_id)(struct device *dev); int (*transition_latency_get)(const struct scmi_protocol_handle *ph, - struct device *dev); + u32 domain); + int (*rate_limit_get)(const struct scmi_protocol_handle *ph, + u32 domain, u32 *rate_limit); int (*device_opps_add)(const struct scmi_protocol_handle *ph, - struct device *dev); + struct device *dev, u32 domain); int (*freq_set)(const struct scmi_protocol_handle *ph, u32 domain, unsigned long rate, bool poll); int (*freq_get)(const struct scmi_protocol_handle *ph, u32 domain, @@ -140,7 +181,9 @@ struct scmi_perf_proto_ops { int (*est_power_get)(const struct scmi_protocol_handle *ph, u32 domain, unsigned long *rate, unsigned long *power); bool (*fast_switch_possible)(const struct scmi_protocol_handle *ph, - struct device *dev); + u32 domain); + int (*fast_switch_rate_limit)(const struct scmi_protocol_handle *ph, + u32 domain, u32 *rate_limit); enum scmi_power_scale (*power_scale_get)(const struct scmi_protocol_handle *ph); }; @@ -930,6 +973,8 @@ struct scmi_perf_limits_report { unsigned int domain_id; unsigned int range_max; unsigned int range_min; + unsigned long range_max_freq; + unsigned long range_min_freq; }; struct scmi_perf_level_report { @@ -937,6 +982,7 @@ struct scmi_perf_level_report { unsigned int agent_id; unsigned int domain_id; unsigned int performance_level; + unsigned long performance_level_freq; }; struct scmi_sensor_trip_point_report { diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h index eab7081392d5..75303c126285 100644 --- a/include/linux/screen_info.h +++ b/include/linux/screen_info.h @@ -4,6 +4,132 @@ #include <uapi/linux/screen_info.h> +#include <linux/bits.h> + +/** + * SCREEN_INFO_MAX_RESOURCES - maximum number of resources per screen_info + */ +#define SCREEN_INFO_MAX_RESOURCES 3 + +struct pci_dev; +struct resource; + +static inline bool __screen_info_has_lfb(unsigned int type) +{ + return (type == VIDEO_TYPE_VLFB) || (type == VIDEO_TYPE_EFI); +} + +static inline u64 __screen_info_lfb_base(const struct screen_info *si) +{ + u64 lfb_base = si->lfb_base; + + if (si->capabilities & VIDEO_CAPABILITY_64BIT_BASE) + lfb_base |= (u64)si->ext_lfb_base << 32; + + return lfb_base; +} + +static inline void __screen_info_set_lfb_base(struct screen_info *si, u64 lfb_base) +{ + si->lfb_base = lfb_base & GENMASK_ULL(31, 0); + si->ext_lfb_base = (lfb_base & GENMASK_ULL(63, 32)) >> 32; + + if (si->ext_lfb_base) + si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE; + else + si->capabilities &= ~VIDEO_CAPABILITY_64BIT_BASE; +} + +static inline u64 __screen_info_lfb_size(const struct screen_info *si, unsigned int type) +{ + u64 lfb_size = si->lfb_size; + + if (type == VIDEO_TYPE_VLFB) + lfb_size <<= 16; + return lfb_size; +} + +static inline unsigned int __screen_info_video_type(unsigned int type) +{ + switch (type) { + case VIDEO_TYPE_MDA: + case VIDEO_TYPE_CGA: + case VIDEO_TYPE_EGAM: + case VIDEO_TYPE_EGAC: + case VIDEO_TYPE_VGAC: + case VIDEO_TYPE_VLFB: + case VIDEO_TYPE_PICA_S3: + case VIDEO_TYPE_MIPS_G364: + case VIDEO_TYPE_SGI: + case VIDEO_TYPE_TGAC: + case VIDEO_TYPE_SUN: + case VIDEO_TYPE_SUNPCI: + case VIDEO_TYPE_PMAC: + case VIDEO_TYPE_EFI: + return type; + default: + return 0; + } +} + +/** + * screen_info_video_type() - Decodes the video type from struct screen_info + * @si: an instance of struct screen_info + * + * Returns: + * A VIDEO_TYPE_ constant representing si's type of video display, or 0 otherwise. + */ +static inline unsigned int screen_info_video_type(const struct screen_info *si) +{ + unsigned int type; + + // check if display output is on + if (!si->orig_video_isVGA) + return 0; + + // check for a known VIDEO_TYPE_ constant + type = __screen_info_video_type(si->orig_video_isVGA); + if (type) + return si->orig_video_isVGA; + + // check if text mode has been initialized + if (!si->orig_video_lines || !si->orig_video_cols) + return 0; + + // 80x25 text, mono + if (si->orig_video_mode == 0x07) { + if ((si->orig_video_ega_bx & 0xff) != 0x10) + return VIDEO_TYPE_EGAM; + else + return VIDEO_TYPE_MDA; + } + + // EGA/VGA, 16 colors + if ((si->orig_video_ega_bx & 0xff) != 0x10) { + if (si->orig_video_isVGA) + return VIDEO_TYPE_VGAC; + else + return VIDEO_TYPE_EGAC; + } + + // the rest... + return VIDEO_TYPE_CGA; +} + +ssize_t screen_info_resources(const struct screen_info *si, struct resource *r, size_t num); + +#if defined(CONFIG_PCI) +void screen_info_apply_fixups(void); +struct pci_dev *screen_info_pci_dev(const struct screen_info *si); +#else +static inline void screen_info_apply_fixups(void) +{ } +static inline struct pci_dev *screen_info_pci_dev(const struct screen_info *si) +{ + return NULL; +} +#endif + extern struct screen_info screen_info; #endif /* _SCREEN_INFO_H */ diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 175079552f68..709ad84809e1 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -3,6 +3,7 @@ #define _LINUX_SECCOMP_H #include <uapi/linux/seccomp.h> +#include <linux/seccomp_types.h> #define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ SECCOMP_FILTER_FLAG_LOG | \ @@ -21,25 +22,6 @@ #include <linux/atomic.h> #include <asm/seccomp.h> -struct seccomp_filter; -/** - * struct seccomp - the state of a seccomp'ed process - * - * @mode: indicates one of the valid values above for controlled - * system calls available to a process. - * @filter_count: number of seccomp filters - * @filter: must always point to a valid seccomp-filter or NULL as it is - * accessed without locking during system call entry. - * - * @filter must only be accessed from the context of current as there - * is no read locking. - */ -struct seccomp { - int mode; - atomic_t filter_count; - struct seccomp_filter *filter; -}; - #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER extern int __secure_computing(const struct seccomp_data *sd); static inline int secure_computing(void) @@ -64,8 +46,6 @@ static inline int seccomp_mode(struct seccomp *s) #include <linux/errno.h> -struct seccomp { }; -struct seccomp_filter { }; struct seccomp_data; #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER @@ -126,6 +106,8 @@ static inline long seccomp_get_metadata(struct task_struct *task, #ifdef CONFIG_SECCOMP_CACHE_DEBUG struct seq_file; +struct pid_namespace; +struct pid; int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); diff --git a/include/linux/seccomp_types.h b/include/linux/seccomp_types.h new file mode 100644 index 000000000000..cf0a0355024f --- /dev/null +++ b/include/linux/seccomp_types.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SECCOMP_TYPES_H +#define _LINUX_SECCOMP_TYPES_H + +#include <linux/types.h> + +#ifdef CONFIG_SECCOMP + +struct seccomp_filter; +/** + * struct seccomp - the state of a seccomp'ed process + * + * @mode: indicates one of the valid values above for controlled + * system calls available to a process. + * @filter_count: number of seccomp filters + * @filter: must always point to a valid seccomp-filter or NULL as it is + * accessed without locking during system call entry. + * + * @filter must only be accessed from the context of current as there + * is no read locking. + */ +struct seccomp { + int mode; + atomic_t filter_count; + struct seccomp_filter *filter; +}; + +#else + +struct seccomp { }; +struct seccomp_filter { }; + +#endif + +#endif /* _LINUX_SECCOMP_TYPES_H */ diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h index 35f3a4a8ceb1..acf7e1a3f3de 100644 --- a/include/linux/secretmem.h +++ b/include/linux/secretmem.h @@ -13,10 +13,10 @@ static inline bool folio_is_secretmem(struct folio *folio) /* * Using folio_mapping() is quite slow because of the actual call * instruction. - * We know that secretmem pages are not compound and LRU so we can + * We know that secretmem pages are not compound, so we can * save a couple of cycles here. */ - if (folio_test_large(folio) || !folio_test_lru(folio)) + if (folio_test_large(folio)) return false; mapping = (struct address_space *) diff --git a/include/linux/security.h b/include/linux/security.h index 5f16eecde00b..41a8f667bdfa 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -32,6 +32,8 @@ #include <linux/string.h> #include <linux/mm.h> #include <linux/sockptr.h> +#include <linux/bpf.h> +#include <uapi/linux/lsm.h> struct linux_binprm; struct cred; @@ -60,6 +62,7 @@ struct fs_parameter; enum fs_value_type; struct watch; struct watch_notification; +struct lsm_ctx; /* Default (no) options for the capable function */ #define CAP_OPT_NONE 0x0 @@ -138,6 +141,8 @@ enum lockdown_reason { }; extern const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1]; +extern u32 lsm_active_cnt; +extern const struct lsm_id *lsm_idlist[]; /* These functions are in security/commoncap.c */ extern int cap_capable(const struct cred *cred, struct user_namespace *ns, @@ -151,7 +156,7 @@ extern int cap_capset(struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); -extern int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file); +extern int cap_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file); int cap_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int cap_inode_removexattr(struct mnt_idmap *idmap, @@ -261,6 +266,7 @@ int unregister_blocking_lsm_notifier(struct notifier_block *nb); /* prototypes */ extern int security_init(void); extern int early_security_init(void); +extern u64 lsm_name_to_attr(const char *name); /* Security operations */ int security_binder_set_context_mgr(const struct cred *mgr); @@ -284,16 +290,16 @@ int security_capable(const struct cred *cred, struct user_namespace *ns, int cap, unsigned int opts); -int security_quotactl(int cmds, int type, int id, struct super_block *sb); +int security_quotactl(int cmds, int type, int id, const struct super_block *sb); int security_quota_on(struct dentry *dentry); int security_syslog(int type); int security_settime64(const struct timespec64 *ts, const struct timezone *tz); int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); int security_bprm_creds_for_exec(struct linux_binprm *bprm); -int security_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file); +int security_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file); int security_bprm_check(struct linux_binprm *bprm); -void security_bprm_committing_creds(struct linux_binprm *bprm); -void security_bprm_committed_creds(struct linux_binprm *bprm); +void security_bprm_committing_creds(const struct linux_binprm *bprm); +void security_bprm_committed_creds(const struct linux_binprm *bprm); int security_fs_context_submount(struct fs_context *fc, struct super_block *reference); int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc); int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param); @@ -304,7 +310,7 @@ void security_free_mnt_opts(void **mnt_opts); int security_sb_eat_lsm_opts(char *options, void **mnt_opts); int security_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts); int security_sb_remount(struct super_block *sb, void *mnt_opts); -int security_sb_kern_mount(struct super_block *sb); +int security_sb_kern_mount(const struct super_block *sb); int security_sb_show_options(struct seq_file *m, struct super_block *sb); int security_sb_statfs(struct dentry *dentry); int security_sb_mount(const char *dev_name, const struct path *path, @@ -339,6 +345,8 @@ int security_inode_init_security_anon(struct inode *inode, const struct qstr *name, const struct inode *context_inode); int security_inode_create(struct inode *dir, struct dentry *dentry, umode_t mode); +void security_inode_post_create_tmpfile(struct mnt_idmap *idmap, + struct inode *inode); int security_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry); int security_inode_unlink(struct inode *dir, struct dentry *dentry); @@ -356,6 +364,8 @@ int security_inode_follow_link(struct dentry *dentry, struct inode *inode, int security_inode_permission(struct inode *inode, int mask); int security_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); +void security_inode_post_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + int ia_valid); int security_inode_getattr(const struct path *path); int security_inode_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, @@ -363,16 +373,22 @@ int security_inode_setxattr(struct mnt_idmap *idmap, int security_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl); +void security_inode_post_set_acl(struct dentry *dentry, const char *acl_name, + struct posix_acl *kacl); int security_inode_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name); int security_inode_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name); +void security_inode_post_remove_acl(struct mnt_idmap *idmap, + struct dentry *dentry, + const char *acl_name); void security_inode_post_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int security_inode_getxattr(struct dentry *dentry, const char *name); int security_inode_listxattr(struct dentry *dentry); int security_inode_removexattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name); +void security_inode_post_removexattr(struct dentry *dentry, const char *name); int security_inode_need_killpriv(struct dentry *dentry); int security_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry); int security_inode_getsecurity(struct mnt_idmap *idmap, @@ -387,8 +403,11 @@ int security_kernfs_init_security(struct kernfs_node *kn_dir, struct kernfs_node *kn); int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); +void security_file_release(struct file *file); void security_file_free(struct file *file); int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int security_file_ioctl_compat(struct file *file, unsigned int cmd, + unsigned long arg); int security_mmap_file(struct file *file, unsigned long prot, unsigned long flags); int security_mmap_addr(unsigned long addr); @@ -401,6 +420,7 @@ int security_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown, int sig); int security_file_receive(struct file *file); int security_file_open(struct file *file); +int security_file_post_open(struct file *file, int mask); int security_file_truncate(struct file *file); int security_task_alloc(struct task_struct *task, unsigned long clone_flags); void security_task_free(struct task_struct *task); @@ -470,10 +490,13 @@ int security_sem_semctl(struct kern_ipc_perm *sma, int cmd); int security_sem_semop(struct kern_ipc_perm *sma, struct sembuf *sops, unsigned nsops, int alter); void security_d_instantiate(struct dentry *dentry, struct inode *inode); -int security_getprocattr(struct task_struct *p, const char *lsm, const char *name, +int security_getselfattr(unsigned int attr, struct lsm_ctx __user *ctx, + u32 __user *size, u32 flags); +int security_setselfattr(unsigned int attr, struct lsm_ctx __user *ctx, + u32 size, u32 flags); +int security_getprocattr(struct task_struct *p, int lsmid, const char *name, char **value); -int security_setprocattr(const char *lsm, const char *name, void *value, - size_t size); +int security_setprocattr(int lsmid, const char *name, void *value, size_t size); int security_netlink_send(struct sock *sk, struct sk_buff *skb); int security_ismaclabel(const char *name); int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen); @@ -484,6 +507,8 @@ int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen); int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen); int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen); int security_locked_down(enum lockdown_reason what); +int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, u32 *uctx_len, + void *val, size_t val_len, u64 id, u64 flags); #else /* CONFIG_SECURITY */ static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data) @@ -501,6 +526,11 @@ static inline int unregister_blocking_lsm_notifier(struct notifier_block *nb) return 0; } +static inline u64 lsm_name_to_attr(const char *name) +{ + return LSM_ATTR_UNDEF; +} + static inline void security_free_mnt_opts(void **mnt_opts) { } @@ -581,7 +611,7 @@ static inline int security_capable(const struct cred *cred, } static inline int security_quotactl(int cmds, int type, int id, - struct super_block *sb) + const struct super_block *sb) { return 0; } @@ -613,7 +643,7 @@ static inline int security_bprm_creds_for_exec(struct linux_binprm *bprm) } static inline int security_bprm_creds_from_file(struct linux_binprm *bprm, - struct file *file) + const struct file *file) { return cap_bprm_creds_from_file(bprm, file); } @@ -623,11 +653,11 @@ static inline int security_bprm_check(struct linux_binprm *bprm) return 0; } -static inline void security_bprm_committing_creds(struct linux_binprm *bprm) +static inline void security_bprm_committing_creds(const struct linux_binprm *bprm) { } -static inline void security_bprm_committed_creds(struct linux_binprm *bprm) +static inline void security_bprm_committed_creds(const struct linux_binprm *bprm) { } @@ -789,6 +819,10 @@ static inline int security_inode_create(struct inode *dir, return 0; } +static inline void +security_inode_post_create_tmpfile(struct mnt_idmap *idmap, struct inode *inode) +{ } + static inline int security_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) @@ -862,6 +896,11 @@ static inline int security_inode_setattr(struct mnt_idmap *idmap, return 0; } +static inline void +security_inode_post_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + int ia_valid) +{ } + static inline int security_inode_getattr(const struct path *path) { return 0; @@ -882,6 +921,11 @@ static inline int security_inode_set_acl(struct mnt_idmap *idmap, return 0; } +static inline void security_inode_post_set_acl(struct dentry *dentry, + const char *acl_name, + struct posix_acl *kacl) +{ } + static inline int security_inode_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) @@ -896,6 +940,11 @@ static inline int security_inode_remove_acl(struct mnt_idmap *idmap, return 0; } +static inline void security_inode_post_remove_acl(struct mnt_idmap *idmap, + struct dentry *dentry, + const char *acl_name) +{ } + static inline void security_inode_post_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { } @@ -918,6 +967,10 @@ static inline int security_inode_removexattr(struct mnt_idmap *idmap, return cap_inode_removexattr(idmap, dentry, name); } +static inline void security_inode_post_removexattr(struct dentry *dentry, + const char *name) +{ } + static inline int security_inode_need_killpriv(struct dentry *dentry) { return cap_inode_need_killpriv(dentry); @@ -978,6 +1031,9 @@ static inline int security_file_alloc(struct file *file) return 0; } +static inline void security_file_release(struct file *file) +{ } + static inline void security_file_free(struct file *file) { } @@ -987,6 +1043,13 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd, return 0; } +static inline int security_file_ioctl_compat(struct file *file, + unsigned int cmd, + unsigned long arg) +{ + return 0; +} + static inline int security_mmap_file(struct file *file, unsigned long prot, unsigned long flags) { @@ -1038,6 +1101,11 @@ static inline int security_file_open(struct file *file) return 0; } +static inline int security_file_post_open(struct file *file, int mask) +{ + return 0; +} + static inline int security_file_truncate(struct file *file) { return 0; @@ -1337,14 +1405,28 @@ static inline void security_d_instantiate(struct dentry *dentry, struct inode *inode) { } -static inline int security_getprocattr(struct task_struct *p, const char *lsm, +static inline int security_getselfattr(unsigned int attr, + struct lsm_ctx __user *ctx, + size_t __user *size, u32 flags) +{ + return -EOPNOTSUPP; +} + +static inline int security_setselfattr(unsigned int attr, + struct lsm_ctx __user *ctx, + size_t size, u32 flags) +{ + return -EOPNOTSUPP; +} + +static inline int security_getprocattr(struct task_struct *p, int lsmid, const char *name, char **value) { return -EINVAL; } -static inline int security_setprocattr(const char *lsm, char *name, - void *value, size_t size) +static inline int security_setprocattr(int lsmid, char *name, void *value, + size_t size) { return -EINVAL; } @@ -1395,6 +1477,12 @@ static inline int security_locked_down(enum lockdown_reason what) { return 0; } +static inline int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, + u32 *uctx_len, void *val, size_t val_len, + u64 id, u64 flags) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_SECURITY */ #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) @@ -1827,6 +1915,7 @@ int security_path_mkdir(const struct path *dir, struct dentry *dentry, umode_t m int security_path_rmdir(const struct path *dir, struct dentry *dentry); int security_path_mknod(const struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev); +void security_path_post_mknod(struct mnt_idmap *idmap, struct dentry *dentry); int security_path_truncate(const struct path *path); int security_path_symlink(const struct path *dir, struct dentry *dentry, const char *old_name); @@ -1861,6 +1950,10 @@ static inline int security_path_mknod(const struct path *dir, struct dentry *den return 0; } +static inline void security_path_post_mknod(struct mnt_idmap *idmap, + struct dentry *dentry) +{ } + static inline int security_path_truncate(const struct path *path) { return 0; @@ -1912,6 +2005,9 @@ void security_key_free(struct key *key); int security_key_permission(key_ref_t key_ref, const struct cred *cred, enum key_need_perm need_perm); int security_key_getsecurity(struct key *key, char **_buffer); +void security_key_post_create_or_update(struct key *keyring, struct key *key, + const void *payload, size_t payload_len, + unsigned long flags, bool create); #else @@ -1939,6 +2035,14 @@ static inline int security_key_getsecurity(struct key *key, char **_buffer) return 0; } +static inline void security_key_post_create_or_update(struct key *keyring, + struct key *key, + const void *payload, + size_t payload_len, + unsigned long flags, + bool create) +{ } + #endif #endif /* CONFIG_KEYS */ @@ -2020,15 +2124,22 @@ static inline void securityfs_remove(struct dentry *dentry) union bpf_attr; struct bpf_map; struct bpf_prog; -struct bpf_prog_aux; +struct bpf_token; #ifdef CONFIG_SECURITY extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size); extern int security_bpf_map(struct bpf_map *map, fmode_t fmode); extern int security_bpf_prog(struct bpf_prog *prog); -extern int security_bpf_map_alloc(struct bpf_map *map); +extern int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, + struct bpf_token *token); extern void security_bpf_map_free(struct bpf_map *map); -extern int security_bpf_prog_alloc(struct bpf_prog_aux *aux); -extern void security_bpf_prog_free(struct bpf_prog_aux *aux); +extern int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, + struct bpf_token *token); +extern void security_bpf_prog_free(struct bpf_prog *prog); +extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, + struct path *path); +extern void security_bpf_token_free(struct bpf_token *token); +extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd); +extern int security_bpf_token_capable(const struct bpf_token *token, int cap); #else static inline int security_bpf(int cmd, union bpf_attr *attr, unsigned int size) @@ -2046,7 +2157,8 @@ static inline int security_bpf_prog(struct bpf_prog *prog) return 0; } -static inline int security_bpf_map_alloc(struct bpf_map *map) +static inline int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, + struct bpf_token *token) { return 0; } @@ -2054,13 +2166,33 @@ static inline int security_bpf_map_alloc(struct bpf_map *map) static inline void security_bpf_map_free(struct bpf_map *map) { } -static inline int security_bpf_prog_alloc(struct bpf_prog_aux *aux) +static inline int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, + struct bpf_token *token) +{ + return 0; +} + +static inline void security_bpf_prog_free(struct bpf_prog *prog) +{ } + +static inline int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, + struct path *path) { return 0; } -static inline void security_bpf_prog_free(struct bpf_prog_aux *aux) +static inline void security_bpf_token_free(struct bpf_token *token) { } + +static inline int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd) +{ + return 0; +} + +static inline int security_bpf_token_capable(const struct bpf_token *token, int cap) +{ + return 0; +} #endif /* CONFIG_SECURITY */ #endif /* CONFIG_BPF_SYSCALL */ diff --git a/include/linux/sed-opal-key.h b/include/linux/sed-opal-key.h new file mode 100644 index 000000000000..0ca03054e8f6 --- /dev/null +++ b/include/linux/sed-opal-key.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * SED key operations. + * + * Copyright (C) 2023 IBM Corporation + * + * These are the accessor functions (read/write) for SED Opal + * keys. Specific keystores can provide overrides. + * + */ + +#include <linux/kernel.h> + +#ifdef CONFIG_PSERIES_PLPKS_SED +int sed_read_key(char *keyname, char *key, u_int *keylen); +int sed_write_key(char *keyname, char *key, u_int keylen); +#else +static inline +int sed_read_key(char *keyname, char *key, u_int *keylen) { + return -EOPNOTSUPP; +} +static inline +int sed_write_key(char *keyname, char *key, u_int keylen) { + return -EOPNOTSUPP; +} +#endif diff --git a/include/linux/selection.h b/include/linux/selection.h index 170ef28ff26b..bab7d30d3446 100644 --- a/include/linux/selection.h +++ b/include/linux/selection.h @@ -14,17 +14,16 @@ struct tty_struct; struct vc_data; -extern void clear_selection(void); -extern int set_selection_user(const struct tiocl_selection __user *sel, - struct tty_struct *tty); -extern int set_selection_kernel(struct tiocl_selection *v, - struct tty_struct *tty); -extern int paste_selection(struct tty_struct *tty); -extern int sel_loadlut(char __user *p); -extern int mouse_reporting(void); -extern void mouse_report(struct tty_struct * tty, int butt, int mrx, int mry); +void clear_selection(void); +int set_selection_user(const struct tiocl_selection __user *sel, + struct tty_struct *tty); +int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty); +int paste_selection(struct tty_struct *tty); +int sel_loadlut(u32 __user *lut); +int mouse_reporting(void); +void mouse_report(struct tty_struct *tty, int butt, int mrx, int mry); -bool vc_is_sel(struct vc_data *vc); +bool vc_is_sel(const struct vc_data *vc); extern int console_blanked; @@ -33,24 +32,21 @@ extern unsigned char default_red[]; extern unsigned char default_grn[]; extern unsigned char default_blu[]; -extern unsigned short *screen_pos(const struct vc_data *vc, int w_offset, - bool viewed); -extern u16 screen_glyph(const struct vc_data *vc, int offset); -extern u32 screen_glyph_unicode(const struct vc_data *vc, int offset); -extern void complement_pos(struct vc_data *vc, int offset); -extern void invert_screen(struct vc_data *vc, int offset, int count, bool viewed); +unsigned short *screen_pos(const struct vc_data *vc, int w_offset, bool viewed); +u16 screen_glyph(const struct vc_data *vc, int offset); +u32 screen_glyph_unicode(const struct vc_data *vc, int offset); +void complement_pos(struct vc_data *vc, int offset); +void invert_screen(struct vc_data *vc, int offset, int count, bool viewed); -extern void getconsxy(const struct vc_data *vc, unsigned char xy[static 2]); -extern void putconsxy(struct vc_data *vc, unsigned char xy[static const 2]); +void getconsxy(const struct vc_data *vc, unsigned char xy[static 2]); +void putconsxy(struct vc_data *vc, unsigned char xy[static const 2]); -extern u16 vcs_scr_readw(const struct vc_data *vc, const u16 *org); -extern void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org); -extern void vcs_scr_updated(struct vc_data *vc); +u16 vcs_scr_readw(const struct vc_data *vc, const u16 *org); +void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org); +void vcs_scr_updated(struct vc_data *vc); -extern int vc_uniscr_check(struct vc_data *vc); -extern void vc_uniscr_copy_line(const struct vc_data *vc, void *dest, - bool viewed, - unsigned int row, unsigned int col, - unsigned int nr); +int vc_uniscr_check(struct vc_data *vc); +void vc_uniscr_copy_line(const struct vc_data *vc, void *dest, bool viewed, + unsigned int row, unsigned int col, unsigned int nr); #endif diff --git a/include/linux/sem.h b/include/linux/sem.h index 5608a500c43e..c4deefe42aeb 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -3,25 +3,17 @@ #define _LINUX_SEM_H #include <uapi/linux/sem.h> +#include <linux/sem_types.h> struct task_struct; -struct sem_undo_list; #ifdef CONFIG_SYSVIPC -struct sysv_sem { - struct sem_undo_list *undo_list; -}; - extern int copy_semundo(unsigned long clone_flags, struct task_struct *tsk); extern void exit_sem(struct task_struct *tsk); #else -struct sysv_sem { - /* empty */ -}; - static inline int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) { return 0; diff --git a/include/linux/sem_types.h b/include/linux/sem_types.h new file mode 100644 index 000000000000..73df1971a7ae --- /dev/null +++ b/include/linux/sem_types.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SEM_TYPES_H +#define _LINUX_SEM_TYPES_H + +struct sem_undo_list; + +struct sysv_sem { +#ifdef CONFIG_SYSVIPC + struct sem_undo_list *undo_list; +#endif +}; + +#endif /* _LINUX_SEM_TYPES_H */ diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index 515d7fcb9634..fe41da005970 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -2,7 +2,10 @@ #ifndef _LINUX_SEQ_BUF_H #define _LINUX_SEQ_BUF_H -#include <linux/fs.h> +#include <linux/bug.h> +#include <linux/minmax.h> +#include <linux/seq_file.h> +#include <linux/types.h> /* * Trace sequences are used to allow a function to call several other functions @@ -10,23 +13,28 @@ */ /** - * seq_buf - seq buffer structure + * struct seq_buf - seq buffer structure * @buffer: pointer to the buffer * @size: size of the buffer * @len: the amount of data inside the buffer - * @readpos: The next position to read in the buffer. */ struct seq_buf { char *buffer; size_t size; size_t len; - loff_t readpos; }; +#define DECLARE_SEQ_BUF(NAME, SIZE) \ + struct seq_buf NAME = { \ + .buffer = (char[SIZE]) { 0 }, \ + .size = SIZE, \ + } + static inline void seq_buf_clear(struct seq_buf *s) { s->len = 0; - s->readpos = 0; + if (s->size) + s->buffer[0] = '\0'; } static inline void @@ -39,7 +47,7 @@ seq_buf_init(struct seq_buf *s, char *buf, unsigned int size) /* * seq_buf have a buffer that might overflow. When this happens - * the len and size are set to be equal. + * len is set to be greater than size. */ static inline bool seq_buf_has_overflowed(struct seq_buf *s) @@ -72,10 +80,10 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) } /** - * seq_buf_terminate - Make sure buffer is nul terminated - * @s: the seq_buf descriptor to terminate. + * seq_buf_str - get NUL-terminated C string from seq_buf + * @s: the seq_buf handle * - * This makes sure that the buffer in @s is nul terminated and + * This makes sure that the buffer in @s is NUL-terminated and * safe to read as a string. * * Note, if this is called when the buffer has overflowed, then @@ -84,16 +92,20 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) * * After this function is called, s->buffer is safe to use * in string operations. + * + * Returns: @s->buf after making sure it is terminated. */ -static inline void seq_buf_terminate(struct seq_buf *s) +static inline const char *seq_buf_str(struct seq_buf *s) { if (WARN_ON(s->size == 0)) - return; + return ""; if (seq_buf_buffer_left(s)) s->buffer[s->len] = 0; else s->buffer[s->size - 1] = 0; + + return s->buffer; } /** @@ -101,7 +113,7 @@ static inline void seq_buf_terminate(struct seq_buf *s) * @s: the seq_buf handle * @bufp: the beginning of the buffer is stored here * - * Return the number of bytes available in the buffer, or zero if + * Returns: the number of bytes available in the buffer, or zero if * there's no space. */ static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp) @@ -123,7 +135,7 @@ static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp) * @num: the number of bytes to commit * * Commit @num bytes of data written to a buffer previously acquired - * by seq_buf_get. To signal an error condition, or that the data + * by seq_buf_get_buf(). To signal an error condition, or that the data * didn't fit in the available space, pass a negative @num value. */ static inline void seq_buf_commit(struct seq_buf *s, int num) @@ -143,7 +155,7 @@ extern __printf(2, 0) int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args); extern int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s); extern int seq_buf_to_user(struct seq_buf *s, char __user *ubuf, - int cnt); + size_t start, int cnt); extern int seq_buf_puts(struct seq_buf *s, const char *str); extern int seq_buf_putc(struct seq_buf *s, unsigned char c); extern int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len); diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 386ab580b839..234bcdb1fba4 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -207,6 +207,21 @@ static const struct file_operations __name ## _fops = { \ .release = single_release, \ } +#define DEFINE_SHOW_STORE_ATTRIBUTE(__name) \ +static int __name ## _open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, __name ## _show, inode->i_private); \ +} \ + \ +static const struct file_operations __name ## _fops = { \ + .owner = THIS_MODULE, \ + .open = __name ## _open, \ + .read = seq_read, \ + .write = __name ## _write, \ + .llseek = seq_lseek, \ + .release = single_release, \ +} + #define DEFINE_PROC_SHOW_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index e9bd2f65d7f4..d90d8ee29d81 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -18,6 +18,7 @@ #include <linux/lockdep.h> #include <linux/mutex.h> #include <linux/preempt.h> +#include <linux/seqlock_types.h> #include <linux/spinlock.h> #include <asm/processor.h> @@ -37,37 +38,6 @@ */ #define KCSAN_SEQLOCK_REGION_MAX 1000 -/* - * Sequence counters (seqcount_t) - * - * This is the raw counting mechanism, without any writer protection. - * - * Write side critical sections must be serialized and non-preemptible. - * - * If readers can be invoked from hardirq or softirq contexts, - * interrupts or bottom halves must also be respectively disabled before - * entering the write section. - * - * This mechanism can't be used if the protected data contains pointers, - * as the writer can invalidate a pointer that a reader is following. - * - * If the write serialization mechanism is one of the common kernel - * locking primitives, use a sequence counter with associated lock - * (seqcount_LOCKNAME_t) instead. - * - * If it's desired to automatically handle the sequence counter writer - * serialization and non-preemptibility requirements, use a sequential - * lock (seqlock_t) instead. - * - * See Documentation/locking/seqlock.rst - */ -typedef struct seqcount { - unsigned sequence; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif -} seqcount_t; - static inline void __seqcount_init(seqcount_t *s, const char *name, struct lock_class_key *key) { @@ -132,28 +102,6 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) */ /* - * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot - * disable preemption. It can lead to higher latencies, and the write side - * sections will not be able to acquire locks which become sleeping locks - * (e.g. spinlock_t). - * - * To remain preemptible while avoiding a possible livelock caused by the - * reader preempting the writer, use a different technique: let the reader - * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the - * case, acquire then release the associated LOCKNAME writer serialization - * lock. This will allow any possibly-preempted writer to make progress - * until the end of its writer serialization lock critical section. - * - * This lock-unlock technique must be implemented for all of PREEMPT_RT - * sleeping locks. See Documentation/locking/locktypes.rst - */ -#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT) -#define __SEQ_LOCK(expr) expr -#else -#define __SEQ_LOCK(expr) -#endif - -/* * typedef seqcount_LOCKNAME_t - sequence counter with LOCKNAME associated * @seqcount: The real sequence counter * @lock: Pointer to the associated lock @@ -191,22 +139,21 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) * @lockname: "LOCKNAME" part of seqcount_LOCKNAME_t * @locktype: LOCKNAME canonical C data type * @preemptible: preemptibility of above locktype - * @lockmember: argument for lockdep_assert_held() - * @lockbase: associated lock release function (prefix only) - * @lock_acquire: associated lock acquisition function (full call) + * @lockbase: prefix for associated lock/unlock */ -#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember, lockbase, lock_acquire) \ -typedef struct seqcount_##lockname { \ - seqcount_t seqcount; \ - __SEQ_LOCK(locktype *lock); \ -} seqcount_##lockname##_t; \ - \ +#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase) \ static __always_inline seqcount_t * \ __seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \ { \ return &s->seqcount; \ } \ \ +static __always_inline const seqcount_t * \ +__seqprop_##lockname##_const_ptr(const seqcount_##lockname##_t *s) \ +{ \ + return &s->seqcount; \ +} \ + \ static __always_inline unsigned \ __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ { \ @@ -216,7 +163,7 @@ __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ return seq; \ \ if (preemptible && unlikely(seq & 1)) { \ - __SEQ_LOCK(lock_acquire); \ + __SEQ_LOCK(lockbase##_lock(s->lock)); \ __SEQ_LOCK(lockbase##_unlock(s->lock)); \ \ /* \ @@ -242,7 +189,7 @@ __seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s) \ static __always_inline void \ __seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \ { \ - __SEQ_LOCK(lockdep_assert_held(lockmember)); \ + __SEQ_LOCK(lockdep_assert_held(s->lock)); \ } /* @@ -254,6 +201,11 @@ static inline seqcount_t *__seqprop_ptr(seqcount_t *s) return s; } +static inline const seqcount_t *__seqprop_const_ptr(const seqcount_t *s) +{ + return s; +} + static inline unsigned __seqprop_sequence(const seqcount_t *s) { return READ_ONCE(s->sequence); @@ -271,10 +223,11 @@ static inline void __seqprop_assert(const seqcount_t *s) #define __SEQ_RT IS_ENABLED(CONFIG_PREEMPT_RT) -SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock, raw_spin, raw_spin_lock(s->lock)) -SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, s->lock, spin, spin_lock(s->lock)) -SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, s->lock, read, read_lock(s->lock)) -SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex, mutex_lock(s->lock)) +SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, raw_spin) +SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, spin) +SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, read) +SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) +#undef SEQCOUNT_LOCKNAME /* * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t @@ -294,19 +247,20 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex #define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define __seqprop_case(s, lockname, prop) \ - seqcount_##lockname##_t: __seqprop_##lockname##_##prop((void *)(s)) + seqcount_##lockname##_t: __seqprop_##lockname##_##prop #define __seqprop(s, prop) _Generic(*(s), \ - seqcount_t: __seqprop_##prop((void *)(s)), \ + seqcount_t: __seqprop_##prop, \ __seqprop_case((s), raw_spinlock, prop), \ __seqprop_case((s), spinlock, prop), \ __seqprop_case((s), rwlock, prop), \ __seqprop_case((s), mutex, prop)) -#define seqprop_ptr(s) __seqprop(s, ptr) -#define seqprop_sequence(s) __seqprop(s, sequence) -#define seqprop_preemptible(s) __seqprop(s, preemptible) -#define seqprop_assert(s) __seqprop(s, assert) +#define seqprop_ptr(s) __seqprop(s, ptr)(s) +#define seqprop_const_ptr(s) __seqprop(s, const_ptr)(s) +#define seqprop_sequence(s) __seqprop(s, sequence)(s) +#define seqprop_preemptible(s) __seqprop(s, preemptible)(s) +#define seqprop_assert(s) __seqprop(s, assert)(s) /** * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier @@ -355,7 +309,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex */ #define read_seqcount_begin(s) \ ({ \ - seqcount_lockdep_reader_access(seqprop_ptr(s)); \ + seqcount_lockdep_reader_access(seqprop_const_ptr(s)); \ raw_read_seqcount_begin(s); \ }) @@ -421,7 +375,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex * Return: true if a read section retry is required, else false */ #define __read_seqcount_retry(s, start) \ - do___read_seqcount_retry(seqprop_ptr(s), start) + do___read_seqcount_retry(seqprop_const_ptr(s), start) static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start) { @@ -441,7 +395,7 @@ static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start) * Return: true if a read section retry is required, else false */ #define read_seqcount_retry(s, start) \ - do_read_seqcount_retry(seqprop_ptr(s), start) + do_read_seqcount_retry(seqprop_const_ptr(s), start) static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start) { @@ -574,7 +528,7 @@ static inline void do_write_seqcount_end(seqcount_t *s) * via WRITE_ONCE): a) to ensure the writes become visible to other threads * atomically, avoiding compiler optimizations; b) to document which writes are * meant to propagate to the reader critical section. This is necessary because - * neither writes before and after the barrier are enclosed in a seq-writer + * neither writes before nor after the barrier are enclosed in a seq-writer * critical section that would ensure readers are aware of ongoing writes:: * * seqcount_t seq; @@ -784,25 +738,6 @@ static inline void raw_write_seqcount_latch(seqcount_latch_t *s) smp_wmb(); /* increment "sequence" before following stores */ } -/* - * Sequential locks (seqlock_t) - * - * Sequence counters with an embedded spinlock for writer serialization - * and non-preemptibility. - * - * For more info, see: - * - Comments on top of seqcount_t - * - Documentation/locking/seqlock.rst - */ -typedef struct { - /* - * Make sure that readers don't starve writers on PREEMPT_RT: use - * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK(). - */ - seqcount_spinlock_t seqcount; - spinlock_t lock; -} seqlock_t; - #define __SEQLOCK_UNLOCKED(lockname) \ { \ .seqcount = SEQCNT_SPINLOCK_ZERO(lockname, &(lockname).lock), \ @@ -864,7 +799,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) } /* - * For all seqlock_t write side functions, use the the internal + * For all seqlock_t write side functions, use the internal * do_write_seqcount_begin() instead of generic write_seqcount_begin(). * This way, no redundant lockdep_assert_held() checks are added. */ diff --git a/include/linux/seqlock_types.h b/include/linux/seqlock_types.h new file mode 100644 index 000000000000..dfdf43e3fa3d --- /dev/null +++ b/include/linux/seqlock_types.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_SEQLOCK_TYPES_H +#define __LINUX_SEQLOCK_TYPES_H + +#include <linux/lockdep_types.h> +#include <linux/mutex_types.h> +#include <linux/spinlock_types.h> + +/* + * Sequence counters (seqcount_t) + * + * This is the raw counting mechanism, without any writer protection. + * + * Write side critical sections must be serialized and non-preemptible. + * + * If readers can be invoked from hardirq or softirq contexts, + * interrupts or bottom halves must also be respectively disabled before + * entering the write section. + * + * This mechanism can't be used if the protected data contains pointers, + * as the writer can invalidate a pointer that a reader is following. + * + * If the write serialization mechanism is one of the common kernel + * locking primitives, use a sequence counter with associated lock + * (seqcount_LOCKNAME_t) instead. + * + * If it's desired to automatically handle the sequence counter writer + * serialization and non-preemptibility requirements, use a sequential + * lock (seqlock_t) instead. + * + * See Documentation/locking/seqlock.rst + */ +typedef struct seqcount { + unsigned sequence; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +} seqcount_t; + +/* + * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot + * disable preemption. It can lead to higher latencies, and the write side + * sections will not be able to acquire locks which become sleeping locks + * (e.g. spinlock_t). + * + * To remain preemptible while avoiding a possible livelock caused by the + * reader preempting the writer, use a different technique: let the reader + * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the + * case, acquire then release the associated LOCKNAME writer serialization + * lock. This will allow any possibly-preempted writer to make progress + * until the end of its writer serialization lock critical section. + * + * This lock-unlock technique must be implemented for all of PREEMPT_RT + * sleeping locks. See Documentation/locking/locktypes.rst + */ +#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT) +#define __SEQ_LOCK(expr) expr +#else +#define __SEQ_LOCK(expr) +#endif + +#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase) \ +typedef struct seqcount_##lockname { \ + seqcount_t seqcount; \ + __SEQ_LOCK(locktype *lock); \ +} seqcount_##lockname##_t; + +SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, raw_spin) +SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, spin) +SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, read) +SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) +#undef SEQCOUNT_LOCKNAME + +/* + * Sequential locks (seqlock_t) + * + * Sequence counters with an embedded spinlock for writer serialization + * and non-preemptibility. + * + * For more info, see: + * - Comments on top of seqcount_t + * - Documentation/locking/seqlock.rst + */ +typedef struct { + /* + * Make sure that readers don't starve writers on PREEMPT_RT: use + * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK(). + */ + seqcount_spinlock_t seqcount; + spinlock_t lock; +} seqlock_t; + +#endif /* __LINUX_SEQLOCK_TYPES_H */ diff --git a/include/linux/serdev.h b/include/linux/serdev.h index f5f97fa25e8a..ff78efc1f60d 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -27,7 +27,7 @@ struct serdev_device; * not sleep. */ struct serdev_device_ops { - int (*receive_buf)(struct serdev_device *, const unsigned char *, size_t); + size_t (*receive_buf)(struct serdev_device *, const u8 *, size_t); void (*write_wakeup)(struct serdev_device *); }; @@ -82,7 +82,7 @@ enum serdev_parity { * serdev controller structures */ struct serdev_controller_ops { - int (*write_buf)(struct serdev_controller *, const unsigned char *, size_t); + ssize_t (*write_buf)(struct serdev_controller *, const u8 *, size_t); void (*write_flush)(struct serdev_controller *); int (*write_room)(struct serdev_controller *); int (*open)(struct serdev_controller *); @@ -99,12 +99,14 @@ struct serdev_controller_ops { /** * struct serdev_controller - interface to the serdev controller * @dev: Driver model representation of the device. + * @host: Serial port hardware controller device * @nr: number identifier for this controller/bus. * @serdev: Pointer to slave device for this controller. * @ops: Controller operations. */ struct serdev_controller { struct device dev; + struct device *host; unsigned int nr; struct serdev_device *serdev; const struct serdev_controller_ops *ops; @@ -167,7 +169,9 @@ struct serdev_device *serdev_device_alloc(struct serdev_controller *); int serdev_device_add(struct serdev_device *); void serdev_device_remove(struct serdev_device *); -struct serdev_controller *serdev_controller_alloc(struct device *, size_t); +struct serdev_controller *serdev_controller_alloc(struct device *host, + struct device *parent, + size_t size); int serdev_controller_add(struct serdev_controller *); void serdev_controller_remove(struct serdev_controller *); @@ -181,9 +185,9 @@ static inline void serdev_controller_write_wakeup(struct serdev_controller *ctrl serdev->ops->write_wakeup(serdev); } -static inline int serdev_controller_receive_buf(struct serdev_controller *ctrl, - const unsigned char *data, - size_t count) +static inline size_t serdev_controller_receive_buf(struct serdev_controller *ctrl, + const u8 *data, + size_t count) { struct serdev_device *serdev = ctrl->serdev; @@ -200,13 +204,13 @@ void serdev_device_close(struct serdev_device *); int devm_serdev_device_open(struct device *, struct serdev_device *); unsigned int serdev_device_set_baudrate(struct serdev_device *, unsigned int); void serdev_device_set_flow_control(struct serdev_device *, bool); -int serdev_device_write_buf(struct serdev_device *, const unsigned char *, size_t); +int serdev_device_write_buf(struct serdev_device *, const u8 *, size_t); void serdev_device_wait_until_sent(struct serdev_device *, long); int serdev_device_get_tiocm(struct serdev_device *); int serdev_device_set_tiocm(struct serdev_device *, int, int); int serdev_device_break_ctl(struct serdev_device *serdev, int break_state); void serdev_device_write_wakeup(struct serdev_device *); -int serdev_device_write(struct serdev_device *, const unsigned char *, size_t, long); +ssize_t serdev_device_write(struct serdev_device *, const u8 *, size_t, long); void serdev_device_write_flush(struct serdev_device *); int serdev_device_write_room(struct serdev_device *); @@ -244,7 +248,7 @@ static inline unsigned int serdev_device_set_baudrate(struct serdev_device *sdev } static inline void serdev_device_set_flow_control(struct serdev_device *sdev, bool enable) {} static inline int serdev_device_write_buf(struct serdev_device *serdev, - const unsigned char *buf, + const u8 *buf, size_t count) { return -ENODEV; @@ -262,8 +266,9 @@ static inline int serdev_device_break_ctl(struct serdev_device *serdev, int brea { return -EOPNOTSUPP; } -static inline int serdev_device_write(struct serdev_device *sdev, const unsigned char *buf, - size_t count, unsigned long timeout) +static inline ssize_t serdev_device_write(struct serdev_device *sdev, + const u8 *buf, size_t count, + unsigned long timeout) { return -ENODEV; } @@ -311,11 +316,13 @@ struct tty_driver; #ifdef CONFIG_SERIAL_DEV_CTRL_TTYPORT struct device *serdev_tty_port_register(struct tty_port *port, + struct device *host, struct device *parent, struct tty_driver *drv, int idx); int serdev_tty_port_unregister(struct tty_port *port); #else static inline struct device *serdev_tty_port_register(struct tty_port *port, + struct device *host, struct device *parent, struct tty_driver *drv, int idx) { diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index be65de65fe61..fd59ed2cca53 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -210,6 +210,12 @@ int serial8250_console_exit(struct uart_port *port); void serial8250_set_isa_configurator(void (*v)(int port, struct uart_port *up, u32 *capabilities)); +#ifdef CONFIG_SERIAL_8250_CONSOLE +extern int hp300_setup_serial_console(void) __init; +#else +static inline int hp300_setup_serial_console(void) { return 0; } +#endif + #ifdef CONFIG_SERIAL_8250_RT288X int rt288x_setup(struct uart_port *p); int au_platform_setup(struct plat_serial8250_port *p); diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index bb6f073bc159..0a0f6e21d40e 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -467,9 +467,10 @@ struct uart_port { unsigned int fifosize; /* tx fifo size */ unsigned char x_char; /* xon/xoff char */ unsigned char regshift; /* reg offset shift */ + unsigned char iotype; /* io access style */ - unsigned char quirks; /* internal quirks */ +#define UPIO_UNKNOWN ((unsigned char)~0U) /* UCHAR_MAX */ #define UPIO_PORT (SERIAL_IO_PORT) /* 8b I/O port access */ #define UPIO_HUB6 (SERIAL_IO_HUB6) /* Hub6 ISA card */ #define UPIO_MEM (SERIAL_IO_MEM) /* driver-specific */ @@ -479,7 +480,9 @@ struct uart_port { #define UPIO_MEM32BE (SERIAL_IO_MEM32BE) /* 32b big endian */ #define UPIO_MEM16 (SERIAL_IO_MEM16) /* 16b little endian */ - /* quirks must be updated while holding port mutex */ + unsigned char quirks; /* internal quirks */ + + /* internal quirks must be updated while holding port mutex */ #define UPQ_NO_TXEN_TEST BIT(0) unsigned int read_status_mask; /* driver specific */ @@ -588,6 +591,85 @@ struct uart_port { void *private_data; /* generic platform data pointer */ }; +/** + * uart_port_lock - Lock the UART port + * @up: Pointer to UART port structure + */ +static inline void uart_port_lock(struct uart_port *up) +{ + spin_lock(&up->lock); +} + +/** + * uart_port_lock_irq - Lock the UART port and disable interrupts + * @up: Pointer to UART port structure + */ +static inline void uart_port_lock_irq(struct uart_port *up) +{ + spin_lock_irq(&up->lock); +} + +/** + * uart_port_lock_irqsave - Lock the UART port, save and disable interrupts + * @up: Pointer to UART port structure + * @flags: Pointer to interrupt flags storage + */ +static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) +{ + spin_lock_irqsave(&up->lock, *flags); +} + +/** + * uart_port_trylock - Try to lock the UART port + * @up: Pointer to UART port structure + * + * Returns: True if lock was acquired, false otherwise + */ +static inline bool uart_port_trylock(struct uart_port *up) +{ + return spin_trylock(&up->lock); +} + +/** + * uart_port_trylock_irqsave - Try to lock the UART port, save and disable interrupts + * @up: Pointer to UART port structure + * @flags: Pointer to interrupt flags storage + * + * Returns: True if lock was acquired, false otherwise + */ +static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags) +{ + return spin_trylock_irqsave(&up->lock, *flags); +} + +/** + * uart_port_unlock - Unlock the UART port + * @up: Pointer to UART port structure + */ +static inline void uart_port_unlock(struct uart_port *up) +{ + spin_unlock(&up->lock); +} + +/** + * uart_port_unlock_irq - Unlock the UART port and re-enable interrupts + * @up: Pointer to UART port structure + */ +static inline void uart_port_unlock_irq(struct uart_port *up) +{ + spin_unlock_irq(&up->lock); +} + +/** + * uart_port_unlock_irqrestore - Unlock the UART port, restore interrupts + * @up: Pointer to UART port structure + * @flags: The saved interrupt flags for restore + */ +static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) +{ + spin_unlock_irqrestore(&up->lock, flags); +} + static inline int serial_port_in(struct uart_port *up, int offset) { return up->serial_in(up, offset); @@ -669,8 +751,17 @@ struct uart_driver { void uart_write_wakeup(struct uart_port *port); -#define __uart_port_tx(uport, ch, tx_ready, put_char, tx_done, for_test, \ - for_post) \ +/** + * enum UART_TX_FLAGS -- flags for uart_port_tx_flags() + * + * @UART_TX_NOSTOP: don't call port->ops->stop_tx() on empty buffer + */ +enum UART_TX_FLAGS { + UART_TX_NOSTOP = BIT(0), +}; + +#define __uart_port_tx(uport, ch, flags, tx_ready, put_char, tx_done, \ + for_test, for_post) \ ({ \ struct uart_port *__port = (uport); \ struct circ_buf *xmit = &__port->state->xmit; \ @@ -698,7 +789,8 @@ void uart_write_wakeup(struct uart_port *port); if (pending < WAKEUP_CHARS) { \ uart_write_wakeup(__port); \ \ - if (pending == 0) \ + if (!((flags) & UART_TX_NOSTOP) && pending == 0 && \ + __port->ops->tx_empty(__port)) \ __port->ops->stop_tx(__port); \ } \ \ @@ -733,7 +825,7 @@ void uart_write_wakeup(struct uart_port *port); */ #define uart_port_tx_limited(port, ch, count, tx_ready, put_char, tx_done) ({ \ unsigned int __count = (count); \ - __uart_port_tx(port, ch, tx_ready, put_char, tx_done, __count, \ + __uart_port_tx(port, ch, 0, tx_ready, put_char, tx_done, __count, \ __count--); \ }) @@ -747,8 +839,21 @@ void uart_write_wakeup(struct uart_port *port); * See uart_port_tx_limited() for more details. */ #define uart_port_tx(port, ch, tx_ready, put_char) \ - __uart_port_tx(port, ch, tx_ready, put_char, ({}), true, ({})) + __uart_port_tx(port, ch, 0, tx_ready, put_char, ({}), true, ({})) + +/** + * uart_port_tx_flags -- transmit helper for uart_port with flags + * @port: uart port + * @ch: variable to store a character to be written to the HW + * @flags: %UART_TX_NOSTOP or similar + * @tx_ready: can HW accept more data function + * @put_char: function to write a character + * + * See uart_port_tx_limited() for more details. + */ +#define uart_port_tx_flags(port, ch, flags, tx_ready, put_char) \ + __uart_port_tx(port, ch, flags, tx_ready, put_char, ({}), true, ({})) /* * Baud rate helpers. */ @@ -773,9 +878,9 @@ static inline unsigned long uart_fifo_timeout(struct uart_port *port) } /* Base timer interval for polling */ -static inline int uart_poll_timeout(struct uart_port *port) +static inline unsigned long uart_poll_timeout(struct uart_port *port) { - int timeout = uart_fifo_timeout(port); + unsigned long timeout = uart_fifo_timeout(port); return timeout > 6 ? (timeout / 2 - 2) : 1; } @@ -858,6 +963,8 @@ int uart_register_driver(struct uart_driver *uart); void uart_unregister_driver(struct uart_driver *uart); int uart_add_one_port(struct uart_driver *reg, struct uart_port *port); void uart_remove_one_port(struct uart_driver *reg, struct uart_port *port); +int uart_read_port_properties(struct uart_port *port); +int uart_read_and_validate_port_properties(struct uart_port *port); bool uart_match_port(const struct uart_port *port1, const struct uart_port *port2); @@ -956,14 +1063,14 @@ static inline void uart_unlock_and_check_sysrq(struct uart_port *port) u8 sysrq_ch; if (!port->has_sysrq) { - spin_unlock(&port->lock); + uart_port_unlock(port); return; } sysrq_ch = port->sysrq_ch; port->sysrq_ch = 0; - spin_unlock(&port->lock); + uart_port_unlock(port); if (sysrq_ch) handle_sysrq(sysrq_ch); @@ -975,14 +1082,14 @@ static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port u8 sysrq_ch; if (!port->has_sysrq) { - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); return; } sysrq_ch = port->sysrq_ch; port->sysrq_ch = 0; - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); if (sysrq_ch) handle_sysrq(sysrq_ch); @@ -998,12 +1105,12 @@ static inline int uart_prepare_sysrq_char(struct uart_port *port, u8 ch) } static inline void uart_unlock_and_check_sysrq(struct uart_port *port) { - spin_unlock(&port->lock); + uart_port_unlock(port); } static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, unsigned long flags) { - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } #endif /* CONFIG_MAGIC_SYSRQ_SERIAL */ diff --git a/include/linux/serio.h b/include/linux/serio.h index 6c27d413da92..7ca41af93b37 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -15,7 +15,7 @@ #include <linux/mod_devicetable.h> #include <uapi/linux/serio.h> -extern struct bus_type serio_bus; +extern const struct bus_type serio_bus; struct serio { void *port_data; diff --git a/include/linux/shm.h b/include/linux/shm.h index d8e69aed3d32..c55bef0538e5 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -2,12 +2,12 @@ #ifndef _LINUX_SHM_H_ #define _LINUX_SHM_H_ -#include <linux/list.h> +#include <linux/types.h> #include <asm/page.h> -#include <uapi/linux/shm.h> #include <asm/shmparam.h> struct file; +struct task_struct; #ifdef CONFIG_SYSVIPC struct sysv_shm { diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 6b0c626620f5..3fb18f7eb73e 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -23,18 +23,22 @@ struct shmem_inode_info { unsigned long flags; unsigned long alloced; /* data pages alloced to file */ unsigned long swapped; /* subtotal assigned to swap */ - pgoff_t fallocend; /* highest fallocate endindex */ - struct list_head shrinklist; /* shrinkable hpage inodes */ - struct list_head swaplist; /* chain of maybes on swap */ + union { + struct offset_ctx dir_offsets; /* stable directory offsets */ + struct { + struct list_head shrinklist; /* shrinkable hpage inodes */ + struct list_head swaplist; /* chain of maybes on swap */ + }; + }; + struct timespec64 i_crtime; /* file creation time */ struct shared_policy policy; /* NUMA memory alloc policy */ struct simple_xattrs xattrs; /* list of xattrs */ + pgoff_t fallocend; /* highest fallocate endindex */ + unsigned int fsflags; /* for FS_IOC_[SG]ETFLAGS */ atomic_t stop_eviction; /* hold when working on inode */ - struct timespec64 i_crtime; /* file creation time */ - unsigned int fsflags; /* flags for FS_IOC_[SG]ETFLAGS */ #ifdef CONFIG_TMPFS_QUOTA - struct dquot *i_dquot[MAXQUOTAS]; + struct dquot __rcu *i_dquot[MAXQUOTAS]; #endif - struct offset_ctx dir_offsets; /* stable entry offsets */ struct inode vfs_inode; }; @@ -93,11 +97,7 @@ extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); extern int shmem_lock(struct file *file, int lock, struct ucounts *ucounts); #ifdef CONFIG_SHMEM -extern const struct address_space_operations shmem_aops; -static inline bool shmem_mapping(struct address_space *mapping) -{ - return mapping->a_ops == &shmem_aops; -} +bool shmem_mapping(struct address_space *mapping); #else static inline bool shmem_mapping(struct address_space *mapping) { @@ -110,8 +110,17 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); int shmem_unuse(unsigned int type); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, struct mm_struct *mm, unsigned long vm_flags); +#else +static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, + struct mm_struct *mm, unsigned long vm_flags) +{ + return false; +} +#endif + #ifdef CONFIG_SHMEM extern unsigned long shmem_swap_usage(struct vm_area_struct *vma); #else diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 224293b2dd06..1a00be90d93a 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -4,6 +4,25 @@ #include <linux/atomic.h> #include <linux/types.h> +#include <linux/refcount.h> +#include <linux/completion.h> + +#define SHRINKER_UNIT_BITS BITS_PER_LONG + +/* + * Bitmap and deferred work of shrinker::id corresponding to memcg-aware + * shrinkers, which have elements charged to the memcg. + */ +struct shrinker_info_unit { + atomic_long_t nr_deferred[SHRINKER_UNIT_BITS]; + DECLARE_BITMAP(map, SHRINKER_UNIT_BITS); +}; + +struct shrinker_info { + struct rcu_head rcu; + int map_nr_max; + struct shrinker_info_unit *unit[]; +}; /* * This struct is used to pass information from page reclaim to the shrinkers. @@ -70,6 +89,19 @@ struct shrinker { int seeks; /* seeks to recreate an obj */ unsigned flags; + /* + * The reference count of this shrinker. Registered shrinker have an + * initial refcount of 1, then the lookup operations are now allowed + * to use it via shrinker_try_get(). Later in the unregistration step, + * the initial refcount will be discarded, and will free the shrinker + * asynchronously via RCU after its refcount reaches 0. + */ + refcount_t refcount; + struct completion done; /* use to wait for refcount to reach 0 */ + struct rcu_head rcu; + + void *private_data; + /* These are for internal use */ struct list_head list; #ifdef CONFIG_MEMCG @@ -86,48 +118,39 @@ struct shrinker { }; #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ -/* Flags */ -#define SHRINKER_REGISTERED (1 << 0) -#define SHRINKER_NUMA_AWARE (1 << 1) -#define SHRINKER_MEMCG_AWARE (1 << 2) +/* Internal flags */ +#define SHRINKER_REGISTERED BIT(0) +#define SHRINKER_ALLOCATED BIT(1) + +/* Flags for users to use */ +#define SHRINKER_NUMA_AWARE BIT(2) +#define SHRINKER_MEMCG_AWARE BIT(3) /* * It just makes sense when the shrinker is also MEMCG_AWARE for now, * non-MEMCG_AWARE shrinker should not have this flag set. */ -#define SHRINKER_NONSLAB (1 << 3) +#define SHRINKER_NONSLAB BIT(4) -extern int __printf(2, 3) prealloc_shrinker(struct shrinker *shrinker, - const char *fmt, ...); -extern void register_shrinker_prepared(struct shrinker *shrinker); -extern int __printf(2, 3) register_shrinker(struct shrinker *shrinker, - const char *fmt, ...); -extern void unregister_shrinker(struct shrinker *shrinker); -extern void free_prealloced_shrinker(struct shrinker *shrinker); -extern void synchronize_shrinkers(void); +__printf(2, 3) +struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...); +void shrinker_register(struct shrinker *shrinker); +void shrinker_free(struct shrinker *shrinker); -#ifdef CONFIG_SHRINKER_DEBUG -extern int shrinker_debugfs_add(struct shrinker *shrinker); -extern struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, - int *debugfs_id); -extern void shrinker_debugfs_remove(struct dentry *debugfs_entry, - int debugfs_id); -extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, - const char *fmt, ...); -#else /* CONFIG_SHRINKER_DEBUG */ -static inline int shrinker_debugfs_add(struct shrinker *shrinker) -{ - return 0; -} -static inline struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, - int *debugfs_id) +static inline bool shrinker_try_get(struct shrinker *shrinker) { - *debugfs_id = -1; - return NULL; + return refcount_inc_not_zero(&shrinker->refcount); } -static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry, - int debugfs_id) + +static inline void shrinker_put(struct shrinker *shrinker) { + if (refcount_dec_and_test(&shrinker->refcount)) + complete(&shrinker->done); } + +#ifdef CONFIG_SHRINKER_DEBUG +extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, + const char *fmt, ...); +#else /* CONFIG_SHRINKER_DEBUG */ static inline __printf(2, 3) int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) { diff --git a/include/linux/signal.h b/include/linux/signal.h index 3b98e7a28538..f19816832f05 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -3,6 +3,7 @@ #define _LINUX_SIGNAL_H #include <linux/bug.h> +#include <linux/list.h> #include <linux/signal_types.h> #include <linux/string.h> diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h index a70b2bdbf4d9..caf4f7a59ab9 100644 --- a/include/linux/signal_types.h +++ b/include/linux/signal_types.h @@ -6,7 +6,7 @@ * Basic signal handling related data type definitions: */ -#include <linux/list.h> +#include <linux/types.h> #include <uapi/linux/signal.h> typedef struct kernel_siginfo { diff --git a/include/linux/sizes.h b/include/linux/sizes.h index 84aa448d8bb3..c3a00b967d18 100644 --- a/include/linux/sizes.h +++ b/include/linux/sizes.h @@ -47,8 +47,17 @@ #define SZ_8G _AC(0x200000000, ULL) #define SZ_16G _AC(0x400000000, ULL) #define SZ_32G _AC(0x800000000, ULL) +#define SZ_64G _AC(0x1000000000, ULL) +#define SZ_128G _AC(0x2000000000, ULL) +#define SZ_256G _AC(0x4000000000, ULL) +#define SZ_512G _AC(0x8000000000, ULL) #define SZ_1T _AC(0x10000000000, ULL) +#define SZ_2T _AC(0x20000000000, ULL) +#define SZ_4T _AC(0x40000000000, ULL) +#define SZ_8T _AC(0x80000000000, ULL) +#define SZ_16T _AC(0x100000000000, ULL) +#define SZ_32T _AC(0x200000000000, ULL) #define SZ_64T _AC(0x400000000000, ULL) #endif /* __LINUX_SIZES_H__ */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 97bfef071255..9d24aec064e8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -37,6 +37,7 @@ #endif #include <net/net_debug.h> #include <net/dropreason-core.h> +#include <net/netmem.h> /** * DOC: skb checksums @@ -295,7 +296,7 @@ struct nf_bridge_info { u8 bridged_dnat:1; u8 sabotage_in_done:1; __u16 frag_max_size; - struct net_device *physindev; + int physinif; /* always valid & non-NULL from FORWARD on, for physdev match */ struct net_device *physoutdev; @@ -359,7 +360,11 @@ extern int sysctl_max_skb_frags; */ #define GSO_BY_FRAGS 0xFFFF -typedef struct bio_vec skb_frag_t; +typedef struct skb_frag { + netmem_ref netmem; + unsigned int len; + unsigned int offset; +} skb_frag_t; /** * skb_frag_size() - Returns the size of a skb fragment @@ -367,7 +372,7 @@ typedef struct bio_vec skb_frag_t; */ static inline unsigned int skb_frag_size(const skb_frag_t *frag) { - return frag->bv_len; + return frag->len; } /** @@ -377,7 +382,7 @@ static inline unsigned int skb_frag_size(const skb_frag_t *frag) */ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) { - frag->bv_len = size; + frag->len = size; } /** @@ -387,7 +392,7 @@ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) */ static inline void skb_frag_size_add(skb_frag_t *frag, int delta) { - frag->bv_len += delta; + frag->len += delta; } /** @@ -397,7 +402,7 @@ static inline void skb_frag_size_add(skb_frag_t *frag, int delta) */ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) { - frag->bv_len -= delta; + frag->len -= delta; } /** @@ -417,7 +422,7 @@ static inline bool skb_frag_must_loop(struct page *p) * skb_frag_foreach_page - loop over pages in a fragment * * @f: skb frag to operate on - * @f_off: offset from start of f->bv_page + * @f_off: offset from start of f->netmem * @f_len: length from f_off to loop over * @p: (temp var) current page * @p_off: (temp var) offset from start of current page, @@ -566,6 +571,15 @@ struct ubuf_info_msgzc { int mm_account_pinned_pages(struct mmpin *mmp, size_t size); void mm_unaccount_pinned_pages(struct mmpin *mmp); +/* Preserve some data across TX submission and completion. + * + * Note, this state is stored in the driver. Extending the layout + * might need some special care. + */ +struct xsk_tx_metadata_compl { + __u64 *tx_timestamp; +}; + /* This data is invariant across clones and lives at * the end of the header data, ie. at skb->end. */ @@ -578,7 +592,10 @@ struct skb_shared_info { /* Warning: this field is not always filled in (UFO)! */ unsigned short gso_segs; struct sk_buff *frag_list; - struct skb_shared_hwtstamps hwtstamps; + union { + struct skb_shared_hwtstamps hwtstamps; + struct xsk_tx_metadata_compl xsk_meta; + }; unsigned int gso_type; u32 tskey; @@ -736,13 +753,10 @@ typedef unsigned char *sk_buff_data_t; * @list: queue head * @ll_node: anchor in an llist (eg socket defer_list) * @sk: Socket we are owned by - * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in - * fragmentation management * @dev: Device we arrived on/are leaving by * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL * @cb: Control buffer. Free for use by every layer. Put private vars here * @_skb_refdst: destination entry (with norefcount bit) - * @sp: the security path, used for xfrm * @len: Length of actual data * @data_len: Data length * @mac_len: Length of link layer header @@ -776,7 +790,6 @@ typedef unsigned char *sk_buff_data_t; * @tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue) * @_sk_redir: socket redirection information for skmsg * @_nfct: Associated connection, if any (with nfctinfo bits) - * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @skb_iif: ifindex of device we arrived on * @tc_index: Traffic control index * @hash: the packet hash @@ -860,10 +873,7 @@ struct sk_buff { struct llist_node ll_node; }; - union { - struct sock *sk; - int ip_defrag_offset; - }; + struct sock *sk; union { ktime_t tstamp; @@ -1057,7 +1067,7 @@ struct sk_buff { refcount_t users; #ifdef CONFIG_SKB_EXTENSIONS - /* only useable after checking ->active_extensions != 0 */ + /* only usable after checking ->active_extensions != 0 */ struct skb_ext *extensions; #endif }; @@ -1222,6 +1232,24 @@ static inline bool skb_unref(struct sk_buff *skb) return true; } +static inline bool skb_data_unref(const struct sk_buff *skb, + struct skb_shared_info *shinfo) +{ + int bias; + + if (!skb->cloned) + return true; + + bias = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1; + + if (atomic_read(&shinfo->dataref) == bias) + smp_rmb(); + else if (atomic_sub_return(bias, &shinfo->dataref)) + return false; + + return true; +} + void __fix_address kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason); @@ -1256,7 +1284,6 @@ static inline void consume_skb(struct sk_buff *skb) void __consume_stateless_skb(struct sk_buff *skb); void __kfree_skb(struct sk_buff *skb); -extern struct kmem_cache *skbuff_cache; void kfree_skb_partial(struct sk_buff *skb, bool head_stolen); bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, @@ -2419,22 +2446,37 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb) return skb_headlen(skb) + __skb_pagelen(skb); } +static inline void skb_frag_fill_netmem_desc(skb_frag_t *frag, + netmem_ref netmem, int off, + int size) +{ + frag->netmem = netmem; + frag->offset = off; + skb_frag_size_set(frag, size); +} + static inline void skb_frag_fill_page_desc(skb_frag_t *frag, struct page *page, int off, int size) { - frag->bv_page = page; - frag->bv_offset = off; - skb_frag_size_set(frag, size); + skb_frag_fill_netmem_desc(frag, page_to_netmem(page), off, size); +} + +static inline void __skb_fill_netmem_desc_noacc(struct skb_shared_info *shinfo, + int i, netmem_ref netmem, + int off, int size) +{ + skb_frag_t *frag = &shinfo->frags[i]; + + skb_frag_fill_netmem_desc(frag, netmem, off, size); } static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo, int i, struct page *page, int off, int size) { - skb_frag_t *frag = &shinfo->frags[i]; - - skb_frag_fill_page_desc(frag, page, off, size); + __skb_fill_netmem_desc_noacc(shinfo, i, page_to_netmem(page), off, + size); } /** @@ -2450,10 +2492,10 @@ static inline void skb_len_add(struct sk_buff *skb, int delta) } /** - * __skb_fill_page_desc - initialise a paged fragment in an skb + * __skb_fill_netmem_desc - initialise a fragment in an skb * @skb: buffer containing fragment to be initialised - * @i: paged fragment index to initialise - * @page: the page to use for this fragment + * @i: fragment index to initialise + * @netmem: the netmem to use for this fragment * @off: the offset to the data with @page * @size: the length of the data * @@ -2462,10 +2504,12 @@ static inline void skb_len_add(struct sk_buff *skb, int delta) * * Does not take any additional reference on the fragment. */ -static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, - struct page *page, int off, int size) +static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i, + netmem_ref netmem, int off, int size) { - __skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size); + struct page *page = netmem_to_page(netmem); + + __skb_fill_netmem_desc_noacc(skb_shinfo(skb), i, netmem, off, size); /* Propagate page pfmemalloc to the skb if we can. The problem is * that not all callers have unique ownership of the page but rely @@ -2473,7 +2517,20 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, */ page = compound_head(page); if (page_is_pfmemalloc(page)) - skb->pfmemalloc = true; + skb->pfmemalloc = true; +} + +static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, + struct page *page, int off, int size) +{ + __skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); +} + +static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i, + netmem_ref netmem, int off, int size) +{ + __skb_fill_netmem_desc(skb, i, netmem, off, size); + skb_shinfo(skb)->nr_frags = i + 1; } /** @@ -2493,8 +2550,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, static inline void skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size) { - __skb_fill_page_desc(skb, i, page, off, size); - skb_shinfo(skb)->nr_frags = i + 1; + skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); } /** @@ -2518,8 +2574,16 @@ static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i, shinfo->nr_frags = i + 1; } -void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, - int size, unsigned int truesize); +void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem, + int off, int size, unsigned int truesize); + +static inline void skb_add_rx_frag(struct sk_buff *skb, int i, + struct page *page, int off, int size, + unsigned int truesize) +{ + skb_add_rx_frag_netmem(skb, i, page_to_netmem(page), off, size, + truesize); +} void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, unsigned int truesize); @@ -2632,6 +2696,8 @@ static inline void skb_put_u8(struct sk_buff *skb, u8 val) void *skb_push(struct sk_buff *skb, unsigned int len); static inline void *__skb_push(struct sk_buff *skb, unsigned int len) { + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + skb->data -= len; skb->len += len; return skb->data; @@ -2640,6 +2706,8 @@ static inline void *__skb_push(struct sk_buff *skb, unsigned int len) void *skb_pull(struct sk_buff *skb, unsigned int len); static inline void *__skb_pull(struct sk_buff *skb, unsigned int len) { + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + skb->len -= len; if (unlikely(skb->len < skb->data_len)) { #if defined(CONFIG_DEBUG_NET) @@ -2664,6 +2732,8 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta); static inline enum skb_drop_reason pskb_may_pull_reason(struct sk_buff *skb, unsigned int len) { + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + if (likely(len <= skb_headlen(skb))) return SKB_NOT_DROPPED_YET; @@ -2836,6 +2906,11 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb, skb->inner_network_header += offset; } +static inline bool skb_inner_network_header_was_set(const struct sk_buff *skb) +{ + return skb->inner_network_header > 0; +} + static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) { return skb->head + skb->inner_mac_header; @@ -2973,6 +3048,7 @@ static inline int skb_transport_offset(const struct sk_buff *skb) static inline u32 skb_network_header_len(const struct sk_buff *skb) { + DEBUG_NET_WARN_ON_ONCE(!skb_transport_header_was_set(skb)); return skb->transport_header - skb->network_header; } @@ -3299,7 +3375,7 @@ static inline struct page *__dev_alloc_pages(gfp_t gfp_mask, unsigned int order) { /* This piece of code contains several assumptions. - * 1. This is for device Rx, therefor a cold page is preferred. + * 1. This is for device Rx, therefore a cold page is preferred. * 2. The expectation is the user wants a compound page. * 3. If requesting a order 0 page it will not be compound * due to the check to see if order has a value in prep_new_page @@ -3368,7 +3444,7 @@ static inline void skb_propagate_pfmemalloc(const struct page *page, */ static inline unsigned int skb_frag_off(const skb_frag_t *frag) { - return frag->bv_offset; + return frag->offset; } /** @@ -3378,7 +3454,7 @@ static inline unsigned int skb_frag_off(const skb_frag_t *frag) */ static inline void skb_frag_off_add(skb_frag_t *frag, int delta) { - frag->bv_offset += delta; + frag->offset += delta; } /** @@ -3388,7 +3464,7 @@ static inline void skb_frag_off_add(skb_frag_t *frag, int delta) */ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) { - frag->bv_offset = offset; + frag->offset = offset; } /** @@ -3399,7 +3475,7 @@ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) static inline void skb_frag_off_copy(skb_frag_t *fragto, const skb_frag_t *fragfrom) { - fragto->bv_offset = fragfrom->bv_offset; + fragto->offset = fragfrom->offset; } /** @@ -3410,7 +3486,7 @@ static inline void skb_frag_off_copy(skb_frag_t *fragto, */ static inline struct page *skb_frag_page(const skb_frag_t *frag) { - return frag->bv_page; + return netmem_to_page(frag->netmem); } /** @@ -3436,9 +3512,23 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f) __skb_frag_ref(&skb_shinfo(skb)->frags[f]); } +int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, + unsigned int headroom); +int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, + struct bpf_prog *prog); bool napi_pp_put_page(struct page *page, bool napi_safe); static inline void +skb_page_unref(const struct sk_buff *skb, struct page *page, bool napi_safe) +{ +#ifdef CONFIG_PAGE_POOL + if (skb->pp_recycle && napi_pp_put_page(page, napi_safe)) + return; +#endif + put_page(page); +} + +static inline void napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe) { struct page *page = skb_frag_page(frag); @@ -3514,7 +3604,7 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag) static inline void skb_frag_page_copy(skb_frag_t *fragto, const skb_frag_t *fragfrom) { - fragto->bv_page = fragfrom->bv_page; + fragto->netmem = fragfrom->netmem; } bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); @@ -3679,6 +3769,9 @@ static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int l return __skb_put_padto(skb, len, true); } +bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i) + __must_check; + static inline int skb_add_data(struct sk_buff *skb, struct iov_iter *from, int copy) { @@ -3994,6 +4087,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features unsigned int offset); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len); +int skb_ensure_writable_head_tail(struct sk_buff *skb, struct net_device *dev); int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); @@ -4232,10 +4326,13 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, { const void *a = skb_metadata_end(skb_a); const void *b = skb_metadata_end(skb_b); - /* Using more efficient varaiant than plain call to memcmp(). */ -#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 u64 diffs = 0; + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + BITS_PER_LONG != 64) + goto slow; + + /* Using more efficient variant than plain call to memcmp(). */ switch (meta_len) { #define __it(x, op) (x -= sizeof(u##op)) #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op)) @@ -4255,11 +4352,11 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, fallthrough; case 4: diffs |= __it_diff(a, b, 32); break; + default: +slow: + return memcmp(a - meta_len, b - meta_len, meta_len); } return diffs; -#else - return memcmp(a - meta_len, b - meta_len, meta_len); -#endif } static inline bool skb_metadata_differs(const struct sk_buff *skb_a, diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index c1637515a8a4..e65ec3fd2799 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -100,12 +100,18 @@ struct sk_psock { void (*saved_close)(struct sock *sk, long timeout); void (*saved_write_space)(struct sock *sk); void (*saved_data_ready)(struct sock *sk); + /* psock_update_sk_prot may be called with restore=false many times + * so the handler must be safe for this case. It will be called + * exactly once with restore=true when the psock is being destroyed + * and psock refcnt is zero, but before an RCU grace period. + */ int (*psock_update_sk_prot)(struct sock *sk, struct sk_psock *psock, bool restore); struct proto *sk_proto; struct mutex work_mutex; struct sk_psock_work_state work_state; struct delayed_work work; + struct sock *sk_pair; struct rcu_work rwork; }; @@ -499,12 +505,6 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) return !!psock->saved_data_ready; } -static inline bool sk_is_udp(const struct sock *sk) -{ - return sk->sk_type == SOCK_DGRAM && - sk->sk_protocol == IPPROTO_UDP; -} - #if IS_ENABLED(CONFIG_NET_SOCK_MSG) #define BPF_F_STRPARSER (1UL << 1) diff --git a/include/linux/slab.h b/include/linux/slab.h index 8228d1276a2f..e53cbfa18325 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -21,29 +21,69 @@ #include <linux/cleanup.h> #include <linux/hash.h> +enum _slab_flag_bits { + _SLAB_CONSISTENCY_CHECKS, + _SLAB_RED_ZONE, + _SLAB_POISON, + _SLAB_KMALLOC, + _SLAB_HWCACHE_ALIGN, + _SLAB_CACHE_DMA, + _SLAB_CACHE_DMA32, + _SLAB_STORE_USER, + _SLAB_PANIC, + _SLAB_TYPESAFE_BY_RCU, + _SLAB_TRACE, +#ifdef CONFIG_DEBUG_OBJECTS + _SLAB_DEBUG_OBJECTS, +#endif + _SLAB_NOLEAKTRACE, + _SLAB_NO_MERGE, +#ifdef CONFIG_FAILSLAB + _SLAB_FAILSLAB, +#endif +#ifdef CONFIG_MEMCG_KMEM + _SLAB_ACCOUNT, +#endif +#ifdef CONFIG_KASAN_GENERIC + _SLAB_KASAN, +#endif + _SLAB_NO_USER_FLAGS, +#ifdef CONFIG_KFENCE + _SLAB_SKIP_KFENCE, +#endif +#ifndef CONFIG_SLUB_TINY + _SLAB_RECLAIM_ACCOUNT, +#endif + _SLAB_OBJECT_POISON, + _SLAB_CMPXCHG_DOUBLE, + _SLAB_FLAGS_LAST_BIT +}; + +#define __SLAB_FLAG_BIT(nr) ((slab_flags_t __force)(1U << (nr))) +#define __SLAB_FLAG_UNUSED ((slab_flags_t __force)(0U)) /* * Flags to pass to kmem_cache_create(). - * The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set. + * The ones marked DEBUG need CONFIG_SLUB_DEBUG enabled, otherwise are no-op */ /* DEBUG: Perform (expensive) checks on alloc/free */ -#define SLAB_CONSISTENCY_CHECKS ((slab_flags_t __force)0x00000100U) +#define SLAB_CONSISTENCY_CHECKS __SLAB_FLAG_BIT(_SLAB_CONSISTENCY_CHECKS) /* DEBUG: Red zone objs in a cache */ -#define SLAB_RED_ZONE ((slab_flags_t __force)0x00000400U) +#define SLAB_RED_ZONE __SLAB_FLAG_BIT(_SLAB_RED_ZONE) /* DEBUG: Poison objects */ -#define SLAB_POISON ((slab_flags_t __force)0x00000800U) +#define SLAB_POISON __SLAB_FLAG_BIT(_SLAB_POISON) /* Indicate a kmalloc slab */ -#define SLAB_KMALLOC ((slab_flags_t __force)0x00001000U) +#define SLAB_KMALLOC __SLAB_FLAG_BIT(_SLAB_KMALLOC) /* Align objs on cache lines */ -#define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U) +#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN) /* Use GFP_DMA memory */ -#define SLAB_CACHE_DMA ((slab_flags_t __force)0x00004000U) +#define SLAB_CACHE_DMA __SLAB_FLAG_BIT(_SLAB_CACHE_DMA) /* Use GFP_DMA32 memory */ -#define SLAB_CACHE_DMA32 ((slab_flags_t __force)0x00008000U) +#define SLAB_CACHE_DMA32 __SLAB_FLAG_BIT(_SLAB_CACHE_DMA32) /* DEBUG: Store the last owner for bug hunting */ -#define SLAB_STORE_USER ((slab_flags_t __force)0x00010000U) +#define SLAB_STORE_USER __SLAB_FLAG_BIT(_SLAB_STORE_USER) /* Panic if kmem_cache_create() fails */ -#define SLAB_PANIC ((slab_flags_t __force)0x00040000U) +#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC) /* * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! * @@ -95,21 +135,19 @@ * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. */ /* Defer freeing slabs to RCU */ -#define SLAB_TYPESAFE_BY_RCU ((slab_flags_t __force)0x00080000U) -/* Spread some memory over cpuset */ -#define SLAB_MEM_SPREAD ((slab_flags_t __force)0x00100000U) +#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU) /* Trace allocations and frees */ -#define SLAB_TRACE ((slab_flags_t __force)0x00200000U) +#define SLAB_TRACE __SLAB_FLAG_BIT(_SLAB_TRACE) /* Flag to prevent checks on free */ #ifdef CONFIG_DEBUG_OBJECTS -# define SLAB_DEBUG_OBJECTS ((slab_flags_t __force)0x00400000U) +# define SLAB_DEBUG_OBJECTS __SLAB_FLAG_BIT(_SLAB_DEBUG_OBJECTS) #else -# define SLAB_DEBUG_OBJECTS 0 +# define SLAB_DEBUG_OBJECTS __SLAB_FLAG_UNUSED #endif /* Avoid kmemleak tracing */ -#define SLAB_NOLEAKTRACE ((slab_flags_t __force)0x00800000U) +#define SLAB_NOLEAKTRACE __SLAB_FLAG_BIT(_SLAB_NOLEAKTRACE) /* * Prevent merging with compatible kmem caches. This flag should be used @@ -121,25 +159,25 @@ * - performance critical caches, should be very rare and consulted with slab * maintainers, and not used together with CONFIG_SLUB_TINY */ -#define SLAB_NO_MERGE ((slab_flags_t __force)0x01000000U) +#define SLAB_NO_MERGE __SLAB_FLAG_BIT(_SLAB_NO_MERGE) /* Fault injection mark */ #ifdef CONFIG_FAILSLAB -# define SLAB_FAILSLAB ((slab_flags_t __force)0x02000000U) +# define SLAB_FAILSLAB __SLAB_FLAG_BIT(_SLAB_FAILSLAB) #else -# define SLAB_FAILSLAB 0 +# define SLAB_FAILSLAB __SLAB_FLAG_UNUSED #endif /* Account to memcg */ #ifdef CONFIG_MEMCG_KMEM -# define SLAB_ACCOUNT ((slab_flags_t __force)0x04000000U) +# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT) #else -# define SLAB_ACCOUNT 0 +# define SLAB_ACCOUNT __SLAB_FLAG_UNUSED #endif #ifdef CONFIG_KASAN_GENERIC -#define SLAB_KASAN ((slab_flags_t __force)0x08000000U) +#define SLAB_KASAN __SLAB_FLAG_BIT(_SLAB_KASAN) #else -#define SLAB_KASAN 0 +#define SLAB_KASAN __SLAB_FLAG_UNUSED #endif /* @@ -147,20 +185,20 @@ * Intended for caches created for self-tests so they have only flags * specified in the code and other flags are ignored. */ -#define SLAB_NO_USER_FLAGS ((slab_flags_t __force)0x10000000U) +#define SLAB_NO_USER_FLAGS __SLAB_FLAG_BIT(_SLAB_NO_USER_FLAGS) #ifdef CONFIG_KFENCE -#define SLAB_SKIP_KFENCE ((slab_flags_t __force)0x20000000U) +#define SLAB_SKIP_KFENCE __SLAB_FLAG_BIT(_SLAB_SKIP_KFENCE) #else -#define SLAB_SKIP_KFENCE 0 +#define SLAB_SKIP_KFENCE __SLAB_FLAG_UNUSED #endif /* The following flags affect the page allocator grouping pages by mobility */ /* Objects are reclaimable */ #ifndef CONFIG_SLUB_TINY -#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U) +#define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT) #else -#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0) +#define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_UNUSED #endif #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ @@ -245,8 +283,9 @@ DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) size_t ksize(const void *objp); #ifdef CONFIG_PRINTK -bool kmem_valid_obj(void *object); -void kmem_dump_obj(void *object); +bool kmem_dump_obj(void *object); +#else +static inline bool kmem_dump_obj(void *object) { return false; } #endif /* @@ -301,25 +340,15 @@ static inline unsigned int arch_slab_minalign(void) * Kmalloc array related definitions */ -#ifdef CONFIG_SLAB /* - * SLAB and SLUB directly allocates requests fitting in to an order-1 page + * SLUB directly allocates requests fitting in to an order-1 page * (PAGE_SIZE*2). Larger requests are passed to the page allocator. */ #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) -#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT) -#ifndef KMALLOC_SHIFT_LOW -#define KMALLOC_SHIFT_LOW 5 -#endif -#endif - -#ifdef CONFIG_SLUB -#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) -#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT) +#define KMALLOC_SHIFT_MAX (MAX_PAGE_ORDER + PAGE_SHIFT) #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 3 #endif -#endif /* Maximum allocatable size */ #define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX) @@ -763,6 +792,8 @@ static inline __alloc_size(1, 2) void *kvcalloc(size_t n, size_t size, gfp_t fla extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags) __realloc_size(3); extern void kvfree(const void *addr); +DEFINE_FREE(kvfree, void *, if (_T) kvfree(_T)) + extern void kvfree_sensitive(const void *addr, size_t len); unsigned int kmem_cache_size(struct kmem_cache *s); @@ -785,12 +816,4 @@ size_t kmalloc_size_roundup(size_t size); void __init kmem_cache_init_late(void); -#if defined(CONFIG_SMP) && defined(CONFIG_SLAB) -int slab_prepare_cpu(unsigned int cpu); -int slab_dead_cpu(unsigned int cpu); -#else -#define slab_prepare_cpu NULL -#define slab_dead_cpu NULL -#endif - #endif /* _LINUX_SLAB_H */ diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h deleted file mode 100644 index a61e7d55d0d3..000000000000 --- a/include/linux/slab_def.h +++ /dev/null @@ -1,124 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_SLAB_DEF_H -#define _LINUX_SLAB_DEF_H - -#include <linux/kfence.h> -#include <linux/reciprocal_div.h> - -/* - * Definitions unique to the original Linux SLAB allocator. - */ - -struct kmem_cache { - struct array_cache __percpu *cpu_cache; - -/* 1) Cache tunables. Protected by slab_mutex */ - unsigned int batchcount; - unsigned int limit; - unsigned int shared; - - unsigned int size; - struct reciprocal_value reciprocal_buffer_size; -/* 2) touched by every alloc & free from the backend */ - - slab_flags_t flags; /* constant flags */ - unsigned int num; /* # of objs per slab */ - -/* 3) cache_grow/shrink */ - /* order of pgs per slab (2^n) */ - unsigned int gfporder; - - /* force GFP flags, e.g. GFP_DMA */ - gfp_t allocflags; - - size_t colour; /* cache colouring range */ - unsigned int colour_off; /* colour offset */ - unsigned int freelist_size; - - /* constructor func */ - void (*ctor)(void *obj); - -/* 4) cache creation/removal */ - const char *name; - struct list_head list; - int refcount; - int object_size; - int align; - -/* 5) statistics */ -#ifdef CONFIG_DEBUG_SLAB - unsigned long num_active; - unsigned long num_allocations; - unsigned long high_mark; - unsigned long grown; - unsigned long reaped; - unsigned long errors; - unsigned long max_freeable; - unsigned long node_allocs; - unsigned long node_frees; - unsigned long node_overflow; - atomic_t allochit; - atomic_t allocmiss; - atomic_t freehit; - atomic_t freemiss; - - /* - * If debugging is enabled, then the allocator can add additional - * fields and/or padding to every object. 'size' contains the total - * object size including these internal fields, while 'obj_offset' - * and 'object_size' contain the offset to the user object and its - * size. - */ - int obj_offset; -#endif /* CONFIG_DEBUG_SLAB */ - -#ifdef CONFIG_KASAN_GENERIC - struct kasan_cache kasan_info; -#endif - -#ifdef CONFIG_SLAB_FREELIST_RANDOM - unsigned int *random_seq; -#endif - -#ifdef CONFIG_HARDENED_USERCOPY - unsigned int useroffset; /* Usercopy region offset */ - unsigned int usersize; /* Usercopy region size */ -#endif - - struct kmem_cache_node *node[MAX_NUMNODES]; -}; - -static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab, - void *x) -{ - void *object = x - (x - slab->s_mem) % cache->size; - void *last_object = slab->s_mem + (cache->num - 1) * cache->size; - - if (unlikely(object > last_object)) - return last_object; - else - return object; -} - -/* - * We want to avoid an expensive divide : (offset / cache->size) - * Using the fact that size is a constant for a particular cache, - * we can replace (offset / cache->size) by - * reciprocal_divide(offset, cache->reciprocal_buffer_size) - */ -static inline unsigned int obj_to_index(const struct kmem_cache *cache, - const struct slab *slab, void *obj) -{ - u32 offset = (obj - slab->s_mem); - return reciprocal_divide(offset, cache->reciprocal_buffer_size); -} - -static inline int objs_per_slab(const struct kmem_cache *cache, - const struct slab *slab) -{ - if (is_kfence_address(slab_address(slab))) - return 1; - return cache->num; -} - -#endif /* _LINUX_SLAB_DEF_H */ diff --git a/include/linux/slimbus.h b/include/linux/slimbus.h index 12c9719b2a55..3042385b7b40 100644 --- a/include/linux/slimbus.h +++ b/include/linux/slimbus.h @@ -10,7 +10,7 @@ #include <linux/completion.h> #include <linux/mod_devicetable.h> -extern struct bus_type slimbus_bus; +extern const struct bus_type slimbus_bus; /** * struct slim_eaddr - Enumeration address for a SLIMbus device diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h deleted file mode 100644 index deb90cf4bffb..000000000000 --- a/include/linux/slub_def.h +++ /dev/null @@ -1,204 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_SLUB_DEF_H -#define _LINUX_SLUB_DEF_H - -/* - * SLUB : A Slab allocator without object queues. - * - * (C) 2007 SGI, Christoph Lameter - */ -#include <linux/kfence.h> -#include <linux/kobject.h> -#include <linux/reciprocal_div.h> -#include <linux/local_lock.h> - -enum stat_item { - ALLOC_FASTPATH, /* Allocation from cpu slab */ - ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */ - FREE_FASTPATH, /* Free to cpu slab */ - FREE_SLOWPATH, /* Freeing not to cpu slab */ - FREE_FROZEN, /* Freeing to frozen slab */ - FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */ - FREE_REMOVE_PARTIAL, /* Freeing removes last object */ - ALLOC_FROM_PARTIAL, /* Cpu slab acquired from node partial list */ - ALLOC_SLAB, /* Cpu slab acquired from page allocator */ - ALLOC_REFILL, /* Refill cpu slab from slab freelist */ - ALLOC_NODE_MISMATCH, /* Switching cpu slab */ - FREE_SLAB, /* Slab freed to the page allocator */ - CPUSLAB_FLUSH, /* Abandoning of the cpu slab */ - DEACTIVATE_FULL, /* Cpu slab was full when deactivated */ - DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */ - DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */ - DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */ - DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */ - DEACTIVATE_BYPASS, /* Implicit deactivation */ - ORDER_FALLBACK, /* Number of times fallback was necessary */ - CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */ - CMPXCHG_DOUBLE_FAIL, /* Number of times that cmpxchg double did not match */ - CPU_PARTIAL_ALLOC, /* Used cpu partial on alloc */ - CPU_PARTIAL_FREE, /* Refill cpu partial on free */ - CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */ - CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */ - NR_SLUB_STAT_ITEMS -}; - -#ifndef CONFIG_SLUB_TINY -/* - * When changing the layout, make sure freelist and tid are still compatible - * with this_cpu_cmpxchg_double() alignment requirements. - */ -struct kmem_cache_cpu { - union { - struct { - void **freelist; /* Pointer to next available object */ - unsigned long tid; /* Globally unique transaction id */ - }; - freelist_aba_t freelist_tid; - }; - struct slab *slab; /* The slab from which we are allocating */ -#ifdef CONFIG_SLUB_CPU_PARTIAL - struct slab *partial; /* Partially allocated frozen slabs */ -#endif - local_lock_t lock; /* Protects the fields above */ -#ifdef CONFIG_SLUB_STATS - unsigned stat[NR_SLUB_STAT_ITEMS]; -#endif -}; -#endif /* CONFIG_SLUB_TINY */ - -#ifdef CONFIG_SLUB_CPU_PARTIAL -#define slub_percpu_partial(c) ((c)->partial) - -#define slub_set_percpu_partial(c, p) \ -({ \ - slub_percpu_partial(c) = (p)->next; \ -}) - -#define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c)) -#else -#define slub_percpu_partial(c) NULL - -#define slub_set_percpu_partial(c, p) - -#define slub_percpu_partial_read_once(c) NULL -#endif // CONFIG_SLUB_CPU_PARTIAL - -/* - * Word size structure that can be atomically updated or read and that - * contains both the order and the number of objects that a slab of the - * given order would contain. - */ -struct kmem_cache_order_objects { - unsigned int x; -}; - -/* - * Slab cache management. - */ -struct kmem_cache { -#ifndef CONFIG_SLUB_TINY - struct kmem_cache_cpu __percpu *cpu_slab; -#endif - /* Used for retrieving partial slabs, etc. */ - slab_flags_t flags; - unsigned long min_partial; - unsigned int size; /* The size of an object including metadata */ - unsigned int object_size;/* The size of an object without metadata */ - struct reciprocal_value reciprocal_size; - unsigned int offset; /* Free pointer offset */ -#ifdef CONFIG_SLUB_CPU_PARTIAL - /* Number of per cpu partial objects to keep around */ - unsigned int cpu_partial; - /* Number of per cpu partial slabs to keep around */ - unsigned int cpu_partial_slabs; -#endif - struct kmem_cache_order_objects oo; - - /* Allocation and freeing of slabs */ - struct kmem_cache_order_objects min; - gfp_t allocflags; /* gfp flags to use on each alloc */ - int refcount; /* Refcount for slab cache destroy */ - void (*ctor)(void *); - unsigned int inuse; /* Offset to metadata */ - unsigned int align; /* Alignment */ - unsigned int red_left_pad; /* Left redzone padding size */ - const char *name; /* Name (only for display!) */ - struct list_head list; /* List of slab caches */ -#ifdef CONFIG_SYSFS - struct kobject kobj; /* For sysfs */ -#endif -#ifdef CONFIG_SLAB_FREELIST_HARDENED - unsigned long random; -#endif - -#ifdef CONFIG_NUMA - /* - * Defragmentation by allocating from a remote node. - */ - unsigned int remote_node_defrag_ratio; -#endif - -#ifdef CONFIG_SLAB_FREELIST_RANDOM - unsigned int *random_seq; -#endif - -#ifdef CONFIG_KASAN_GENERIC - struct kasan_cache kasan_info; -#endif - -#ifdef CONFIG_HARDENED_USERCOPY - unsigned int useroffset; /* Usercopy region offset */ - unsigned int usersize; /* Usercopy region size */ -#endif - - struct kmem_cache_node *node[MAX_NUMNODES]; -}; - -#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY) -#define SLAB_SUPPORTS_SYSFS -void sysfs_slab_unlink(struct kmem_cache *); -void sysfs_slab_release(struct kmem_cache *); -#else -static inline void sysfs_slab_unlink(struct kmem_cache *s) -{ -} -static inline void sysfs_slab_release(struct kmem_cache *s) -{ -} -#endif - -void *fixup_red_left(struct kmem_cache *s, void *p); - -static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab, - void *x) { - void *object = x - (x - slab_address(slab)) % cache->size; - void *last_object = slab_address(slab) + - (slab->objects - 1) * cache->size; - void *result = (unlikely(object > last_object)) ? last_object : object; - - result = fixup_red_left(cache, result); - return result; -} - -/* Determine object index from a given position */ -static inline unsigned int __obj_to_index(const struct kmem_cache *cache, - void *addr, void *obj) -{ - return reciprocal_divide(kasan_reset_tag(obj) - addr, - cache->reciprocal_size); -} - -static inline unsigned int obj_to_index(const struct kmem_cache *cache, - const struct slab *slab, void *obj) -{ - if (is_kfence_address(obj)) - return 0; - return __obj_to_index(cache, slab_address(slab), obj); -} - -static inline int objs_per_slab(const struct kmem_cache *cache, - const struct slab *slab) -{ - return slab->objects; -} -#endif /* _LINUX_SLUB_DEF_H */ diff --git a/include/linux/smp.h b/include/linux/smp.h index 91ea4a67f8ca..fcd61dfe2af3 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -53,7 +53,7 @@ int smp_call_function_single(int cpuid, smp_call_func_t func, void *info, void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, void *info, bool wait, const struct cpumask *mask); -int smp_call_function_single_async(int cpu, struct __call_single_data *csd); +int smp_call_function_single_async(int cpu, call_single_data_t *csd); /* * Cpus stopping functions in panic. All have default weak definitions. @@ -105,6 +105,12 @@ static inline void on_each_cpu_cond(smp_cond_func_t cond_func, on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask); } +/* + * Architecture specific boot CPU setup. Defined as empty weak function in + * init/main.c. Architectures can override it. + */ +void smp_prepare_boot_cpu(void); + #ifdef CONFIG_SMP #include <linux/preempt.h> @@ -171,12 +177,6 @@ void generic_smp_call_function_single_interrupt(void); #define generic_smp_call_function_interrupt \ generic_smp_call_function_single_interrupt -/* - * Mark the boot cpu "online" so that it can call console drivers in - * printk() and can access its per-cpu storage. - */ -void smp_prepare_boot_cpu(void); - extern unsigned int setup_max_cpus; extern void __init setup_nr_cpu_ids(void); extern void __init smp_init(void); @@ -203,7 +203,6 @@ static inline void up_smp_call_function(smp_call_func_t func, void *info) (up_smp_call_function(func, info)) static inline void smp_send_reschedule(int cpu) { } -#define smp_prepare_boot_cpu() do {} while (0) #define smp_call_function_many(mask, func, info, wait) \ (up_smp_call_function(func, info)) static inline void call_function_init(void) { } @@ -218,6 +217,8 @@ smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, static inline void kick_all_cpus_sync(void) { } static inline void wake_up_all_idle_cpus(void) { } +#define setup_max_cpus 0 + #ifdef CONFIG_UP_LATE_INIT extern void __init up_late_init(void); static inline void smp_init(void) { up_late_init(); } @@ -261,7 +262,7 @@ static inline int get_boot_cpu_id(void) * regular asm read for the stable. */ #ifndef __smp_processor_id -#define __smp_processor_id(x) raw_smp_processor_id(x) +#define __smp_processor_id() raw_smp_processor_id() #endif #ifdef CONFIG_DEBUG_PREEMPT diff --git a/include/linux/soc/andes/irq.h b/include/linux/soc/andes/irq.h new file mode 100644 index 000000000000..edc3182d6e66 --- /dev/null +++ b/include/linux/soc/andes/irq.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 Andes Technology Corporation + */ +#ifndef __ANDES_IRQ_H +#define __ANDES_IRQ_H + +/* Andes PMU irq number */ +#define ANDES_RV_IRQ_PMOVI 18 +#define ANDES_RV_IRQ_LAST ANDES_RV_IRQ_PMOVI +#define ANDES_SLI_CAUSE_BASE 256 + +/* Andes PMU related registers */ +#define ANDES_CSR_SLIE 0x9c4 +#define ANDES_CSR_SLIP 0x9c5 +#define ANDES_CSR_SCOUNTEROF 0x9d4 + +#endif /* __ANDES_IRQ_H */ diff --git a/include/linux/soc/apple/rtkit.h b/include/linux/soc/apple/rtkit.h index fc456f75c131..8c9ca857ccf6 100644 --- a/include/linux/soc/apple/rtkit.h +++ b/include/linux/soc/apple/rtkit.h @@ -161,24 +161,6 @@ int apple_rtkit_send_message(struct apple_rtkit *rtk, u8 ep, u64 message, struct completion *completion, bool atomic); /* - * Send a message to the given endpoint and wait until it has been submitted - * to the hardware FIFO. - * Will return zero on success and a negative error code on failure - * (e.g. -ETIME when the message couldn't be written within the given - * timeout) - * - * @rtk: RTKit reference - * @ep: target endpoint - * @message: message to be sent - * @timeout: timeout in milliseconds to allow the message transmission - * to be completed - * @atomic: if set to true this function can be called from atomic - * context. - */ -int apple_rtkit_send_message_wait(struct apple_rtkit *rtk, u8 ep, u64 message, - unsigned long timeout, bool atomic); - -/* * Process incoming messages in atomic context. * This only guarantees that messages arrive as far as the recv_message_early * callback; drivers expecting to handle incoming messages synchronously diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h index 07f67b3d8e97..6c6cccc848f4 100644 --- a/include/linux/soc/mediatek/infracfg.h +++ b/include/linux/soc/mediatek/infracfg.h @@ -2,6 +2,47 @@ #ifndef __SOC_MEDIATEK_INFRACFG_H #define __SOC_MEDIATEK_INFRACFG_H +#define MT8365_INFRA_TOPAXI_PROTECTEN_STA1 0x228 +#define MT8365_INFRA_TOPAXI_PROTECTEN_SET 0x2a0 +#define MT8365_INFRA_TOPAXI_PROTECTEN_CLR 0x2a4 +#define MT8365_INFRA_TOPAXI_PROTECTEN_MM_M0 BIT(1) +#define MT8365_INFRA_TOPAXI_PROTECTEN_MDMCU_M1 BIT(2) +#define MT8365_INFRA_TOPAXI_PROTECTEN_MMAPB_S BIT(6) +#define MT8365_INFRA_TOPAXI_PROTECTEN_MM2INFRA_AXI_GALS_SLV_0 BIT(10) +#define MT8365_INFRA_TOPAXI_PROTECTEN_MM2INFRA_AXI_GALS_SLV_1 BIT(11) +#define MT8365_INFRA_TOPAXI_PROTECTEN_AP2CONN_AHB BIT(13) +#define MT8365_INFRA_TOPAXI_PROTECTEN_CONN2INFRA_AHB BIT(14) +#define MT8365_INFRA_TOPAXI_PROTECTEN_MFG_M0 BIT(21) +#define MT8365_INFRA_TOPAXI_PROTECTEN_INFRA2MFG BIT(22) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_STA1 0x258 +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_SET 0x2a8 +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_CLR 0x2ac +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_APU2AP BIT(2) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_MM2INFRA_AXI_GALS_MST_0 BIT(16) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_MM2INFRA_AXI_GALS_MST_1 BIT(17) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_CONN2INFRA_AXI_GALS_MST BIT(18) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_CAM2MM_AXI_GALS_MST BIT(19) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_APU_CBIP_GALS_MST BIT(20) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_INFRA2CONN_AHB_GALS_SLV BIT(21) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_PWRDNREQ_INFRA_GALS_ADB BIT(24) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_PWRDNREQ_MP1_L2C_AFIFO BIT(27) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_AUDIO_BUS_AUDIO_M BIT(28) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_AUDIO_BUS_DSP_M BIT(30) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_AUDIO_BUS_DSP_S BIT(31) + +#define MT8365_INFRA_NAO_TOPAXI_SI0_STA 0x0 +#define MT8365_INFRA_NAO_TOPAXI_SI0_CTRL_UPDATED BIT(24) +#define MT8365_INFRA_NAO_TOPAXI_SI2_STA 0x28 +#define MT8365_INFRA_NAO_TOPAXI_SI2_CTRL_UPDATED BIT(14) +#define MT8365_INFRA_TOPAXI_SI0_CTL 0x200 +#define MT8365_INFRA_TOPAXI_SI0_WAY_EN_MMAPB_S BIT(6) +#define MT8365_INFRA_TOPAXI_SI2_CTL 0x234 +#define MT8365_INFRA_TOPAXI_SI2_WAY_EN_PERI_M1 BIT(5) + +#define MT8365_SMI_COMMON_CLAMP_EN 0x3c0 +#define MT8365_SMI_COMMON_CLAMP_EN_SET 0x3c4 +#define MT8365_SMI_COMMON_CLAMP_EN_CLR 0x3c8 + #define MT8195_TOP_AXI_PROT_EN_STA1 0x228 #define MT8195_TOP_AXI_PROT_EN_1_STA1 0x258 #define MT8195_TOP_AXI_PROT_EN_SET 0x2a0 diff --git a/include/linux/soc/mediatek/mtk-mmsys.h b/include/linux/soc/mediatek/mtk-mmsys.h index 2475ef914746..4885b065b849 100644 --- a/include/linux/soc/mediatek/mtk-mmsys.h +++ b/include/linux/soc/mediatek/mtk-mmsys.h @@ -62,6 +62,14 @@ enum mtk_ddp_comp_id { DDP_COMPONENT_OVL_2L1, DDP_COMPONENT_OVL_2L2, DDP_COMPONENT_OVL1, + DDP_COMPONENT_PADDING0, + DDP_COMPONENT_PADDING1, + DDP_COMPONENT_PADDING2, + DDP_COMPONENT_PADDING3, + DDP_COMPONENT_PADDING4, + DDP_COMPONENT_PADDING5, + DDP_COMPONENT_PADDING6, + DDP_COMPONENT_PADDING7, DDP_COMPONENT_POSTMASK0, DDP_COMPONENT_PWM0, DDP_COMPONENT_PWM1, diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index b2b28180dff7..a476648858a6 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -10,6 +10,7 @@ #define MTK_WED_TX_QUEUES 2 #define MTK_WED_RX_QUEUES 2 +#define MTK_WED_RX_PAGE_QUEUES 3 #define WED_WO_STA_REC 0x6 @@ -45,7 +46,7 @@ enum mtk_wed_wo_cmd { MTK_WED_WO_CMD_WED_END }; -struct mtk_rxbm_desc { +struct mtk_wed_bm_desc { __le32 buf0; __le32 token; } __packed __aligned(4); @@ -76,6 +77,11 @@ struct mtk_wed_wo_rx_stats { __le32 rx_drop_cnt; }; +struct mtk_wed_buf { + void *p; + dma_addr_t phy_addr; +}; + struct mtk_wed_device { #ifdef CONFIG_NET_MEDIATEK_SOC_WED const struct mtk_wed_ops *ops; @@ -94,17 +100,20 @@ struct mtk_wed_device { struct mtk_wed_ring txfree_ring; struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES]; struct mtk_wed_ring rx_wdma[MTK_WED_RX_QUEUES]; + struct mtk_wed_ring rx_rro_ring[MTK_WED_RX_QUEUES]; + struct mtk_wed_ring rx_page_ring[MTK_WED_RX_PAGE_QUEUES]; + struct mtk_wed_ring ind_cmd_ring; struct { int size; - void **pages; + struct mtk_wed_buf *pages; struct mtk_wdma_desc *desc; dma_addr_t desc_phys; } tx_buf_ring; struct { int size; - struct mtk_rxbm_desc *desc; + struct mtk_wed_bm_desc *desc; dma_addr_t desc_phys; } rx_buf_ring; @@ -114,6 +123,13 @@ struct mtk_wed_device { dma_addr_t fdbk_phys; } rro; + struct { + int size; + struct mtk_wed_buf *pages; + struct mtk_wed_bm_desc *desc; + dma_addr_t desc_phys; + } hw_rro; + /* filled by driver: */ struct { union { @@ -123,6 +139,7 @@ struct mtk_wed_device { enum mtk_wed_bus_tye bus_type; void __iomem *base; u32 phy_base; + u32 id; u32 wpdma_phys; u32 wpdma_int; @@ -131,18 +148,35 @@ struct mtk_wed_device { u32 wpdma_txfree; u32 wpdma_rx_glo; u32 wpdma_rx; + u32 wpdma_rx_rro[MTK_WED_RX_QUEUES]; + u32 wpdma_rx_pg; bool wcid_512; + bool hw_rro; + bool msi; u16 token_start; unsigned int nbuf; unsigned int rx_nbuf; unsigned int rx_npkt; unsigned int rx_size; + unsigned int amsdu_max_len; u8 tx_tbit[MTK_WED_TX_QUEUES]; u8 rx_tbit[MTK_WED_RX_QUEUES]; + u8 rro_rx_tbit[MTK_WED_RX_QUEUES]; + u8 rx_pg_tbit[MTK_WED_RX_PAGE_QUEUES]; u8 txfree_tbit; + u8 amsdu_max_subframes; + + struct { + u8 se_group_nums; + u16 win_size; + u16 particular_sid; + u32 ack_sn_addr; + dma_addr_t particular_se_phys; + dma_addr_t addr_elem_phys[1024]; + } ind_cmd; u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id); int (*offload_enable)(struct mtk_wed_device *wed); @@ -182,6 +216,14 @@ struct mtk_wed_ops { void (*irq_set_mask)(struct mtk_wed_device *dev, u32 mask); int (*setup_tc)(struct mtk_wed_device *wed, struct net_device *dev, enum tc_setup_type type, void *type_data); + void (*start_hw_rro)(struct mtk_wed_device *dev, u32 irq_mask, + bool reset); + void (*rro_rx_ring_setup)(struct mtk_wed_device *dev, int ring, + void __iomem *regs); + void (*msdu_pg_rx_ring_setup)(struct mtk_wed_device *dev, int ring, + void __iomem *regs); + int (*ind_rx_ring_setup)(struct mtk_wed_device *dev, + void __iomem *regs); }; extern const struct mtk_wed_ops __rcu *mtk_soc_wed_ops; @@ -206,16 +248,27 @@ mtk_wed_device_attach(struct mtk_wed_device *dev) return ret; } -static inline bool -mtk_wed_get_rx_capa(struct mtk_wed_device *dev) +static inline bool mtk_wed_get_rx_capa(struct mtk_wed_device *dev) { #ifdef CONFIG_NET_MEDIATEK_SOC_WED + if (dev->version == 3) + return dev->wlan.hw_rro; + return dev->version != 1; #else return false; #endif } +static inline bool mtk_wed_is_amsdu_supported(struct mtk_wed_device *dev) +{ +#ifdef CONFIG_NET_MEDIATEK_SOC_WED + return dev->version == 3; +#else + return false; +#endif +} + #ifdef CONFIG_NET_MEDIATEK_SOC_WED #define mtk_wed_device_active(_dev) !!(_dev)->ops #define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev) @@ -242,6 +295,15 @@ mtk_wed_get_rx_capa(struct mtk_wed_device *dev) #define mtk_wed_device_dma_reset(_dev) (_dev)->ops->reset_dma(_dev) #define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) \ (_dev)->ops->setup_tc(_dev, _netdev, _type, _type_data) +#define mtk_wed_device_start_hw_rro(_dev, _mask, _reset) \ + (_dev)->ops->start_hw_rro(_dev, _mask, _reset) +#define mtk_wed_device_rro_rx_ring_setup(_dev, _ring, _regs) \ + (_dev)->ops->rro_rx_ring_setup(_dev, _ring, _regs) +#define mtk_wed_device_msdu_pg_rx_ring_setup(_dev, _ring, _regs) \ + (_dev)->ops->msdu_pg_rx_ring_setup(_dev, _ring, _regs) +#define mtk_wed_device_ind_rx_ring_setup(_dev, _regs) \ + (_dev)->ops->ind_rx_ring_setup(_dev, _regs) + #else static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) { @@ -261,6 +323,10 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) #define mtk_wed_device_stop(_dev) do {} while (0) #define mtk_wed_device_dma_reset(_dev) do {} while (0) #define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) -EOPNOTSUPP +#define mtk_wed_device_start_hw_rro(_dev, _mask, _reset) do {} while (0) +#define mtk_wed_device_rro_rx_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_msdu_pg_rx_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_ind_rx_ring_setup(_dev, _regs) -ENODEV #endif #endif diff --git a/include/linux/soc/qcom/apr.h b/include/linux/soc/qcom/apr.h index be98aebcb3e1..7161a3183eda 100644 --- a/include/linux/soc/qcom/apr.h +++ b/include/linux/soc/qcom/apr.h @@ -9,7 +9,7 @@ #include <dt-bindings/soc/qcom,apr.h> #include <dt-bindings/soc/qcom,gpr.h> -extern struct bus_type aprbus; +extern const struct bus_type aprbus; #define APR_HDR_LEN(hdr_len) ((hdr_len)/4) diff --git a/include/linux/soc/qcom/geni-se.h b/include/linux/soc/qcom/geni-se.h index 29e06905bc1f..0f038a1a0330 100644 --- a/include/linux/soc/qcom/geni-se.h +++ b/include/linux/soc/qcom/geni-se.h @@ -178,6 +178,7 @@ struct geni_se { #define M_GP_IRQ_3_EN BIT(12) #define M_GP_IRQ_4_EN BIT(13) #define M_GP_IRQ_5_EN BIT(14) +#define M_TX_FIFO_NOT_EMPTY_EN BIT(21) #define M_IO_DATA_DEASSERT_EN BIT(22) #define M_IO_DATA_ASSERT_EN BIT(23) #define M_RX_FIFO_RD_ERR_EN BIT(24) diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 93417ba1ead4..1a886666bbb6 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -30,7 +30,7 @@ #define LLCC_NPU 23 #define LLCC_WLHW 24 #define LLCC_PIMEM 25 -#define LLCC_DRE 26 +#define LLCC_ECC 26 #define LLCC_CVP 28 #define LLCC_MODPE 29 #define LLCC_APTCM 30 diff --git a/include/linux/soc/qcom/qcom-pbs.h b/include/linux/soc/qcom/qcom-pbs.h new file mode 100644 index 000000000000..8a46209ccf13 --- /dev/null +++ b/include/linux/soc/qcom/qcom-pbs.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _QCOM_PBS_H +#define _QCOM_PBS_H + +#include <linux/errno.h> +#include <linux/types.h> + +struct device_node; +struct pbs_dev; + +#if IS_ENABLED(CONFIG_QCOM_PBS) +int qcom_pbs_trigger_event(struct pbs_dev *pbs, u8 bitmap); +struct pbs_dev *get_pbs_client_device(struct device *client_dev); +#else +static inline int qcom_pbs_trigger_event(struct pbs_dev *pbs, u8 bitmap) +{ + return -ENODEV; +} + +static inline struct pbs_dev *get_pbs_client_device(struct device *client_dev) +{ + return ERR_PTR(-ENODEV); +} +#endif + +#endif diff --git a/include/linux/soc/samsung/exynos-pmu.h b/include/linux/soc/samsung/exynos-pmu.h index a4f5516cc956..2bd9d12d9a52 100644 --- a/include/linux/soc/samsung/exynos-pmu.h +++ b/include/linux/soc/samsung/exynos-pmu.h @@ -10,6 +10,7 @@ #define __LINUX_SOC_EXYNOS_PMU_H struct regmap; +struct device_node; enum sys_powerdown { SYS_AFTR, @@ -20,12 +21,20 @@ enum sys_powerdown { extern void exynos_sys_powerdown_conf(enum sys_powerdown mode); #ifdef CONFIG_EXYNOS_PMU -extern struct regmap *exynos_get_pmu_regmap(void); +struct regmap *exynos_get_pmu_regmap(void); +struct regmap *exynos_get_pmu_regmap_by_phandle(struct device_node *np, + const char *propname); #else static inline struct regmap *exynos_get_pmu_regmap(void) { return ERR_PTR(-ENODEV); } + +static inline struct regmap *exynos_get_pmu_regmap_by_phandle(struct device_node *np, + const char *propname) +{ + return ERR_PTR(-ENODEV); +} #endif #endif /* __LINUX_SOC_EXYNOS_PMU_H */ diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 0b9ecd8cf979..110978dc9af1 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -13,6 +13,7 @@ struct nlmsghdr; struct sock; struct sock_diag_handler { + struct module *owner; __u8 family; int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh); int (*get_info)(struct sk_buff *skb, struct sock *sk); @@ -22,8 +23,13 @@ struct sock_diag_handler { int sock_diag_register(const struct sock_diag_handler *h); void sock_diag_unregister(const struct sock_diag_handler *h); -void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); -void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); +struct sock_diag_inet_compat { + struct module *owner; + int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh); +}; + +void sock_diag_register_inet_compat(const struct sock_diag_inet_compat *ptr); +void sock_diag_unregister_inet_compat(const struct sock_diag_inet_compat *ptr); u64 __sock_gen_cookie(struct sock *sk); diff --git a/include/linux/socket.h b/include/linux/socket.h index 39b74d83c7c4..139c330ccf2c 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -383,6 +383,7 @@ struct ucred { #define SOL_MPTCP 284 #define SOL_MCTP 285 #define SOL_SMC 286 +#define SOL_VSOCK 287 /* IPX options */ #define IPX_TYPE 1 @@ -421,13 +422,6 @@ extern long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg, struct user_msghdr __user *umsg, struct sockaddr __user *uaddr, unsigned int flags); -extern int sendmsg_copy_msghdr(struct msghdr *msg, - struct user_msghdr __user *umsg, unsigned flags, - struct iovec **iov); -extern int recvmsg_copy_msghdr(struct msghdr *msg, - struct user_msghdr __user *umsg, unsigned flags, - struct sockaddr __user **uaddr, - struct iovec **iov); extern int __copy_msghdr(struct msghdr *kmsg, struct user_msghdr *umsg, struct sockaddr __user **save_addr); diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index bae5e2369b4f..317200cd3a60 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -50,11 +50,59 @@ static inline int copy_from_sockptr_offset(void *dst, sockptr_t src, return 0; } +/* Deprecated. + * This is unsafe, unless caller checked user provided optlen. + * Prefer copy_safe_from_sockptr() instead. + */ static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size) { return copy_from_sockptr_offset(dst, src, 0, size); } +/** + * copy_safe_from_sockptr: copy a struct from sockptr + * @dst: Destination address, in kernel space. This buffer must be @ksize + * bytes long. + * @ksize: Size of @dst struct. + * @optval: Source address. (in user or kernel space) + * @optlen: Size of @optval data. + * + * Returns: + * * -EINVAL: @optlen < @ksize + * * -EFAULT: access to userspace failed. + * * 0 : @ksize bytes were copied + */ +static inline int copy_safe_from_sockptr(void *dst, size_t ksize, + sockptr_t optval, unsigned int optlen) +{ + if (optlen < ksize) + return -EINVAL; + return copy_from_sockptr(dst, optval, ksize); +} + +static inline int copy_struct_from_sockptr(void *dst, size_t ksize, + sockptr_t src, size_t usize) +{ + size_t size = min(ksize, usize); + size_t rest = max(ksize, usize) - size; + + if (!sockptr_is_kernel(src)) + return copy_struct_from_user(dst, ksize, src.user, size); + + if (usize < ksize) { + memset(dst + size, 0, rest); + } else if (usize > ksize) { + char *p = src.kernel; + + while (rest--) { + if (*p++) + return -E2BIG; + } + } + memcpy(dst, src.kernel, size); + return 0; +} + static inline int copy_to_sockptr_offset(sockptr_t dst, size_t offset, const void *src, size_t size) { diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 4f3d14bb1538..66f814b63a43 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -886,7 +886,8 @@ struct sdw_master_ops { * struct sdw_bus - SoundWire bus * @dev: Shortcut to &bus->md->dev to avoid changing the entire code. * @md: Master device - * @link_id: Link id number, can be 0 to N, unique for each Master + * @controller_id: system-unique controller ID. If set to -1, the bus @id will be used. + * @link_id: Link id number, can be 0 to N, unique for each Controller * @id: bus system-wide unique id * @slaves: list of Slaves on this bus * @assigned: Bitmap for Slave device numbers. @@ -918,6 +919,7 @@ struct sdw_master_ops { struct sdw_bus { struct device *dev; struct sdw_master_device *md; + int controller_id; unsigned int link_id; int id; struct list_head slaves; @@ -1040,7 +1042,7 @@ int sdw_compute_params(struct sdw_bus *bus); int sdw_stream_add_master(struct sdw_bus *bus, struct sdw_stream_config *stream_config, - struct sdw_port_config *port_config, + const struct sdw_port_config *port_config, unsigned int num_ports, struct sdw_stream_runtime *stream); int sdw_stream_remove_master(struct sdw_bus *bus, @@ -1062,7 +1064,7 @@ void sdw_extract_slave_id(struct sdw_bus *bus, u64 addr, struct sdw_slave_id *id int sdw_stream_add_slave(struct sdw_slave *slave, struct sdw_stream_config *stream_config, - struct sdw_port_config *port_config, + const struct sdw_port_config *port_config, unsigned int num_ports, struct sdw_stream_runtime *stream); int sdw_stream_remove_slave(struct sdw_slave *slave, @@ -1084,7 +1086,7 @@ int sdw_update_no_pm(struct sdw_slave *slave, u32 addr, u8 mask, u8 val); static inline int sdw_stream_add_slave(struct sdw_slave *slave, struct sdw_stream_config *stream_config, - struct sdw_port_config *port_config, + const struct sdw_port_config *port_config, unsigned int num_ports, struct sdw_stream_runtime *stream) { diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h index ceecad74aef9..28a4eb77717f 100644 --- a/include/linux/soundwire/sdw_amd.h +++ b/include/linux/soundwire/sdw_amd.h @@ -1,11 +1,12 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ /* - * Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2023-24 Advanced Micro Devices, Inc. All rights reserved. */ #ifndef __SDW_AMD_H #define __SDW_AMD_H +#include <linux/acpi.h> #include <linux/soundwire/sdw.h> /* AMD pm_runtime quirk definitions */ @@ -25,6 +26,7 @@ #define AMD_SDW_POWER_OFF_MODE 2 #define ACP_SDW0 0 #define ACP_SDW1 1 +#define AMD_SDW_MAX_MANAGER_COUNT 2 struct acp_sdw_pdata { u16 instance; @@ -32,12 +34,6 @@ struct acp_sdw_pdata { struct mutex *acp_sdw_lock; }; -struct sdw_manager_reg_mask { - u32 sw_pad_enable_mask; - u32 sw_pad_pulldown_mask; - u32 acp_sdw_intr_mask; -}; - /** * struct sdw_amd_dai_runtime: AMD sdw dai runtime data * @@ -59,10 +55,8 @@ struct sdw_amd_dai_runtime { * @dev: linux device * @mmio: SoundWire registers mmio base * @acp_mmio: acp registers mmio base - * @reg_mask: register mask structure per manager instance * @amd_sdw_irq_thread: SoundWire manager irq workqueue * @amd_sdw_work: peripheral status work queue - * @probe_work: SoundWire manager probe workqueue * @acp_sdw_lock: mutex to protect acp share register access * @status: peripheral devices status array * @num_din_ports: number of input ports @@ -83,10 +77,8 @@ struct amd_sdw_manager { void __iomem *mmio; void __iomem *acp_mmio; - struct sdw_manager_reg_mask *reg_mask; struct work_struct amd_sdw_irq_thread; struct work_struct amd_sdw_work; - struct work_struct probe_work; /* mutex to protect acp common register access */ struct mutex *acp_sdw_lock; @@ -106,4 +98,71 @@ struct amd_sdw_manager { struct sdw_amd_dai_runtime **dai_runtime_array; }; + +/** + * struct sdw_amd_acpi_info - Soundwire AMD information found in ACPI tables + * @handle: ACPI controller handle + * @count: maximum no of soundwire manager links supported on AMD platform. + * @link_mask: bit-wise mask listing links enabled by BIOS menu + */ +struct sdw_amd_acpi_info { + acpi_handle handle; + int count; + u32 link_mask; +}; + +/** + * struct sdw_amd_ctx - context allocated by the controller driver probe + * + * @count: link count + * @num_slaves: total number of devices exposed across all enabled links + * @link_mask: bit-wise mask listing SoundWire links reported by the + * Controller + * @ids: array of slave_id, representing Slaves exposed across all enabled + * links + * @pdev: platform device structure + */ +struct sdw_amd_ctx { + int count; + int num_slaves; + u32 link_mask; + struct sdw_extended_slave_id *ids; + struct platform_device *pdev[AMD_SDW_MAX_MANAGER_COUNT]; +}; + +/** + * struct sdw_amd_res - Soundwire AMD global resource structure, + * typically populated by the DSP driver/Legacy driver + * + * @addr: acp pci device resource start address + * @reg_range: ACP register range + * @link_mask: bit-wise mask listing links selected by the DSP driver/ + * legacy driver + * @count: link count + * @mmio_base: mmio base of SoundWire registers + * @handle: ACPI parent handle + * @parent: parent device + * @dev: device implementing hwparams and free callbacks + * @acp_lock: mutex protecting acp common registers access + */ +struct sdw_amd_res { + u32 addr; + u32 reg_range; + u32 link_mask; + int count; + void __iomem *mmio_base; + acpi_handle handle; + struct device *parent; + struct device *dev; + /* use to protect acp common registers access */ + struct mutex *acp_lock; +}; + +int sdw_amd_probe(struct sdw_amd_res *res, struct sdw_amd_ctx **ctx); + +void sdw_amd_exit(struct sdw_amd_ctx *ctx); + +int sdw_amd_get_slave_info(struct sdw_amd_ctx *ctx); + +int amd_sdw_scan_controller(struct sdw_amd_acpi_info *info); #endif diff --git a/include/linux/soundwire/sdw_type.h b/include/linux/soundwire/sdw_type.h index d8c27f1e5559..693320b4f5c2 100644 --- a/include/linux/soundwire/sdw_type.h +++ b/include/linux/soundwire/sdw_type.h @@ -4,9 +4,9 @@ #ifndef __SOUNDWIRE_TYPES_H #define __SOUNDWIRE_TYPES_H -extern struct bus_type sdw_bus_type; -extern struct device_type sdw_slave_type; -extern struct device_type sdw_master_type; +extern const struct bus_type sdw_bus_type; +extern const struct device_type sdw_slave_type; +extern const struct device_type sdw_master_type; static inline int is_sdw_slave(const struct device *dev) { diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h index 0916cb9bcb0a..ca2cd4e30ead 100644 --- a/include/linux/spi/pxa2xx_spi.h +++ b/include/linux/spi/pxa2xx_spi.h @@ -5,6 +5,7 @@ #ifndef __LINUX_SPI_PXA2XX_SPI_H #define __LINUX_SPI_PXA2XX_SPI_H +#include <linux/dmaengine.h> #include <linux/types.h> #include <linux/pxa2xx_ssp.h> @@ -22,7 +23,7 @@ struct pxa2xx_spi_controller { bool is_target; /* DMA engine specific config */ - bool (*dma_filter)(struct dma_chan *chan, void *param); + dma_filter_fn dma_filter; void *tx_param; void *rx_param; diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index 6b0a7dc48a4b..f866d5c8ed32 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -233,6 +233,8 @@ static inline void *spi_mem_get_drvdata(struct spi_mem *mem) * limitations) * @supports_op: check if an operation is supported by the controller * @exec_op: execute a SPI memory operation + * not all driver provides supports_op(), so it can return -EOPNOTSUPP + * if the op is not supported by the driver/controller * @get_name: get a custom name for the SPI mem device from the controller. * This might be needed if the controller driver has been ported * to use the SPI mem layer and a custom name is used to keep diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 7f8b478fdeb3..c459809efee4 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -20,6 +20,9 @@ #include <uapi/linux/spi/spi.h> +/* Max no. of CS supported per spi device */ +#define SPI_CS_CNT_MAX 16 + struct dma_chan; struct software_node; struct ptp_system_timestamp; @@ -33,7 +36,7 @@ struct spi_message; * INTERFACES between SPI master-side drivers and SPI slave protocol handlers, * and SPI infrastructure. */ -extern struct bus_type spi_bus_type; +extern const struct bus_type spi_bus_type; /** * struct spi_statistics - statistics for spi transfers @@ -128,11 +131,11 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * struct spi_device - Controller side proxy for an SPI slave device * @dev: Driver model representation of the device. * @controller: SPI controller used with the device. - * @master: Copy of controller, for backwards compatibility. * @max_speed_hz: Maximum clock rate to be used with this chip * (on this board); may be changed by the device's driver. * The spi_transfer.speed_hz can override this for each transfer. - * @chip_select: Chipselect, distinguishing chips handled by @controller. + * @chip_select: Array of physical chipselect, spi->chipselect[i] gives + * the corresponding physical CS for logical CS i. * @mode: The spi mode defines how data is clocked out and in. * This may be changed by the device's driver. * The "active low" default for chipselect mode can be overridden @@ -157,8 +160,8 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * the device will bind to the named driver and only the named driver. * Do not set directly, because core frees it; use driver_set_override() to * set or clear it. - * @cs_gpiod: GPIO descriptor of the chipselect line (optional, NULL when - * not using a GPIO line) + * @cs_gpiod: Array of GPIO descriptors of the corresponding chipselect lines + * (optional, NULL when not using a GPIO line) * @word_delay: delay to be inserted between consecutive * words of a transfer * @cs_setup: delay to be introduced by the controller after CS is asserted @@ -167,6 +170,7 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * deasserted. If @cs_change_delay is used from @spi_transfer, then the * two delays will be added up. * @pcpu_statistics: statistics for the spi_device + * @cs_index_mask: Bit mask of the active chipselect(s) in the chipselect array * * A @spi_device is used to interchange data between an SPI slave * (usually a discrete chip) and CPU memory. @@ -180,9 +184,8 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, struct spi_device { struct device dev; struct spi_controller *controller; - struct spi_controller *master; /* Compatibility layer */ u32 max_speed_hz; - u8 chip_select; + u8 chip_select[SPI_CS_CNT_MAX]; u8 bits_per_word; bool rt; #define SPI_NO_TX BIT(31) /* No transmit wire */ @@ -213,7 +216,7 @@ struct spi_device { void *controller_data; char modalias[SPI_NAME_SIZE]; const char *driver_override; - struct gpio_desc *cs_gpiod; /* Chip select GPIO descriptor */ + struct gpio_desc *cs_gpiod[SPI_CS_CNT_MAX]; /* Chip select gpio desc */ struct spi_delay word_delay; /* Inter-word delay */ /* CS delays */ struct spi_delay cs_setup; @@ -223,6 +226,13 @@ struct spi_device { /* The statistics */ struct spi_statistics __percpu *pcpu_statistics; + /* Bit mask of the chipselect(s) that the driver need to use from + * the chipselect array.When the controller is capable to handle + * multiple chip selects & memories are connected in parallel + * then more than one bit need to be set in cs_index_mask. + */ + u32 cs_index_mask : SPI_CS_CNT_MAX; + /* * Likely need more hooks for more protocol options affecting how * the controller talks to each chip, like: @@ -279,22 +289,33 @@ static inline void *spi_get_drvdata(const struct spi_device *spi) static inline u8 spi_get_chipselect(const struct spi_device *spi, u8 idx) { - return spi->chip_select; + return spi->chip_select[idx]; } static inline void spi_set_chipselect(struct spi_device *spi, u8 idx, u8 chipselect) { - spi->chip_select = chipselect; + spi->chip_select[idx] = chipselect; } static inline struct gpio_desc *spi_get_csgpiod(const struct spi_device *spi, u8 idx) { - return spi->cs_gpiod; + return spi->cs_gpiod[idx]; } static inline void spi_set_csgpiod(struct spi_device *spi, u8 idx, struct gpio_desc *csgpiod) { - spi->cs_gpiod = csgpiod; + spi->cs_gpiod[idx] = csgpiod; +} + +static inline bool spi_is_csgpiod(struct spi_device *spi) +{ + u8 idx; + + for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) { + if (spi_get_csgpiod(spi, idx)) + return true; + } + return false; } /** @@ -427,9 +448,11 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * the @cur_msg_completion. This flag is used to signal the context that * is running spi_finalize_current_message() that it needs to complete() * @cur_msg_mapped: message has been mapped for DMA + * @fallback: fallback to PIO if DMA transfer return failure with + * SPI_TRANS_FAIL_NO_START. + * @last_cs_mode_high: was (mode & SPI_CS_HIGH) true on the last call to set_cs. * @last_cs: the last chip_select that is recorded by set_cs, -1 on non chip * selected - * @last_cs_mode_high: was (mode & SPI_CS_HIGH) true on the last call to set_cs. * @xfer_completion: used by core transfer_one_message() * @busy: message pump is busy * @running: message pump is running @@ -452,6 +475,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * * @set_cs: set the logic level of the chip select line. May be called * from interrupt context. + * @optimize_message: optimize the message for reuse + * @unoptimize_message: release resources allocated by optimize_message * @prepare_message: set up the controller to transfer a single message, * for example doing DMA mapping. Called from threaded * context. @@ -461,10 +486,13 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * - return 1 if the transfer is still in progress. When * the driver is finished with this transfer it must * call spi_finalize_current_transfer() so the subsystem - * can issue the next transfer. Note: transfer_one and - * transfer_one_message are mutually exclusive; when both - * are set, the generic subsystem does not call your - * transfer_one callback. + * can issue the next transfer. If the transfer fails, the + * driver must set the flag SPI_TRANS_FAIL_IO to + * spi_transfer->error first, before calling + * spi_finalize_current_transfer(). + * Note: transfer_one and transfer_one_message are mutually + * exclusive; when both are set, the generic subsystem does + * not call your transfer_one callback. * @handle_err: the subsystem calls the driver to handle an error that occurs * in the generic implementation of transfer_one_message(). * @mem_ops: optimized/dedicated operations for interactions with SPI memory. @@ -501,8 +529,6 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * If the driver does not set this, the SPI core takes the snapshot as * close to the driver hand-over as possible. * @irq_flags: Interrupt enable state during PTP system timestamping - * @fallback: fallback to PIO if DMA transfer return failure with - * SPI_TRANS_FAIL_NO_START. * @queue_empty: signal green light for opportunistically skipping the queue * for spi_sync transfers. * @must_async: disable all fast paths in the core @@ -566,6 +592,12 @@ struct spi_controller { #define SPI_CONTROLLER_MUST_RX BIT(3) /* Requires rx */ #define SPI_CONTROLLER_MUST_TX BIT(4) /* Requires tx */ #define SPI_CONTROLLER_GPIO_SS BIT(5) /* GPIO CS must select slave */ +#define SPI_CONTROLLER_SUSPENDED BIT(6) /* Currently suspended */ + /* + * The spi-controller has multi chip select capability and can + * assert/de-assert more than one chip select at once. + */ +#define SPI_CONTROLLER_MULTI_CS BIT(7) /* Flag indicating if the allocation of this struct is devres-managed */ bool devm_allocated; @@ -676,12 +708,15 @@ struct spi_controller { bool rt; bool auto_runtime_pm; bool cur_msg_mapped; - char last_cs; - bool last_cs_mode_high; bool fallback; + bool last_cs_mode_high; + s8 last_cs[SPI_CS_CNT_MAX]; + u32 last_cs_index_mask : SPI_CS_CNT_MAX; struct completion xfer_completion; size_t max_dma_len; + int (*optimize_message)(struct spi_message *msg); + int (*unoptimize_message)(struct spi_message *msg); int (*prepare_transfer_hardware)(struct spi_controller *ctlr); int (*transfer_one_message)(struct spi_controller *ctlr, struct spi_message *mesg); @@ -867,6 +902,7 @@ extern int devm_spi_register_controller(struct device *dev, extern void spi_unregister_controller(struct spi_controller *ctlr); #if IS_ENABLED(CONFIG_ACPI) +extern struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev); extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, struct acpi_device *adev, int index); @@ -1038,6 +1074,7 @@ struct spi_transfer { unsigned len; #define SPI_TRANS_FAIL_NO_START BIT(0) +#define SPI_TRANS_FAIL_IO BIT(1) u16 error; dma_addr_t tx_dma; @@ -1076,18 +1113,19 @@ struct spi_transfer { * @spi: SPI device to which the transaction is queued * @is_dma_mapped: if true, the caller provided both DMA and CPU virtual * addresses for each transfer buffer + * @pre_optimized: peripheral driver pre-optimized the message + * @optimized: the message is in the optimized state + * @prepared: spi_prepare_message was called for the this message + * @status: zero for success, else negative errno * @complete: called to report transaction completions * @context: the argument to complete() when it's called * @frame_length: the total number of bytes in the message * @actual_length: the total number of bytes that were transferred in all * successful segments - * @status: zero for success, else negative errno * @queue: for use by whichever driver currently owns the message * @state: for use by whichever driver currently owns the message + * @opt_state: for use by whichever driver currently owns the message * @resources: for resource management when the SPI message is processed - * @prepared: spi_prepare_message was called for the this message - * @t: for use with spi_message_alloc() when message and transfers have - * been allocated together * * A @spi_message is used to execute an atomic sequence of data transfers, * each represented by a struct spi_transfer. The sequence is "atomic" @@ -1110,6 +1148,11 @@ struct spi_message { unsigned is_dma_mapped:1; + /* spi_optimize_message() was called for this message */ + bool pre_optimized; + /* __spi_optimize_message() was called for this message */ + bool optimized; + /* spi_prepare_message() was called for this message */ bool prepared; @@ -1139,12 +1182,14 @@ struct spi_message { */ struct list_head queue; void *state; + /* + * Optional state for use by controller driver between calls to + * __spi_optimize_message() and __spi_unoptimize_message(). + */ + void *opt_state; /* List of spi_res resources when the SPI message is processed */ struct list_head resources; - - /* For embedding transfers into the memory of the message */ - struct spi_transfer t[]; }; static inline void spi_message_init_no_memset(struct spi_message *m) @@ -1203,17 +1248,21 @@ struct spi_transfer *xfers, unsigned int num_xfers) */ static inline struct spi_message *spi_message_alloc(unsigned ntrans, gfp_t flags) { - struct spi_message *m; + struct spi_message_with_transfers { + struct spi_message m; + struct spi_transfer t[]; + } *mwt; + unsigned i; - m = kzalloc(struct_size(m, t, ntrans), flags); - if (m) { - unsigned i; + mwt = kzalloc(struct_size(mwt, t, ntrans), flags); + if (!mwt) + return NULL; - spi_message_init_no_memset(m); - for (i = 0; i < ntrans; i++) - spi_message_add_tail(&m->t[i], m); - } - return m; + spi_message_init_no_memset(&mwt->m); + for (i = 0; i < ntrans; i++) + spi_message_add_tail(&mwt->t[i], &mwt->m); + + return &mwt->m; } static inline void spi_message_free(struct spi_message *m) @@ -1221,6 +1270,9 @@ static inline void spi_message_free(struct spi_message *m) kfree(m); } +extern int spi_optimize_message(struct spi_device *spi, struct spi_message *msg); +extern void spi_unoptimize_message(struct spi_message *msg); + extern int spi_setup(struct spi_device *spi); extern int spi_async(struct spi_device *spi, struct spi_message *message); extern int spi_slave_abort(struct spi_device *spi); @@ -1262,7 +1314,7 @@ spi_max_transfer_size(struct spi_device *spi) */ static inline bool spi_is_bpw_supported(struct spi_device *spi, u32 bpw) { - u32 bpw_mask = spi->master->bits_per_word_mask; + u32 bpw_mask = spi->controller->bits_per_word_mask; if (bpw == 8 || (bpw <= 32 && bpw_mask & SPI_BPW_MASK(bpw))) return true; @@ -1329,12 +1381,10 @@ struct spi_replaced_transfers { extern int spi_split_transfers_maxsize(struct spi_controller *ctlr, struct spi_message *msg, - size_t maxsize, - gfp_t gfp); + size_t maxsize); extern int spi_split_transfers_maxwords(struct spi_controller *ctlr, struct spi_message *msg, - size_t maxwords, - gfp_t gfp); + size_t maxwords); /*---------------------------------------------------------------------------*/ @@ -1634,22 +1684,4 @@ spi_transfer_is_last(struct spi_controller *ctlr, struct spi_transfer *xfer) return list_is_last(&xfer->transfer_list, &ctlr->cur_msg->transfers); } -/* Compatibility layer */ -#define spi_master spi_controller - -#define SPI_MASTER_HALF_DUPLEX SPI_CONTROLLER_HALF_DUPLEX - -#define spi_master_get_devdata(_ctlr) spi_controller_get_devdata(_ctlr) -#define spi_master_set_devdata(_ctlr, _data) \ - spi_controller_set_devdata(_ctlr, _data) -#define spi_master_get(_ctlr) spi_controller_get(_ctlr) -#define spi_master_put(_ctlr) spi_controller_put(_ctlr) -#define spi_master_suspend(_ctlr) spi_controller_suspend(_ctlr) -#define spi_master_resume(_ctlr) spi_controller_resume(_ctlr) - -#define spi_register_master(_ctlr) spi_register_controller(_ctlr) -#define devm_spi_register_master(_dev, _ctlr) \ - devm_spi_register_controller(_dev, _ctlr) -#define spi_unregister_master(_ctlr) spi_unregister_controller(_ctlr) - #endif /* __LINUX_SPI_H */ diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h index 4444c2a992cb..b930eca2ef7b 100644 --- a/include/linux/spi/spi_bitbang.h +++ b/include/linux/spi/spi_bitbang.h @@ -10,7 +10,7 @@ struct spi_bitbang { u8 use_dma; u16 flags; /* extra spi->mode support */ - struct spi_master *master; + struct spi_controller *ctlr; /* setup_transfer() changes clock and/or wordsize to match settings * for this transfer; zeroes restore defaults from spi_device. diff --git a/include/linux/spi/spi_gpio.h b/include/linux/spi/spi_gpio.h index 9e7e83d8645b..5f0e1407917a 100644 --- a/include/linux/spi/spi_gpio.h +++ b/include/linux/spi/spi_gpio.h @@ -15,8 +15,8 @@ */ /** - * struct spi_gpio_platform_data - parameter for bitbanged SPI master - * @num_chipselect: how many slaves to allow + * struct spi_gpio_platform_data - parameter for bitbanged SPI host controller + * @num_chipselect: how many target devices to allow */ struct spi_gpio_platform_data { u16 num_chipselect; diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 31d3d747a9db..3fcd20de6ca8 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -456,6 +456,37 @@ static __always_inline int spin_is_contended(spinlock_t *lock) #endif /* CONFIG_PREEMPT_RT */ /* + * Does a critical section need to be broken due to another + * task waiting?: (technically does not depend on CONFIG_PREEMPTION, + * but a general need for low latency) + */ +static inline int spin_needbreak(spinlock_t *lock) +{ +#ifdef CONFIG_PREEMPTION + return spin_is_contended(lock); +#else + return 0; +#endif +} + +/* + * Check if a rwlock is contended. + * Returns non-zero if there is another task waiting on the rwlock. + * Returns zero if the lock is not contended or the system / underlying + * rwlock implementation does not support contention detection. + * Technically does not depend on CONFIG_PREEMPTION, but a general need + * for low latency. + */ +static inline int rwlock_needbreak(rwlock_t *lock) +{ +#ifdef CONFIG_PREEMPTION + return rwlock_is_contended(lock); +#else + return 0; +#endif +} + +/* * Pull the atomic_t declaration: * (asm-mips/atomic.h needs above definitions) */ @@ -507,6 +538,8 @@ DEFINE_LOCK_GUARD_1(raw_spinlock, raw_spinlock_t, raw_spin_lock(_T->lock), raw_spin_unlock(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(raw_spinlock, _try, raw_spin_trylock(_T->lock)) + DEFINE_LOCK_GUARD_1(raw_spinlock_nested, raw_spinlock_t, raw_spin_lock_nested(_T->lock, SINGLE_DEPTH_NESTING), raw_spin_unlock(_T->lock)) @@ -515,23 +548,62 @@ DEFINE_LOCK_GUARD_1(raw_spinlock_irq, raw_spinlock_t, raw_spin_lock_irq(_T->lock), raw_spin_unlock_irq(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irq, _try, raw_spin_trylock_irq(_T->lock)) + DEFINE_LOCK_GUARD_1(raw_spinlock_irqsave, raw_spinlock_t, raw_spin_lock_irqsave(_T->lock, _T->flags), raw_spin_unlock_irqrestore(_T->lock, _T->flags), unsigned long flags) +DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irqsave, _try, + raw_spin_trylock_irqsave(_T->lock, _T->flags)) + DEFINE_LOCK_GUARD_1(spinlock, spinlock_t, spin_lock(_T->lock), spin_unlock(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(spinlock, _try, spin_trylock(_T->lock)) + DEFINE_LOCK_GUARD_1(spinlock_irq, spinlock_t, spin_lock_irq(_T->lock), spin_unlock_irq(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(spinlock_irq, _try, + spin_trylock_irq(_T->lock)) + DEFINE_LOCK_GUARD_1(spinlock_irqsave, spinlock_t, spin_lock_irqsave(_T->lock, _T->flags), spin_unlock_irqrestore(_T->lock, _T->flags), unsigned long flags) +DEFINE_LOCK_GUARD_1_COND(spinlock_irqsave, _try, + spin_trylock_irqsave(_T->lock, _T->flags)) + +DEFINE_LOCK_GUARD_1(read_lock, rwlock_t, + read_lock(_T->lock), + read_unlock(_T->lock)) + +DEFINE_LOCK_GUARD_1(read_lock_irq, rwlock_t, + read_lock_irq(_T->lock), + read_unlock_irq(_T->lock)) + +DEFINE_LOCK_GUARD_1(read_lock_irqsave, rwlock_t, + read_lock_irqsave(_T->lock, _T->flags), + read_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags) + +DEFINE_LOCK_GUARD_1(write_lock, rwlock_t, + write_lock(_T->lock), + write_unlock(_T->lock)) + +DEFINE_LOCK_GUARD_1(write_lock_irq, rwlock_t, + write_lock_irq(_T->lock), + write_unlock_irq(_T->lock)) + +DEFINE_LOCK_GUARD_1(write_lock_irqsave, rwlock_t, + write_lock_irqsave(_T->lock, _T->flags), + write_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags) + #undef __LINUX_INSIDE_SPINLOCK_H #endif /* __LINUX_SPINLOCK_H */ diff --git a/include/linux/splice.h b/include/linux/splice.h index 6c461573434d..9dec4861d09f 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -68,28 +68,37 @@ typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, typedef int (splice_direct_actor)(struct pipe_inode_info *, struct splice_desc *); -extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *, - loff_t *, size_t, unsigned int, - splice_actor *); -extern ssize_t __splice_from_pipe(struct pipe_inode_info *, - struct splice_desc *, splice_actor *); -extern ssize_t splice_to_pipe(struct pipe_inode_info *, - struct splice_pipe_desc *); -extern ssize_t add_to_pipe(struct pipe_inode_info *, - struct pipe_buffer *); -long vfs_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags); -extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, - splice_direct_actor *); -extern long do_splice(struct file *in, loff_t *off_in, - struct file *out, loff_t *off_out, - size_t len, unsigned int flags); +ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags, + splice_actor *actor); +ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, + struct splice_desc *sd, splice_actor *actor); +ssize_t splice_to_pipe(struct pipe_inode_info *pipe, + struct splice_pipe_desc *spd); +ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf); +ssize_t vfs_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags); +ssize_t splice_direct_to_actor(struct file *file, struct splice_desc *sd, + splice_direct_actor *actor); +ssize_t do_splice(struct file *in, loff_t *off_in, struct file *out, + loff_t *off_out, size_t len, unsigned int flags); +ssize_t do_splice_direct(struct file *in, loff_t *ppos, struct file *out, + loff_t *opos, size_t len, unsigned int flags); +ssize_t splice_file_range(struct file *in, loff_t *ppos, struct file *out, + loff_t *opos, size_t len); -extern long do_tee(struct file *in, struct file *out, size_t len, - unsigned int flags); -extern ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out, - loff_t *ppos, size_t len, unsigned int flags); +static inline long splice_copy_file_range(struct file *in, loff_t pos_in, + struct file *out, loff_t pos_out, + size_t len) +{ + return splice_file_range(in, &pos_in, out, &pos_out, len); +} + +ssize_t do_tee(struct file *in, struct file *out, size_t len, + unsigned int flags); +ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags); /* * for dynamic pipe sizing diff --git a/include/linux/spmi.h b/include/linux/spmi.h index eac1956a8727..28e8c8bd3944 100644 --- a/include/linux/spmi.h +++ b/include/linux/spmi.h @@ -120,6 +120,9 @@ static inline void spmi_controller_put(struct spmi_controller *ctrl) int spmi_controller_add(struct spmi_controller *ctrl); void spmi_controller_remove(struct spmi_controller *ctrl); +struct spmi_controller *devm_spmi_controller_alloc(struct device *parent, size_t size); +int devm_spmi_controller_add(struct device *parent, struct spmi_controller *ctrl); + /** * struct spmi_driver - SPMI slave device driver * @driver: SPMI device drivers should initialize name and owner field of @@ -166,7 +169,7 @@ static inline void spmi_driver_unregister(struct spmi_driver *sdrv) struct device_node; -struct spmi_device *spmi_device_from_of(struct device_node *np); +struct spmi_device *spmi_find_device_by_of_node(struct device_node *np); int spmi_register_read(struct spmi_device *sdev, u8 addr, u8 *buf); int spmi_ext_register_read(struct spmi_device *sdev, u8 addr, u8 *buf, size_t len); diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 127ef3b2e607..236610e4a8fa 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -229,7 +229,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp srcu_check_nmi_safety(ssp, true); retval = __srcu_read_lock_nmisafe(ssp); - rcu_lock_acquire(&ssp->dep_map); + rcu_try_lock_acquire(&ssp->dep_map); return retval; } diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index e58306783d8e..e9ec32fb97d4 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -11,8 +11,6 @@ * SLUB_DEBUG needs 256 bytes per object for that). Since allocation and free * stack traces often repeat, using stack depot allows to save about 100x space. * - * Stack traces are never removed from the stack depot. - * * Author: Alexander Potapenko <glider@google.com> * Copyright (C) 2016 Google, Inc. * @@ -32,6 +30,64 @@ typedef u32 depot_stack_handle_t; */ #define STACK_DEPOT_EXTRA_BITS 5 +#define DEPOT_HANDLE_BITS (sizeof(depot_stack_handle_t) * 8) + +#define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */ +#define DEPOT_POOL_SIZE (1LL << (PAGE_SHIFT + DEPOT_POOL_ORDER)) +#define DEPOT_STACK_ALIGN 4 +#define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN) +#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \ + STACK_DEPOT_EXTRA_BITS) + +#ifdef CONFIG_STACKDEPOT +/* Compact structure that stores a reference to a stack. */ +union handle_parts { + depot_stack_handle_t handle; + struct { + u32 pool_index_plus_1 : DEPOT_POOL_INDEX_BITS; + u32 offset : DEPOT_OFFSET_BITS; + u32 extra : STACK_DEPOT_EXTRA_BITS; + }; +}; + +struct stack_record { + struct list_head hash_list; /* Links in the hash table */ + u32 hash; /* Hash in hash table */ + u32 size; /* Number of stored frames */ + union handle_parts handle; /* Constant after initialization */ + refcount_t count; + union { + unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */ + struct { + /* + * An important invariant of the implementation is to + * only place a stack record onto the freelist iff its + * refcount is zero. Because stack records with a zero + * refcount are never considered as valid, it is safe to + * union @entries and freelist management state below. + * Conversely, as soon as an entry is off the freelist + * and its refcount becomes non-zero, the below must not + * be accessed until being placed back on the freelist. + */ + struct list_head free_list; /* Links in the freelist */ + unsigned long rcu_state; /* RCU cookie */ + }; + }; +}; +#endif + +typedef u32 depot_flags_t; + +/* + * Flags that can be passed to stack_depot_save_flags(); see the comment next + * to its declaration for more details. + */ +#define STACK_DEPOT_FLAG_CAN_ALLOC ((depot_flags_t)0x0001) +#define STACK_DEPOT_FLAG_GET ((depot_flags_t)0x0002) + +#define STACK_DEPOT_FLAGS_NUM 2 +#define STACK_DEPOT_FLAGS_MASK ((depot_flags_t)((1 << STACK_DEPOT_FLAGS_NUM) - 1)) + /* * Using stack depot requires its initialization, which can be done in 3 ways: * @@ -69,31 +125,39 @@ static inline int stack_depot_early_init(void) { return 0; } #endif /** - * __stack_depot_save - Save a stack trace to stack depot + * stack_depot_save_flags - Save a stack trace to stack depot * * @entries: Pointer to the stack trace * @nr_entries: Number of frames in the stack * @alloc_flags: Allocation GFP flags - * @can_alloc: Allocate stack pools (increased chance of failure if false) + * @depot_flags: Stack depot flags + * + * Saves a stack trace from @entries array of size @nr_entries. + * + * If STACK_DEPOT_FLAG_CAN_ALLOC is set in @depot_flags, stack depot can + * replenish the stack pools in case no space is left (allocates using GFP + * flags of @alloc_flags). Otherwise, stack depot avoids any allocations and + * fails if no space is left to store the stack trace. * - * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is - * %true, stack depot can replenish the stack pools in case no space is left - * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids - * any allocations and fails if no space is left to store the stack trace. + * If STACK_DEPOT_FLAG_GET is set in @depot_flags, stack depot will increment + * the refcount on the saved stack trace if it already exists in stack depot. + * Users of this flag must also call stack_depot_put() when keeping the stack + * trace is no longer required to avoid overflowing the refcount. * * If the provided stack trace comes from the interrupt context, only the part * up to the interrupt entry is saved. * - * Context: Any context, but setting @can_alloc to %false is required if + * Context: Any context, but setting STACK_DEPOT_FLAG_CAN_ALLOC is required if * alloc_pages() cannot be used from the current context. Currently * this is the case for contexts where neither %GFP_ATOMIC nor * %GFP_NOWAIT can be used (NMI, raw_spin_lock). * * Return: Handle of the stack struct stored in depot, 0 on failure */ -depot_stack_handle_t __stack_depot_save(unsigned long *entries, - unsigned int nr_entries, - gfp_t gfp_flags, bool can_alloc); +depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, + unsigned int nr_entries, + gfp_t gfp_flags, + depot_flags_t depot_flags); /** * stack_depot_save - Save a stack trace to stack depot @@ -102,8 +166,11 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, * @nr_entries: Number of frames in the stack * @alloc_flags: Allocation GFP flags * - * Context: Contexts where allocations via alloc_pages() are allowed. - * See __stack_depot_save() for more details. + * Does not increment the refcount on the saved stack trace; see + * stack_depot_save_flags() for more details. + * + * Context: Contexts where allocations via alloc_pages() are allowed; + * see stack_depot_save_flags() for more details. * * Return: Handle of the stack trace stored in depot, 0 on failure */ @@ -111,6 +178,17 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags); /** + * __stack_depot_get_stack_record - Get a pointer to a stack_record struct + * + * @handle: Stack depot handle + * + * This function is only for internal purposes. + * + * Return: Returns a pointer to a stack_record struct + */ +struct stack_record *__stack_depot_get_stack_record(depot_stack_handle_t handle); + +/** * stack_depot_fetch - Fetch a stack trace from stack depot * * @handle: Stack depot handle returned from stack_depot_save() @@ -142,6 +220,18 @@ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, int spaces); /** + * stack_depot_put - Drop a reference to a stack trace from stack depot + * + * @handle: Stack depot handle returned from stack_depot_save() + * + * The stack trace is evicted from stack depot once all references to it have + * been dropped (once the number of stack_depot_evict() calls matches the + * number of stack_depot_save_flags() calls with STACK_DEPOT_FLAG_GET set for + * this stack trace). + */ +void stack_depot_put(depot_stack_handle_t handle); + +/** * stack_depot_set_extra_bits - Set extra bits in a stack depot handle * * @handle: Stack depot handle returned from stack_depot_save() diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h index c36e7a3b45e7..3be2cb564710 100644 --- a/include/linux/stackleak.h +++ b/include/linux/stackleak.h @@ -14,6 +14,7 @@ #ifdef CONFIG_GCC_PLUGIN_STACKLEAK #include <asm/stacktrace.h> +#include <linux/linkage.h> /* * The lowest address on tsk's stack which we can plausibly erase. @@ -76,6 +77,11 @@ static inline void stackleak_task_init(struct task_struct *t) # endif } +asmlinkage void noinstr stackleak_erase(void); +asmlinkage void noinstr stackleak_erase_on_task_stack(void); +asmlinkage void noinstr stackleak_erase_off_task_stack(void); +void __no_caller_saved_registers noinstr stackleak_track_stack(void); + #else /* !CONFIG_GCC_PLUGIN_STACKLEAK */ static inline void stackleak_task_init(struct task_struct *t) { } #endif diff --git a/include/linux/start_kernel.h b/include/linux/start_kernel.h index a9806a44a605..09f994ac87df 100644 --- a/include/linux/start_kernel.h +++ b/include/linux/start_kernel.h @@ -9,7 +9,5 @@ up something else. */ extern asmlinkage void __init __noreturn start_kernel(void); -extern void __init __noreturn arch_call_rest_init(void); -extern void __ref __noreturn rest_init(void); #endif /* _LINUX_START_KERNEL_H */ diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index ce89cc3e4913..dfa1828cd756 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -127,6 +127,7 @@ struct stmmac_est { u32 gcl_unaligned[EST_GCL]; u32 gcl[EST_GCL]; u32 gcl_size; + u32 max_sdu[MTL_MAX_TX_QUEUES]; }; struct stmmac_rxq_cfg { @@ -139,6 +140,7 @@ struct stmmac_rxq_cfg { struct stmmac_txq_cfg { u32 weight; + bool coe_unsupported; u8 mode_to_use; /* Credit Base Shaper parameters */ u32 send_slope; @@ -174,6 +176,7 @@ struct stmmac_fpe_cfg { bool hs_enable; /* FPE handshake enable */ enum stmmac_fpe_state lp_fpe_state; /* Link Partner FPE state */ enum stmmac_fpe_state lo_fpe_state; /* Local station FPE state */ + u32 fpe_csr; /* MAC_FPE_CTRL_STS reg cache */ }; struct stmmac_safety_feature_cfg { @@ -302,7 +305,6 @@ struct plat_stmmacenet_data { unsigned int eee_usecs_rate; struct pci_dev *pdev; int int_snapshot_num; - int ext_snapshot_num; int msi_mac_vec; int msi_wol_vec; int msi_lpi_vec; diff --git a/include/linux/string.h b/include/linux/string.h index debf4ef1098f..9ba8b4597009 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -2,6 +2,8 @@ #ifndef _LINUX_STRING_H_ #define _LINUX_STRING_H_ +#include <linux/args.h> +#include <linux/array_size.h> #include <linux/compiler.h> /* for inline */ #include <linux/types.h> /* for size_t */ #include <linux/stddef.h> /* for NULL */ @@ -65,15 +67,79 @@ extern char * strcpy(char *,const char *); #ifndef __HAVE_ARCH_STRNCPY extern char * strncpy(char *,const char *, __kernel_size_t); #endif -#ifndef __HAVE_ARCH_STRLCPY -size_t strlcpy(char *, const char *, size_t); -#endif -#ifndef __HAVE_ARCH_STRSCPY -ssize_t strscpy(char *, const char *, size_t); -#endif +ssize_t sized_strscpy(char *, const char *, size_t); + +/* + * The 2 argument style can only be used when dst is an array with a + * known size. + */ +#define __strscpy0(dst, src, ...) \ + sized_strscpy(dst, src, sizeof(dst) + __must_be_array(dst)) +#define __strscpy1(dst, src, size) sized_strscpy(dst, src, size) + +#define __strscpy_pad0(dst, src, ...) \ + sized_strscpy_pad(dst, src, sizeof(dst) + __must_be_array(dst)) +#define __strscpy_pad1(dst, src, size) sized_strscpy_pad(dst, src, size) + +/** + * strscpy - Copy a C-string into a sized buffer + * @dst: Where to copy the string to + * @src: Where to copy the string from + * @...: Size of destination buffer (optional) + * + * Copy the source string @src, or as much of it as fits, into the + * destination @dst buffer. The behavior is undefined if the string + * buffers overlap. The destination @dst buffer is always NUL terminated, + * unless it's zero-sized. + * + * The size argument @... is only required when @dst is not an array, or + * when the copy needs to be smaller than sizeof(@dst). + * + * Preferred to strncpy() since it always returns a valid string, and + * doesn't unnecessarily force the tail of the destination buffer to be + * zero padded. If padding is desired please use strscpy_pad(). + * + * Returns the number of characters copied in @dst (not including the + * trailing %NUL) or -E2BIG if @size is 0 or the copy from @src was + * truncated. + */ +#define strscpy(dst, src, ...) \ + CONCATENATE(__strscpy, COUNT_ARGS(__VA_ARGS__))(dst, src, __VA_ARGS__) + +#define sized_strscpy_pad(dest, src, count) ({ \ + char *__dst = (dest); \ + const char *__src = (src); \ + const size_t __count = (count); \ + ssize_t __wrote; \ + \ + __wrote = sized_strscpy(__dst, __src, __count); \ + if (__wrote >= 0 && __wrote < __count) \ + memset(__dst + __wrote + 1, 0, __count - __wrote - 1); \ + __wrote; \ +}) -/* Wraps calls to strscpy()/memset(), no arch specific code required */ -ssize_t strscpy_pad(char *dest, const char *src, size_t count); +/** + * strscpy_pad() - Copy a C-string into a sized buffer + * @dst: Where to copy the string to + * @src: Where to copy the string from + * @...: Size of destination buffer + * + * Copy the string, or as much of it as fits, into the dest buffer. The + * behavior is undefined if the string buffers overlap. The destination + * buffer is always %NUL terminated, unless it's zero-sized. + * + * If the source string is shorter than the destination buffer, the + * remaining bytes in the buffer will be filled with %NUL bytes. + * + * For full explanation of why you may want to consider using the + * 'strscpy' functions please see the function docstring for strscpy(). + * + * Returns: + * * The number of characters copied (not including the trailing %NULs) + * * -E2BIG if count is 0 or @src was truncated. + */ +#define strscpy_pad(dst, src, ...) \ + CONCATENATE(__strscpy_pad, COUNT_ARGS(__VA_ARGS__))(dst, src, __VA_ARGS__) #ifndef __HAVE_ARCH_STRCAT extern char * strcat(char *, const char *); @@ -219,10 +285,19 @@ extern char *kstrndup(const char *s, size_t len, gfp_t gfp); extern void *kmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2); extern void *kvmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2); extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp); +extern void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp); +/* lib/argv_split.c */ extern char **argv_split(gfp_t gfp, const char *str, int *argcp); extern void argv_free(char **argv); +/* lib/cmdline.c */ +extern int get_option(char **str, int *pint); +extern char *get_options(const char *str, int nints, int *ints); +extern unsigned long long memparse(const char *ptr, char **retptr); +extern bool parse_option_str(const char *str, const char *option); +extern char *next_arg(char *args, char **param, char **val); + extern bool sysfs_streq(const char *s1, const char *s2); int match_string(const char * const *array, size_t n, const char *string); int __sysfs_match_string(const char * const *array, size_t n, const char *s); @@ -317,10 +392,12 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, */ #define strtomem_pad(dest, src, pad) do { \ const size_t _dest_len = __builtin_object_size(dest, 1); \ + const size_t _src_len = __builtin_object_size(src, 1); \ \ BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ _dest_len == (size_t)-1); \ - memcpy_and_pad(dest, _dest_len, src, strnlen(src, _dest_len), pad); \ + memcpy_and_pad(dest, _dest_len, src, \ + strnlen(src, min(_src_len, _dest_len)), pad); \ } while (0) /** @@ -338,10 +415,11 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, */ #define strtomem(dest, src) do { \ const size_t _dest_len = __builtin_object_size(dest, 1); \ + const size_t _src_len = __builtin_object_size(src, 1); \ \ BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ _dest_len == (size_t)-1); \ - memcpy(dest, src, min(_dest_len, strnlen(src, _dest_len))); \ + memcpy(dest, src, strnlen(src, min(_src_len, _dest_len))); \ } while (0) /** diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h index 3c1091941eb8..d9ebe20229f8 100644 --- a/include/linux/string_choices.h +++ b/include/linux/string_choices.h @@ -42,4 +42,15 @@ static inline const char *str_yes_no(bool v) return v ? "yes" : "no"; } +/** + * str_plural - Return the simple pluralization based on English counts + * @num: Number used for deciding pluralization + * + * If @num is 1, returns empty string, otherwise returns "s". + */ +static inline const char *str_plural(size_t num) +{ + return num == 1 ? "" : "s"; +} + #endif diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 9d1f5bb74dd5..e93fbb5b0c01 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -17,15 +17,19 @@ static inline bool string_is_terminated(const char *s, int len) return memchr(s, '\0', len) ? true : false; } -/* Descriptions of the types of units to - * print in */ +/* Descriptions of the types of units to print in */ enum string_size_units { STRING_UNITS_10, /* use powers of 10^3 (standard SI) */ STRING_UNITS_2, /* use binary powers of 2^10 */ + STRING_UNITS_MASK = BIT(0), + + /* Modifiers */ + STRING_UNITS_NO_SPACE = BIT(30), + STRING_UNITS_NO_BYTES = BIT(31), }; -void string_get_size(u64 size, u64 blk_size, enum string_size_units units, - char *buf, int len); +int string_get_size(u64 size, u64 blk_size, const enum string_size_units units, + char *buf, int len); int parse_int_array_user(const char __user *from, size_t count, int **array); diff --git a/include/linux/stringify.h b/include/linux/stringify.h index 841cec8ed525..0e84cbe65270 100644 --- a/include/linux/stringify.h +++ b/include/linux/stringify.h @@ -9,4 +9,6 @@ #define __stringify_1(x...) #x #define __stringify(x...) __stringify_1(x) +#define FILE_LINE __FILE__ ":" __stringify(__LINE__) + #endif /* !__LINUX_STRINGIFY_H */ diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index db30a159f9d5..f22bf915dcf6 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -20,7 +20,8 @@ #ifdef CONFIG_SUNRPC_BACKCHANNEL struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid); void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied); -void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task); +void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task, + const struct rpc_timeout *to); void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index af7358277f1c..5321585c778f 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -92,6 +92,7 @@ struct rpc_clnt { }; const struct cred *cl_cred; unsigned int cl_max_connect; /* max number of transports not to the same IP */ + struct super_block *pipefs_sb; }; /* @@ -138,6 +139,7 @@ struct rpc_create_args { const char *servername; const char *nodename; const struct rpc_program *program; + struct rpc_stat *stats; u32 prognumber; /* overrides program->number */ u32 version; rpc_authflavor_t authflavor; @@ -251,7 +253,6 @@ void rpc_clnt_probe_trunked_xprts(struct rpc_clnt *, const char *rpc_proc_name(const struct rpc_task *task); -void rpc_clnt_xprt_switch_put(struct rpc_clnt *); void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *); void rpc_clnt_xprt_switch_remove_xprt(struct rpc_clnt *, struct rpc_xprt *); bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 8ada7dc802d3..0c77ba488bba 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -38,6 +38,17 @@ struct rpc_wait { }; /* + * This describes a timeout strategy + */ +struct rpc_timeout { + unsigned long to_initval, /* initial timeout */ + to_maxval, /* max timeout */ + to_increment; /* if !exponential */ + unsigned int to_retries; /* max # of retries */ + unsigned char to_exponential; +}; + +/* * This is the RPC task struct */ struct rpc_task { @@ -186,7 +197,7 @@ struct rpc_wait_queue { unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */ unsigned char priority; /* current priority */ unsigned char nr; /* # tasks remaining for cookie */ - unsigned short qlen; /* total # tasks waiting in queue */ + unsigned int qlen; /* total # tasks waiting in queue */ struct rpc_timer timer_list; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) const char * name; @@ -205,7 +216,8 @@ struct rpc_wait_queue { */ struct rpc_task *rpc_new_task(const struct rpc_task_setup *); struct rpc_task *rpc_run_task(const struct rpc_task_setup *); -struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req); +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, + struct rpc_timeout *timeout); void rpc_put_task(struct rpc_task *); void rpc_put_task_async(struct rpc_task *); bool rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index dbf5b21feafe..23617da0e565 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -17,6 +17,7 @@ #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/auth.h> #include <linux/sunrpc/svcauth.h> +#include <linux/lwq.h> #include <linux/wait.h> #include <linux/mm.h> #include <linux/pagevec.h> @@ -33,10 +34,10 @@ */ struct svc_pool { unsigned int sp_id; /* pool id; also node id on NUMA */ - spinlock_t sp_lock; /* protects all fields */ - struct list_head sp_sockets; /* pending sockets */ - unsigned int sp_nrthreads; /* # of threads in pool */ + struct lwq sp_xprts; /* pending transports */ + atomic_t sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ + struct llist_head sp_idle_threads; /* idle server threads */ /* statistics on pool operation */ struct percpu_counter sp_messages_arrived; @@ -49,7 +50,8 @@ struct svc_pool { /* bits for sp_flags */ enum { SP_TASK_PENDING, /* still work to do even if no xprt is queued */ - SP_CONGESTED, /* all threads are busy, none idle */ + SP_NEED_VICTIM, /* One thread needs to agree to exit */ + SP_VICTIM_REMAINS, /* One thread needs to actually exit */ }; @@ -67,7 +69,6 @@ struct svc_serv { struct svc_program * sv_program; /* RPC program */ struct svc_stat * sv_stats; /* RPC statistics */ spinlock_t sv_lock; - struct kref sv_refcnt; unsigned int sv_nrthreads; /* # of server threads */ unsigned int sv_maxconn; /* max connections allowed or * '0' causing max to be based @@ -88,41 +89,20 @@ struct svc_serv { int (*sv_threadfn)(void *data); #if defined(CONFIG_SUNRPC_BACKCHANNEL) - struct list_head sv_cb_list; /* queue for callback requests + struct lwq sv_cb_list; /* queue for callback requests * that arrive over the same * connection */ - spinlock_t sv_cb_lock; /* protects the svc_cb_list */ - wait_queue_head_t sv_cb_waitq; /* sleep here if there are no - * entries in the svc_cb_list */ bool sv_bc_enabled; /* service uses backchannel */ #endif /* CONFIG_SUNRPC_BACKCHANNEL */ }; -/** - * svc_get() - increment reference count on a SUNRPC serv - * @serv: the svc_serv to have count incremented - * - * Returns: the svc_serv that was passed in. - */ -static inline struct svc_serv *svc_get(struct svc_serv *serv) -{ - kref_get(&serv->sv_refcnt); - return serv; -} +/* This is used by pool_stats to find and lock an svc */ +struct svc_info { + struct svc_serv *serv; + struct mutex *mutex; +}; -void svc_destroy(struct kref *); - -/** - * svc_put - decrement reference count on a SUNRPC serv - * @serv: the svc_serv to have count decremented - * - * When the reference count reaches zero, svc_destroy() - * is called to clean up and free the serv. - */ -static inline void svc_put(struct svc_serv *serv) -{ - kref_put(&serv->sv_refcnt, svc_destroy); -} +void svc_destroy(struct svc_serv **svcp); /* * Maximum payload size supported by a kernel RPC server. @@ -186,6 +166,7 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp); */ struct svc_rqst { struct list_head rq_all; /* all threads list */ + struct llist_node rq_idle; /* On the idle list */ struct rcu_head rq_rcu_head; /* for RCU deferred kfree */ struct svc_xprt * rq_xprt; /* transport ptr */ @@ -250,7 +231,10 @@ struct svc_rqst { struct net *rq_bc_net; /* pointer to backchannel's * net namespace */ + unsigned long bc_to_initval; + unsigned int bc_to_retries; void ** rq_lease_breaker; /* The v4 client breaking a lease */ + unsigned int rq_status_counter; /* RPC processing counter */ }; /* bits for rq_flags */ @@ -259,10 +243,7 @@ enum { RQ_LOCAL, /* local request */ RQ_USEDEFERRAL, /* use deferral */ RQ_DROPME, /* drop current reply */ - RQ_SPLICE_OK, /* turned off in gss privacy to prevent - * encrypting page cache pages */ - RQ_VICTIM, /* about to be shut down */ - RQ_BUSY, /* request is busy */ + RQ_VICTIM, /* Have agreed to shut down */ RQ_DATA, /* request has data */ }; @@ -301,6 +282,28 @@ static inline struct sockaddr *svc_daddr(const struct svc_rqst *rqst) return (struct sockaddr *) &rqst->rq_daddr; } +/** + * svc_thread_should_stop - check if this thread should stop + * @rqstp: the thread that might need to stop + * + * To stop an svc thread, the pool flags SP_NEED_VICTIM and SP_VICTIM_REMAINS + * are set. The first thread which sees SP_NEED_VICTIM clears it, becoming + * the victim using this function. It should then promptly call + * svc_exit_thread() to complete the process, clearing SP_VICTIM_REMAINS + * so the task waiting for a thread to exit can wake and continue. + * + * Return values: + * %true: caller should invoke svc_exit_thread() + * %false: caller should do nothing + */ +static inline bool svc_thread_should_stop(struct svc_rqst *rqstp) +{ + if (test_and_clear_bit(SP_NEED_VICTIM, &rqstp->rq_pool->sp_flags)) + set_bit(RQ_VICTIM, &rqstp->rq_flags); + + return test_bit(RQ_VICTIM, &rqstp->rq_flags); +} + struct svc_deferred_req { u32 prot; /* protocol (UDP or TCP) */ struct svc_xprt *xprt; @@ -336,7 +339,6 @@ struct svc_program { const struct svc_version **pg_vers; /* version array */ char * pg_name; /* service name */ char * pg_class; /* class name: services sharing authentication */ - struct svc_stat * pg_stats; /* rpc statistics */ enum svc_auth_status (*pg_authenticate)(struct svc_rqst *rqstp); __be32 (*pg_init_request)(struct svc_rqst *, const struct svc_program *, @@ -408,13 +410,14 @@ bool svc_rqst_replace_page(struct svc_rqst *rqstp, void svc_rqst_release_pages(struct svc_rqst *rqstp); void svc_rqst_free(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *); -struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, +struct svc_serv * svc_create_pooled(struct svc_program *prog, + struct svc_stat *stats, + unsigned int bufsize, int (*threadfn)(void *data)); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); -int svc_pool_stats_open(struct svc_serv *serv, struct file *file); +int svc_pool_stats_open(struct svc_info *si, struct file *file); void svc_process(struct svc_rqst *rqstp); -int bc_svc_process(struct svc_serv *, struct rpc_rqst *, - struct svc_rqst *); +void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp); int svc_register(const struct svc_serv *, struct net *, const int, const unsigned short, const unsigned short); diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index a5ee0af2a310..d33bab33099a 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -65,6 +65,7 @@ extern unsigned int svcrdma_ord; extern unsigned int svcrdma_max_requests; extern unsigned int svcrdma_max_bc_requests; extern unsigned int svcrdma_max_req_size; +extern struct workqueue_struct *svcrdma_wq; extern struct percpu_counter svcrdma_stat_read; extern struct percpu_counter svcrdma_stat_recv; @@ -97,6 +98,7 @@ struct svcxprt_rdma { u32 sc_pending_recvs; u32 sc_recv_batch; struct list_head sc_rq_dto_q; + struct list_head sc_read_complete_q; spinlock_t sc_rq_dto_lock; struct ib_qp *sc_qp; struct ib_cq *sc_rq_cq; @@ -115,6 +117,13 @@ struct svcxprt_rdma { /* sc_flags */ #define RDMAXPRT_CONN_PENDING 3 +static inline struct svcxprt_rdma *svc_rdma_rqst_rdma(struct svc_rqst *rqstp) +{ + struct svc_xprt *xprt = rqstp->rq_xprt; + + return container_of(xprt, struct svcxprt_rdma, sc_xprt); +} + /* * Default connection parameters */ @@ -126,6 +135,43 @@ enum { #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD +/** + * svc_rdma_send_cid_init - Initialize a Receive Queue completion ID + * @rdma: controlling transport + * @cid: completion ID to initialize + */ +static inline void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma, + struct rpc_rdma_cid *cid) +{ + cid->ci_queue_id = rdma->sc_rq_cq->res.id; + cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); +} + +/** + * svc_rdma_send_cid_init - Initialize a Send Queue completion ID + * @rdma: controlling transport + * @cid: completion ID to initialize + */ +static inline void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma, + struct rpc_rdma_cid *cid) +{ + cid->ci_queue_id = rdma->sc_sq_cq->res.id; + cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); +} + +/* + * A chunk context tracks all I/O for moving one Read or Write + * chunk. This is a set of rdma_rw's that handle data movement + * for all segments of one chunk. + */ +struct svc_rdma_chunk_ctxt { + struct rpc_rdma_cid cc_cid; + struct ib_cqe cc_cqe; + struct list_head cc_rwctxts; + ktime_t cc_posttime; + int cc_sqecount; +}; + struct svc_rdma_recv_ctxt { struct llist_node rc_node; struct list_head rc_list; @@ -136,26 +182,63 @@ struct svc_rdma_recv_ctxt { void *rc_recv_buf; struct xdr_stream rc_stream; u32 rc_byte_len; - unsigned int rc_page_count; u32 rc_inv_rkey; __be32 rc_msgtype; + /* State for pulling a Read chunk */ + unsigned int rc_pageoff; + unsigned int rc_curpage; + unsigned int rc_readbytes; + struct xdr_buf rc_saved_arg; + struct svc_rdma_chunk_ctxt rc_cc; + struct svc_rdma_pcl rc_call_pcl; struct svc_rdma_pcl rc_read_pcl; struct svc_rdma_chunk *rc_cur_result_payload; struct svc_rdma_pcl rc_write_pcl; struct svc_rdma_pcl rc_reply_pcl; + + unsigned int rc_page_count; + struct page *rc_pages[RPCSVC_MAXPAGES]; +}; + +/* + * State for sending a Write chunk. + * - Tracks progress of writing one chunk over all its segments + * - Stores arguments for the SGL constructor functions + */ +struct svc_rdma_write_info { + struct svcxprt_rdma *wi_rdma; + + const struct svc_rdma_chunk *wi_chunk; + + /* write state of this chunk */ + unsigned int wi_seg_off; + unsigned int wi_seg_no; + + /* SGL constructor arguments */ + const struct xdr_buf *wi_xdr; + unsigned char *wi_base; + unsigned int wi_next_off; + + struct svc_rdma_chunk_ctxt wi_cc; + struct work_struct wi_work; }; struct svc_rdma_send_ctxt { struct llist_node sc_node; struct rpc_rdma_cid sc_cid; + struct work_struct sc_work; + struct svcxprt_rdma *sc_rdma; struct ib_send_wr sc_send_wr; + struct ib_send_wr *sc_wr_chain; + int sc_sqecount; struct ib_cqe sc_cqe; struct xdr_buf sc_hdrbuf; struct xdr_stream sc_stream; + struct svc_rdma_write_info sc_reply_info; void *sc_xprt_buf; int sc_page_count; int sc_cur_sge_no; @@ -179,13 +262,24 @@ extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt); extern int svc_rdma_recvfrom(struct svc_rqst *); /* svc_rdma_rw.c */ +extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma, + struct svc_rdma_chunk_ctxt *cc); extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma); -extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, - const struct svc_rdma_chunk *chunk, - const struct xdr_buf *xdr); -extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, - const struct svc_rdma_recv_ctxt *rctxt, - const struct xdr_buf *xdr); +extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma, + struct svc_rdma_chunk_ctxt *cc); +extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma, + struct svc_rdma_chunk_ctxt *cc, + enum dma_data_direction dir); +extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt); +extern int svc_rdma_send_write_list(struct svcxprt_rdma *rdma, + const struct svc_rdma_recv_ctxt *rctxt, + const struct xdr_buf *xdr); +extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma, + const struct svc_rdma_pcl *write_pcl, + const struct svc_rdma_pcl *reply_pcl, + struct svc_rdma_send_ctxt *sctxt, + const struct xdr_buf *xdr); extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, struct svc_rdma_recv_ctxt *head); @@ -196,11 +290,12 @@ extern struct svc_rdma_send_ctxt * svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma); extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt); -extern int svc_rdma_send(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *ctxt); +extern int svc_rdma_post_send(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt); extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, - const struct svc_rdma_recv_ctxt *rctxt, + const struct svc_rdma_pcl *write_pcl, + const struct svc_rdma_pcl *reply_pcl, const struct xdr_buf *xdr); extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index fa55d12dc765..8e20cd60e2e7 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -54,7 +54,7 @@ struct svc_xprt { const struct svc_xprt_ops *xpt_ops; struct kref xpt_ref; struct list_head xpt_list; - struct list_head xpt_ready; + struct lwq_node xpt_ready; unsigned long xpt_flags; struct svc_serv *xpt_server; /* service for transport */ diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 6f90203edbf8..61c455f1e1f5 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -131,8 +131,11 @@ enum svc_auth_status { * This call releases a domain. * * set_client() - * Givens a pending request (struct svc_rqst), finds and assigns + * Given a pending request (struct svc_rqst), finds and assigns * an appropriate 'auth_domain' as the client. + * + * pseudoflavor() + * Returns RPC_AUTH pseudoflavor in use by @rqstp. */ struct auth_ops { char * name; @@ -143,11 +146,13 @@ struct auth_ops { int (*release)(struct svc_rqst *rqstp); void (*domain_release)(struct auth_domain *dom); enum svc_auth_status (*set_client)(struct svc_rqst *rqstp); + rpc_authflavor_t (*pseudoflavor)(struct svc_rqst *rqstp); }; struct svc_xprt; extern enum svc_auth_status svc_authenticate(struct svc_rqst *rqstp); +extern rpc_authflavor_t svc_auth_flavor(struct svc_rqst *rqstp); extern int svc_authorise(struct svc_rqst *rqstp); extern enum svc_auth_status svc_set_client(struct svc_rqst *rqstp); extern int svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 4ecc89301eb7..81b952649d35 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -30,17 +30,6 @@ #define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) -/* - * This describes a timeout strategy - */ -struct rpc_timeout { - unsigned long to_initval, /* initial timeout */ - to_maxval, /* max timeout */ - to_increment; /* if !exponential */ - unsigned int to_retries; /* max # of retries */ - unsigned char to_exponential; -}; - enum rpc_display_format_t { RPC_DISPLAY_ADDR = 0, RPC_DISPLAY_PORT, @@ -57,6 +46,7 @@ struct xprt_class; struct seq_file; struct svc_serv; struct net; +#include <linux/lwq.h> /* * This describes a complete RPC request @@ -121,7 +111,7 @@ struct rpc_rqst { int rq_ntrans; #if defined(CONFIG_SUNRPC_BACKCHANNEL) - struct list_head rq_bc_list; /* Callback service list */ + struct lwq_node rq_bc_list; /* Callback service list */ unsigned long rq_bc_pa_state; /* Backchannel prealloc state */ struct list_head rq_bc_pa_list; /* Backchannel prealloc list */ #endif /* CONFIG_SUNRPC_BACKCHANEL */ @@ -162,6 +152,7 @@ struct rpc_xprt_ops { int (*prepare_request)(struct rpc_rqst *req, struct xdr_buf *buf); int (*send_request)(struct rpc_rqst *req); + void (*abort_send_request)(struct rpc_rqst *req); void (*wait_for_reply_request)(struct rpc_task *task); void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); void (*release_request)(struct rpc_task *task); diff --git a/include/linux/superhyway.h b/include/linux/superhyway.h deleted file mode 100644 index 8d3376775813..000000000000 --- a/include/linux/superhyway.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * include/linux/superhyway.h - * - * SuperHyway Bus definitions - * - * Copyright (C) 2004, 2005 Paul Mundt <lethal@linux-sh.org> - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ -#ifndef __LINUX_SUPERHYWAY_H -#define __LINUX_SUPERHYWAY_H - -#include <linux/device.h> - -/* - * SuperHyway IDs - */ -#define SUPERHYWAY_DEVICE_ID_SH5_DMAC 0x0183 - -struct superhyway_vcr_info { - u8 perr_flags; /* P-port Error flags */ - u8 merr_flags; /* Module Error flags */ - u16 mod_vers; /* Module Version */ - u16 mod_id; /* Module ID */ - u8 bot_mb; /* Bottom Memory block */ - u8 top_mb; /* Top Memory block */ -}; - -struct superhyway_ops { - int (*read_vcr)(unsigned long base, struct superhyway_vcr_info *vcr); - int (*write_vcr)(unsigned long base, struct superhyway_vcr_info vcr); -}; - -struct superhyway_bus { - struct superhyway_ops *ops; -}; - -extern struct superhyway_bus superhyway_channels[]; - -struct superhyway_device_id { - unsigned int id; - unsigned long driver_data; -}; - -struct superhyway_device; -extern struct bus_type superhyway_bus_type; - -struct superhyway_driver { - char *name; - - const struct superhyway_device_id *id_table; - struct device_driver drv; - - int (*probe)(struct superhyway_device *dev, const struct superhyway_device_id *id); - void (*remove)(struct superhyway_device *dev); -}; - -#define to_superhyway_driver(d) container_of((d), struct superhyway_driver, drv) - -struct superhyway_device { - char name[32]; - - struct device dev; - - struct superhyway_device_id id; - struct superhyway_driver *drv; - struct superhyway_bus *bus; - - int num_resources; - struct resource *resource; - struct superhyway_vcr_info vcr; -}; - -#define to_superhyway_device(d) container_of((d), struct superhyway_device, dev) - -#define superhyway_get_drvdata(d) dev_get_drvdata(&(d)->dev) -#define superhyway_set_drvdata(d,p) dev_set_drvdata(&(d)->dev, (p)) - -static inline int -superhyway_read_vcr(struct superhyway_device *dev, unsigned long base, - struct superhyway_vcr_info *vcr) -{ - return dev->bus->ops->read_vcr(base, vcr); -} - -static inline int -superhyway_write_vcr(struct superhyway_device *dev, unsigned long base, - struct superhyway_vcr_info vcr) -{ - return dev->bus->ops->write_vcr(base, vcr); -} - -extern int superhyway_scan_bus(struct superhyway_bus *); - -/* drivers/sh/superhyway/superhyway.c */ -int superhyway_register_driver(struct superhyway_driver *); -void superhyway_unregister_driver(struct superhyway_driver *); -int superhyway_add_device(unsigned long base, struct superhyway_device *, struct superhyway_bus *); -int superhyway_add_devices(struct superhyway_bus *bus, struct superhyway_device **devices, int nr_devices); - -/* drivers/sh/superhyway/superhyway-sysfs.c */ -extern const struct attribute_group *superhyway_dev_groups[]; - -#endif /* __LINUX_SUPERHYWAY_H */ - diff --git a/include/linux/surface_aggregator/controller.h b/include/linux/surface_aggregator/controller.h index cb7980805920..5b67f0f47d80 100644 --- a/include/linux/surface_aggregator/controller.h +++ b/include/linux/surface_aggregator/controller.h @@ -44,7 +44,7 @@ struct ssam_event { u8 command_id; u8 instance_id; u16 length; - u8 data[]; + u8 data[] __counted_by(length); }; /** diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h index 42b249b4c24b..8cd8c38cf3f3 100644 --- a/include/linux/surface_aggregator/device.h +++ b/include/linux/surface_aggregator/device.h @@ -193,7 +193,6 @@ struct ssam_device_driver { #ifdef CONFIG_SURFACE_AGGREGATOR_BUS -extern struct bus_type ssam_bus_type; extern const struct device_type ssam_device_type; /** diff --git a/include/linux/surface_aggregator/serial_hub.h b/include/linux/surface_aggregator/serial_hub.h index 5c4ae1a26183..d8dbef6b7fc2 100644 --- a/include/linux/surface_aggregator/serial_hub.h +++ b/include/linux/surface_aggregator/serial_hub.h @@ -12,7 +12,7 @@ #ifndef _LINUX_SURFACE_AGGREGATOR_SERIAL_HUB_H #define _LINUX_SURFACE_AGGREGATOR_SERIAL_HUB_H -#include <linux/crc-ccitt.h> +#include <linux/crc-itu-t.h> #include <linux/kref.h> #include <linux/ktime.h> #include <linux/list.h> @@ -188,7 +188,7 @@ static_assert(sizeof(struct ssh_command) == 8); */ static inline u16 ssh_crc(const u8 *buf, size_t len) { - return crc_ccitt_false(0xffff, buf, len); + return crc_itu_t(0xffff, buf, len); } /* diff --git a/include/linux/suspend.h b/include/linux/suspend.h index ef503088942d..da6ebca3ff77 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -40,65 +40,6 @@ typedef int __bitwise suspend_state_t; #define PM_SUSPEND_MIN PM_SUSPEND_TO_IDLE #define PM_SUSPEND_MAX ((__force suspend_state_t) 4) -enum suspend_stat_step { - SUSPEND_FREEZE = 1, - SUSPEND_PREPARE, - SUSPEND_SUSPEND, - SUSPEND_SUSPEND_LATE, - SUSPEND_SUSPEND_NOIRQ, - SUSPEND_RESUME_NOIRQ, - SUSPEND_RESUME_EARLY, - SUSPEND_RESUME -}; - -struct suspend_stats { - int success; - int fail; - int failed_freeze; - int failed_prepare; - int failed_suspend; - int failed_suspend_late; - int failed_suspend_noirq; - int failed_resume; - int failed_resume_early; - int failed_resume_noirq; -#define REC_FAILED_NUM 2 - int last_failed_dev; - char failed_devs[REC_FAILED_NUM][40]; - int last_failed_errno; - int errno[REC_FAILED_NUM]; - int last_failed_step; - u64 last_hw_sleep; - u64 total_hw_sleep; - u64 max_hw_sleep; - enum suspend_stat_step failed_steps[REC_FAILED_NUM]; -}; - -extern struct suspend_stats suspend_stats; - -static inline void dpm_save_failed_dev(const char *name) -{ - strscpy(suspend_stats.failed_devs[suspend_stats.last_failed_dev], - name, - sizeof(suspend_stats.failed_devs[0])); - suspend_stats.last_failed_dev++; - suspend_stats.last_failed_dev %= REC_FAILED_NUM; -} - -static inline void dpm_save_failed_errno(int err) -{ - suspend_stats.errno[suspend_stats.last_failed_errno] = err; - suspend_stats.last_failed_errno++; - suspend_stats.last_failed_errno %= REC_FAILED_NUM; -} - -static inline void dpm_save_failed_step(enum suspend_stat_step step) -{ - suspend_stats.failed_steps[suspend_stats.last_failed_step] = step; - suspend_stats.last_failed_step++; - suspend_stats.last_failed_step %= REC_FAILED_NUM; -} - /** * struct platform_suspend_ops - Callbacks for managing platform dependent * system sleep states. @@ -626,4 +567,19 @@ static inline void queue_up_suspend_work(void) {} #endif /* !CONFIG_PM_AUTOSLEEP */ +enum suspend_stat_step { + SUSPEND_WORKING = 0, + SUSPEND_FREEZE, + SUSPEND_PREPARE, + SUSPEND_SUSPEND, + SUSPEND_SUSPEND_LATE, + SUSPEND_SUSPEND_NOIRQ, + SUSPEND_RESUME_NOIRQ, + SUSPEND_RESUME_EARLY, + SUSPEND_RESUME +}; + +void dpm_save_failed_dev(const char *name); +void dpm_save_failed_step(enum suspend_stat_step step); + #endif /* _LINUX_SUSPEND_H */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 493487ed7c38..f53d608daa01 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -298,6 +298,7 @@ struct swap_info_struct { unsigned int __percpu *cluster_next_cpu; /*percpu index for next allocation */ struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */ struct rb_root swap_extent_root;/* root of the swap extent rbtree */ + struct file *bdev_file; /* open handle of the bdev */ struct block_device *bdev; /* swap device or bdev of swap file */ struct file *swap_file; /* seldom referenced */ unsigned int old_block_size; /* seldom referenced */ @@ -349,16 +350,6 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg); void workingset_refault(struct folio *folio, void *shadow); void workingset_activation(struct folio *folio); -/* Only track the nodes of mappings with shadow entries */ -void workingset_update_node(struct xa_node *node); -extern struct list_lru shadow_nodes; -#define mapping_set_update(xas, mapping) do { \ - if (!dax_mapping(mapping) && !shmem_mapping(mapping)) { \ - xas_set_update(xas, workingset_update_node); \ - xas_set_lru(xas, &shadow_nodes); \ - } \ -} while (0) - /* linux/mm/page_alloc.c */ extern unsigned long totalreserve_pages; @@ -396,9 +387,6 @@ void folio_deactivate(struct folio *folio); void folio_mark_lazyfree(struct folio *folio); extern void swap_setup(void); -extern void lru_cache_add_inactive_or_unevictable(struct page *page, - struct vm_area_struct *vma); - /* linux/mm/vmscan.c */ extern unsigned long zone_reclaimable_pages(struct zone *zone); extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, @@ -449,9 +437,9 @@ static inline unsigned long total_swapcache_pages(void) return global_node_page_state(NR_SWAPCACHE); } -extern void free_swap_cache(struct page *page); -extern void free_page_and_swap_cache(struct page *); -extern void free_pages_and_swap_cache(struct encoded_page **, int); +void free_swap_cache(struct folio *folio); +void free_page_and_swap_cache(struct page *); +void free_pages_and_swap_cache(struct encoded_page **, int); /* linux/mm/swapfile.c */ extern atomic_long_t nr_swap_pages; extern long total_swap_pages; @@ -489,13 +477,12 @@ extern sector_t swapdev_block(int, pgoff_t); extern int __swap_count(swp_entry_t entry); extern int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); -extern struct swap_info_struct *page_swap_info(struct page *); -extern struct swap_info_struct *swp_swap_info(swp_entry_t entry); +struct swap_info_struct *swp_swap_info(swp_entry_t entry); struct backing_dev_info; extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); extern void exit_swap_address_space(unsigned int type); extern struct swap_info_struct *get_swap_device(swp_entry_t entry); -sector_t swap_page_sector(struct page *page); +sector_t swap_folio_sector(struct folio *folio); static inline void put_swap_device(struct swap_info_struct *si) { @@ -534,7 +521,7 @@ static inline void put_swap_device(struct swap_info_struct *si) /* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */ #define free_swap_and_cache(e) is_pfn_swap_entry(e) -static inline void free_swap_cache(struct page *page) +static inline void free_swap_cache(struct folio *folio) { } @@ -552,6 +539,11 @@ static inline int swap_duplicate(swp_entry_t swp) return 0; } +static inline int swapcache_prepare(swp_entry_t swp) +{ + return 0; +} + static inline void swap_free(swp_entry_t swp) { } diff --git a/include/linux/swapops.h b/include/linux/swapops.h index bff1e8d97de0..a5c560a2f8c2 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -390,6 +390,35 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry) } #endif /* CONFIG_MIGRATION */ +#ifdef CONFIG_MEMORY_FAILURE + +/* + * Support for hardware poisoned pages + */ +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + BUG_ON(!PageLocked(page)); + return swp_entry(SWP_HWPOISON, page_to_pfn(page)); +} + +static inline int is_hwpoison_entry(swp_entry_t entry) +{ + return swp_type(entry) == SWP_HWPOISON; +} + +#else + +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + return swp_entry(0, 0); +} + +static inline int is_hwpoison_entry(swp_entry_t swp) +{ + return 0; +} +#endif + typedef unsigned long pte_marker; #define PTE_MARKER_UFFD_WP BIT(0) @@ -468,10 +497,24 @@ static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry) return p; } +static inline struct folio *pfn_swap_entry_folio(swp_entry_t entry) +{ + struct folio *folio = pfn_folio(swp_offset_pfn(entry)); + + /* + * Any use of migration entries may only occur while the + * corresponding folio is locked + */ + BUG_ON(is_migration_entry(entry) && !folio_test_locked(folio)); + + return folio; +} + /* * A pfn swap entry is a special type of swap entry that always has a pfn stored - * in the swap offset. They are used to represent unaddressable device memory - * and to restrict access to a page undergoing migration. + * in the swap offset. They can either be used to represent unaddressable device + * memory, to restrict access to a page undergoing migration or to represent a + * pfn which has been hwpoisoned and unmapped. */ static inline bool is_pfn_swap_entry(swp_entry_t entry) { @@ -479,7 +522,7 @@ static inline bool is_pfn_swap_entry(swp_entry_t entry) BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS); return is_migration_entry(entry) || is_device_private_entry(entry) || - is_device_exclusive_entry(entry); + is_device_exclusive_entry(entry) || is_hwpoison_entry(entry); } struct page_vma_mapped_walk; @@ -548,35 +591,6 @@ static inline int is_pmd_migration_entry(pmd_t pmd) } #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ -#ifdef CONFIG_MEMORY_FAILURE - -/* - * Support for hardware poisoned pages - */ -static inline swp_entry_t make_hwpoison_entry(struct page *page) -{ - BUG_ON(!PageLocked(page)); - return swp_entry(SWP_HWPOISON, page_to_pfn(page)); -} - -static inline int is_hwpoison_entry(swp_entry_t entry) -{ - return swp_type(entry) == SWP_HWPOISON; -} - -#else - -static inline swp_entry_t make_hwpoison_entry(struct page *page) -{ - return swp_entry(0, 0); -} - -static inline int is_hwpoison_entry(swp_entry_t swp) -{ - return 0; -} -#endif - static inline int non_swap_entry(swp_entry_t entry) { return swp_type(entry) >= MAX_SWAPFILES; diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index ecde0312dd52..ea23097e351f 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -120,6 +120,8 @@ struct io_tlb_pool { * debugfs. * @used_hiwater: The high water mark for total_used. Used only for reporting * in debugfs. + * @transient_nslabs: The total number of slots in all transient pools that + * are currently used across all areas. */ struct io_tlb_mem { struct io_tlb_pool defpool; @@ -137,6 +139,7 @@ struct io_tlb_mem { #ifdef CONFIG_DEBUG_FS atomic_long_t total_used; atomic_long_t used_hiwater; + atomic_long_t transient_nslabs; #endif }; diff --git a/include/linux/sync_core.h b/include/linux/sync_core.h index 013da4b8b327..67bb9794b875 100644 --- a/include/linux/sync_core.h +++ b/include/linux/sync_core.h @@ -17,5 +17,19 @@ static inline void sync_core_before_usermode(void) } #endif -#endif /* _LINUX_SYNC_CORE_H */ +#ifdef CONFIG_ARCH_HAS_PREPARE_SYNC_CORE_CMD +#include <asm/sync_core.h> +#else +/* + * This is a dummy prepare_sync_core_cmd() implementation that can be used on + * all architectures which provide unconditional core serializing instructions + * in switch_mm(). + * If your architecture doesn't provide such core serializing instructions in + * switch_mm(), you may need to write your own functions. + */ +static inline void prepare_sync_core_cmd(struct mm_struct *mm) +{ +} +#endif +#endif /* _LINUX_SYNC_CORE_H */ diff --git a/include/linux/syscall_user_dispatch.h b/include/linux/syscall_user_dispatch.h index 641ca8880995..3858a6ffdd5c 100644 --- a/include/linux/syscall_user_dispatch.h +++ b/include/linux/syscall_user_dispatch.h @@ -6,16 +6,10 @@ #define _SYSCALL_USER_DISPATCH_H #include <linux/thread_info.h> +#include <linux/syscall_user_dispatch_types.h> #ifdef CONFIG_GENERIC_ENTRY -struct syscall_user_dispatch { - char __user *selector; - unsigned long offset; - unsigned long len; - bool on_dispatch; -}; - int set_syscall_user_dispatch(unsigned long mode, unsigned long offset, unsigned long len, char __user *selector); @@ -29,7 +23,6 @@ int syscall_user_dispatch_set_config(struct task_struct *task, unsigned long siz void __user *data); #else -struct syscall_user_dispatch {}; static inline int set_syscall_user_dispatch(unsigned long mode, unsigned long offset, unsigned long len, char __user *selector) diff --git a/include/linux/syscall_user_dispatch_types.h b/include/linux/syscall_user_dispatch_types.h new file mode 100644 index 000000000000..3be36b06c7d7 --- /dev/null +++ b/include/linux/syscall_user_dispatch_types.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SYSCALL_USER_DISPATCH_TYPES_H +#define _SYSCALL_USER_DISPATCH_TYPES_H + +#include <linux/types.h> + +#ifdef CONFIG_GENERIC_ENTRY + +struct syscall_user_dispatch { + char __user *selector; + unsigned long offset; + unsigned long len; + bool on_dispatch; +}; + +#else + +struct syscall_user_dispatch {}; + +#endif + +#endif /* _SYSCALL_USER_DISPATCH_TYPES_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 22bc6bc147f8..e619ac10cd23 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -71,9 +71,12 @@ struct clone_args; struct open_how; struct mount_attr; struct landlock_ruleset_attr; +struct lsm_ctx; enum landlock_rule_type; struct cachestat_range; struct cachestat; +struct statmount; +struct mnt_id_req; #include <linux/types.h> #include <linux/aio_abi.h> @@ -125,6 +128,7 @@ struct cachestat; #define __TYPE_IS_LL(t) (__TYPE_AS(t, 0LL) || __TYPE_AS(t, 0ULL)) #define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a #define __SC_CAST(t, a) (__force t) a +#define __SC_TYPE(t, a) t #define __SC_ARGS(t, a) a #define __SC_TEST(t, a) (void)BUILD_BUG_ON_ZERO(!__TYPE_IS_LL(t) && sizeof(t) > sizeof(long)) @@ -355,7 +359,6 @@ asmlinkage long sys_lremovexattr(const char __user *path, const char __user *name); asmlinkage long sys_fremovexattr(int fd, const char __user *name); asmlinkage long sys_getcwd(char __user *buf, unsigned long size); -asmlinkage long sys_lookup_dcookie(u64 cookie64, char __user *buf, size_t len); asmlinkage long sys_eventfd2(unsigned int count, int flags); asmlinkage long sys_epoll_create1(int flags); asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, @@ -408,6 +411,12 @@ asmlinkage long sys_statfs64(const char __user *path, size_t sz, asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user *buf); asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, struct statfs64 __user *buf); +asmlinkage long sys_statmount(const struct mnt_id_req __user *req, + struct statmount __user *buf, size_t bufsize, + unsigned int flags); +asmlinkage long sys_listmount(const struct mnt_id_req __user *req, + u64 __user *mnt_ids, size_t nr_mnt_ids, + unsigned int flags); asmlinkage long sys_truncate(const char __user *path, long length); asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length); #if BITS_PER_LONG == 32 @@ -549,6 +558,16 @@ asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, asmlinkage long sys_futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes, unsigned int flags, struct __kernel_timespec __user *timeout, clockid_t clockid); + +asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long mask, int nr, unsigned int flags); + +asmlinkage long sys_futex_wait(void __user *uaddr, unsigned long val, unsigned long mask, + unsigned int flags, struct __kernel_timespec __user *timespec, + clockid_t clockid); + +asmlinkage long sys_futex_requeue(struct futex_waitv __user *waiters, + unsigned int flags, int nr_wake, int nr_requeue); + asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, @@ -940,6 +959,11 @@ asmlinkage long sys_cachestat(unsigned int fd, struct cachestat_range __user *cstat_range, struct cachestat __user *cstat, unsigned int flags); asmlinkage long sys_map_shadow_stack(unsigned long addr, unsigned long size, unsigned int flags); +asmlinkage long sys_lsm_get_self_attr(unsigned int attr, struct lsm_ctx *ctx, + u32 *size, u32 flags); +asmlinkage long sys_lsm_set_self_attr(unsigned int attr, struct lsm_ctx *ctx, + u32 size, u32 flags); +asmlinkage long sys_lsm_list_modules(u64 *ids, u32 *size, u32 flags); /* * Architecture-specific system calls diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 09d7429d67c0..ee7d33b89e9e 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -210,11 +210,6 @@ struct ctl_table_root { int (*permissions)(struct ctl_table_header *head, struct ctl_table *table); }; -/* struct ctl_path describes where in the hierarchy a table is added */ -struct ctl_path { - const char *procname; -}; - #define register_sysctl(path, table) \ register_sysctl_sz(path, table, ARRAY_SIZE(table)) @@ -242,6 +237,7 @@ extern void __register_sysctl_init(const char *path, struct ctl_table *table, extern struct ctl_table_header *register_sysctl_mount_point(const char *path); void do_sysctl_args(void); +bool sysctl_is_alias(char *param); int do_proc_douintvec(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos, int (*conv)(unsigned long *lvalp, @@ -254,8 +250,6 @@ extern int unaligned_enabled; extern int unaligned_dump_stack; extern int no_unaligned_warning; -#define SYSCTL_PERM_EMPTY_DIR (1 << 0) - #else /* CONFIG_SYSCTL */ static inline void register_sysctl_init(const char *path, struct ctl_table *table) @@ -287,6 +281,11 @@ static inline void setup_sysctl_set(struct ctl_table_set *p, static inline void do_sysctl_args(void) { } + +static inline bool sysctl_is_alias(char *param) +{ + return false; +} #endif /* CONFIG_SYSCTL */ int sysctl_max_threads(struct ctl_table *table, int write, void *buffer, diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index 19cb803dd5ec..c9cb657dad08 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -91,7 +91,8 @@ static inline void sysfb_set_efifb_fwnode(struct platform_device *pd) bool sysfb_parse_mode(const struct screen_info *si, struct simplefb_platform_data *mode); struct platform_device *sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode); + const struct simplefb_platform_data *mode, + struct device *parent); #else /* CONFIG_SYSFB_SIMPLE */ @@ -102,7 +103,8 @@ static inline bool sysfb_parse_mode(const struct screen_info *si, } static inline struct platform_device *sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode) + const struct simplefb_platform_data *mode, + struct device *parent) { return ERR_PTR(-EINVAL); } diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index fd3fe5c8c17f..326341c62385 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -61,22 +61,32 @@ do { \ /** * struct attribute_group - data structure used to declare an attribute group. * @name: Optional: Attribute group name - * If specified, the attribute group will be created in - * a new subdirectory with this name. + * If specified, the attribute group will be created in a + * new subdirectory with this name. Additionally when a + * group is named, @is_visible and @is_bin_visible may + * return SYSFS_GROUP_INVISIBLE to control visibility of + * the directory itself. * @is_visible: Optional: Function to return permissions associated with an - * attribute of the group. Will be called repeatedly for each - * non-binary attribute in the group. Only read/write + * attribute of the group. Will be called repeatedly for + * each non-binary attribute in the group. Only read/write * permissions as well as SYSFS_PREALLOC are accepted. Must - * return 0 if an attribute is not visible. The returned value - * will replace static permissions defined in struct attribute. + * return 0 if an attribute is not visible. The returned + * value will replace static permissions defined in struct + * attribute. Use SYSFS_GROUP_VISIBLE() when assigning this + * callback to specify separate _group_visible() and + * _attr_visible() handlers. * @is_bin_visible: * Optional: Function to return permissions associated with a * binary attribute of the group. Will be called repeatedly * for each binary attribute in the group. Only read/write - * permissions as well as SYSFS_PREALLOC are accepted. Must - * return 0 if a binary attribute is not visible. The returned - * value will replace static permissions defined in - * struct bin_attribute. + * permissions as well as SYSFS_PREALLOC (and the + * visibility flags for named groups) are accepted. Must + * return 0 if a binary attribute is not visible. The + * returned value will replace static permissions defined + * in struct bin_attribute. If @is_visible is not set, Use + * SYSFS_GROUP_VISIBLE() when assigning this callback to + * specify separate _group_visible() and _attr_visible() + * handlers. * @attrs: Pointer to NULL terminated list of attributes. * @bin_attrs: Pointer to NULL terminated list of binary attributes. * Either attrs or bin_attrs or both must be provided. @@ -91,13 +101,121 @@ struct attribute_group { struct bin_attribute **bin_attrs; }; +#define SYSFS_PREALLOC 010000 +#define SYSFS_GROUP_INVISIBLE 020000 + +/* + * DEFINE_SYSFS_GROUP_VISIBLE(name): + * A helper macro to pair with the assignment of ".is_visible = + * SYSFS_GROUP_VISIBLE(name)", that arranges for the directory + * associated with a named attribute_group to optionally be hidden. + * This allows for static declaration of attribute_groups, and the + * simplification of attribute visibility lifetime that implies, + * without polluting sysfs with empty attribute directories. + * Ex. + * + * static umode_t example_attr_visible(struct kobject *kobj, + * struct attribute *attr, int n) + * { + * if (example_attr_condition) + * return 0; + * else if (ro_attr_condition) + * return 0444; + * return a->mode; + * } + * + * static bool example_group_visible(struct kobject *kobj) + * { + * if (example_group_condition) + * return false; + * return true; + * } + * + * DEFINE_SYSFS_GROUP_VISIBLE(example); + * + * static struct attribute_group example_group = { + * .name = "example", + * .is_visible = SYSFS_GROUP_VISIBLE(example), + * .attrs = &example_attrs, + * }; + * + * Note that it expects <name>_attr_visible and <name>_group_visible to + * be defined. For cases where individual attributes do not need + * separate visibility consideration, only entire group visibility at + * once, see DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(). + */ +#define DEFINE_SYSFS_GROUP_VISIBLE(name) \ + static inline umode_t sysfs_group_visible_##name( \ + struct kobject *kobj, struct attribute *attr, int n) \ + { \ + if (n == 0 && !name##_group_visible(kobj)) \ + return SYSFS_GROUP_INVISIBLE; \ + return name##_attr_visible(kobj, attr, n); \ + } + +/* + * DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(name): + * A helper macro to pair with SYSFS_GROUP_VISIBLE() that like + * DEFINE_SYSFS_GROUP_VISIBLE() controls group visibility, but does + * not require the implementation of a per-attribute visibility + * callback. + * Ex. + * + * static bool example_group_visible(struct kobject *kobj) + * { + * if (example_group_condition) + * return false; + * return true; + * } + * + * DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(example); + * + * static struct attribute_group example_group = { + * .name = "example", + * .is_visible = SYSFS_GROUP_VISIBLE(example), + * .attrs = &example_attrs, + * }; + */ +#define DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(name) \ + static inline umode_t sysfs_group_visible_##name( \ + struct kobject *kobj, struct attribute *a, int n) \ + { \ + if (n == 0 && !name##_group_visible(kobj)) \ + return SYSFS_GROUP_INVISIBLE; \ + return a->mode; \ + } + +/* + * Same as DEFINE_SYSFS_GROUP_VISIBLE, but for groups with only binary + * attributes. If an attribute_group defines both text and binary + * attributes, the group visibility is determined by the function + * specified to is_visible() not is_bin_visible() + */ +#define DEFINE_SYSFS_BIN_GROUP_VISIBLE(name) \ + static inline umode_t sysfs_group_visible_##name( \ + struct kobject *kobj, struct bin_attribute *attr, int n) \ + { \ + if (n == 0 && !name##_group_visible(kobj)) \ + return SYSFS_GROUP_INVISIBLE; \ + return name##_attr_visible(kobj, attr, n); \ + } + +#define DEFINE_SIMPLE_SYSFS_BIN_GROUP_VISIBLE(name) \ + static inline umode_t sysfs_group_visible_##name( \ + struct kobject *kobj, struct bin_attribute *a, int n) \ + { \ + if (n == 0 && !name##_group_visible(kobj)) \ + return SYSFS_GROUP_INVISIBLE; \ + return a->mode; \ + } + +#define SYSFS_GROUP_VISIBLE(fn) sysfs_group_visible_##fn + /* * Use these macros to make defining attributes easier. * See include/linux/device.h for examples.. */ -#define SYSFS_PREALLOC 010000 - #define __ATTR(_name, _mode, _show, _store) { \ .attr = {.name = __stringify(_name), \ .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \ @@ -181,6 +299,8 @@ struct bin_attribute { char *, loff_t, size_t); ssize_t (*write)(struct file *, struct kobject *, struct bin_attribute *, char *, loff_t, size_t); + loff_t (*llseek)(struct file *, struct kobject *, struct bin_attribute *, + loff_t, int); int (*mmap)(struct file *, struct kobject *, struct bin_attribute *attr, struct vm_area_struct *vma); }; diff --git a/include/linux/tc.h b/include/linux/tc.h index a60639f37963..1638660abf5e 100644 --- a/include/linux/tc.h +++ b/include/linux/tc.h @@ -120,7 +120,7 @@ static inline unsigned long tc_get_speed(struct tc_bus *tbus) #ifdef CONFIG_TC -extern struct bus_type tc_bus_type; +extern const struct bus_type tc_bus_type; extern int tc_register_driver(struct tc_driver *tdrv); extern void tc_unregister_driver(struct tc_driver *tdrv); diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3c5efeeb024f..55399ee2a57e 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -152,6 +152,7 @@ struct tcp_request_sock { u64 snt_synack; /* first SYNACK sent time */ bool tfo_listener; bool is_mptcp; + bool req_usec_ts; #if IS_ENABLED(CONFIG_MPTCP) bool drop_req; #endif @@ -165,6 +166,11 @@ struct tcp_request_sock { * after data-in-SYN. */ u8 syn_tos; +#ifdef CONFIG_TCP_AO + u8 ao_keyid; + u8 ao_rcv_next; + bool used_tcp_ao; +#endif }; static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) @@ -172,26 +178,135 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) return (struct tcp_request_sock *)req; } +static inline bool tcp_rsk_used_ao(const struct request_sock *req) +{ +#ifndef CONFIG_TCP_AO + return false; +#else + return tcp_rsk(req)->used_tcp_ao; +#endif +} + #define TCP_RMEM_TO_WIN_SCALE 8 struct tcp_sock { + /* Cacheline organization can be found documented in + * Documentation/networking/net_cachelines/tcp_sock.rst. + * Please update the document when adding new fields. + */ + /* inet_connection_sock has to be the first member of tcp_sock */ struct inet_connection_sock inet_conn; - u16 tcp_header_len; /* Bytes of tcp header to send */ + + /* TX read-mostly hotpath cache lines */ + __cacheline_group_begin(tcp_sock_read_tx); + /* timestamp of last sent data packet (for restart window) */ + u32 max_window; /* Maximal window ever seen from peer */ + u32 rcv_ssthresh; /* Current window clamp */ + u32 reordering; /* Packet reordering metric. */ + u32 notsent_lowat; /* TCP_NOTSENT_LOWAT */ u16 gso_segs; /* Max number of segs per GSO packet */ + /* from STCP, retrans queue hinting */ + struct sk_buff *lost_skb_hint; + struct sk_buff *retransmit_skb_hint; + __cacheline_group_end(tcp_sock_read_tx); + + /* TXRX read-mostly hotpath cache lines */ + __cacheline_group_begin(tcp_sock_read_txrx); + u32 tsoffset; /* timestamp offset */ + u32 snd_wnd; /* The window we expect to receive */ + u32 mss_cache; /* Cached effective mss, not including SACKS */ + u32 snd_cwnd; /* Sending congestion window */ + u32 prr_out; /* Total number of pkts sent during Recovery. */ + u32 lost_out; /* Lost packets */ + u32 sacked_out; /* SACK'd packets */ + u16 tcp_header_len; /* Bytes of tcp header to send */ + u8 scaling_ratio; /* see tcp_win_from_space() */ + u8 chrono_type : 2, /* current chronograph type */ + repair : 1, + tcp_usec_ts : 1, /* TSval values in usec */ + is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ + is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ + __cacheline_group_end(tcp_sock_read_txrx); + + /* RX read-mostly hotpath cache lines */ + __cacheline_group_begin(tcp_sock_read_rx); + u32 copied_seq; /* Head of yet unread data */ + u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ + u32 snd_wl1; /* Sequence for window update */ + u32 tlp_high_seq; /* snd_nxt at the time of TLP */ + u32 rttvar_us; /* smoothed mdev_max */ + u32 retrans_out; /* Retransmitted packets out */ + u16 advmss; /* Advertised MSS */ + u16 urg_data; /* Saved octet of OOB data and control flags */ + u32 lost; /* Total data packets lost incl. rexmits */ + struct minmax rtt_min; + /* OOO segments go in this rbtree. Socket lock must be held. */ + struct rb_root out_of_order_queue; + u32 snd_ssthresh; /* Slow start size threshold */ + __cacheline_group_end(tcp_sock_read_rx); + /* TX read-write hotpath cache lines */ + __cacheline_group_begin(tcp_sock_write_tx) ____cacheline_aligned; + u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut + * The total number of segments sent. + */ + u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut + * total number of data segments sent. + */ + u64 bytes_sent; /* RFC4898 tcpEStatsPerfHCDataOctetsOut + * total number of data bytes sent. + */ + u32 snd_sml; /* Last byte of the most recently transmitted small packet */ + u32 chrono_start; /* Start time in jiffies of a TCP chrono */ + u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ + u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ + u32 pushed_seq; /* Last pushed seq, required to talk to windows */ + u32 lsndtime; + u32 mdev_us; /* medium deviation */ + u32 rtt_seq; /* sequence number to update rttvar */ + u64 tcp_wstamp_ns; /* departure time for next sent data packet */ + u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ + u64 tcp_mstamp; /* most recent packet received/sent */ + struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */ + struct sk_buff *highest_sack; /* skb just after the highest + * skb with SACKed bit set + * (validity guaranteed only if + * sacked_out > 0) + */ + u8 ecn_flags; /* ECN status bits. */ + __cacheline_group_end(tcp_sock_write_tx); + + /* TXRX read-write hotpath cache lines */ + __cacheline_group_begin(tcp_sock_write_txrx); /* * Header prediction flags * 0x5?10 << 16 + snd_wnd in net byte order */ __be32 pred_flags; - + u32 rcv_nxt; /* What we want to receive next */ + u32 snd_nxt; /* Next sequence we send */ + u32 snd_una; /* First byte we want an ack for */ + u32 window_clamp; /* Maximal window to advertise */ + u32 srtt_us; /* smoothed round trip time << 3 in usecs */ + u32 packets_out; /* Packets which are "in flight" */ + u32 snd_up; /* Urgent pointer */ + u32 delivered; /* Total data packets delivered incl. rexmits */ + u32 delivered_ce; /* Like the above but only ECE marked packets */ + u32 app_limited; /* limited until "delivered" reaches this val */ + u32 rcv_wnd; /* Current receiver window */ /* - * RFC793 variables by their proper names. This means you can - * read the code and the spec side by side (and laugh ...) - * See RFC793 and RFC1122. The RFC writes these in capitals. + * Options received (usually on last packet, some only on SYN packets). */ - u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived + struct tcp_options_received rx_opt; + u8 nonagle : 4,/* Disable Nagle algorithm? */ + rate_app_limited:1; /* rate_{delivered,interval_us} limited? */ + __cacheline_group_end(tcp_sock_write_txrx); + + /* RX read-write hotpath cache lines */ + __cacheline_group_begin(tcp_sock_write_rx) __aligned(8); + u64 bytes_received; + /* RFC4898 tcpEStatsAppHCThruOctetsReceived * sum(delta(rcv_nxt)), or how many bytes * were acked. */ @@ -201,46 +316,43 @@ struct tcp_sock { u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn * total number of data segments in. */ - u32 rcv_nxt; /* What we want to receive next */ - u32 copied_seq; /* Head of yet unread data */ u32 rcv_wup; /* rcv_nxt on last window update sent */ - u32 snd_nxt; /* Next sequence we send */ - u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut - * The total number of segments sent. - */ - u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut - * total number of data segments sent. - */ - u64 bytes_sent; /* RFC4898 tcpEStatsPerfHCDataOctetsOut - * total number of data bytes sent. - */ + u32 max_packets_out; /* max packets_out in last window */ + u32 cwnd_usage_seq; /* right edge of cwnd usage tracking flight */ + u32 rate_delivered; /* saved rate sample: packets delivered */ + u32 rate_interval_us; /* saved rate sample: time elapsed */ + u32 rcv_rtt_last_tsecr; + u64 first_tx_mstamp; /* start of window send phase */ + u64 delivered_mstamp; /* time we reached "delivered" */ u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked * sum(delta(snd_una)), or how many bytes * were acked. */ + struct { + u32 rtt_us; + u32 seq; + u64 time; + } rcv_rtt_est; +/* Receiver queue space */ + struct { + u32 space; + u32 seq; + u64 time; + } rcvq_space; + __cacheline_group_end(tcp_sock_write_rx); + /* End of Hot Path */ + +/* + * RFC793 variables by their proper names. This means you can + * read the code and the spec side by side (and laugh ...) + * See RFC793 and RFC1122. The RFC writes these in capitals. + */ u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups * total number of DSACK blocks received */ - u32 snd_una; /* First byte we want an ack for */ - u32 snd_sml; /* Last byte of the most recently transmitted small packet */ - u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ - u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ - u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */ u32 compressed_ack_rcv_nxt; - - u32 tsoffset; /* timestamp offset */ - struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ - struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */ - u32 snd_wl1; /* Sequence for window update */ - u32 snd_wnd; /* The window we expect to receive */ - u32 max_window; /* Maximal window ever seen from peer */ - u32 mss_cache; /* Cached effective mss, not including SACKS */ - - u32 window_clamp; /* Maximal window to advertise */ - u32 rcv_ssthresh; /* Current window clamp */ - u8 scaling_ratio; /* see tcp_win_from_space() */ /* Information of the most recently (s)acked skb */ struct tcp_rack { u64 mstamp; /* (Re)sent time of the skb */ @@ -253,23 +365,15 @@ struct tcp_sock { dsack_seen:1, /* Whether DSACK seen after last adj */ advanced:1; /* mstamp advanced since last lost marking */ } rack; - u16 advmss; /* Advertised MSS */ u8 compressed_ack; u8 dup_ack_counter:2, tlp_retrans:1, /* TLP is a retransmission */ unused:5; - u32 chrono_start; /* Start time in jiffies of a TCP chrono */ - u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ - u8 chrono_type:2, /* current chronograph type */ - rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ + u8 thin_lto : 1,/* Use linear timeouts for thin streams */ + recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */ - is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ - fastopen_client_fail:2; /* reason why fastopen failed */ - u8 nonagle : 4,/* Disable Nagle algorithm? */ - thin_lto : 1,/* Use linear timeouts for thin streams */ - recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ - repair : 1, + fastopen_client_fail:2, /* reason why fastopen failed */ frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */ u8 repair_queue; u8 save_syn:2, /* Save headers of SYN packet */ @@ -277,45 +381,19 @@ struct tcp_sock { syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ syn_fastopen_ch:1, /* Active TFO re-enabling probe */ - syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ - is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ - u32 tlp_high_seq; /* snd_nxt at the time of TLP */ + syn_data_acked:1;/* data in SYN is acked by SYN-ACK */ + u8 keepalive_probes; /* num of allowed keep alive probes */ u32 tcp_tx_delay; /* delay (in usec) added to TX packets */ - u64 tcp_wstamp_ns; /* departure time for next sent data packet */ - u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ /* RTT measurement */ - u64 tcp_mstamp; /* most recent packet received/sent */ - u32 srtt_us; /* smoothed round trip time << 3 in usecs */ - u32 mdev_us; /* medium deviation */ u32 mdev_max_us; /* maximal mdev for the last rtt period */ - u32 rttvar_us; /* smoothed mdev_max */ - u32 rtt_seq; /* sequence number to update rttvar */ - struct minmax rtt_min; - - u32 packets_out; /* Packets which are "in flight" */ - u32 retrans_out; /* Retransmitted packets out */ - u32 max_packets_out; /* max packets_out in last window */ - u32 cwnd_usage_seq; /* right edge of cwnd usage tracking flight */ - u16 urg_data; /* Saved octet of OOB data and control flags */ - u8 ecn_flags; /* ECN status bits. */ - u8 keepalive_probes; /* num of allowed keep alive probes */ - u32 reordering; /* Packet reordering metric. */ u32 reord_seen; /* number of data packet reordering events */ - u32 snd_up; /* Urgent pointer */ - -/* - * Options received (usually on last packet, some only on SYN packets). - */ - struct tcp_options_received rx_opt; /* * Slow start and congestion control (see also Nagle, and Karn & Partridge) */ - u32 snd_ssthresh; /* Slow start size threshold */ - u32 snd_cwnd; /* Sending congestion window */ u32 snd_cwnd_cnt; /* Linear increase counter */ u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ u32 snd_cwnd_used; @@ -323,32 +401,11 @@ struct tcp_sock { u32 prior_cwnd; /* cwnd right before starting loss recovery */ u32 prr_delivered; /* Number of newly delivered packets to * receiver in Recovery. */ - u32 prr_out; /* Total number of pkts sent during Recovery. */ - u32 delivered; /* Total data packets delivered incl. rexmits */ - u32 delivered_ce; /* Like the above but only ECE marked packets */ - u32 lost; /* Total data packets lost incl. rexmits */ - u32 app_limited; /* limited until "delivered" reaches this val */ - u64 first_tx_mstamp; /* start of window send phase */ - u64 delivered_mstamp; /* time we reached "delivered" */ - u32 rate_delivered; /* saved rate sample: packets delivered */ - u32 rate_interval_us; /* saved rate sample: time elapsed */ - - u32 rcv_wnd; /* Current receiver window */ - u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ - u32 notsent_lowat; /* TCP_NOTSENT_LOWAT */ - u32 pushed_seq; /* Last pushed seq, required to talk to windows */ - u32 lost_out; /* Lost packets */ - u32 sacked_out; /* SACK'd packets */ + u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */ struct hrtimer pacing_timer; struct hrtimer compressed_ack_timer; - /* from STCP, retrans queue hinting */ - struct sk_buff* lost_skb_hint; - struct sk_buff *retransmit_skb_hint; - - /* OOO segments go in this rbtree. Socket lock must be held. */ - struct rb_root out_of_order_queue; struct sk_buff *ooo_last_skb; /* cache rb_last(out_of_order_queue) */ /* SACKs data, these 2 need to be together (see tcp_options_write) */ @@ -357,12 +414,6 @@ struct tcp_sock { struct tcp_sack_block recv_sack_cache[4]; - struct sk_buff *highest_sack; /* skb just after the highest - * skb with SACKed bit set - * (validity guaranteed only if - * sacked_out > 0) - */ - int lost_cnt_hint; u32 prior_ssthresh; /* ssthresh saved at recovery start */ @@ -377,6 +428,14 @@ struct tcp_sock { * Total data bytes retransmitted */ u32 total_retrans; /* Total retransmits for entire connection */ + u32 rto_stamp; /* Start time (ms) of last CA_Loss recovery */ + u16 total_rto; /* Total number of RTO timeouts, including + * SYN/SYN-ACK and recurring timeouts. + */ + u16 total_rto_recoveries; /* Total number of RTO recoveries, + * including any unfinished recovery. + */ + u32 total_rto_time; /* ms spent in (completed) RTO recoveries. */ u32 urg_seq; /* Seq of received urgent pointer */ unsigned int keepalive_time; /* time before keep alive takes place */ @@ -405,21 +464,6 @@ struct tcp_sock { u32 rcv_ooopack; /* Received out-of-order packets, for tcpinfo */ -/* Receiver side RTT estimation */ - u32 rcv_rtt_last_tsecr; - struct { - u32 rtt_us; - u32 seq; - u64 time; - } rcv_rtt_est; - -/* Receiver queue space */ - struct { - u32 space; - u32 seq; - u64 time; - } rcvq_space; - /* TCP-specific MTU probe information. */ struct { u32 probe_seq_start; @@ -433,17 +477,22 @@ struct tcp_sock { bool is_mptcp; #endif #if IS_ENABLED(CONFIG_SMC) - bool (*smc_hs_congested)(const struct sock *sk); bool syn_smc; /* SYN includes SMC */ + bool (*smc_hs_congested)(const struct sock *sk); #endif -#ifdef CONFIG_TCP_MD5SIG -/* TCP AF-Specific parts; only used by MD5 Signature support so far */ +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) +/* TCP AF-Specific parts; only used by TCP-AO/MD5 Signature support so far */ const struct tcp_sock_af_ops *af_specific; +#ifdef CONFIG_TCP_MD5SIG /* TCP MD5 Signature Option information */ struct tcp_md5sig_info __rcu *md5sig_info; #endif +#ifdef CONFIG_TCP_AO + struct tcp_ao_info __rcu *ao_info; +#endif +#endif /* TCP fastopen related information */ struct tcp_fastopen_request *fastopen_req; @@ -463,15 +512,17 @@ enum tsq_enum { TCP_MTU_REDUCED_DEFERRED, /* tcp_v{4|6}_err() could not call * tcp_v{4|6}_mtu_reduced() */ + TCP_ACK_DEFERRED, /* TX pure ack is deferred */ }; enum tsq_flags { - TSQF_THROTTLED = (1UL << TSQ_THROTTLED), - TSQF_QUEUED = (1UL << TSQ_QUEUED), - TCPF_TSQ_DEFERRED = (1UL << TCP_TSQ_DEFERRED), - TCPF_WRITE_TIMER_DEFERRED = (1UL << TCP_WRITE_TIMER_DEFERRED), - TCPF_DELACK_TIMER_DEFERRED = (1UL << TCP_DELACK_TIMER_DEFERRED), - TCPF_MTU_REDUCED_DEFERRED = (1UL << TCP_MTU_REDUCED_DEFERRED), + TSQF_THROTTLED = BIT(TSQ_THROTTLED), + TSQF_QUEUED = BIT(TSQ_QUEUED), + TCPF_TSQ_DEFERRED = BIT(TCP_TSQ_DEFERRED), + TCPF_WRITE_TIMER_DEFERRED = BIT(TCP_WRITE_TIMER_DEFERRED), + TCPF_DELACK_TIMER_DEFERRED = BIT(TCP_DELACK_TIMER_DEFERRED), + TCPF_MTU_REDUCED_DEFERRED = BIT(TCP_MTU_REDUCED_DEFERRED), + TCPF_ACK_DEFERRED = BIT(TCP_ACK_DEFERRED), }; #define tcp_sk(ptr) container_of_const(ptr, struct tcp_sock, inet_conn.icsk_inet.sk) @@ -497,6 +548,9 @@ struct tcp_timewait_sock { #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *tw_md5_key; #endif +#ifdef CONFIG_TCP_AO + struct tcp_ao_info __rcu *ao_info; +#endif }; static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) @@ -566,4 +620,9 @@ void tcp_sock_set_quickack(struct sock *sk, int val); int tcp_sock_set_syncnt(struct sock *sk, int val); int tcp_sock_set_user_timeout(struct sock *sk, int val); +static inline bool dst_tcp_usec_ts(const struct dst_entry *dst) +{ + return dst_feature(dst, RTAX_FEATURE_TCP_USEC_TS); +} + #endif /* _LINUX_TCP_H */ diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 17eb1c5205d3..71632e3c5f18 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -84,6 +84,7 @@ struct tee_param { * @release: release this open file * @open_session: open a new session * @close_session: close a session + * @system_session: declare session as a system session * @invoke_func: invoke a trusted function * @cancel_req: request cancel of an ongoing invoke or open * @supp_recv: called for supplicant to get a command @@ -100,6 +101,7 @@ struct tee_driver_ops { struct tee_ioctl_open_session_arg *arg, struct tee_param *param); int (*close_session)(struct tee_context *ctx, u32 session); + int (*system_session)(struct tee_context *ctx, u32 session); int (*invoke_func)(struct tee_context *ctx, struct tee_ioctl_invoke_arg *arg, struct tee_param *param); @@ -430,6 +432,20 @@ int tee_client_open_session(struct tee_context *ctx, int tee_client_close_session(struct tee_context *ctx, u32 session); /** + * tee_client_system_session() - Declare session as a system session + * @ctx: TEE Context + * @session: Session id + * + * This function requests TEE to provision an entry context ready to use for + * that session only. The provisioned entry context is used for command + * invocation and session closure, not for command cancelling requests. + * TEE releases the provisioned context upon session closure. + * + * Return < 0 on error else 0 if an entry context has been provisioned. + */ +int tee_client_system_session(struct tee_context *ctx, u32 session); + +/** * tee_client_invoke_func() - Invoke a function in a Trusted Application * @ctx: TEE Context * @arg: Invoke arguments, see description of @@ -466,7 +482,7 @@ static inline bool tee_param_is_memref(struct tee_param *param) } } -extern struct bus_type tee_bus_type; +extern const struct bus_type tee_bus_type; /** * struct tee_client_device - tee based device diff --git a/include/linux/thermal.h b/include/linux/thermal.h index a5ae4af955ff..c33f50177f51 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -32,6 +32,7 @@ struct thermal_zone_device; struct thermal_cooling_device; struct thermal_instance; +struct thermal_debugfs; struct thermal_attr; enum thermal_trend { @@ -51,22 +52,35 @@ enum thermal_notify_event { THERMAL_DEVICE_POWER_CAPABILITY_CHANGED, /* power capability changed */ THERMAL_TABLE_CHANGED, /* Thermal table(s) changed */ THERMAL_EVENT_KEEP_ALIVE, /* Request for user space handler to respond */ + THERMAL_TZ_BIND_CDEV, /* Cooling dev is bind to the thermal zone */ + THERMAL_TZ_UNBIND_CDEV, /* Cooling dev is unbind from the thermal zone */ + THERMAL_INSTANCE_WEIGHT_CHANGED, /* Thermal instance weight changed */ }; /** * struct thermal_trip - representation of a point in temperature domain * @temperature: temperature value in miliCelsius * @hysteresis: relative hysteresis in miliCelsius + * @threshold: trip crossing notification threshold miliCelsius * @type: trip point type * @priv: pointer to driver data associated with this trip + * @flags: flags representing binary properties of the trip */ struct thermal_trip { int temperature; int hysteresis; + int threshold; enum thermal_trip_type type; + u8 flags; void *priv; }; +#define THERMAL_TRIP_FLAG_RW_TEMP BIT(0) +#define THERMAL_TRIP_FLAG_RW_HYST BIT(1) + +#define THERMAL_TRIP_FLAG_RW (THERMAL_TRIP_FLAG_RW_TEMP | \ + THERMAL_TRIP_FLAG_RW_HYST) + struct thermal_zone_device_ops { int (*bind) (struct thermal_zone_device *, struct thermal_cooling_device *); @@ -77,7 +91,6 @@ struct thermal_zone_device_ops { int (*change_mode) (struct thermal_zone_device *, enum thermal_device_mode); int (*set_trip_temp) (struct thermal_zone_device *, int, int); - int (*set_trip_hyst) (struct thermal_zone_device *, int, int); int (*get_crit_temp) (struct thermal_zone_device *, int *); int (*set_emul_temp) (struct thermal_zone_device *, int); int (*get_trend) (struct thermal_zone_device *, @@ -97,7 +110,7 @@ struct thermal_cooling_device_ops { struct thermal_cooling_device { int id; - char *type; + const char *type; unsigned long max_state; struct device device; struct device_node *np; @@ -108,6 +121,9 @@ struct thermal_cooling_device { struct mutex lock; /* protect thermal_instances list */ struct list_head thermal_instances; struct list_head node; +#ifdef CONFIG_THERMAL_DEBUGFS + struct thermal_debugfs *debugfs; +#endif }; /** @@ -115,14 +131,13 @@ struct thermal_cooling_device { * @id: unique id number for each thermal zone * @type: the thermal zone device type * @device: &struct device for this thermal zone + * @removal: removal completion * @trip_temp_attrs: attributes for trip points for sysfs: trip temperature * @trip_type_attrs: attributes for trip points for sysfs: trip type * @trip_hyst_attrs: attributes for trip points for sysfs: trip hysteresis * @mode: current mode of this thermal zone * @devdata: private pointer for device private data - * @trips: an array of struct thermal_trip * @num_trips: number of trip points the thermal zone supports - * @trips_disabled; bitmap for disabled trips * @passive_delay_jiffies: number of jiffies to wait between polls when * performing passive cooling. * @polling_delay_jiffies: number of jiffies to wait between polls when @@ -150,20 +165,21 @@ struct thermal_cooling_device { * @node: node in thermal_tz_list (in thermal_core.c) * @poll_queue: delayed work for polling * @notify_event: Last notification event + * @suspended: thermal zone suspend indicator + * @trips: array of struct thermal_trip objects */ struct thermal_zone_device { int id; char type[THERMAL_NAME_LENGTH]; struct device device; + struct completion removal; struct attribute_group trips_attribute_group; struct thermal_attr *trip_temp_attrs; struct thermal_attr *trip_type_attrs; struct thermal_attr *trip_hyst_attrs; enum thermal_device_mode mode; void *devdata; - struct thermal_trip *trips; int num_trips; - unsigned long trips_disabled; /* bitmap for disabled trips */ unsigned long passive_delay_jiffies; unsigned long polling_delay_jiffies; int temperature; @@ -173,7 +189,7 @@ struct thermal_zone_device { int prev_low_trip; int prev_high_trip; atomic_t need_update; - struct thermal_zone_device_ops *ops; + struct thermal_zone_device_ops ops; struct thermal_zone_params *tzp; struct thermal_governor *governor; void *governor_data; @@ -183,6 +199,11 @@ struct thermal_zone_device { struct list_head node; struct delayed_work poll_queue; enum thermal_notify_event notify_event; + bool suspended; +#ifdef CONFIG_THERMAL_DEBUGFS + struct thermal_debugfs *debugfs; +#endif + struct thermal_trip trips[] __counted_by(num_trips); }; /** @@ -195,19 +216,24 @@ struct thermal_zone_device { * thermal zone. * @throttle: callback called for every trip point even if temperature is * below the trip point temperature + * @update_tz: callback called when thermal zone internals have changed, e.g. + * thermal cooling instance was added/removed * @governor_list: node in thermal_governor_list (in thermal_core.c) */ struct thermal_governor { - char name[THERMAL_NAME_LENGTH]; + const char *name; int (*bind_to_tz)(struct thermal_zone_device *tz); void (*unbind_from_tz)(struct thermal_zone_device *tz); - int (*throttle)(struct thermal_zone_device *tz, int trip); + int (*throttle)(struct thermal_zone_device *tz, + const struct thermal_trip *trip); + void (*update_tz)(struct thermal_zone_device *tz, + enum thermal_notify_event reason); struct list_head governor_list; }; /* Structure to define Thermal Zone parameters */ struct thermal_zone_params { - char governor_name[THERMAL_NAME_LENGTH]; + const char *governor_name; /* * a boolean to indicate if the thermal to hwmon sysfs interface @@ -281,38 +307,31 @@ int __thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id, struct thermal_trip *trip); int thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id, struct thermal_trip *trip); - -int thermal_zone_set_trip(struct thermal_zone_device *tz, int trip_id, - const struct thermal_trip *trip); - int for_each_thermal_trip(struct thermal_zone_device *tz, int (*cb)(struct thermal_trip *, void *), void *data); +int thermal_zone_for_each_trip(struct thermal_zone_device *tz, + int (*cb)(struct thermal_trip *, void *), + void *data); int thermal_zone_get_num_trips(struct thermal_zone_device *tz); +void thermal_zone_set_trip_temp(struct thermal_zone_device *tz, + struct thermal_trip *trip, int temp); int thermal_zone_get_crit_temp(struct thermal_zone_device *tz, int *temp); -#ifdef CONFIG_THERMAL_ACPI -int thermal_acpi_active_trip_temp(struct acpi_device *adev, int id, int *ret_temp); -int thermal_acpi_passive_trip_temp(struct acpi_device *adev, int *ret_temp); -int thermal_acpi_hot_trip_temp(struct acpi_device *adev, int *ret_temp); -int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp); -#endif - #ifdef CONFIG_THERMAL struct thermal_zone_device *thermal_zone_device_register_with_trips( const char *type, - struct thermal_trip *trips, - int num_trips, int mask, - void *devdata, - struct thermal_zone_device_ops *ops, + const struct thermal_trip *trips, + int num_trips, void *devdata, + const struct thermal_zone_device_ops *ops, const struct thermal_zone_params *tzp, int passive_delay, int polling_delay); struct thermal_zone_device *thermal_tripless_zone_device_register( const char *type, void *devdata, - struct thermal_zone_device_ops *ops, + const struct thermal_zone_device_ops *ops, const struct thermal_zone_params *tzp); void thermal_zone_device_unregister(struct thermal_zone_device *tz); @@ -322,18 +341,22 @@ const char *thermal_zone_device_type(struct thermal_zone_device *tzd); int thermal_zone_device_id(struct thermal_zone_device *tzd); struct device *thermal_zone_device(struct thermal_zone_device *tzd); +int thermal_bind_cdev_to_trip(struct thermal_zone_device *tz, + const struct thermal_trip *trip, + struct thermal_cooling_device *cdev, + unsigned long upper, unsigned long lower, + unsigned int weight); int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *, unsigned long, unsigned long, unsigned int); +int thermal_unbind_cdev_from_trip(struct thermal_zone_device *tz, + const struct thermal_trip *trip, + struct thermal_cooling_device *cdev); int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *); void thermal_zone_device_update(struct thermal_zone_device *, enum thermal_notify_event); -void thermal_zone_device_exec(struct thermal_zone_device *tz, - void (*cb)(struct thermal_zone_device *, - unsigned long), - unsigned long data); struct thermal_cooling_device *thermal_cooling_device_register(const char *, void *, const struct thermal_cooling_device_ops *); @@ -358,10 +381,9 @@ void thermal_zone_device_critical(struct thermal_zone_device *tz); #else static inline struct thermal_zone_device *thermal_zone_device_register_with_trips( const char *type, - struct thermal_trip *trips, - int num_trips, int mask, - void *devdata, - struct thermal_zone_device_ops *ops, + const struct thermal_trip *trips, + int num_trips, void *devdata, + const struct thermal_zone_device_ops *ops, const struct thermal_zone_params *tzp, int passive_delay, int polling_delay) { return ERR_PTR(-ENODEV); } diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 02333f47c994..4338ea9ac4fd 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -86,9 +86,9 @@ struct tb { unsigned long privdata[]; }; -extern struct bus_type tb_bus_type; -extern struct device_type tb_service_type; -extern struct device_type tb_xdomain_type; +extern const struct bus_type tb_bus_type; +extern const struct device_type tb_service_type; +extern const struct device_type tb_xdomain_type; #define TB_LINKS_PER_PHY_PORT 2 @@ -175,7 +175,7 @@ void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir); * enum tb_link_width - Thunderbolt/USB4 link width * @TB_LINK_WIDTH_SINGLE: Single lane link * @TB_LINK_WIDTH_DUAL: Dual lane symmetric link - * @TB_LINK_WIDTH_ASYM_TX: Dual lane asymmetric Gen 4 link with 3 trasmitters + * @TB_LINK_WIDTH_ASYM_TX: Dual lane asymmetric Gen 4 link with 3 transmitters * @TB_LINK_WIDTH_ASYM_RX: Dual lane asymmetric Gen 4 link with 3 receivers */ enum tb_link_width { diff --git a/include/linux/tick.h b/include/linux/tick.h index 9459fef5b857..4924a33700b7 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -12,6 +12,7 @@ #include <linux/cpumask.h> #include <linux/sched.h> #include <linux/rcupdate.h> +#include <linux/static_key.h> #ifdef CONFIG_GENERIC_CLOCKEVENTS extern void __init tick_init(void); @@ -19,16 +20,22 @@ extern void __init tick_init(void); extern void tick_suspend_local(void); /* Should be core only, but XEN resume magic and ARM BL switcher require it */ extern void tick_resume_local(void); -extern void tick_handover_do_timer(void); extern void tick_cleanup_dead_cpu(int cpu); #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } static inline void tick_suspend_local(void) { } static inline void tick_resume_local(void) { } -static inline void tick_handover_do_timer(void) { } static inline void tick_cleanup_dead_cpu(int cpu) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ +#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_HOTPLUG_CPU) +extern int tick_cpu_dying(unsigned int cpu); +extern void tick_assert_timekeeping_handover(void); +#else +#define tick_cpu_dying NULL +static inline void tick_assert_timekeeping_handover(void) { } +#endif + #if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_SUSPEND) extern void tick_freeze(void); extern void tick_unfreeze(void); @@ -63,18 +70,14 @@ enum tick_broadcast_state { TICK_BROADCAST_ENTER, }; +extern struct static_key_false arch_needs_tick_broadcast; + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST extern void tick_broadcast_control(enum tick_broadcast_mode mode); #else static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } #endif /* BROADCAST */ -#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_HOTPLUG_CPU) -extern void tick_offline_cpu(unsigned int cpu); -#else -static inline void tick_offline_cpu(unsigned int cpu) { } -#endif - #ifdef CONFIG_GENERIC_CLOCKEVENTS extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state); #else @@ -140,14 +143,6 @@ extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); - -static inline void tick_nohz_idle_stop_tick_protected(void) -{ - local_irq_disable(); - tick_nohz_idle_stop_tick(); - local_irq_enable(); -} - #else /* !CONFIG_NO_HZ_COMMON */ #define tick_nohz_enabled (0) static inline int tick_nohz_tick_stopped(void) { return 0; } @@ -170,13 +165,18 @@ static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } - -static inline void tick_nohz_idle_stop_tick_protected(void) { } #endif /* !CONFIG_NO_HZ_COMMON */ +/* + * Mask of CPUs that are nohz_full. + * + * Users should be guarded by CONFIG_NO_HZ_FULL or a tick_nohz_full_cpu() + * check. + */ +extern cpumask_var_t tick_nohz_full_mask; + #ifdef CONFIG_NO_HZ_FULL extern bool tick_nohz_full_running; -extern cpumask_var_t tick_nohz_full_mask; static inline bool tick_nohz_full_enabled(void) { diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index 03d9c5ac01d1..876e31b4461d 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -7,10 +7,13 @@ #include <linux/nsproxy.h> #include <linux/ns_common.h> #include <linux/err.h> +#include <linux/time64.h> struct user_namespace; extern struct user_namespace init_user_ns; +struct vm_area_struct; + struct timens_offsets { struct timespec64 monotonic; struct timespec64 boottime; diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h index c6540ceea143..0982d1d52b24 100644 --- a/include/linux/timecounter.h +++ b/include/linux/timecounter.h @@ -22,7 +22,7 @@ * * @read: returns the current cycle value * @mask: bitmask for two's complement - * subtraction of non 64 bit counters, + * subtraction of non-64-bit counters, * see CYCLECOUNTER_MASK() helper macro * @mult: cycle to nanosecond multiplier * @shift: cycle to nanosecond divisor (power of two) @@ -35,7 +35,7 @@ struct cyclecounter { }; /** - * struct timecounter - layer above a %struct cyclecounter which counts nanoseconds + * struct timecounter - layer above a &struct cyclecounter which counts nanoseconds * Contains the state needed by timecounter_read() to detect * cycle counter wrap around. Initialize with * timecounter_init(). Also used to convert cycle counts into the @@ -66,6 +66,8 @@ struct timecounter { * @cycles: Cycles * @mask: bit mask for maintaining the 'frac' field * @frac: pointer to storage for the fractional nanoseconds. + * + * Returns: cycle counter cycles converted to nanoseconds */ static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc, u64 cycles, u64 mask, u64 *frac) @@ -79,6 +81,7 @@ static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc, /** * timecounter_adjtime - Shifts the time of the clock. + * @tc: The &struct timecounter to adjust * @delta: Desired change in nanoseconds. */ static inline void timecounter_adjtime(struct timecounter *tc, s64 delta) @@ -107,6 +110,8 @@ extern void timecounter_init(struct timecounter *tc, * * In other words, keeps track of time since the same epoch as * the function which generated the initial time stamp. + * + * Returns: nanoseconds since the initial time stamp */ extern u64 timecounter_read(struct timecounter *tc); @@ -123,6 +128,8 @@ extern u64 timecounter_read(struct timecounter *tc); * * This allows conversion of cycle counter values which were generated * in the past. + * + * Returns: cycle counter converted to nanoseconds since the initial time stamp */ extern u64 timecounter_cyc2time(const struct timecounter *tc, u64 cycle_tstamp); diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index fe1e467ba046..0ea7823b7f31 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -4,6 +4,7 @@ #include <linux/errno.h> #include <linux/clocksource_ids.h> +#include <linux/ktime.h> /* Included from linux/ktime.h */ @@ -21,14 +22,14 @@ extern int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz); /* - * ktime_get() family: read the current time in a multitude of ways, + * ktime_get() family - read the current time in a multitude of ways. * * The default time reference is CLOCK_MONOTONIC, starting at * boot time but not counting the time spent in suspend. * For other references, use the functions with "real", "clocktai", * "boottime" and "raw" suffixes. * - * To get the time in a different format, use the ones wit + * To get the time in a different format, use the ones with * "ns", "ts64" and "seconds" suffix. * * See Documentation/core-api/timekeeping.rst for more details. @@ -73,6 +74,8 @@ extern u32 ktime_get_resolution_ns(void); /** * ktime_get_real - get the real (wall-) time in ktime_t format + * + * Returns: real (wall) time in ktime_t format */ static inline ktime_t ktime_get_real(void) { @@ -85,10 +88,12 @@ static inline ktime_t ktime_get_coarse_real(void) } /** - * ktime_get_boottime - Returns monotonic time since boot in ktime_t format + * ktime_get_boottime - Get monotonic time since boot in ktime_t format * * This is similar to CLOCK_MONTONIC/ktime_get, but also includes the * time spent in suspend. + * + * Returns: monotonic time since boot in ktime_t format */ static inline ktime_t ktime_get_boottime(void) { @@ -101,7 +106,9 @@ static inline ktime_t ktime_get_coarse_boottime(void) } /** - * ktime_get_clocktai - Returns the TAI time of day in ktime_t format + * ktime_get_clocktai - Get the TAI time of day in ktime_t format + * + * Returns: the TAI time of day in ktime_t format */ static inline ktime_t ktime_get_clocktai(void) { @@ -143,32 +150,60 @@ static inline u64 ktime_get_coarse_clocktai_ns(void) /** * ktime_mono_to_real - Convert monotonic time to clock realtime + * @mono: monotonic time to convert + * + * Returns: time converted to realtime clock */ static inline ktime_t ktime_mono_to_real(ktime_t mono) { return ktime_mono_to_any(mono, TK_OFFS_REAL); } +/** + * ktime_get_ns - Get the current time in nanoseconds + * + * Returns: current time converted to nanoseconds + */ static inline u64 ktime_get_ns(void) { return ktime_to_ns(ktime_get()); } +/** + * ktime_get_real_ns - Get the current real/wall time in nanoseconds + * + * Returns: current real time converted to nanoseconds + */ static inline u64 ktime_get_real_ns(void) { return ktime_to_ns(ktime_get_real()); } +/** + * ktime_get_boottime_ns - Get the monotonic time since boot in nanoseconds + * + * Returns: current boottime converted to nanoseconds + */ static inline u64 ktime_get_boottime_ns(void) { return ktime_to_ns(ktime_get_boottime()); } +/** + * ktime_get_clocktai_ns - Get the current TAI time of day in nanoseconds + * + * Returns: current TAI time converted to nanoseconds + */ static inline u64 ktime_get_clocktai_ns(void) { return ktime_to_ns(ktime_get_clocktai()); } +/** + * ktime_get_raw_ns - Get the raw monotonic time in nanoseconds + * + * Returns: current raw monotonic time converted to nanoseconds + */ static inline u64 ktime_get_raw_ns(void) { return ktime_to_ns(ktime_get_raw()); @@ -223,8 +258,8 @@ extern bool timekeeping_rtc_skipresume(void); extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta); -/* - * struct ktime_timestanps - Simultaneous mono/boot/real timestamps +/** + * struct ktime_timestamps - Simultaneous mono/boot/real timestamps * @mono: Monotonic timestamp * @boot: Boottime timestamp * @real: Realtime timestamp @@ -241,7 +276,8 @@ struct ktime_timestamps { * @cycles: Clocksource counter value to produce the system times * @real: Realtime system time * @raw: Monotonic raw system time - * @clock_was_set_seq: The sequence number of clock was set events + * @cs_id: Clocksource ID + * @clock_was_set_seq: The sequence number of clock-was-set events * @cs_was_changed_seq: The sequence number of clocksource change events */ struct system_time_snapshot { @@ -267,15 +303,17 @@ struct system_device_crosststamp { }; /** - * struct system_counterval_t - system counter value with the pointer to the + * struct system_counterval_t - system counter value with the ID of the * corresponding clocksource * @cycles: System counter value - * @cs: Clocksource corresponding to system counter value. Used by - * timekeeping code to verify comparibility of two cycle values + * @cs_id: Clocksource ID corresponding to system counter value. Used by + * timekeeping code to verify comparability of two cycle values. + * The default ID, CSID_GENERIC, does not identify a specific + * clocksource. */ struct system_counterval_t { u64 cycles; - struct clocksource *cs; + enum clocksource_ids cs_id; }; /* diff --git a/include/linux/timer.h b/include/linux/timer.h index 9162f275819a..e67ecd1cbc97 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -7,21 +7,7 @@ #include <linux/stddef.h> #include <linux/debugobjects.h> #include <linux/stringify.h> - -struct timer_list { - /* - * All fields that change during normal runtime grouped to the - * same cacheline - */ - struct hlist_node entry; - unsigned long expires; - void (*function)(struct timer_list *); - u32 flags; - -#ifdef CONFIG_LOCKDEP - struct lockdep_map lockdep_map; -#endif -}; +#include <linux/timer_types.h> #ifdef CONFIG_LOCKDEP /* @@ -36,7 +22,7 @@ struct timer_list { #define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn) #endif -/** +/* * @TIMER_DEFERRABLE: A deferrable timer will work normally when the * system is busy, but will not cause a CPU to come out of idle just * to service it; instead, the timer will be serviced when the CPU @@ -50,16 +36,10 @@ struct timer_list { * workqueue locking issues. It's not meant for executing random crap * with interrupts disabled. Abuse is monitored! * - * @TIMER_PINNED: A pinned timer will not be affected by any timer - * placement heuristics (like, NOHZ) and will always expire on the CPU - * on which the timer was enqueued. - * - * Note: Because enqueuing of timers can migrate the timer from one - * CPU to another, pinned timers are not guaranteed to stay on the - * initialy selected CPU. They move to the CPU on which the enqueue - * function is invoked via mod_timer() or add_timer(). If the timer - * should be placed on a particular CPU, then add_timer_on() has to be - * used. + * @TIMER_PINNED: A pinned timer will always expire on the CPU on which the + * timer was enqueued. When a particular CPU is required, add_timer_on() + * has to be used. Enqueue via mod_timer() and add_timer() is always done + * on the local CPU. */ #define TIMER_CPUMASK 0x0003FFFF #define TIMER_MIGRATING 0x00040000 @@ -77,8 +57,7 @@ struct timer_list { .entry = { .next = TIMER_ENTRY_STATIC }, \ .function = (_function), \ .flags = (_flags), \ - __TIMER_LOCKDEP_MAP_INITIALIZER( \ - __FILE__ ":" __stringify(__LINE__)) \ + __TIMER_LOCKDEP_MAP_INITIALIZER(FILE_LINE) \ } #define DEFINE_TIMER(_name, _function) \ @@ -161,7 +140,7 @@ static inline void destroy_timer_on_stack(struct timer_list *timer) { } * or not. Callers must ensure serialization wrt. other operations done * to this timer, eg. interrupt contexts, or other CPUs on SMP. * - * return value: 1 if the timer is pending, 0 if not. + * Returns: 1 if the timer is pending, 0 if not. */ static inline int timer_pending(const struct timer_list * timer) { @@ -180,6 +159,8 @@ extern int timer_reduce(struct timer_list *timer, unsigned long expires); #define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1) extern void add_timer(struct timer_list *timer); +extern void add_timer_local(struct timer_list *timer); +extern void add_timer_global(struct timer_list *timer); extern int try_to_del_timer_sync(struct timer_list *timer); extern int timer_delete_sync(struct timer_list *timer); @@ -194,6 +175,10 @@ extern int timer_shutdown(struct timer_list *timer); * See timer_delete_sync() for detailed explanation. * * Do not use in new code. Use timer_delete_sync() instead. + * + * Returns: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated */ static inline int del_timer_sync(struct timer_list *timer) { @@ -207,6 +192,10 @@ static inline int del_timer_sync(struct timer_list *timer) * See timer_delete() for detailed explanation. * * Do not use in new code. Use timer_delete() instead. + * + * Returns: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated */ static inline int del_timer(struct timer_list *timer) { diff --git a/include/linux/timer_types.h b/include/linux/timer_types.h new file mode 100644 index 000000000000..fae5a388f914 --- /dev/null +++ b/include/linux/timer_types.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_TIMER_TYPES_H +#define _LINUX_TIMER_TYPES_H + +#include <linux/lockdep_types.h> +#include <linux/types.h> + +struct timer_list { + /* + * All fields that change during normal runtime grouped to the + * same cacheline + */ + struct hlist_node entry; + unsigned long expires; + void (*function)(struct timer_list *); + u32 flags; + +#ifdef CONFIG_LOCKDEP + struct lockdep_map lockdep_map; +#endif +}; + +#endif /* _LINUX_TIMER_TYPES_H */ diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h index adc80e29168e..62973f7d4610 100644 --- a/include/linux/timerqueue.h +++ b/include/linux/timerqueue.h @@ -3,18 +3,7 @@ #define _LINUX_TIMERQUEUE_H #include <linux/rbtree.h> -#include <linux/ktime.h> - - -struct timerqueue_node { - struct rb_node node; - ktime_t expires; -}; - -struct timerqueue_head { - struct rb_root_cached rb_root; -}; - +#include <linux/timerqueue_types.h> extern bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node); diff --git a/include/linux/timerqueue_types.h b/include/linux/timerqueue_types.h new file mode 100644 index 000000000000..dc298d0923e3 --- /dev/null +++ b/include/linux/timerqueue_types.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_TIMERQUEUE_TYPES_H +#define _LINUX_TIMERQUEUE_TYPES_H + +#include <linux/rbtree_types.h> +#include <linux/types.h> + +struct timerqueue_node { + struct rb_node node; + ktime_t expires; +}; + +struct timerqueue_head { + struct rb_root_cached rb_root; +}; + +#endif /* _LINUX_TIMERQUEUE_TYPES_H */ diff --git a/include/linux/tnum.h b/include/linux/tnum.h index 1c3948a1d6ad..3c13240077b8 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -106,6 +106,10 @@ int tnum_sbin(char *str, size_t size, struct tnum a); struct tnum tnum_subreg(struct tnum a); /* Returns the tnum with the lower 32-bit subreg cleared */ struct tnum tnum_clear_subreg(struct tnum a); +/* Returns the tnum with the lower 32-bit subreg in *reg* set to the lower + * 32-bit subreg in *subreg* + */ +struct tnum tnum_with_subreg(struct tnum reg, struct tnum subreg); /* Returns the tnum with the lower 32-bit subreg set to value */ struct tnum tnum_const_subreg(struct tnum a, u32 value); /* Returns true if 32-bit subreg @a is a known constant*/ diff --git a/include/linux/topology.h b/include/linux/topology.h index fea32377f7c7..52f5850730b3 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -251,7 +251,7 @@ extern const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int #else static __always_inline int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node) { - return cpumask_nth(cpu, cpus); + return cpumask_nth_and(cpu, cpus, cpu_online_mask); } static inline const struct cpumask * diff --git a/include/linux/torture.h b/include/linux/torture.h index bb466eec01e4..1541454da03e 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -21,6 +21,7 @@ #include <linux/debugobjects.h> #include <linux/bug.h> #include <linux/compiler.h> +#include <linux/hrtimer.h> /* Definitions for a non-string torture-test module parameter. */ #define torture_param(type, name, init, msg) \ @@ -81,7 +82,8 @@ static inline void torture_random_init(struct torture_random_state *trsp) } /* Definitions for high-resolution-timer sleeps. */ -int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, struct torture_random_state *trsp); +int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, const enum hrtimer_mode mode, + struct torture_random_state *trsp); int torture_hrtimeout_us(u32 baset_us, u32 fuzzt_ns, struct torture_random_state *trsp); int torture_hrtimeout_ms(u32 baset_ms, u32 fuzzt_us, struct torture_random_state *trsp); int torture_hrtimeout_jiffies(u32 baset_j, struct torture_random_state *trsp); @@ -120,10 +122,15 @@ void _torture_stop_kthread(char *m, struct task_struct **tp); #define torture_stop_kthread(n, tp) \ _torture_stop_kthread("Stopping " #n " task", &(tp)) +/* Scheduler-related definitions. */ #ifdef CONFIG_PREEMPTION #define torture_preempt_schedule() __preempt_schedule() #else #define torture_preempt_schedule() do { } while (0) #endif +#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) || IS_ENABLED(CONFIG_LOCK_TORTURE_TEST) || IS_MODULE(CONFIG_LOCK_TORTURE_TEST) +long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask); +#endif + #endif /* __LINUX_TORTURE_H */ diff --git a/include/linux/trace.h b/include/linux/trace.h index 2a70a447184c..fdcd76b7be83 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -51,7 +51,7 @@ int trace_array_printk(struct trace_array *tr, unsigned long ip, const char *fmt, ...); int trace_array_init_printk(struct trace_array *tr); void trace_array_put(struct trace_array *tr); -struct trace_array *trace_array_get_by_name(const char *name); +struct trace_array *trace_array_get_by_name(const char *name, const char *systems); int trace_array_destroy(struct trace_array *tr); /* For osnoise tracer */ @@ -84,7 +84,7 @@ static inline int trace_array_init_printk(struct trace_array *tr) static inline void trace_array_put(struct trace_array *tr) { } -static inline struct trace_array *trace_array_get_by_name(const char *name) +static inline struct trace_array *trace_array_get_by_name(const char *name, const char *systems) { return NULL; } diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 21ae37e49319..6f9bdfb09d1d 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -17,6 +17,9 @@ struct dentry; struct bpf_prog; union bpf_attr; +/* Used for event string fields when they are NULL */ +#define EVENT_NULL_STR "(null)" + const char *trace_print_flags_seq(struct trace_seq *p, const char *delim, unsigned long flags, const struct trace_print_flags *flag_array); @@ -103,13 +106,16 @@ struct trace_iterator { unsigned int temp_size; char *fmt; /* modified format holder */ unsigned int fmt_size; - long wait_index; + atomic_t wait_index; /* trace_seq for __print_flags() and __print_symbolic() etc. */ struct trace_seq tmp_seq; cpumask_var_t started; + /* Set when the file is closed to prevent new waiters */ + bool closed; + /* it's true when current open file is snapshot */ bool snapshot; @@ -492,6 +498,7 @@ enum { EVENT_FILE_FL_TRIGGER_COND_BIT, EVENT_FILE_FL_PID_FILTER_BIT, EVENT_FILE_FL_WAS_ENABLED_BIT, + EVENT_FILE_FL_FREED_BIT, }; extern struct trace_event_file *trace_get_event_file(const char *instance, @@ -630,6 +637,7 @@ extern int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...); * TRIGGER_COND - When set, one or more triggers has an associated filter * PID_FILTER - When set, the event is filtered based on pid * WAS_ENABLED - Set when enabled to know to clear trace on module removal + * FREED - File descriptor is freed, all fields should be considered invalid */ enum { EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT), @@ -643,13 +651,14 @@ enum { EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT), EVENT_FILE_FL_PID_FILTER = (1 << EVENT_FILE_FL_PID_FILTER_BIT), EVENT_FILE_FL_WAS_ENABLED = (1 << EVENT_FILE_FL_WAS_ENABLED_BIT), + EVENT_FILE_FL_FREED = (1 << EVENT_FILE_FL_FREED_BIT), }; struct trace_event_file { struct list_head list; struct trace_event_call *event_call; struct event_filter __rcu *filter; - struct eventfs_file *ef; + struct eventfs_inode *ei; struct trace_array *tr; struct trace_subsystem_dir *system; struct list_head triggers; @@ -671,6 +680,7 @@ struct trace_event_file { * caching and such. Which is mostly OK ;-) */ unsigned long flags; + atomic_t ref; /* ref count for opened files */ atomic_t sm_ref; /* soft-mode reference counter */ atomic_t tm_ref; /* trigger-mode reference counter */ }; @@ -761,7 +771,8 @@ struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name); void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp); int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, - u64 *probe_offset, u64 *probe_addr); + u64 *probe_offset, u64 *probe_addr, + unsigned long *missed); int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); #else @@ -801,7 +812,7 @@ static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) static inline int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, u64 *probe_offset, - u64 *probe_addr) + u64 *probe_addr, unsigned long *missed) { return -EOPNOTSUPP; } @@ -877,6 +888,7 @@ extern void perf_kprobe_destroy(struct perf_event *event); extern int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, const char **symbol, u64 *probe_offset, u64 *probe_addr, + unsigned long *missed, bool perf_type_tracepoint); #endif #ifdef CONFIG_UPROBE_EVENTS diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 6be92bf559fe..1ef95c0287f0 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -8,20 +8,31 @@ /* * Trace sequences are used to allow a function to call several other functions - * to create a string of data to use (up to a max of PAGE_SIZE). + * to create a string of data to use. + * + * Have the trace seq to be 8K which is typically PAGE_SIZE * 2 on + * most architectures. The TRACE_SEQ_BUFFER_SIZE (which is + * TRACE_SEQ_SIZE minus the other fields of trace_seq), is the + * max size the output of a trace event may be. */ +#define TRACE_SEQ_SIZE 8192 +#define TRACE_SEQ_BUFFER_SIZE (TRACE_SEQ_SIZE - \ + (sizeof(struct seq_buf) + sizeof(size_t) + sizeof(int))) + struct trace_seq { - char buffer[PAGE_SIZE]; + char buffer[TRACE_SEQ_BUFFER_SIZE]; struct seq_buf seq; + size_t readpos; int full; }; static inline void trace_seq_init(struct trace_seq *s) { - seq_buf_init(&s->seq, s->buffer, PAGE_SIZE); + seq_buf_init(&s->seq, s->buffer, TRACE_SEQ_BUFFER_SIZE); s->full = 0; + s->readpos = 0; } /** diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h index 009072792fa3..7a5fe17b6bf9 100644 --- a/include/linux/tracefs.h +++ b/include/linux/tracefs.h @@ -23,26 +23,69 @@ struct file_operations; struct eventfs_file; -struct dentry *eventfs_create_events_dir(const char *name, - struct dentry *parent); - -struct eventfs_file *eventfs_add_subsystem_dir(const char *name, - struct dentry *parent); +/** + * eventfs_callback - A callback function to create dynamic files in eventfs + * @name: The name of the file that is to be created + * @mode: return the file mode for the file (RW access, etc) + * @data: data to pass to the created file ops + * @fops: the file operations of the created file + * + * The evetnfs files are dynamically created. The struct eventfs_entry array + * is passed to eventfs_create_dir() or eventfs_create_events_dir() that will + * be used to create the files within those directories. When a lookup + * or access to a file within the directory is made, the struct eventfs_entry + * array is used to find a callback() with the matching name that is being + * referenced (for lookups, the entire array is iterated and each callback + * will be called). + * + * The callback will be called with @name for the name of the file to create. + * The callback can return less than 1 to indicate that no file should be + * created. + * + * If a file is to be created, then @mode should be populated with the file + * mode (permissions) for which the file is created for. This would be + * used to set the created inode i_mode field. + * + * The @data should be set to the data passed to the other file operations + * (read, write, etc). Note, @data will also point to the data passed in + * to eventfs_create_dir() or eventfs_create_events_dir(), but the callback + * can replace the data if it chooses to. Otherwise, the original data + * will be used for the file operation functions. + * + * The @fops should be set to the file operations that will be used to create + * the inode. + * + * NB. This callback is called while holding internal locks of the eventfs + * system. The callback must not call any code that might also call into + * the tracefs or eventfs system or it will risk creating a deadlock. + */ +typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data, + const struct file_operations **fops); -struct eventfs_file *eventfs_add_dir(const char *name, - struct eventfs_file *ef_parent); +/** + * struct eventfs_entry - dynamically created eventfs file call back handler + * @name: Then name of the dynamic file in an eventfs directory + * @callback: The callback to get the fops of the file when it is created + * + * See evenfs_callback() typedef for how to set up @callback. + */ +struct eventfs_entry { + const char *name; + eventfs_callback callback; +}; -int eventfs_add_file(const char *name, umode_t mode, - struct eventfs_file *ef_parent, void *data, - const struct file_operations *fops); +struct eventfs_inode; -int eventfs_add_events_file(const char *name, umode_t mode, - struct dentry *parent, void *data, - const struct file_operations *fops); +struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent, + const struct eventfs_entry *entries, + int size, void *data); -void eventfs_remove(struct eventfs_file *ef); +struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent, + const struct eventfs_entry *entries, + int size, void *data); -void eventfs_remove_events_dir(struct dentry *dentry); +void eventfs_remove_events_dir(struct eventfs_inode *ei); +void eventfs_remove_dir(struct eventfs_inode *ei); struct dentry *tracefs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 88c0ba623ee6..689b6d71590e 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -199,7 +199,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) if (!(cond)) \ return; \ \ - if (WARN_ON_ONCE(RCUIDLE_COND(rcuidle))) \ + if (WARN_ONCE(RCUIDLE_COND(rcuidle), \ + "Bad RCU usage for tracepoint")) \ return; \ \ /* keep srcu and sched-rcu usage consistent */ \ @@ -259,7 +260,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) TP_ARGS(args), \ TP_CONDITION(cond), 0); \ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ - WARN_ON_ONCE(!rcu_is_watching()); \ + WARN_ONCE(!rcu_is_watching(), \ + "RCU not watching for tracepoint"); \ } \ } \ __DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \ diff --git a/include/linux/tsm.h b/include/linux/tsm.h new file mode 100644 index 000000000000..de8324a2223c --- /dev/null +++ b/include/linux/tsm.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TSM_H +#define __TSM_H + +#include <linux/sizes.h> +#include <linux/types.h> + +#define TSM_INBLOB_MAX 64 +#define TSM_OUTBLOB_MAX SZ_32K + +/* + * Privilege level is a nested permission concept to allow confidential + * guests to partition address space, 4-levels are supported. + */ +#define TSM_PRIVLEVEL_MAX 3 + +/** + * struct tsm_desc - option descriptor for generating tsm report blobs + * @privlevel: optional privilege level to associate with @outblob + * @inblob_len: sizeof @inblob + * @inblob: arbitrary input data + */ +struct tsm_desc { + unsigned int privlevel; + size_t inblob_len; + u8 inblob[TSM_INBLOB_MAX]; +}; + +/** + * struct tsm_report - track state of report generation relative to options + * @desc: input parameters to @report_new() + * @outblob_len: sizeof(@outblob) + * @outblob: generated evidence to provider to the attestation agent + * @auxblob_len: sizeof(@auxblob) + * @auxblob: (optional) auxiliary data to the report (e.g. certificate data) + */ +struct tsm_report { + struct tsm_desc desc; + size_t outblob_len; + u8 *outblob; + size_t auxblob_len; + u8 *auxblob; +}; + +/** + * struct tsm_ops - attributes and operations for tsm instances + * @name: tsm id reflected in /sys/kernel/config/tsm/report/$report/provider + * @privlevel_floor: convey base privlevel for nested scenarios + * @report_new: Populate @report with the report blob and auxblob + * (optional), return 0 on successful population, or -errno otherwise + * + * Implementation specific ops, only one is expected to be registered at + * a time i.e. only one of "sev-guest", "tdx-guest", etc. + */ +struct tsm_ops { + const char *name; + const unsigned int privlevel_floor; + int (*report_new)(struct tsm_report *report, void *data); +}; + +extern const struct config_item_type tsm_report_default_type; + +/* publish @privlevel, @privlevel_floor, and @auxblob attributes */ +extern const struct config_item_type tsm_report_extra_type; + +int tsm_register(const struct tsm_ops *ops, void *priv, + const struct config_item_type *type); +int tsm_unregister(const struct tsm_ops *ops); +#endif /* __TSM_H */ diff --git a/include/linux/tty.h b/include/linux/tty.h index f002d0f25db7..2b2e6f0a54d6 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -6,7 +6,6 @@ #include <linux/major.h> #include <linux/termios.h> #include <linux/workqueue.h> -#include <linux/tty_buffer.h> #include <linux/tty_driver.h> #include <linux/tty_ldisc.h> #include <linux/tty_port.h> @@ -242,7 +241,7 @@ struct tty_struct { void *driver_data; spinlock_t files_lock; int write_cnt; - unsigned char *write_buf; + u8 *write_buf; struct list_head tty_files; @@ -390,14 +389,14 @@ int vcs_init(void); extern const struct class tty_class; /** - * tty_kref_get - get a tty reference - * @tty: tty device + * tty_kref_get - get a tty reference + * @tty: tty device * - * Return a new reference to a tty object. The caller must hold - * sufficient locks/counts to ensure that their existing reference cannot - * go away + * Returns: a new reference to a tty object + * + * Locking: The caller must hold sufficient locks/counts to ensure that their + * existing reference cannot go away. */ - static inline struct tty_struct *tty_kref_get(struct tty_struct *tty) { if (tty) @@ -410,17 +409,18 @@ void tty_wait_until_sent(struct tty_struct *tty, long timeout); void stop_tty(struct tty_struct *tty); void start_tty(struct tty_struct *tty); void tty_write_message(struct tty_struct *tty, char *msg); -int tty_send_xchar(struct tty_struct *tty, char ch); -int tty_put_char(struct tty_struct *tty, unsigned char c); +int tty_send_xchar(struct tty_struct *tty, u8 ch); +int tty_put_char(struct tty_struct *tty, u8 c); unsigned int tty_chars_in_buffer(struct tty_struct *tty); unsigned int tty_write_room(struct tty_struct *tty); void tty_driver_flush_buffer(struct tty_struct *tty); void tty_unthrottle(struct tty_struct *tty); -int tty_throttle_safe(struct tty_struct *tty); -int tty_unthrottle_safe(struct tty_struct *tty); +bool tty_throttle_safe(struct tty_struct *tty); +bool tty_unthrottle_safe(struct tty_struct *tty); int tty_do_resize(struct tty_struct *tty, struct winsize *ws); int tty_get_icount(struct tty_struct *tty, struct serial_icounter_struct *icount); +int tty_get_tiocm(struct tty_struct *tty); int is_current_pgrp_orphaned(void); void tty_hangup(struct tty_struct *tty); void tty_vhangup(struct tty_struct *tty); @@ -435,16 +435,14 @@ void tty_encode_baud_rate(struct tty_struct *tty, speed_t ibaud, speed_t obaud); /** - * tty_get_baud_rate - get tty bit rates - * @tty: tty to query + * tty_get_baud_rate - get tty bit rates + * @tty: tty to query * - * Returns the baud rate as an integer for this terminal. The - * termios lock must be held by the caller and the terminal bit - * flags may be updated. + * Returns: the baud rate as an integer for this terminal * - * Locking: none + * Locking: The termios lock must be held by the caller. */ -static inline speed_t tty_get_baud_rate(struct tty_struct *tty) +static inline speed_t tty_get_baud_rate(const struct tty_struct *tty) { return tty_termios_baud_rate(&tty->termios); } diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 18beff0cec1a..7372124fbf90 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -72,8 +72,7 @@ struct serial_struct; * is closed for the last time freeing up the resources. This is * actually the second part of shutdown for routines that might sleep. * - * @write: ``ssize_t ()(struct tty_struct *tty, const unsigned char *buf, - * size_t count)`` + * @write: ``ssize_t ()(struct tty_struct *tty, const u8 *buf, size_t count)`` * * This routine is called by the kernel to write a series (@count) of * characters (@buf) to the @tty device. The characters may come from @@ -85,7 +84,7 @@ struct serial_struct; * * Optional: Required for writable devices. May not sleep. * - * @put_char: ``int ()(struct tty_struct *tty, unsigned char ch)`` + * @put_char: ``int ()(struct tty_struct *tty, u8 ch)`` * * This routine is called by the kernel to write a single character @ch to * the @tty device. If the kernel uses this routine, it must call the @@ -243,7 +242,7 @@ struct serial_struct; * Optional: If not provided, the device is assumed to have no FIFO. * Usually correct to invoke via tty_wait_until_sent(). May sleep. * - * @send_xchar: ``void ()(struct tty_struct *tty, char ch)`` + * @send_xchar: ``void ()(struct tty_struct *tty, u8 ch)`` * * This routine is used to send a high-priority XON/XOFF character (@ch) * to the @tty device. @@ -375,7 +374,7 @@ struct tty_operations { void (*flush_buffer)(struct tty_struct *tty); void (*set_ldisc)(struct tty_struct *tty); void (*wait_until_sent)(struct tty_struct *tty, int timeout); - void (*send_xchar)(struct tty_struct *tty, char ch); + void (*send_xchar)(struct tty_struct *tty, u8 ch); int (*tiocmget)(struct tty_struct *tty); int (*tiocmset)(struct tty_struct *tty, unsigned int set, unsigned int clear); diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index 6b367eb17979..1b861f2100b6 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -114,8 +114,8 @@ struct tty_port { unsigned char console:1; struct mutex mutex; struct mutex buf_mutex; - unsigned char *xmit_buf; - DECLARE_KFIFO_PTR(xmit_fifo, unsigned char); + u8 *xmit_buf; + DECLARE_KFIFO_PTR(xmit_fifo, u8); unsigned int close_delay; unsigned int closing_wait; int drain_delay; @@ -149,10 +149,10 @@ struct device *tty_port_register_device_attr(struct tty_port *port, const struct attribute_group **attr_grp); struct device *tty_port_register_device_serdev(struct tty_port *port, struct tty_driver *driver, unsigned index, - struct device *device); + struct device *host, struct device *parent); struct device *tty_port_register_device_attr_serdev(struct tty_port *port, struct tty_driver *driver, unsigned index, - struct device *device, void *drvdata, + struct device *host, struct device *parent, void *drvdata, const struct attribute_group **attr_grp); void tty_port_unregister_device(struct tty_port *port, struct tty_driver *driver, unsigned index); diff --git a/include/linux/types.h b/include/linux/types.h index 253168bb3fe1..2bc8766ba20c 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -120,6 +120,9 @@ typedef s64 int64_t; #define aligned_be64 __aligned_be64 #define aligned_le64 __aligned_le64 +/* Nanosecond scalar representation for kernel time values */ +typedef s64 ktime_t; + /** * The type used for indexing onto a disc or disc partition. * diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index ffe48e69b3f3..457879938fc1 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -135,10 +135,11 @@ static inline void u64_stats_inc(u64_stats_t *p) p->v++; } -static inline void u64_stats_init(struct u64_stats_sync *syncp) -{ - seqcount_init(&syncp->seq); -} +#define u64_stats_init(syncp) \ + do { \ + struct u64_stats_sync *__s = (syncp); \ + seqcount_init(&__s->seq); \ + } while (0) static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) { diff --git a/include/linux/ucs2_string.h b/include/linux/ucs2_string.h index cf3ada3e820e..c499ae809c7d 100644 --- a/include/linux/ucs2_string.h +++ b/include/linux/ucs2_string.h @@ -10,6 +10,7 @@ typedef u16 ucs2_char_t; unsigned long ucs2_strnlen(const ucs2_char_t *s, size_t maxlength); unsigned long ucs2_strlen(const ucs2_char_t *s); unsigned long ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength); +ssize_t ucs2_strscpy(ucs2_char_t *dst, const ucs2_char_t *src, size_t count); int ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len); unsigned long ucs2_utf8size(const ucs2_char_t *src); diff --git a/include/linux/udp.h b/include/linux/udp.h index 43c1fb2d2c21..e398e1dbd2d3 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -32,25 +32,30 @@ static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) return (num + net_hash_mix(net)) & mask; } +enum { + UDP_FLAGS_CORK, /* Cork is required */ + UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */ + UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */ + UDP_FLAGS_GRO_ENABLED, /* Request GRO aggregation */ + UDP_FLAGS_ACCEPT_FRAGLIST, + UDP_FLAGS_ACCEPT_L4, + UDP_FLAGS_ENCAP_ENABLED, /* This socket enabled encap */ + UDP_FLAGS_UDPLITE_SEND_CC, /* set via udplite setsockopt */ + UDP_FLAGS_UDPLITE_RECV_CC, /* set via udplite setsockopt */ +}; + struct udp_sock { /* inet_sock has to be the first member */ struct inet_sock inet; #define udp_port_hash inet.sk.__sk_common.skc_u16hashes[0] #define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1] #define udp_portaddr_node inet.sk.__sk_common.skc_portaddr_node + + unsigned long udp_flags; + int pending; /* Any pending frames ? */ - unsigned int corkflag; /* Cork is required */ __u8 encap_type; /* Is this an Encapsulation socket? */ - unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ - no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ - encap_enabled:1, /* This socket enabled encap - * processing; UDP tunnels and - * different encapsulation layer set - * this - */ - gro_enabled:1, /* Request GRO aggregation */ - accept_udp_l4:1, - accept_udp_fraglist:1; + /* * Following member retains the information to create a UDP header * when the socket is uncorked. @@ -62,12 +67,6 @@ struct udp_sock { */ __u16 pcslen; __u16 pcrlen; -/* indicator bits used by pcflag: */ -#define UDPLITE_BIT 0x1 /* set by udplite proto init function */ -#define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ -#define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ - __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ - __u8 unused[3]; /* * For encapsulation sockets. */ @@ -93,30 +92,51 @@ struct udp_sock { /* This fields follows rcvbuf value, and is touched by udp_recvmsg */ int forward_threshold; + + /* Cache friendly copy of sk->sk_peek_off >= 0 */ + bool peeking_with_offset; }; -#define UDP_MAX_SEGMENTS (1 << 6UL) +#define udp_test_bit(nr, sk) \ + test_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_set_bit(nr, sk) \ + set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_test_and_set_bit(nr, sk) \ + test_and_set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_clear_bit(nr, sk) \ + clear_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_assign_bit(nr, sk, val) \ + assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val) + +#define UDP_MAX_SEGMENTS (1 << 7UL) #define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk) +static inline int udp_set_peek_off(struct sock *sk, int val) +{ + sk_set_peek_off(sk, val); + WRITE_ONCE(udp_sk(sk)->peeking_with_offset, val >= 0); + return 0; +} + static inline void udp_set_no_check6_tx(struct sock *sk, bool val) { - udp_sk(sk)->no_check6_tx = val; + udp_assign_bit(NO_CHECK6_TX, sk, val); } static inline void udp_set_no_check6_rx(struct sock *sk, bool val) { - udp_sk(sk)->no_check6_rx = val; + udp_assign_bit(NO_CHECK6_RX, sk, val); } -static inline bool udp_get_no_check6_tx(struct sock *sk) +static inline bool udp_get_no_check6_tx(const struct sock *sk) { - return udp_sk(sk)->no_check6_tx; + return udp_test_bit(NO_CHECK6_TX, sk); } -static inline bool udp_get_no_check6_rx(struct sock *sk) +static inline bool udp_get_no_check6_rx(const struct sock *sk) { - return udp_sk(sk)->no_check6_rx; + return udp_test_bit(NO_CHECK6_RX, sk); } static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, @@ -130,15 +150,45 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, } } +DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key); +#if IS_ENABLED(CONFIG_IPV6) +DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +#endif + +static inline bool udp_encap_needed(void) +{ + if (static_branch_unlikely(&udp_encap_needed_key)) + return true; + +#if IS_ENABLED(CONFIG_IPV6) + if (static_branch_unlikely(&udpv6_encap_needed_key)) + return true; +#endif + + return false; +} + static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) { if (!skb_is_gso(skb)) return false; - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && !udp_sk(sk)->accept_udp_l4) + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && + !udp_test_bit(ACCEPT_L4, sk)) return true; - if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_sk(sk)->accept_udp_fraglist) + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && + !udp_test_bit(ACCEPT_FRAGLIST, sk)) + return true; + + /* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still + * land in a tunnel as the socket check in udp_gro_receive cannot be + * foolproof. + */ + if (udp_encap_needed() && + READ_ONCE(udp_sk(sk)->encap_rcv) && + !(skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM))) return true; return false; @@ -146,8 +196,8 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) static inline void udp_allow_gso(struct sock *sk) { - udp_sk(sk)->accept_udp_l4 = 1; - udp_sk(sk)->accept_udp_fraglist = 1; + udp_set_bit(ACCEPT_L4, sk); + udp_set_bit(ACCEPT_FRAGLIST, sk); } #define udp_portaddr_for_each_entry(__sk, list) \ diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h index b0542cd11aeb..f85ec5613721 100644 --- a/include/linux/uidgid.h +++ b/include/linux/uidgid.h @@ -12,20 +12,12 @@ * to detect when we overlook these differences. * */ -#include <linux/types.h> +#include <linux/uidgid_types.h> #include <linux/highuid.h> struct user_namespace; extern struct user_namespace init_user_ns; - -typedef struct { - uid_t val; -} kuid_t; - - -typedef struct { - gid_t val; -} kgid_t; +struct uid_gid_map; #define KUIDT_INIT(value) (kuid_t){ value } #define KGIDT_INIT(value) (kgid_t){ value } @@ -138,6 +130,9 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid) return from_kgid(ns, gid) != (gid_t) -1; } +u32 map_id_down(struct uid_gid_map *map, u32 id); +u32 map_id_up(struct uid_gid_map *map, u32 id); + #else static inline kuid_t make_kuid(struct user_namespace *from, uid_t uid) @@ -186,6 +181,15 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid) return gid_valid(gid); } +static inline u32 map_id_down(struct uid_gid_map *map, u32 id) +{ + return id; +} + +static inline u32 map_id_up(struct uid_gid_map *map, u32 id) +{ + return id; +} #endif /* CONFIG_USER_NS */ #endif /* _LINUX_UIDGID_H */ diff --git a/include/linux/uidgid_types.h b/include/linux/uidgid_types.h new file mode 100644 index 000000000000..b35ac4955a33 --- /dev/null +++ b/include/linux/uidgid_types.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UIDGID_TYPES_H +#define _LINUX_UIDGID_TYPES_H + +#include <linux/types.h> + +typedef struct { + uid_t val; +} kuid_t; + +typedef struct { + gid_t val; +} kgid_t; + +#endif /* _LINUX_UIDGID_TYPES_H */ diff --git a/include/linux/uio.h b/include/linux/uio.h index 42bce38a8e87..00cebe2b70de 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -21,12 +21,12 @@ struct kvec { enum iter_type { /* iter types */ + ITER_UBUF, ITER_IOVEC, - ITER_KVEC, ITER_BVEC, + ITER_KVEC, ITER_XARRAY, ITER_DISCARD, - ITER_UBUF, }; #define ITER_SOURCE 1 // == WRITE @@ -40,14 +40,9 @@ struct iov_iter_state { struct iov_iter { u8 iter_type; - bool copy_mc; bool nofault; bool data_source; - bool user_backed; - union { - size_t iov_offset; - int last_offset; - }; + size_t iov_offset; /* * Hack alert: overlay ubuf_iovec with iovec + count, so * that the members resolve correctly regardless of the type @@ -143,7 +138,7 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i) static inline bool user_backed_iter(const struct iov_iter *i) { - return i->user_backed; + return iter_is_ubuf(i) || iter_is_iovec(i); } /* @@ -252,22 +247,8 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i); #ifdef CONFIG_ARCH_HAS_COPY_MC size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i); -static inline void iov_iter_set_copy_mc(struct iov_iter *i) -{ - i->copy_mc = true; -} - -static inline bool iov_iter_is_copy_mc(const struct iov_iter *i) -{ - return i->copy_mc; -} #else #define _copy_mc_to_iter _copy_to_iter -static inline void iov_iter_set_copy_mc(struct iov_iter *i) { } -static inline bool iov_iter_is_copy_mc(const struct iov_iter *i) -{ - return false; -} #endif size_t iov_iter_zero(size_t bytes, struct iov_iter *); @@ -342,27 +323,6 @@ iov_iter_npages_cap(struct iov_iter *i, int maxpages, size_t max_bytes) return npages; } -struct csum_state { - __wsum csum; - size_t off; -}; - -size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csstate, struct iov_iter *i); -size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); - -static __always_inline __must_check -bool csum_and_copy_from_iter_full(void *addr, size_t bytes, - __wsum *csum, struct iov_iter *i) -{ - size_t copied = csum_and_copy_from_iter(addr, bytes, csum, i); - if (likely(copied == bytes)) - return true; - iov_iter_revert(i, copied); - return false; -} -size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, - struct iov_iter *i); - struct iovec *iovec_from_user(const struct iovec __user *uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_iov, bool compat); @@ -372,8 +332,6 @@ ssize_t import_iovec(int type, const struct iovec __user *uvec, ssize_t __import_iovec(int type, const struct iovec __user *uvec, unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, struct iov_iter *i, bool compat); -int import_single_range(int type, void __user *buf, size_t len, - struct iovec *iov, struct iov_iter *i); int import_ubuf(int type, void __user *buf, size_t len, struct iov_iter *i); static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, @@ -382,8 +340,6 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, WARN_ON(direction & ~(READ | WRITE)); *i = (struct iov_iter) { .iter_type = ITER_UBUF, - .copy_mc = false, - .user_backed = true, .data_source = direction, .ubuf = buf, .count = count, diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 47c5962b876b..18238dc8bfd3 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -28,19 +28,26 @@ struct uio_map; * logical, virtual, or physical & phys_addr_t * should always be large enough to handle any of * the address types) + * @dma_addr: DMA handle set by dma_alloc_coherent, used with + * UIO_MEM_DMA_COHERENT only (@addr should be the + * void * returned from the same dma_alloc_coherent call) * @offs: offset of device memory within the page * @size: size of IO (multiple of page size) * @memtype: type of memory addr points to * @internal_addr: ioremap-ped version of addr, for driver internal use + * @dma_device: device struct that was passed to dma_alloc_coherent, + * used with UIO_MEM_DMA_COHERENT only * @map: for use by the UIO core only. */ struct uio_mem { const char *name; phys_addr_t addr; + dma_addr_t dma_addr; unsigned long offs; resource_size_t size; int memtype; void __iomem *internal_addr; + struct device *dma_device; struct uio_map *map; }; @@ -158,6 +165,12 @@ extern int __must_check #define UIO_MEM_LOGICAL 2 #define UIO_MEM_VIRTUAL 3 #define UIO_MEM_IOVA 4 +/* + * UIO_MEM_DMA_COHERENT exists for legacy drivers that had been getting by with + * improperly mapping DMA coherent allocations through the other modes. + * Do not use in new drivers. + */ +#define UIO_MEM_DMA_COHERENT 5 /* defines for uio_port->porttype */ #define UIO_PORT_NONE 0 diff --git a/include/linux/units.h b/include/linux/units.h index 2793a41e73a2..00e15de33eca 100644 --- a/include/linux/units.h +++ b/include/linux/units.h @@ -2,6 +2,7 @@ #ifndef _LINUX_UNITS_H #define _LINUX_UNITS_H +#include <linux/bits.h> #include <linux/math.h> /* Metric prefixes in accordance with Système international (d'unités) */ @@ -23,14 +24,21 @@ #define NANOHZ_PER_HZ 1000000000UL #define MICROHZ_PER_HZ 1000000UL #define MILLIHZ_PER_HZ 1000UL + #define HZ_PER_KHZ 1000UL -#define KHZ_PER_MHZ 1000UL #define HZ_PER_MHZ 1000000UL +#define KHZ_PER_MHZ 1000UL +#define KHZ_PER_GHZ 1000000UL + #define MILLIWATT_PER_WATT 1000UL #define MICROWATT_PER_MILLIWATT 1000UL #define MICROWATT_PER_WATT 1000000UL +#define BYTES_PER_KBIT (KILO / BITS_PER_BYTE) +#define BYTES_PER_MBIT (MEGA / BITS_PER_BYTE) +#define BYTES_PER_GBIT (GIGA / BITS_PER_BYTE) + #define ABSOLUTE_ZERO_MILLICELSIUS -273150 static inline long milli_kelvin_to_millicelsius(long t) diff --git a/include/linux/usb.h b/include/linux/usb.h index a21074861f91..9e52179872a5 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -632,7 +632,6 @@ struct usb3_lpm_parameters { * @reset_resume: needs reset instead of resume * @port_is_suspended: the upstream port is suspended (L2 or U3) * @slot_id: Slot ID assigned by xHCI - * @removable: Device can be physically removed from this port * @l1_params: best effor service latency for USB2 L1 LPM state, and L1 timeout. * @u1_params: exit latencies for USB3 U1 LPM state, and hub-initiated timeout. * @u2_params: exit latencies for USB3 U2 LPM state, and hub-initiated timeout. @@ -1145,16 +1144,6 @@ extern ssize_t usb_store_new_id(struct usb_dynids *dynids, extern ssize_t usb_show_dynids(struct usb_dynids *dynids, char *buf); /** - * struct usbdrv_wrap - wrapper for driver-model structure - * @driver: The driver-model core driver structure. - * @for_devices: Non-zero for device drivers, 0 for interface drivers. - */ -struct usbdrv_wrap { - struct device_driver driver; - int for_devices; -}; - -/** * struct usb_driver - identifies USB interface driver to usbcore * @name: The driver name should be unique among USB drivers, * and should normally be the same as the module name. @@ -1194,7 +1183,7 @@ struct usbdrv_wrap { * is bound to the driver. * @dynids: used internally to hold the list of dynamically added device * ids for this driver. - * @drvwrap: Driver-model core structure wrapper. + * @driver: The driver-model core driver structure. * @no_dynamic_id: if set to 1, the USB core will not allow dynamic ids to be * added to this driver by preventing the sysfs file from being created. * @supports_autosuspend: if set to 0, the USB core will not allow autosuspend @@ -1242,13 +1231,13 @@ struct usb_driver { const struct attribute_group **dev_groups; struct usb_dynids dynids; - struct usbdrv_wrap drvwrap; + struct device_driver driver; unsigned int no_dynamic_id:1; unsigned int supports_autosuspend:1; unsigned int disable_hub_initiated_lpm:1; unsigned int soft_unbind:1; }; -#define to_usb_driver(d) container_of(d, struct usb_driver, drvwrap.driver) +#define to_usb_driver(d) container_of(d, struct usb_driver, driver) /** * struct usb_device_driver - identifies USB device driver to usbcore @@ -1264,9 +1253,12 @@ struct usb_driver { * module is being unloaded. * @suspend: Called when the device is going to be suspended by the system. * @resume: Called when the device is being resumed by the system. + * @choose_configuration: If non-NULL, called instead of the default + * usb_choose_configuration(). If this returns an error then we'll go + * on to call the normal usb_choose_configuration(). * @dev_groups: Attributes attached to the device that will be created once it * is bound to the driver. - * @drvwrap: Driver-model core structure wrapper. + * @driver: The driver-model core driver structure. * @id_table: used with @match() to select better matching driver at * probe() time. * @supports_autosuspend: if set to 0, the USB core will not allow autosuspend @@ -1275,7 +1267,7 @@ struct usb_driver { * resume and suspend functions will be called in addition to the driver's * own, so this part of the setup does not need to be replicated. * - * USB drivers must provide all the fields listed above except drvwrap, + * USB drivers must provide all the fields listed above except driver, * match, and id_table. */ struct usb_device_driver { @@ -1287,14 +1279,17 @@ struct usb_device_driver { int (*suspend) (struct usb_device *udev, pm_message_t message); int (*resume) (struct usb_device *udev, pm_message_t message); + + int (*choose_configuration) (struct usb_device *udev); + const struct attribute_group **dev_groups; - struct usbdrv_wrap drvwrap; + struct device_driver driver; const struct usb_device_id *id_table; unsigned int supports_autosuspend:1; unsigned int generic_subclass:1; }; #define to_usb_device_driver(d) container_of(d, struct usb_device_driver, \ - drvwrap.driver) + driver) /** * struct usb_class_driver - identifies a USB driver that wants to use the USB major number @@ -1823,22 +1818,6 @@ void *usb_alloc_coherent(struct usb_device *dev, size_t size, void usb_free_coherent(struct usb_device *dev, size_t size, void *addr, dma_addr_t dma); -#if 0 -struct urb *usb_buffer_map(struct urb *urb); -void usb_buffer_dmasync(struct urb *urb); -void usb_buffer_unmap(struct urb *urb); -#endif - -struct scatterlist; -int usb_buffer_map_sg(const struct usb_device *dev, int is_in, - struct scatterlist *sg, int nents); -#if 0 -void usb_buffer_dmasync_sg(const struct usb_device *dev, int is_in, - struct scatterlist *sg, int n_hw_ents); -#endif -void usb_buffer_unmap_sg(const struct usb_device *dev, int is_in, - struct scatterlist *sg, int n_hw_ents); - /*-------------------------------------------------------------------* * SYNCHRONOUS CALL SUPPORT * *-------------------------------------------------------------------*/ diff --git a/include/linux/usb/audio-v2.h b/include/linux/usb/audio-v2.h index ca796dc1a984..6e5555610010 100644 --- a/include/linux/usb/audio-v2.h +++ b/include/linux/usb/audio-v2.h @@ -82,7 +82,7 @@ struct uac_clock_source_descriptor { #define UAC_CLOCK_SOURCE_TYPE_INT_PROG 0x3 #define UAC_CLOCK_SOURCE_SYNCED_TO_SOF (1 << 2) -/* 4.7.2.2 Clock Source Descriptor */ +/* 4.7.2.2 Clock Selector Descriptor */ struct uac_clock_selector_descriptor { __u8 bLength; @@ -91,7 +91,7 @@ struct uac_clock_selector_descriptor { __u8 bClockID; __u8 bNrInPins; __u8 baCSourceID[]; - /* bmControls and iClockSource omitted */ + /* bmControls and iClockSelector omitted */ } __attribute__((packed)); /* 4.7.2.3 Clock Multiplier Descriptor */ diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index 0b4f2d5faa08..5a7f96684ea2 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -64,6 +64,7 @@ struct ci_hdrc_platform_data { #define CI_HDRC_PMQOS BIT(15) #define CI_HDRC_PHY_VBUS_CONTROL BIT(16) #define CI_HDRC_HAS_PORTSC_PEC_MISSED BIT(17) +#define CI_HDRC_FORCE_VBUS_ACTIVE_ALWAYS BIT(18) enum usb_dr_mode dr_mode; #define CI_HDRC_CONTROLLER_RESET_EVENT 0 #define CI_HDRC_CONTROLLER_STOPPED_EVENT 1 diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 6014340ba980..af3cd2aae4bc 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -35,6 +35,14 @@ * are ready. The control transfer will then be kept from completing till * all the function drivers that requested for USB_GADGET_DELAYED_STAUS * invoke usb_composite_setup_continue(). + * + * NOTE: USB_GADGET_DELAYED_STATUS must not be used in UDC drivers: they + * must delay completing the status stage for 0-length control transfers + * regardless of the whether USB_GADGET_DELAYED_STATUS is returned from + * the gadget driver's setup() callback. + * Currently, a number of UDC drivers rely on USB_GADGET_DELAYED_STATUS, + * which is a bug. These drivers must be fixed and USB_GADGET_DELAYED_STATUS + * must be contained within the composite framework. */ #define USB_GADGET_DELAYED_STATUS 0x7fff /* Impossibly large value */ diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 75bda0783395..56dda8e1562d 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -52,6 +52,7 @@ struct usb_ep; * @short_not_ok: When reading data, makes short packets be * treated as errors (queue stops advancing till cleanup). * @dma_mapped: Indicates if request has been mapped to DMA (internal) + * @sg_was_mapped: Set if the scatterlist has been mapped before the request * @complete: Function called when request completes, so this request and * its buffer may be re-used. The function will always be called with * interrupts disabled, and it must not sleep. @@ -111,6 +112,7 @@ struct usb_request { unsigned zero:1; unsigned short_not_ok:1; unsigned dma_mapped:1; + unsigned sg_was_mapped:1; void (*complete)(struct usb_ep *ep, struct usb_request *req); @@ -711,6 +713,15 @@ static inline int usb_gadget_check_config(struct usb_gadget *gadget) * get_interface. Setting a configuration (or interface) is where * endpoints should be activated or (config 0) shut down. * + * The gadget driver's setup() callback does not have to queue a response to + * ep0 within the setup() call, the driver can do it after setup() returns. + * The UDC driver must wait until such a response is queued before proceeding + * with the data/status stages of the control transfer. + * + * NOTE: Currently, a number of UDC drivers rely on USB_GADGET_DELAYED_STATUS + * being returned from the setup() callback, which is a bug. See the comment + * next to USB_GADGET_DELAYED_STATUS for details. + * * (Note that only the default control endpoint is supported. Neither * hosts nor devices generally support control traffic except to ep0.) * diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 61d4f0b793dc..ac95e7c89df5 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -55,7 +55,7 @@ struct giveback_urb_bh { bool high_prio; spinlock_t lock; struct list_head head; - struct tasklet_struct bh; + struct work_struct bh; struct usb_host_endpoint *completing_ep; }; @@ -372,8 +372,9 @@ struct hc_driver { * or bandwidth constraints. */ void (*reset_bandwidth)(struct usb_hcd *, struct usb_device *); - /* Returns the hardware-chosen device address */ - int (*address_device)(struct usb_hcd *, struct usb_device *udev); + /* Set the hardware-chosen device address */ + int (*address_device)(struct usb_hcd *, struct usb_device *udev, + unsigned int timeout_ms); /* prepares the hardware to send commands to the device */ int (*enable_device)(struct usb_hcd *, struct usb_device *udev); /* Notifies the HCD after a hub descriptor is fetched. @@ -484,8 +485,25 @@ extern int usb_hcd_pci_probe(struct pci_dev *dev, extern void usb_hcd_pci_remove(struct pci_dev *dev); extern void usb_hcd_pci_shutdown(struct pci_dev *dev); +#ifdef CONFIG_USB_PCI_AMD extern int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *dev); +static inline bool usb_hcd_amd_resume_bug(struct pci_dev *dev, + const struct hc_driver *driver) +{ + if (!usb_hcd_amd_remote_wakeup_quirk(dev)) + return false; + if (driver->flags & (HCD_USB11 | HCD_USB3)) + return true; + return false; +} +#else /* CONFIG_USB_PCI_AMD */ +static inline bool usb_hcd_amd_resume_bug(struct pci_dev *dev, + const struct hc_driver *driver) +{ + return false; +} +#endif extern const struct dev_pm_ops usb_hcd_pci_pm_ops; #endif /* CONFIG_USB_PCI */ diff --git a/include/linux/usb/ljca.h b/include/linux/usb/ljca.h new file mode 100644 index 000000000000..47661feda96c --- /dev/null +++ b/include/linux/usb/ljca.h @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023, Intel Corporation. All rights reserved. + */ +#ifndef _LINUX_USB_LJCA_H_ +#define _LINUX_USB_LJCA_H_ + +#include <linux/auxiliary_bus.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +#define LJCA_MAX_GPIO_NUM 64 + +#define auxiliary_dev_to_ljca_client(auxiliary_dev) \ + container_of(auxiliary_dev, struct ljca_client, auxdev) + +struct ljca_adapter; + +/** + * typedef ljca_event_cb_t - event callback function signature + * + * @context: the execution context of who registered this callback + * @cmd: the command from device for this event + * @evt_data: the event data payload + * @len: the event data payload length + * + * The callback function is called in interrupt context and the data payload is + * only valid during the call. If the user needs later access of the data, it + * must copy it. + */ +typedef void (*ljca_event_cb_t)(void *context, u8 cmd, const void *evt_data, int len); + +/** + * struct ljca_client - represent a ljca client device + * + * @type: ljca client type + * @id: ljca client id within same client type + * @link: ljca client on the same ljca adapter + * @auxdev: auxiliary device object + * @adapter: ljca adapter the ljca client sit on + * @context: the execution context of the event callback + * @event_cb: ljca client driver register this callback to get + * firmware asynchronous rx buffer pending notifications + * @event_cb_lock: spinlock to protect event callback + */ +struct ljca_client { + u8 type; + u8 id; + struct list_head link; + struct auxiliary_device auxdev; + struct ljca_adapter *adapter; + + void *context; + ljca_event_cb_t event_cb; + /* lock to protect event_cb */ + spinlock_t event_cb_lock; +}; + +/** + * struct ljca_gpio_info - ljca gpio client device info + * + * @num: ljca gpio client device pin number + * @valid_pin_map: ljca gpio client device valid pin mapping + */ +struct ljca_gpio_info { + unsigned int num; + DECLARE_BITMAP(valid_pin_map, LJCA_MAX_GPIO_NUM); +}; + +/** + * struct ljca_i2c_info - ljca i2c client device info + * + * @id: ljca i2c client device identification number + * @capacity: ljca i2c client device capacity + * @intr_pin: ljca i2c client device interrupt pin number if exists + */ +struct ljca_i2c_info { + u8 id; + u8 capacity; + u8 intr_pin; +}; + +/** + * struct ljca_spi_info - ljca spi client device info + * + * @id: ljca spi client device identification number + * @capacity: ljca spi client device capacity + */ +struct ljca_spi_info { + u8 id; + u8 capacity; +}; + +/** + * ljca_register_event_cb - register a callback function to receive events + * + * @client: ljca client device + * @event_cb: callback function + * @context: execution context of event callback + * + * Return: 0 in case of success, negative value in case of error + */ +int ljca_register_event_cb(struct ljca_client *client, ljca_event_cb_t event_cb, void *context); + +/** + * ljca_unregister_event_cb - unregister the callback function for an event + * + * @client: ljca client device + */ +void ljca_unregister_event_cb(struct ljca_client *client); + +/** + * ljca_transfer - issue a LJCA command and wait for a response + * + * @client: ljca client device + * @cmd: the command to be sent to the device + * @obuf: the buffer to be sent to the device; it can be NULL if the user + * doesn't need to transmit data with this command + * @obuf_len: the size of the buffer to be sent to the device; it should + * be 0 when obuf is NULL + * @ibuf: any data associated with the response will be copied here; it can be + * NULL if the user doesn't need the response data + * @ibuf_len: must be initialized to the input buffer size + * + * Return: the actual length of response data for success, negative value for errors + */ +int ljca_transfer(struct ljca_client *client, u8 cmd, const u8 *obuf, + u8 obuf_len, u8 *ibuf, u8 ibuf_len); + +/** + * ljca_transfer_noack - issue a LJCA command without a response + * + * @client: ljca client device + * @cmd: the command to be sent to the device + * @obuf: the buffer to be sent to the device; it can be NULL if the user + * doesn't need to transmit data with this command + * @obuf_len: the size of the buffer to be sent to the device + * + * Return: 0 for success, negative value for errors + */ +int ljca_transfer_noack(struct ljca_client *client, u8 cmd, const u8 *obuf, + u8 obuf_len); + +#endif diff --git a/include/linux/usb/of.h b/include/linux/usb/of.h index 98487fd7ab11..de42f14bd280 100644 --- a/include/linux/usb/of.h +++ b/include/linux/usb/of.h @@ -6,6 +6,7 @@ #ifndef __LINUX_USB_OF_H #define __LINUX_USB_OF_H +#include <linux/usb.h> #include <linux/usb/ch9.h> #include <linux/usb/otg.h> #include <linux/usb/phy.h> @@ -17,6 +18,7 @@ enum usb_dr_mode of_usb_get_dr_mode_by_phy(struct device_node *np, int arg0); bool of_usb_host_tpl_support(struct device_node *np); int of_usb_update_otg_caps(struct device_node *np, struct usb_otg_caps *otg_caps); +enum usb_port_connect_type usb_of_get_connect_type(struct usb_device *hub, int port1); struct device_node *usb_of_get_device_node(struct usb_device *hub, int port1); bool usb_of_has_combined_node(struct usb_device *udev); struct device_node *usb_of_get_interface_node(struct usb_device *udev, @@ -37,6 +39,11 @@ static inline int of_usb_update_otg_caps(struct device_node *np, { return 0; } +static inline enum usb_port_connect_type +usb_of_get_connect_type(const struct usb_device *hub, int port1) +{ + return USB_PORT_CONNECT_TYPE_UNKNOWN; +} static inline struct device_node * usb_of_get_device_node(struct usb_device *hub, int port1) { diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h index c59fb79a42e8..d50098fb16b5 100644 --- a/include/linux/usb/pd.h +++ b/include/linux/usb/pd.h @@ -228,6 +228,7 @@ enum pd_pdo_type { #define PDO_FIXED_UNCHUNK_EXT BIT(24) /* Unchunked Extended Message supported (Source) */ #define PDO_FIXED_FRS_CURR_MASK (BIT(24) | BIT(23)) /* FR_Swap Current (Sink) */ #define PDO_FIXED_FRS_CURR_SHIFT 23 +#define PDO_FIXED_PEAK_CURR_SHIFT 20 #define PDO_FIXED_VOLT_SHIFT 10 /* 50mV units */ #define PDO_FIXED_CURR_SHIFT 0 /* 10mA units */ @@ -482,6 +483,7 @@ static inline unsigned int rdo_max_power(u32 rdo) #define PD_T_BIST_CONT_MODE 50 /* 30 - 60 ms */ #define PD_T_SINK_TX 16 /* 16 - 20 ms */ #define PD_T_CHUNK_NOT_SUPP 42 /* 40 - 50 ms */ +#define PD_T_VCONN_STABLE 50 #define PD_T_DRP_TRY 100 /* 75 - 150 ms */ #define PD_T_DRP_TRYWAIT 600 /* 400 - 800 ms */ diff --git a/include/linux/usb/pd_vdo.h b/include/linux/usb/pd_vdo.h index b057250704e8..5c48e8a81403 100644 --- a/include/linux/usb/pd_vdo.h +++ b/include/linux/usb/pd_vdo.h @@ -7,6 +7,7 @@ #define __LINUX_USB_PD_VDO_H #include "pd.h" +#include <linux/bitfield.h> /* * VDO : Vendor Defined Message Object @@ -86,12 +87,15 @@ * * Request is simply properly formatted SVDM header * - * Response is 4 data objects: + * Response is 4 data objects for Power Delivery 2.0 and Passive Cables for + * Power Delivery 3.0. Active Cables in Power Delivery 3.0 have 5 data objects. * [0] :: SVDM header * [1] :: Identitiy header * [2] :: Cert Stat VDO * [3] :: (Product | Cable) VDO + * [4] :: Cable VDO 1 * [4] :: AMA VDO + * [5] :: Cable VDO 2 * */ #define VDO_INDEX_HDR 0 @@ -100,6 +104,8 @@ #define VDO_INDEX_CABLE 3 #define VDO_INDEX_PRODUCT 3 #define VDO_INDEX_AMA 4 +#define VDO_INDEX_CABLE_1 4 +#define VDO_INDEX_CABLE_2 5 /* * SVDM Identity Header @@ -150,6 +156,7 @@ #define PD_IDH_MODAL_SUPP(vdo) ((vdo) & (1 << 26)) #define PD_IDH_DFP_PTYPE(vdo) (((vdo) >> 23) & 0x7) #define PD_IDH_CONN_TYPE(vdo) (((vdo) >> 21) & 0x3) +#define PD_IDH_HOST_SUPP(vdo) ((vdo) & (1 << 31)) /* * Cert Stat VDO @@ -182,7 +189,7 @@ * <5:3> :: Alternate modes * <2:0> :: USB highest speed */ -#define PD_VDO_UFP_DEVCAP(vdo) (((vdo) & GENMASK(27, 24)) >> 24) +#define PD_VDO_UFP_DEVCAP(vdo) FIELD_GET(GENMASK(27, 24), vdo) /* UFP VDO Version */ #define UFP_VDO_VER1_2 2 @@ -241,7 +248,7 @@ * <21:5> :: Reserved * <4:0> :: Port number */ -#define PD_VDO_DFP_HOSTCAP(vdo) (((vdo) & GENMASK(26, 24)) >> 24) +#define PD_VDO_DFP_HOSTCAP(vdo) FIELD_GET(GENMASK(26, 24), vdo) #define DFP_VDO_VER1_1 1 #define HOST_USB2_CAPABLE BIT(0) @@ -376,6 +383,7 @@ | ((vbm) & 0x3) << 9 | (sbu) << 8 | (sbut) << 7 | ((cur) & 0x3) << 5 \ | (vbt) << 4 | (sopp) << 3 | ((spd) & 0x7)) +#define VDO_TYPEC_CABLE_SPEED(vdo) ((vdo) & 0x7) #define VDO_TYPEC_CABLE_TYPE(vdo) (((vdo) >> 18) & 0x3) /* diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h index b513749582d7..e4de6bc1f69b 100644 --- a/include/linux/usb/phy.h +++ b/include/linux/usb/phy.h @@ -144,10 +144,6 @@ struct usb_phy { */ int (*set_wakeup)(struct usb_phy *x, bool enabled); - /* notify phy port status change */ - int (*notify_port_status)(struct usb_phy *x, int port, - u16 portstatus, u16 portchange); - /* notify phy connect status change */ int (*notify_connect)(struct usb_phy *x, enum usb_device_speed speed); @@ -321,15 +317,6 @@ usb_phy_set_wakeup(struct usb_phy *x, bool enabled) } static inline int -usb_phy_notify_port_status(struct usb_phy *x, int port, u16 portstatus, u16 portchange) -{ - if (x && x->notify_port_status) - return x->notify_port_status(x, port, portstatus, portchange); - else - return 0; -} - -static inline int usb_phy_notify_connect(struct usb_phy *x, enum usb_device_speed speed) { if (x && x->notify_connect) diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index eeb7c2157c72..59409c1fc3de 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -72,4 +72,7 @@ /* device has endpoints that should be ignored */ #define USB_QUIRK_ENDPOINT_IGNORE BIT(15) +/* short SET_ADDRESS request timeout */ +#define USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT BIT(16) + #endif /* __LINUX_USB_QUIRKS_H */ diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h index 287e9d83fb8b..33a4c146dc19 100644 --- a/include/linux/usb/r8152.h +++ b/include/linux/usb/r8152.h @@ -30,6 +30,7 @@ #define VENDOR_ID_NVIDIA 0x0955 #define VENDOR_ID_TPLINK 0x2357 #define VENDOR_ID_DLINK 0x2001 +#define VENDOR_ID_ASUS 0x0b05 #if IS_REACHABLE(CONFIG_USB_RTL8152) extern u8 rtl8152_get_version(struct usb_interface *intf); diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index d418c55523a7..372898d9eeb0 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -5,16 +5,6 @@ * Copyright (C) 2011 Renesas Solutions Corp. * Copyright (C) 2019 Renesas Electronics Corporation * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * */ #ifndef RENESAS_USB_H #define RENESAS_USB_H diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h index 83376473ac76..47a86b8a4a50 100644 --- a/include/linux/usb/tcpci.h +++ b/include/linux/usb/tcpci.h @@ -36,7 +36,9 @@ #define TCPC_ALERT_MASK 0x12 #define TCPC_POWER_STATUS_MASK 0x14 -#define TCPC_FAULT_STATUS_MASK 0x15 + +#define TCPC_FAULT_STATUS_MASK 0x15 +#define TCPC_FAULT_STATUS_MASK_VCONN_OC BIT(1) #define TCPC_EXTENDED_STATUS_MASK 0x16 #define TCPC_EXTENDED_STATUS_MASK_VSAFE0V BIT(0) @@ -104,6 +106,7 @@ #define TCPC_FAULT_STATUS 0x1f #define TCPC_FAULT_STATUS_ALL_REG_RST_TO_DEFAULT BIT(7) +#define TCPC_FAULT_STATUS_VCONN_OC BIT(1) #define TCPC_ALERT_EXTENDED 0x21 @@ -142,6 +145,7 @@ #define TCPC_RX_BYTE_CNT 0x30 #define TCPC_RX_BUF_FRAME_TYPE 0x31 #define TCPC_RX_BUF_FRAME_TYPE_SOP 0 +#define TCPC_RX_BUF_FRAME_TYPE_SOP1 1 #define TCPC_RX_HDR 0x32 #define TCPC_RX_DATA 0x34 /* through 0x4f */ @@ -195,12 +199,23 @@ struct tcpci; * Chip level drivers are expected to check for contaminant and call * tcpm_clean_port when the port is clean to put the port back into * toggling state. + * @cable_comm_capable + * optional; Set when TCPC can communicate with cable plugs over SOP' + * @attempt_vconn_swap_discovery: + * Optional; The callback is called by the TCPM when the result of + * a Discover Identity request indicates that the port partner is + * a receptacle capable of modal operation. Chip level TCPCI drivers + * can implement their own policy to determine if and when a Vconn + * swap following Discover Identity on SOP' occurs. + * Return true when the TCPM is allowed to request a Vconn swap + * after Discovery Identity on SOP. */ struct tcpci_data { struct regmap *regmap; unsigned char TX_BUF_BYTE_x_hidden:1; unsigned char auto_discharge_disconnect:1; unsigned char vbus_vsafe0v:1; + unsigned char cable_comm_capable:1; int (*init)(struct tcpci *tcpci, struct tcpci_data *data); int (*set_vconn)(struct tcpci *tcpci, struct tcpci_data *data, @@ -212,6 +227,7 @@ struct tcpci_data { void (*set_partner_usb_comm_capable)(struct tcpci *tcpci, struct tcpci_data *data, bool capable); void (*check_contaminant)(struct tcpci *tcpci, struct tcpci_data *data); + bool (*attempt_vconn_swap_discovery)(struct tcpci *tcpci, struct tcpci_data *data); }; struct tcpci *tcpci_register_port(struct device *dev, struct tcpci_data *data); diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h index ab7ca872950b..061da9546a81 100644 --- a/include/linux/usb/tcpm.h +++ b/include/linux/usb/tcpm.h @@ -119,6 +119,17 @@ enum tcpm_transmit_type { * at the end of the deboumce period or when the port is still * toggling. Chip level drivers are expected to check for contaminant * and call tcpm_clean_port when the port is clean. + * @cable_comm_capable + * Optional; Returns whether cable communication over SOP' is supported + * by the tcpc + * @attempt_vconn_swap_discovery: + * Optional; The callback is called by the TCPM when the result of + * a Discover Identity request indicates that the port partner is + * a receptacle capable of modal operation. Chip level TCPCI drivers + * can implement their own policy to determine if and when a Vconn + * swap following Discover Identity on SOP' occurs. + * Return true when the TCPM is allowed to request a Vconn swap + * after Discovery Identity on SOP. */ struct tcpc_dev { struct fwnode_handle *fwnode; @@ -133,6 +144,8 @@ struct tcpc_dev { enum typec_cc_status *cc2); int (*set_polarity)(struct tcpc_dev *dev, enum typec_cc_polarity polarity); + int (*set_orientation)(struct tcpc_dev *dev, + enum typec_orientation orientation); int (*set_vconn)(struct tcpc_dev *dev, bool on); int (*set_vbus)(struct tcpc_dev *dev, bool on, bool charge); int (*set_current_limit)(struct tcpc_dev *dev, u32 max_ma, u32 mv); @@ -154,6 +167,8 @@ struct tcpc_dev { bool (*is_vbus_vsafe0v)(struct tcpc_dev *dev); void (*set_partner_usb_comm_capable)(struct tcpc_dev *dev, bool enable); void (*check_contaminant)(struct tcpc_dev *dev); + bool (*cable_comm_capable)(struct tcpc_dev *dev); + bool (*attempt_vconn_swap_discovery)(struct tcpc_dev *dev); }; struct tcpm_port; @@ -166,12 +181,14 @@ void tcpm_cc_change(struct tcpm_port *port); void tcpm_sink_frs(struct tcpm_port *port); void tcpm_sourcing_vbus(struct tcpm_port *port); void tcpm_pd_receive(struct tcpm_port *port, - const struct pd_message *msg); + const struct pd_message *msg, + enum tcpm_transmit_type rx_sop_type); void tcpm_pd_transmit_complete(struct tcpm_port *port, enum tcpm_transmit_status status); void tcpm_pd_hard_reset(struct tcpm_port *port); void tcpm_tcpc_reset(struct tcpm_port *port); void tcpm_port_clean(struct tcpm_port *port); bool tcpm_port_is_toggling(struct tcpm_port *port); +void tcpm_port_error_recovery(struct tcpm_port *port); #endif /* __LINUX_USB_TCPM_H */ diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index 8fa781207970..b35b427561ab 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -18,6 +18,7 @@ struct typec_cable; struct typec_plug; struct typec_port; struct typec_altmode_ops; +struct typec_cable_ops; struct fwnode_handle; struct device; @@ -157,6 +158,9 @@ void typec_port_register_altmodes(struct typec_port *port, const struct typec_altmode_ops *ops, void *drvdata, struct typec_altmode **altmodes, size_t n); +void typec_port_register_cable_ops(struct typec_altmode **altmodes, int max_altmodes, + const struct typec_cable_ops *ops); + void typec_unregister_altmode(struct typec_altmode *altmode); struct typec_port *typec_altmode2port(struct typec_altmode *alt); @@ -202,6 +206,8 @@ struct typec_cable_desc { * @accessory: Audio, Debug or none. * @identity: Discover Identity command data * @pd_revision: USB Power Delivery Specification Revision if supported + * @attach: Notification about attached USB device + * @deattach: Notification about removed USB device * * Details about a partner that is attached to USB Type-C port. If @identity * member exists when partner is registered, a directory named "identity" is @@ -217,6 +223,9 @@ struct typec_partner_desc { enum typec_accessory accessory; struct usb_pd_identity *identity; u16 pd_revision; /* 0300H = "3.0" */ + + void (*attach)(struct typec_partner *partner, struct device *dev); + void (*deattach)(struct typec_partner *partner, struct device *dev); }; /** @@ -328,6 +337,9 @@ void typec_partner_set_svdm_version(struct typec_partner *partner, enum usb_pd_svdm_ver svdm_version); int typec_get_negotiated_svdm_version(struct typec_port *port); +int typec_get_cable_svdm_version(struct typec_port *port); +void typec_cable_set_svdm_version(struct typec_cable *cable, enum usb_pd_svdm_ver svdm_version); + struct usb_power_delivery *typec_partner_usb_power_delivery_register(struct typec_partner *partner, struct usb_power_delivery_desc *desc); @@ -335,4 +347,36 @@ int typec_port_set_usb_power_delivery(struct typec_port *port, struct usb_power_ int typec_partner_set_usb_power_delivery(struct typec_partner *partner, struct usb_power_delivery *pd); +/** + * struct typec_connector - Representation of Type-C port for external drivers + * @attach: notification about device removal + * @deattach: notification about device removal + * + * Drivers that control the USB and other ports (DisplayPorts, etc.), that are + * connected to the Type-C connectors, can use these callbacks to inform the + * Type-C connector class about connections and disconnections. That information + * can then be used by the typec-port drivers to power on or off parts that are + * needed or not needed - as an example, in USB mode if USB2 device is + * enumerated, USB3 components (retimers, phys, and what have you) do not need + * to be powered on. + * + * The attached (enumerated) devices will be liked with the typec-partner device. + */ +struct typec_connector { + void (*attach)(struct typec_connector *con, struct device *dev); + void (*deattach)(struct typec_connector *con, struct device *dev); +}; + +static inline void typec_attach(struct typec_connector *con, struct device *dev) +{ + if (con && con->attach) + con->attach(con, dev); +} + +static inline void typec_deattach(struct typec_connector *con, struct device *dev) +{ + if (con && con->deattach) + con->deattach(con, dev); +} + #endif /* __LINUX_USB_TYPEC_H */ diff --git a/include/linux/usb/typec_altmode.h b/include/linux/usb/typec_altmode.h index 28aeef8f9e7b..b3c0866ea70f 100644 --- a/include/linux/usb/typec_altmode.h +++ b/include/linux/usb/typec_altmode.h @@ -20,6 +20,7 @@ struct typec_altmode_ops; * @active: Tells has the mode been entered or not * @desc: Optional human readable description of the mode * @ops: Operations vector from the driver + * @cable_ops: Cable operations vector from the driver. */ struct typec_altmode { struct device dev; @@ -30,6 +31,7 @@ struct typec_altmode { char *desc; const struct typec_altmode_ops *ops; + const struct typec_cable_ops *cable_ops; }; #define to_typec_altmode(d) container_of(d, struct typec_altmode, dev) @@ -75,6 +77,34 @@ int typec_altmode_notify(struct typec_altmode *altmode, unsigned long conf, const struct typec_altmode * typec_altmode_get_partner(struct typec_altmode *altmode); +/** + * struct typec_cable_ops - Cable alternate mode operations vector + * @enter: Operations to be executed with Enter Mode Command + * @exit: Operations to be executed with Exit Mode Command + * @vdm: Callback for SVID specific commands + */ +struct typec_cable_ops { + int (*enter)(struct typec_altmode *altmode, enum typec_plug_index sop, u32 *vdo); + int (*exit)(struct typec_altmode *altmode, enum typec_plug_index sop); + int (*vdm)(struct typec_altmode *altmode, enum typec_plug_index sop, + const u32 hdr, const u32 *vdo, int cnt); +}; + +int typec_cable_altmode_enter(struct typec_altmode *altmode, enum typec_plug_index sop, u32 *vdo); +int typec_cable_altmode_exit(struct typec_altmode *altmode, enum typec_plug_index sop); +int typec_cable_altmode_vdm(struct typec_altmode *altmode, enum typec_plug_index sop, + const u32 header, const u32 *vdo, int count); + +/** + * typec_altmode_get_cable_svdm_version - Get negotiated SVDM version for cable plug + * @altmode: Handle to the alternate mode + */ +static inline int +typec_altmode_get_cable_svdm_version(struct typec_altmode *altmode) +{ + return typec_get_cable_svdm_version(typec_altmode2port(altmode)); +} + /* * These are the connector states (USB, Safe and Alt Mode) defined in USB Type-C * Specification. SVID specific connector states are expected to follow and diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h index 8d09c2f0a9b8..f2da264d9c14 100644 --- a/include/linux/usb/typec_dp.h +++ b/include/linux/usb/typec_dp.h @@ -3,6 +3,7 @@ #define __USB_TYPEC_DP_H #include <linux/usb/typec_altmode.h> +#include <linux/bitfield.h> #define USB_TYPEC_DP_SID 0xff01 /* USB IF has not assigned a Standard ID (SID) for VirtualLink, @@ -67,17 +68,26 @@ enum { #define DP_CAP_UFP_D 1 #define DP_CAP_DFP_D 2 #define DP_CAP_DFP_D_AND_UFP_D 3 -#define DP_CAP_DP_SIGNALING BIT(2) /* Always set */ -#define DP_CAP_GEN2 BIT(3) /* Reserved after v1.0b */ +#define DP_CAP_DP_SIGNALLING(_cap_) FIELD_GET(GENMASK(5, 2), _cap_) +#define DP_CAP_SIGNALLING_HBR3 1 +#define DP_CAP_SIGNALLING_UHBR10 2 +#define DP_CAP_SIGNALLING_UHBR20 3 #define DP_CAP_RECEPTACLE BIT(6) #define DP_CAP_USB BIT(7) -#define DP_CAP_DFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(15, 8)) >> 8) -#define DP_CAP_UFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(23, 16)) >> 16) +#define DP_CAP_DFP_D_PIN_ASSIGN(_cap_) FIELD_GET(GENMASK(15, 8), _cap_) +#define DP_CAP_UFP_D_PIN_ASSIGN(_cap_) FIELD_GET(GENMASK(23, 16), _cap_) /* Get pin assignment taking plug & receptacle into consideration */ #define DP_CAP_PIN_ASSIGN_UFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \ DP_CAP_UFP_D_PIN_ASSIGN(_cap_) : DP_CAP_DFP_D_PIN_ASSIGN(_cap_)) #define DP_CAP_PIN_ASSIGN_DFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \ DP_CAP_DFP_D_PIN_ASSIGN(_cap_) : DP_CAP_UFP_D_PIN_ASSIGN(_cap_)) +#define DP_CAP_UHBR_13_5_SUPPORT BIT(26) +#define DP_CAP_CABLE_TYPE(_cap_) FIELD_GET(GENMASK(29, 28), _cap_) +#define DP_CAP_CABLE_TYPE_PASSIVE 0 +#define DP_CAP_CABLE_TYPE_RE_TIMER 1 +#define DP_CAP_CABLE_TYPE_RE_DRIVER 2 +#define DP_CAP_CABLE_TYPE_OPTICAL 3 +#define DP_CAP_DPAM_VERSION BIT(30) /* DisplayPort Status Update VDO bits */ #define DP_STATUS_CONNECTION(_status_) ((_status_) & 3) @@ -97,13 +107,24 @@ enum { #define DP_CONF_CURRENTLY(_conf_) ((_conf_) & 3) #define DP_CONF_UFP_U_AS_DFP_D BIT(0) #define DP_CONF_UFP_U_AS_UFP_D BIT(1) -#define DP_CONF_SIGNALING_DP BIT(2) -#define DP_CONF_SIGNALING_GEN_2 BIT(3) /* Reserved after v1.0b */ +#define DP_CONF_SIGNALLING_MASK GENMASK(5, 2) +#define DP_CONF_SIGNALLING_SHIFT 2 +#define DP_CONF_SIGNALLING_HBR3 1 +#define DP_CONF_SIGNALLING_UHBR10 2 +#define DP_CONF_SIGNALLING_UHBR20 3 #define DP_CONF_PIN_ASSIGNEMENT_SHIFT 8 #define DP_CONF_PIN_ASSIGNEMENT_MASK GENMASK(15, 8) /* Helper for setting/getting the pin assignment value to the configuration */ #define DP_CONF_SET_PIN_ASSIGN(_a_) ((_a_) << 8) -#define DP_CONF_GET_PIN_ASSIGN(_conf_) (((_conf_) & GENMASK(15, 8)) >> 8) +#define DP_CONF_GET_PIN_ASSIGN(_conf_) FIELD_GET(GENMASK(15, 8), _conf_) +#define DP_CONF_UHBR13_5_SUPPORT BIT(26) +#define DP_CONF_CABLE_TYPE_MASK GENMASK(29, 28) +#define DP_CONF_CABLE_TYPE_SHIFT 28 +#define DP_CONF_CABLE_TYPE_PASSIVE 0 +#define DP_CONF_CABLE_TYPE_RE_TIMER 1 +#define DP_CONF_CABLE_TYPE_RE_DRIVER 2 +#define DP_CONF_CABLE_TYPE_OPTICAL 3 +#define DP_CONF_DPAM_VERSION BIT(30) #endif /* __USB_TYPEC_DP_H */ diff --git a/include/linux/usb/typec_tbt.h b/include/linux/usb/typec_tbt.h index 63dd44b72e0c..fa97d7e00f5c 100644 --- a/include/linux/usb/typec_tbt.h +++ b/include/linux/usb/typec_tbt.h @@ -3,6 +3,7 @@ #define __USB_TYPEC_TBT_H #include <linux/usb/typec_altmode.h> +#include <linux/bitfield.h> #define USB_TYPEC_VENDOR_INTEL 0x8087 /* Alias for convenience */ @@ -25,7 +26,7 @@ struct typec_thunderbolt_data { /* TBT3 Device Discover Mode VDO bits */ #define TBT_MODE BIT(0) -#define TBT_ADAPTER(_vdo_) (((_vdo_) & BIT(16)) >> 16) +#define TBT_ADAPTER(_vdo_) FIELD_GET(BIT(16), _vdo_) #define TBT_ADAPTER_LEGACY 0 #define TBT_ADAPTER_TBT3 1 #define TBT_INTEL_SPECIFIC_B0 BIT(26) @@ -35,17 +36,18 @@ struct typec_thunderbolt_data { #define TBT_SET_ADAPTER(a) (((a) & 1) << 16) /* TBT3 Cable Discover Mode VDO bits */ -#define TBT_CABLE_SPEED(_vdo_) (((_vdo_) & GENMASK(18, 16)) >> 16) +#define TBT_CABLE_SPEED(_vdo_) FIELD_GET(GENMASK(18, 16), _vdo_) #define TBT_CABLE_USB3_GEN1 1 #define TBT_CABLE_USB3_PASSIVE 2 #define TBT_CABLE_10_AND_20GBPS 3 -#define TBT_CABLE_ROUNDED_SUPPORT(_vdo_) \ - (((_vdo_) & GENMASK(20, 19)) >> 19) +#define TBT_CABLE_ROUNDED_SUPPORT(_vdo_) FIELD_GET(GENMASK(20, 19), _vdo_) + #define TBT_GEN3_NON_ROUNDED 0 #define TBT_GEN3_GEN4_ROUNDED_NON_ROUNDED 1 #define TBT_CABLE_OPTICAL BIT(21) #define TBT_CABLE_RETIMER BIT(22) #define TBT_CABLE_LINK_TRAINING BIT(23) +#define TBT_CABLE_ACTIVE_PASSIVE BIT(25) #define TBT_SET_CABLE_SPEED(_s_) (((_s_) & GENMASK(2, 0)) << 16) #define TBT_SET_CABLE_ROUNDED(_g_) (((_g_) & GENMASK(1, 0)) << 19) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 45f09bec02c4..6030a8235617 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -65,6 +65,10 @@ enum rlimit_type { UCOUNT_RLIMIT_COUNTS, }; +#if IS_ENABLED(CONFIG_BINFMT_MISC) +struct binfmt_misc; +#endif + struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; @@ -102,6 +106,10 @@ struct user_namespace { struct ucounts *ucounts; long ucount_max[UCOUNT_COUNTS]; long rlimit_max[UCOUNT_RLIMIT_COUNTS]; + +#if IS_ENABLED(CONFIG_BINFMT_MISC) + struct binfmt_misc *binfmt_misc; +#endif } __randomize_layout; struct ucounts { diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index ac8c6854097c..05d59f74fc88 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -36,6 +36,52 @@ #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS) +/* + * Start with fault_pending_wqh and fault_wqh so they're more likely + * to be in the same cacheline. + * + * Locking order: + * fd_wqh.lock + * fault_pending_wqh.lock + * fault_wqh.lock + * event_wqh.lock + * + * To avoid deadlocks, IRQs must be disabled when taking any of the above locks, + * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's + * also taken in IRQ context. + */ +struct userfaultfd_ctx { + /* waitqueue head for the pending (i.e. not read) userfaults */ + wait_queue_head_t fault_pending_wqh; + /* waitqueue head for the userfaults */ + wait_queue_head_t fault_wqh; + /* waitqueue head for the pseudo fd to wakeup poll/read */ + wait_queue_head_t fd_wqh; + /* waitqueue head for events */ + wait_queue_head_t event_wqh; + /* a refile sequence protected by fault_pending_wqh lock */ + seqcount_spinlock_t refile_seq; + /* pseudo fd refcounting */ + refcount_t refcount; + /* userfaultfd syscall flags */ + unsigned int flags; + /* features requested from the userspace */ + unsigned int features; + /* released */ + bool released; + /* + * Prevents userfaultfd operations (fill/move/wp) from happening while + * some non-cooperative event(s) is taking place. Increments are done + * in write-mode. Whereas, userfaultfd operations, which includes + * reading mmap_changing, is done under read-mode. + */ + struct rw_semaphore map_changing_lock; + /* memory mappings are changing because of non-cooperative event */ + atomic_t mmap_changing; + /* mm with one ore more vmas attached to this userfaultfd_ctx */ + struct mm_struct *mm; +}; + extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); /* A combined operation mode + behavior flags. */ @@ -74,25 +120,31 @@ extern int mfill_atomic_install_pte(pmd_t *dst_pmd, unsigned long dst_addr, struct page *page, bool newly_allocated, uffd_flags_t flags); -extern ssize_t mfill_atomic_copy(struct mm_struct *dst_mm, unsigned long dst_start, +extern ssize_t mfill_atomic_copy(struct userfaultfd_ctx *ctx, unsigned long dst_start, unsigned long src_start, unsigned long len, - atomic_t *mmap_changing, uffd_flags_t flags); -extern ssize_t mfill_atomic_zeropage(struct mm_struct *dst_mm, + uffd_flags_t flags); +extern ssize_t mfill_atomic_zeropage(struct userfaultfd_ctx *ctx, unsigned long dst_start, - unsigned long len, - atomic_t *mmap_changing); -extern ssize_t mfill_atomic_continue(struct mm_struct *dst_mm, unsigned long dst_start, - unsigned long len, atomic_t *mmap_changing, - uffd_flags_t flags); -extern ssize_t mfill_atomic_poison(struct mm_struct *dst_mm, unsigned long start, - unsigned long len, atomic_t *mmap_changing, - uffd_flags_t flags); -extern int mwriteprotect_range(struct mm_struct *dst_mm, - unsigned long start, unsigned long len, - bool enable_wp, atomic_t *mmap_changing); + unsigned long len); +extern ssize_t mfill_atomic_continue(struct userfaultfd_ctx *ctx, unsigned long dst_start, + unsigned long len, uffd_flags_t flags); +extern ssize_t mfill_atomic_poison(struct userfaultfd_ctx *ctx, unsigned long start, + unsigned long len, uffd_flags_t flags); +extern int mwriteprotect_range(struct userfaultfd_ctx *ctx, unsigned long start, + unsigned long len, bool enable_wp); extern long uffd_wp_range(struct vm_area_struct *vma, unsigned long start, unsigned long len, bool enable_wp); +/* move_pages */ +void double_pt_lock(spinlock_t *ptl1, spinlock_t *ptl2); +void double_pt_unlock(spinlock_t *ptl1, spinlock_t *ptl2); +ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start, + unsigned long src_start, unsigned long len, __u64 flags); +int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pmd_t dst_pmdval, + struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma, + unsigned long dst_addr, unsigned long src_addr); + /* mm helpers */ static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, struct vm_userfaultfd_ctx vm_ctx) @@ -161,11 +213,22 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma) } static inline bool vma_can_userfault(struct vm_area_struct *vma, - unsigned long vm_flags) + unsigned long vm_flags, + bool wp_async) { + vm_flags &= __VM_UFFD_FLAGS; + if ((vm_flags & VM_UFFD_MINOR) && (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma))) return false; + + /* + * If wp async enabled, and WP is the only mode enabled, allow any + * memory type. + */ + if (wp_async && (vm_flags == VM_UFFD_WP)) + return true; + #ifndef CONFIG_PTE_MARKER_UFFD_WP /* * If user requested uffd-wp but not enabled pte markers for @@ -175,6 +238,8 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma, if ((vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma)) return false; #endif + + /* By default, allow any of anon|shmem|hugetlb */ return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) || vma_is_shmem(vma); } @@ -197,6 +262,7 @@ extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, extern void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf); extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma); +extern bool userfaultfd_wp_async(struct vm_area_struct *vma); #else /* CONFIG_USERFAULTFD */ @@ -207,6 +273,13 @@ static inline vm_fault_t handle_userfault(struct vm_fault *vmf, return VM_FAULT_SIGBUS; } +static inline long uffd_wp_range(struct vm_area_struct *vma, + unsigned long start, unsigned long len, + bool enable_wp) +{ + return false; +} + static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, struct vm_userfaultfd_ctx vm_ctx) { @@ -297,6 +370,11 @@ static inline bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma) return false; } +static inline bool userfaultfd_wp_async(struct vm_area_struct *vma) +{ + return false; +} + #endif /* CONFIG_USERFAULTFD */ static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma) diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 0e652026b776..7977ca03ac7a 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -7,6 +7,7 @@ #include <linux/interrupt.h> #include <linux/vhost_iotlb.h> #include <linux/virtio_net.h> +#include <linux/virtio_blk.h> #include <linux/if_ether.h> /** @@ -195,6 +196,10 @@ struct vdpa_map_file { * @idx: virtqueue index * Returns int: irq number of a virtqueue, * negative number if no irq assigned. + * @get_vq_size: Get the size of a specific virtqueue (optional) + * @vdev: vdpa device + * @idx: virtqueue index + * Return u16: the size of the virtqueue * @get_vq_align: Get the virtqueue align requirement * for the device * @vdev: vdpa device @@ -204,6 +209,16 @@ struct vdpa_map_file { * @vdev: vdpa device * @idx: virtqueue index * Returns u32: group id for this virtqueue + * @get_vq_desc_group: Get the group id for the descriptor table of + * a specific virtqueue (optional) + * @vdev: vdpa device + * @idx: virtqueue index + * Returns u32: group id for the descriptor table + * portion of this virtqueue. Could be different + * than the one from @get_vq_group, in which case + * the access to the descriptor table can be + * confined to a separate asid, isolating from + * the virtqueue's buffer address access. * @get_device_features: Get virtio features supported by the device * @vdev: vdpa device * Returns the virtio features support by the @@ -242,6 +257,17 @@ struct vdpa_map_file { * @reset: Reset device * @vdev: vdpa device * Returns integer: success (0) or error (< 0) + * @compat_reset: Reset device with compatibility quirks to + * accommodate older userspace. Only needed by + * parent driver which used to have bogus reset + * behaviour, and has to maintain such behaviour + * for compatibility with older userspace. + * Historically compliant driver only has to + * implement .reset, Historically non-compliant + * driver should implement both. + * @vdev: vdpa device + * @flags: compatibility quirks for reset + * Returns integer: success (0) or error (< 0) * @suspend: Suspend the device (optional) * @vdev: vdpa device * Returns integer: success (0) or error (< 0) @@ -317,6 +343,15 @@ struct vdpa_map_file { * @iova: iova to be unmapped * @size: size of the area * Returns integer: success (0) or error (< 0) + * @reset_map: Reset device memory mapping to the default + * state (optional) + * Needed for devices that are using device + * specific DMA translation and prefer mapping + * to be decoupled from the virtio life cycle, + * i.e. device .reset op does not reset mapping + * @vdev: vdpa device + * @asid: address space identifier + * Returns integer: success (0) or error (< 0) * @get_vq_dma_dev: Get the dma device for a specific * virtqueue (optional) * @vdev: vdpa device @@ -356,10 +391,12 @@ struct vdpa_config_ops { (*get_vq_notification)(struct vdpa_device *vdev, u16 idx); /* vq irq is not expected to be changed once DRIVER_OK is set */ int (*get_vq_irq)(struct vdpa_device *vdev, u16 idx); + u16 (*get_vq_size)(struct vdpa_device *vdev, u16 idx); /* Device ops */ u32 (*get_vq_align)(struct vdpa_device *vdev); u32 (*get_vq_group)(struct vdpa_device *vdev, u16 idx); + u32 (*get_vq_desc_group)(struct vdpa_device *vdev, u16 idx); u64 (*get_device_features)(struct vdpa_device *vdev); u64 (*get_backend_features)(const struct vdpa_device *vdev); int (*set_driver_features)(struct vdpa_device *vdev, u64 features); @@ -373,6 +410,8 @@ struct vdpa_config_ops { u8 (*get_status)(struct vdpa_device *vdev); void (*set_status)(struct vdpa_device *vdev, u8 status); int (*reset)(struct vdpa_device *vdev); + int (*compat_reset)(struct vdpa_device *vdev, u32 flags); +#define VDPA_RESET_F_CLEAN_MAP 1 int (*suspend)(struct vdpa_device *vdev); int (*resume)(struct vdpa_device *vdev); size_t (*get_config_size)(struct vdpa_device *vdev); @@ -394,6 +433,7 @@ struct vdpa_config_ops { u64 iova, u64 size, u64 pa, u32 perm, void *opaque); int (*dma_unmap)(struct vdpa_device *vdev, unsigned int asid, u64 iova, u64 size); + int (*reset_map)(struct vdpa_device *vdev, unsigned int asid); int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group, unsigned int asid); struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx); @@ -485,14 +525,17 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev) return vdev->dma_dev; } -static inline int vdpa_reset(struct vdpa_device *vdev) +static inline int vdpa_reset(struct vdpa_device *vdev, u32 flags) { const struct vdpa_config_ops *ops = vdev->config; int ret; down_write(&vdev->cf_lock); vdev->features_valid = false; - ret = ops->reset(vdev); + if (ops->compat_reset && flags) + ret = ops->compat_reset(vdev, flags); + else + ret = ops->reset(vdev); up_write(&vdev->cf_lock); return ret; } diff --git a/include/linux/verification.h b/include/linux/verification.h index f34e50ebcf60..cb2d47f28091 100644 --- a/include/linux/verification.h +++ b/include/linux/verification.h @@ -8,6 +8,7 @@ #ifndef _LINUX_VERIFICATION_H #define _LINUX_VERIFICATION_H +#include <linux/errno.h> #include <linux/types.h> /* diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 454e9295970c..8b1a29820409 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -69,6 +69,13 @@ struct vfio_device { u8 iommufd_attached:1; #endif u8 cdev_opened:1; +#ifdef CONFIG_DEBUG_FS + /* + * debug_root is a static property of the vfio_device + * which must be set prior to registering the vfio_device. + */ + struct dentry *debug_root; +#endif }; /** @@ -289,16 +296,12 @@ void vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes, /* * External user API */ -#if IS_ENABLED(CONFIG_VFIO_GROUP) struct iommu_group *vfio_file_iommu_group(struct file *file); + +#if IS_ENABLED(CONFIG_VFIO_GROUP) bool vfio_file_is_group(struct file *file); bool vfio_file_has_dev(struct file *file, struct vfio_device *device); #else -static inline struct iommu_group *vfio_file_iommu_group(struct file *file) -{ - return NULL; -} - static inline bool vfio_file_is_group(struct file *file) { return false; @@ -353,6 +356,7 @@ struct virqfd { wait_queue_entry_t wait; poll_table pt; struct work_struct shutdown; + struct work_struct flush_inject; struct virqfd **pvirqfd; }; @@ -360,5 +364,6 @@ int vfio_virqfd_enable(void *opaque, int (*handler)(void *, void *), void (*thread)(void *, void *), void *data, struct virqfd **pvirqfd, int fd); void vfio_virqfd_disable(struct virqfd **pvirqfd); +void vfio_virqfd_flush_thread(struct virqfd **pvirqfd); #endif /* VFIO_H */ diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 562e8754869d..a2c8b8bba711 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -127,7 +127,35 @@ int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf); int vfio_pci_core_enable(struct vfio_pci_core_device *vdev); void vfio_pci_core_disable(struct vfio_pci_core_device *vdev); void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev); +int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar); pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev, pci_channel_state_t state); +ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, + void __iomem *io, char __user *buf, + loff_t off, size_t count, size_t x_start, + size_t x_end, bool iswrite); +bool vfio_pci_core_range_intersect_range(loff_t buf_start, size_t buf_cnt, + loff_t reg_start, size_t reg_cnt, + loff_t *buf_offset, + size_t *intersect_count, + size_t *register_offset); +#define VFIO_IOWRITE_DECLATION(size) \ +int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev, \ + bool test_mem, u##size val, void __iomem *io); + +VFIO_IOWRITE_DECLATION(8) +VFIO_IOWRITE_DECLATION(16) +VFIO_IOWRITE_DECLATION(32) +#ifdef iowrite64 +VFIO_IOWRITE_DECLATION(64) +#endif + +#define VFIO_IOREAD_DECLATION(size) \ +int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev, \ + bool test_mem, u##size *val, void __iomem *io); + +VFIO_IOREAD_DECLATION(8) +VFIO_IOREAD_DECLATION(16) +VFIO_IOREAD_DECLATION(32) #endif /* VFIO_PCI_CORE_H */ diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 4cc614a38376..26c4325aa373 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -103,6 +103,14 @@ int virtqueue_resize(struct virtqueue *vq, u32 num, int virtqueue_reset(struct virtqueue *vq, void (*recycle)(struct virtqueue *vq, void *buf)); +struct virtio_admin_cmd { + __le16 opcode; + __le16 group_type; + __le64 group_member_id; + struct scatterlist *data_sg; + struct scatterlist *result_sg; +}; + /** * struct virtio_device - representation of a device using virtio * @index: unique position on the virtio bus @@ -162,7 +170,7 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev); /** * struct virtio_driver - operations for a virtio I/O driver - * @driver: underlying device driver (populate name and owner). + * @driver: underlying device driver (populate name). * @id_table: the ids serviced by this driver. * @feature_table: an array of feature numbers supported by this driver. * @feature_table_size: number of entries in the feature table array. @@ -200,7 +208,10 @@ static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv) return container_of(drv, struct virtio_driver, driver); } -int register_virtio_driver(struct virtio_driver *drv); +/* use a macro to avoid include chaining to get THIS_MODULE */ +#define register_virtio_driver(drv) \ + __register_virtio_driver(drv, THIS_MODULE) +int __register_virtio_driver(struct virtio_driver *drv, struct module *owner); void unregister_virtio_driver(struct virtio_driver *drv); /* module_virtio_driver() - Helper macro for drivers that don't do diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 2b3438de2c4d..da9b271b54db 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -93,6 +93,8 @@ typedef void vq_callback_t(struct virtqueue *); * Returns 0 on success or error status * If disable_vq_and_reset is set, then enable_vq_after_reset must also be * set. + * @create_avq: create admin virtqueue resource. + * @destroy_avq: destroy admin virtqueue resource. */ struct virtio_config_ops { void (*get)(struct virtio_device *vdev, unsigned offset, @@ -120,6 +122,8 @@ struct virtio_config_ops { struct virtio_shm_region *region, u8 id); int (*disable_vq_and_reset)(struct virtqueue *vq); int (*enable_vq_after_reset)(struct virtqueue *vq); + int (*create_avq)(struct virtio_device *vdev); + void (*destroy_avq)(struct virtio_device *vdev); }; /* If driver didn't advertise the feature, it will never appear. */ diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h deleted file mode 100644 index d2e2785af602..000000000000 --- a/include/linux/virtio_console.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so - * anyone can use the definitions to implement compatible drivers/servers: - * - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of IBM nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Copyright (C) Red Hat, Inc., 2009, 2010, 2011 - * Copyright (C) Amit Shah <amit.shah@redhat.com>, 2009, 2010, 2011 - */ -#ifndef _LINUX_VIRTIO_CONSOLE_H -#define _LINUX_VIRTIO_CONSOLE_H - -#include <uapi/linux/virtio_console.h> - -int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int)); -#endif /* _LINUX_VIRTIO_CONSOLE_H */ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 27cc1d464321..4dfa9b69ca8d 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -3,6 +3,8 @@ #define _LINUX_VIRTIO_NET_H #include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/ipv6.h> #include <linux/udp.h> #include <uapi/linux/tcp.h> #include <uapi/linux/virtio_net.h> @@ -49,6 +51,7 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, const struct virtio_net_hdr *hdr, bool little_endian) { + unsigned int nh_min_len = sizeof(struct iphdr); unsigned int gso_type = 0; unsigned int thlen = 0; unsigned int p_off = 0; @@ -65,6 +68,7 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, gso_type = SKB_GSO_TCPV6; ip_proto = IPPROTO_TCP; thlen = sizeof(struct tcphdr); + nh_min_len = sizeof(struct ipv6hdr); break; case VIRTIO_NET_HDR_GSO_UDP: gso_type = SKB_GSO_UDP; @@ -100,7 +104,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!skb_partial_csum_set(skb, start, off)) return -EINVAL; - p_off = skb_transport_offset(skb) + thlen; + nh_min_len = max_t(u32, nh_min_len, skb_transport_offset(skb)); + p_off = nh_min_len + thlen; if (!pskb_may_pull(skb, p_off)) return -EINVAL; } else { @@ -140,7 +145,7 @@ retry: skb_set_transport_header(skb, keys.control.thoff); } else if (gso_type) { - p_off = thlen; + p_off = nh_min_len + thlen; if (!pskb_may_pull(skb, p_off)) return -EINVAL; } diff --git a/include/linux/virtio_pci_admin.h b/include/linux/virtio_pci_admin.h new file mode 100644 index 000000000000..f4a100a0fe2e --- /dev/null +++ b/include/linux/virtio_pci_admin.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VIRTIO_PCI_ADMIN_H +#define _LINUX_VIRTIO_PCI_ADMIN_H + +#include <linux/types.h> +#include <linux/pci.h> + +#ifdef CONFIG_VIRTIO_PCI_ADMIN_LEGACY +bool virtio_pci_admin_has_legacy_io(struct pci_dev *pdev); +int virtio_pci_admin_legacy_common_io_write(struct pci_dev *pdev, u8 offset, + u8 size, u8 *buf); +int virtio_pci_admin_legacy_common_io_read(struct pci_dev *pdev, u8 offset, + u8 size, u8 *buf); +int virtio_pci_admin_legacy_device_io_write(struct pci_dev *pdev, u8 offset, + u8 size, u8 *buf); +int virtio_pci_admin_legacy_device_io_read(struct pci_dev *pdev, u8 offset, + u8 size, u8 *buf); +int virtio_pci_admin_legacy_io_notify_info(struct pci_dev *pdev, + u8 req_bar_flags, u8 *bar, + u64 *bar_offset); +#endif + +#endif /* _LINUX_VIRTIO_PCI_ADMIN_H */ diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index 067ac1d789bc..c0b1b1ca1163 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -5,44 +5,48 @@ #include <linux/pci.h> #include <linux/virtio_pci.h> -struct virtio_pci_modern_common_cfg { - struct virtio_pci_common_cfg cfg; - - __le16 queue_notify_data; /* read-write */ - __le16 queue_reset; /* read-write */ -}; - +/** + * struct virtio_pci_modern_device - info for modern PCI virtio + * @pci_dev: Ptr to the PCI device struct + * @common: Position of the common capability in the PCI config + * @device: Device-specific data (non-legacy mode) + * @notify_base: Base of vq notifications (non-legacy mode) + * @notify_pa: Physical base of vq notifications + * @isr: Where to read and clear interrupt + * @notify_len: So we can sanity-check accesses + * @device_len: So we can sanity-check accesses + * @notify_map_cap: Capability for when we need to map notifications per-vq + * @notify_offset_multiplier: Multiply queue_notify_off by this value + * (non-legacy mode). + * @modern_bars: Bitmask of BARs + * @id: Device and vendor id + * @device_id_check: Callback defined before vp_modern_probe() to be used to + * verify the PCI device is a vendor's expected device rather + * than the standard virtio PCI device + * Returns the found device id or ERRNO + * @dma_mask: Optional mask instead of the traditional DMA_BIT_MASK(64), + * for vendor devices with DMA space address limitations + */ struct virtio_pci_modern_device { struct pci_dev *pci_dev; struct virtio_pci_common_cfg __iomem *common; - /* Device-specific data (non-legacy mode) */ void __iomem *device; - /* Base of vq notifications (non-legacy mode). */ void __iomem *notify_base; - /* Physical base of vq notifications */ resource_size_t notify_pa; - /* Where to read and clear interrupt */ u8 __iomem *isr; - /* So we can sanity-check accesses. */ size_t notify_len; size_t device_len; + size_t common_len; - /* Capability for when we need to map notifications per-vq. */ int notify_map_cap; - /* Multiply queue_notify_off by this value. (non-legacy mode). */ u32 notify_offset_multiplier; - int modern_bars; - struct virtio_device_id id; - /* optional check for vendor virtio device, returns dev_id or -ERRNO */ int (*device_id_check)(struct pci_dev *pdev); - - /* optional mask for devices with limited DMA space */ u64 dma_mask; }; @@ -121,4 +125,6 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev); void vp_modern_remove(struct virtio_pci_modern_device *mdev); int vp_modern_get_queue_reset(struct virtio_pci_modern_device *mdev, u16 index); void vp_modern_set_queue_reset(struct virtio_pci_modern_device *mdev, u16 index); +u16 vp_modern_avq_num(struct virtio_pci_modern_device *mdev); +u16 vp_modern_avq_index(struct virtio_pci_modern_device *mdev); #endif diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index c58453699ee9..c82089dee0c8 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -12,6 +12,7 @@ struct virtio_vsock_skb_cb { bool reply; bool tap_delivered; + u32 offset; }; #define VIRTIO_VSOCK_SKB_CB(skb) ((struct virtio_vsock_skb_cb *)((skb)->cb)) @@ -159,6 +160,15 @@ struct virtio_transport { /* Takes ownership of the packet */ int (*send_pkt)(struct sk_buff *skb); + + /* Used in MSG_ZEROCOPY mode. Checks, that provided data + * (number of buffers) could be transmitted with zerocopy + * mode. If this callback is not implemented for the current + * transport - this means that this transport doesn't need + * extra checks and can perform zerocopy transmission by + * default. + */ + bool (*can_msgzerocopy)(int bufs_num); }; ssize_t @@ -246,4 +256,5 @@ void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit); void virtio_transport_deliver_tap_pkt(struct sk_buff *skb); int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *list); int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t read_actor); +int virtio_transport_notify_set_rcvlowat(struct vsock_sock *vsk, int val); #endif /* _LINUX_VIRTIO_VSOCK_H */ diff --git a/include/linux/vlynq.h b/include/linux/vlynq.h deleted file mode 100644 index e9c0cd36c48a..000000000000 --- a/include/linux/vlynq.h +++ /dev/null @@ -1,149 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2006, 2007 Eugene Konev <ejka@openwrt.org> - */ - -#ifndef __VLYNQ_H__ -#define __VLYNQ_H__ - -#include <linux/device.h> -#include <linux/types.h> - -struct module; - -#define VLYNQ_NUM_IRQS 32 - -struct vlynq_mapping { - u32 size; - u32 offset; -}; - -enum vlynq_divisor { - vlynq_div_auto = 0, - vlynq_ldiv1, - vlynq_ldiv2, - vlynq_ldiv3, - vlynq_ldiv4, - vlynq_ldiv5, - vlynq_ldiv6, - vlynq_ldiv7, - vlynq_ldiv8, - vlynq_rdiv1, - vlynq_rdiv2, - vlynq_rdiv3, - vlynq_rdiv4, - vlynq_rdiv5, - vlynq_rdiv6, - vlynq_rdiv7, - vlynq_rdiv8, - vlynq_div_external -}; - -struct vlynq_device_id { - u32 id; - enum vlynq_divisor divisor; - unsigned long driver_data; -}; - -struct vlynq_regs; -struct vlynq_device { - u32 id, dev_id; - int local_irq; - int remote_irq; - enum vlynq_divisor divisor; - u32 regs_start, regs_end; - u32 mem_start, mem_end; - u32 irq_start, irq_end; - int irq; - int enabled; - struct vlynq_regs *local; - struct vlynq_regs *remote; - struct device dev; -}; - -struct vlynq_driver { - char *name; - struct vlynq_device_id *id_table; - int (*probe)(struct vlynq_device *dev, struct vlynq_device_id *id); - void (*remove)(struct vlynq_device *dev); - struct device_driver driver; -}; - -struct plat_vlynq_ops { - int (*on)(struct vlynq_device *dev); - void (*off)(struct vlynq_device *dev); -}; - -static inline struct vlynq_driver *to_vlynq_driver(struct device_driver *drv) -{ - return container_of(drv, struct vlynq_driver, driver); -} - -static inline struct vlynq_device *to_vlynq_device(struct device *device) -{ - return container_of(device, struct vlynq_device, dev); -} - -extern struct bus_type vlynq_bus_type; - -extern int __vlynq_register_driver(struct vlynq_driver *driver, - struct module *owner); - -static inline int vlynq_register_driver(struct vlynq_driver *driver) -{ - return __vlynq_register_driver(driver, THIS_MODULE); -} - -static inline void *vlynq_get_drvdata(struct vlynq_device *dev) -{ - return dev_get_drvdata(&dev->dev); -} - -static inline void vlynq_set_drvdata(struct vlynq_device *dev, void *data) -{ - dev_set_drvdata(&dev->dev, data); -} - -static inline u32 vlynq_mem_start(struct vlynq_device *dev) -{ - return dev->mem_start; -} - -static inline u32 vlynq_mem_end(struct vlynq_device *dev) -{ - return dev->mem_end; -} - -static inline u32 vlynq_mem_len(struct vlynq_device *dev) -{ - return dev->mem_end - dev->mem_start + 1; -} - -static inline int vlynq_virq_to_irq(struct vlynq_device *dev, int virq) -{ - int irq = dev->irq_start + virq; - if ((irq < dev->irq_start) || (irq > dev->irq_end)) - return -EINVAL; - - return irq; -} - -static inline int vlynq_irq_to_virq(struct vlynq_device *dev, int irq) -{ - if ((irq < dev->irq_start) || (irq > dev->irq_end)) - return -EINVAL; - - return irq - dev->irq_start; -} - -extern void vlynq_unregister_driver(struct vlynq_driver *driver); -extern int vlynq_enable_device(struct vlynq_device *dev); -extern void vlynq_disable_device(struct vlynq_device *dev); -extern int vlynq_set_local_mapping(struct vlynq_device *dev, u32 tx_offset, - struct vlynq_mapping *mapping); -extern int vlynq_set_remote_mapping(struct vlynq_device *dev, u32 tx_offset, - struct vlynq_mapping *mapping); -extern int vlynq_set_local_irq(struct vlynq_device *dev, int virq); -extern int vlynq_set_remote_irq(struct vlynq_device *dev, int virq); - -#endif /* __VLYNQ_H__ */ diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 8abfa1240040..747943bc8cc2 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -41,9 +41,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, PGSTEAL_KSWAPD, PGSTEAL_DIRECT, PGSTEAL_KHUGEPAGED, - PGDEMOTE_KSWAPD, - PGDEMOTE_DIRECT, - PGDEMOTE_KHUGEPAGED, PGSCAN_KSWAPD, PGSCAN_DIRECT, PGSCAN_KHUGEPAGED, @@ -145,6 +142,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_ZSWAP ZSWPIN, ZSWPOUT, + ZSWPWB, #endif #ifdef CONFIG_X86 DIRECT_MAP_LEVEL2_SPLIT, diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index c720be70c8dd..98ea90e90439 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -35,6 +35,7 @@ struct iov_iter; /* in uio.h */ #else #define VM_DEFER_KMEMLEAK 0 #endif +#define VM_SPARSE 0x00001000 /* sparse vm_area. not all pages are present. */ /* bits [20..32] reserved for arch specific ioremap internals */ @@ -232,6 +233,10 @@ static inline bool is_vm_area_hugepages(const void *addr) } #ifdef CONFIG_MMU +int vm_area_map_pages(struct vm_struct *area, unsigned long start, + unsigned long end, struct page **pages); +void vm_area_unmap_pages(struct vm_struct *area, unsigned long start, + unsigned long end); void vunmap_range(unsigned long addr, unsigned long end); static inline void set_vm_flush_reset_perms(void *addr) { @@ -253,7 +258,6 @@ extern long vread_iter(struct iov_iter *iter, const char *addr, size_t count); /* * Internals. Don't use.. */ -extern struct list_head vmap_area_list; extern __init void vm_area_add_early(struct vm_struct *vm); extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); diff --git a/include/linux/vmcore_info.h b/include/linux/vmcore_info.h new file mode 100644 index 000000000000..e1dec1a6a749 --- /dev/null +++ b/include/linux/vmcore_info.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_VMCORE_INFO_H +#define LINUX_VMCORE_INFO_H + +#include <linux/linkage.h> +#include <linux/elfcore.h> +#include <linux/elf.h> + +#define CRASH_CORE_NOTE_NAME "CORE" +#define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) +#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(CRASH_CORE_NOTE_NAME), 4) +#define CRASH_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4) + +/* + * The per-cpu notes area is a list of notes terminated by a "NULL" + * note header. For kdump, the code in vmcore.c runs in the context + * of the second kernel to combine them into one note. + */ +#define CRASH_CORE_NOTE_BYTES ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ + CRASH_CORE_NOTE_NAME_BYTES + \ + CRASH_CORE_NOTE_DESC_BYTES) + +#define VMCOREINFO_BYTES PAGE_SIZE +#define VMCOREINFO_NOTE_NAME "VMCOREINFO" +#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4) +#define VMCOREINFO_NOTE_SIZE ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ + VMCOREINFO_NOTE_NAME_BYTES + \ + VMCOREINFO_BYTES) + +typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4]; +/* Per cpu memory for storing cpu states in case of system crash. */ +extern note_buf_t __percpu *crash_notes; + +void crash_update_vmcoreinfo_safecopy(void *ptr); +void crash_save_vmcoreinfo(void); +void arch_crash_save_vmcoreinfo(void); +__printf(1, 2) +void vmcoreinfo_append_str(const char *fmt, ...); +phys_addr_t paddr_vmcoreinfo_note(void); + +#define VMCOREINFO_OSRELEASE(value) \ + vmcoreinfo_append_str("OSRELEASE=%s\n", value) +#define VMCOREINFO_BUILD_ID() \ + ({ \ + static_assert(sizeof(vmlinux_build_id) == 20); \ + vmcoreinfo_append_str("BUILD-ID=%20phN\n", vmlinux_build_id); \ + }) + +#define VMCOREINFO_PAGESIZE(value) \ + vmcoreinfo_append_str("PAGESIZE=%ld\n", value) +#define VMCOREINFO_SYMBOL(name) \ + vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name) +#define VMCOREINFO_SYMBOL_ARRAY(name) \ + vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name) +#define VMCOREINFO_SIZE(name) \ + vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ + (unsigned long)sizeof(name)) +#define VMCOREINFO_STRUCT_SIZE(name) \ + vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ + (unsigned long)sizeof(struct name)) +#define VMCOREINFO_OFFSET(name, field) \ + vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ + (unsigned long)offsetof(struct name, field)) +#define VMCOREINFO_TYPE_OFFSET(name, field) \ + vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ + (unsigned long)offsetof(name, field)) +#define VMCOREINFO_LENGTH(name, value) \ + vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value) +#define VMCOREINFO_NUMBER(name) \ + vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name) +#define VMCOREINFO_CONFIG(name) \ + vmcoreinfo_append_str("CONFIG_%s=y\n", #name) + +extern unsigned char *vmcoreinfo_data; +extern size_t vmcoreinfo_size; +extern u32 *vmcoreinfo_note; + +Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, + void *data, size_t data_len); +void final_note(Elf_Word *buf); +#endif /* LINUX_VMCORE_INFO_H */ diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index fed855bae6d8..343906a98d6e 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -556,19 +556,25 @@ static inline void mod_lruvec_state(struct lruvec *lruvec, local_irq_restore(flags); } -void __mod_lruvec_page_state(struct page *page, +void __lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, int val); -static inline void mod_lruvec_page_state(struct page *page, +static inline void lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, int val) { unsigned long flags; local_irq_save(flags); - __mod_lruvec_page_state(page, idx, val); + __lruvec_stat_mod_folio(folio, idx, val); local_irq_restore(flags); } +static inline void mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) +{ + lruvec_stat_mod_folio(page_folio(page), idx, val); +} + #else static inline void __mod_lruvec_state(struct lruvec *lruvec, @@ -583,37 +589,25 @@ static inline void mod_lruvec_state(struct lruvec *lruvec, mod_node_page_state(lruvec_pgdat(lruvec), idx, val); } -static inline void __mod_lruvec_page_state(struct page *page, - enum node_stat_item idx, int val) -{ - __mod_node_page_state(page_pgdat(page), idx, val); -} - -static inline void mod_lruvec_page_state(struct page *page, +static inline void __lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, int val) { - mod_node_page_state(page_pgdat(page), idx, val); + __mod_node_page_state(folio_pgdat(folio), idx, val); } -#endif /* CONFIG_MEMCG */ - -static inline void __inc_lruvec_page_state(struct page *page, - enum node_stat_item idx) +static inline void lruvec_stat_mod_folio(struct folio *folio, + enum node_stat_item idx, int val) { - __mod_lruvec_page_state(page, idx, 1); + mod_node_page_state(folio_pgdat(folio), idx, val); } -static inline void __dec_lruvec_page_state(struct page *page, - enum node_stat_item idx) +static inline void mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) { - __mod_lruvec_page_state(page, idx, -1); + mod_node_page_state(page_pgdat(page), idx, val); } -static inline void __lruvec_stat_mod_folio(struct folio *folio, - enum node_stat_item idx, int val) -{ - __mod_lruvec_page_state(&folio->page, idx, val); -} +#endif /* CONFIG_MEMCG */ static inline void __lruvec_stat_add_folio(struct folio *folio, enum node_stat_item idx) @@ -627,24 +621,6 @@ static inline void __lruvec_stat_sub_folio(struct folio *folio, __lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); } -static inline void inc_lruvec_page_state(struct page *page, - enum node_stat_item idx) -{ - mod_lruvec_page_state(page, idx, 1); -} - -static inline void dec_lruvec_page_state(struct page *page, - enum node_stat_item idx) -{ - mod_lruvec_page_state(page, idx, -1); -} - -static inline void lruvec_stat_mod_folio(struct folio *folio, - enum node_stat_item idx, int val) -{ - mod_lruvec_page_state(&folio->page, idx, val); -} - static inline void lruvec_stat_add_folio(struct folio *folio, enum node_stat_item idx) { diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h index c1f5aebef170..d008c3d0a9bb 100644 --- a/include/linux/vt_kern.h +++ b/include/linux/vt_kern.h @@ -25,7 +25,8 @@ extern int fg_console, last_console, want_console; int vc_allocate(unsigned int console); int vc_cons_allocated(unsigned int console); -int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int lines); +int __vc_resize(struct vc_data *vc, unsigned int cols, unsigned int lines, + bool from_user); struct vc_data *vc_deallocate(unsigned int console); void reset_palette(struct vc_data *vc); void do_blank_screen(int entering_gfx); @@ -42,6 +43,12 @@ void redraw_screen(struct vc_data *vc, int is_switch); #define update_screen(x) redraw_screen(x, 0) #define switch_screen(x) redraw_screen(x, 1) +static inline int vc_resize(struct vc_data *vc, unsigned int cols, + unsigned int lines) +{ + return __vc_resize(vc, cols, lines, false); +} + struct tty_struct; int tioclinux(struct tty_struct *tty, unsigned long arg); @@ -168,7 +175,4 @@ void vt_set_led_state(unsigned int console, int leds); void vt_kbd_con_start(unsigned int console); void vt_kbd_con_stop(unsigned int console); -void vc_scrolldelta_helper(struct vc_data *c, int lines, - unsigned int rolled_over, void *_base, unsigned int size); - #endif /* _VT_KERN_H */ diff --git a/include/linux/w1-gpio.h b/include/linux/w1-gpio.h deleted file mode 100644 index 3495fd0dc790..000000000000 --- a/include/linux/w1-gpio.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * w1-gpio interface to platform code - * - * Copyright (C) 2007 Ville Syrjala <syrjala@sci.fi> - */ -#ifndef _LINUX_W1_GPIO_H -#define _LINUX_W1_GPIO_H - -struct gpio_desc; - -/** - * struct w1_gpio_platform_data - Platform-dependent data for w1-gpio - */ -struct w1_gpio_platform_data { - struct gpio_desc *gpiod; - struct gpio_desc *pullup_gpiod; - void (*enable_external_pullup)(int enable); - unsigned int pullup_duration; -}; - -#endif /* _LINUX_W1_GPIO_H */ diff --git a/include/linux/wait.h b/include/linux/wait.h index 5ec7739400f4..8aa3372f21a0 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -9,7 +9,6 @@ #include <linux/spinlock.h> #include <asm/current.h> -#include <uapi/linux/wait.h> typedef struct wait_queue_entry wait_queue_entry_t; @@ -19,10 +18,9 @@ int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int /* wait_queue_entry::flags */ #define WQ_FLAG_EXCLUSIVE 0x01 #define WQ_FLAG_WOKEN 0x02 -#define WQ_FLAG_BOOKMARK 0x04 -#define WQ_FLAG_CUSTOM 0x08 -#define WQ_FLAG_DONE 0x10 -#define WQ_FLAG_PRIORITY 0x20 +#define WQ_FLAG_CUSTOM 0x04 +#define WQ_FLAG_DONE 0x08 +#define WQ_FLAG_PRIORITY 0x10 /* * A single wait-queue entry structure: @@ -212,8 +210,6 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); -void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, - unsigned int mode, void *key, wait_queue_entry_t *bookmark); void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h index 45cd42f55d49..429c7b6afead 100644 --- a/include/linux/watch_queue.h +++ b/include/linux/watch_queue.h @@ -32,7 +32,7 @@ struct watch_filter { DECLARE_BITMAP(type_filter, WATCH_TYPE__NR); }; u32 nr_filters; /* Number of filters */ - struct watch_type_filter filters[]; + struct watch_type_filter filters[] __counted_by(nr_filters); }; struct watch_queue { diff --git a/include/linux/win_minmax.h b/include/linux/win_minmax.h index 4ca2842d2842..6a5bb052fcc2 100644 --- a/include/linux/win_minmax.h +++ b/include/linux/win_minmax.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/** - * lib/minmax.c: windowed min/max tracker by Kathleen Nichols. +/* + * win_minmax.h: windowed min/max tracker by Kathleen Nichols. * */ #ifndef MINMAX_H diff --git a/include/linux/wmi.h b/include/linux/wmi.h index 763bd382cf2d..63cca3b58d6d 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -11,7 +11,6 @@ #include <linux/device.h> #include <linux/acpi.h> #include <linux/mod_devicetable.h> -#include <uapi/linux/wmi.h> /** * struct wmi_device - WMI device structure @@ -22,11 +21,17 @@ */ struct wmi_device { struct device dev; - - /* private: used by the WMI driver core */ bool setable; }; +/** + * to_wmi_device() - Helper macro to cast a device to a wmi_device + * @device: device struct + * + * Cast a struct device to a struct wmi_device. + */ +#define to_wmi_device(device) container_of(device, struct wmi_device, dev) + extern acpi_status wmidev_evaluate_method(struct wmi_device *wdev, u8 instance, u32 method_id, const struct acpi_buffer *in, @@ -35,34 +40,31 @@ extern acpi_status wmidev_evaluate_method(struct wmi_device *wdev, extern union acpi_object *wmidev_block_query(struct wmi_device *wdev, u8 instance); -u8 wmidev_instance_count(struct wmi_device *wdev); +acpi_status wmidev_block_set(struct wmi_device *wdev, u8 instance, const struct acpi_buffer *in); -extern int set_required_buffer_size(struct wmi_device *wdev, u64 length); +u8 wmidev_instance_count(struct wmi_device *wdev); /** * struct wmi_driver - WMI driver structure * @driver: Driver model structure * @id_table: List of WMI GUIDs supported by this driver - * @no_notify_data: WMI events provide no event data + * @no_notify_data: Driver supports WMI events which provide no event data + * @no_singleton: Driver can be instantiated multiple times * @probe: Callback for device binding * @remove: Callback for device unbinding * @notify: Callback for receiving WMI events - * @filter_callback: Callback for filtering device IOCTLs * * This represents WMI drivers which handle WMI devices. - * @filter_callback is only necessary for drivers which - * want to set up a WMI IOCTL interface. */ struct wmi_driver { struct device_driver driver; const struct wmi_device_id *id_table; bool no_notify_data; + bool no_singleton; int (*probe)(struct wmi_device *wdev, const void *context); void (*remove)(struct wmi_device *wdev); void (*notify)(struct wmi_device *device, union acpi_object *data); - long (*filter_callback)(struct wmi_device *wdev, unsigned int cmd, - struct wmi_ioctl_buffer *arg); }; extern int __must_check __wmi_driver_register(struct wmi_driver *driver, diff --git a/include/linux/wordpart.h b/include/linux/wordpart.h new file mode 100644 index 000000000000..f6f8f83b15b0 --- /dev/null +++ b/include/linux/wordpart.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_WORDPART_H +#define _LINUX_WORDPART_H + +/** + * upper_32_bits - return bits 32-63 of a number + * @n: the number we're accessing + * + * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress + * the "right shift count >= width of type" warning when that quantity is + * 32-bits. + */ +#define upper_32_bits(n) ((u32)(((n) >> 16) >> 16)) + +/** + * lower_32_bits - return bits 0-31 of a number + * @n: the number we're accessing + */ +#define lower_32_bits(n) ((u32)((n) & 0xffffffff)) + +/** + * upper_16_bits - return bits 16-31 of a number + * @n: the number we're accessing + */ +#define upper_16_bits(n) ((u16)((n) >> 16)) + +/** + * lower_16_bits - return bits 0-15 of a number + * @n: the number we're accessing + */ +#define lower_16_bits(n) ((u16)((n) & 0xffff)) + +/** + * REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value + * @x: value to repeat + * + * NOTE: @x is not checked for > 0xff; larger values produce odd results. + */ +#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) + +#endif // _LINUX_WORDPART_H diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 1c1d06804d45..158784dd189a 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -14,12 +14,7 @@ #include <linux/atomic.h> #include <linux/cpumask.h> #include <linux/rcupdate.h> - -struct workqueue_struct; - -struct work_struct; -typedef void (*work_func_t)(struct work_struct *work); -void delayed_work_timer_fn(struct timer_list *t); +#include <linux/workqueue_types.h> /* * The first word is the work queue pointer and the flags rolled into @@ -27,20 +22,54 @@ void delayed_work_timer_fn(struct timer_list *t); */ #define work_data_bits(work) ((unsigned long *)(&(work)->data)) -enum { +enum work_bits { WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */ - WORK_STRUCT_INACTIVE_BIT= 1, /* work item is inactive */ - WORK_STRUCT_PWQ_BIT = 2, /* data points to pwq */ - WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */ + WORK_STRUCT_INACTIVE_BIT, /* work item is inactive */ + WORK_STRUCT_PWQ_BIT, /* data points to pwq */ + WORK_STRUCT_LINKED_BIT, /* next work is linked to this one */ #ifdef CONFIG_DEBUG_OBJECTS_WORK - WORK_STRUCT_STATIC_BIT = 4, /* static initializer (debugobjects) */ - WORK_STRUCT_COLOR_SHIFT = 5, /* color for workqueue flushing */ -#else - WORK_STRUCT_COLOR_SHIFT = 4, /* color for workqueue flushing */ + WORK_STRUCT_STATIC_BIT, /* static initializer (debugobjects) */ #endif + WORK_STRUCT_FLAG_BITS, + /* color for workqueue flushing */ + WORK_STRUCT_COLOR_SHIFT = WORK_STRUCT_FLAG_BITS, WORK_STRUCT_COLOR_BITS = 4, + /* + * When WORK_STRUCT_PWQ is set, reserve 8 bits off of pwq pointer w/ + * debugobjects turned off. This makes pwqs aligned to 256 bytes (512 + * bytes w/ DEBUG_OBJECTS_WORK) and allows 16 workqueue flush colors. + * + * MSB + * [ pwq pointer ] [ flush color ] [ STRUCT flags ] + * 4 bits 4 or 5 bits + */ + WORK_STRUCT_PWQ_SHIFT = WORK_STRUCT_COLOR_SHIFT + WORK_STRUCT_COLOR_BITS, + + /* + * data contains off-queue information when !WORK_STRUCT_PWQ. + * + * MSB + * [ pool ID ] [ OFFQ flags ] [ STRUCT flags ] + * 1 bit 4 or 5 bits + */ + WORK_OFFQ_FLAG_SHIFT = WORK_STRUCT_FLAG_BITS, + WORK_OFFQ_CANCELING_BIT = WORK_OFFQ_FLAG_SHIFT, + WORK_OFFQ_FLAG_END, + WORK_OFFQ_FLAG_BITS = WORK_OFFQ_FLAG_END - WORK_OFFQ_FLAG_SHIFT, + + /* + * When a work item is off queue, the high bits encode off-queue flags + * and the last pool it was on. Cap pool ID to 31 bits and use the + * highest number to indicate that no pool is associated. + */ + WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_SHIFT + WORK_OFFQ_FLAG_BITS, + WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT, + WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31, +}; + +enum work_flags { WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT, WORK_STRUCT_INACTIVE = 1 << WORK_STRUCT_INACTIVE_BIT, WORK_STRUCT_PWQ = 1 << WORK_STRUCT_PWQ_BIT, @@ -50,35 +79,14 @@ enum { #else WORK_STRUCT_STATIC = 0, #endif +}; +enum wq_misc_consts { WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS), /* not bound to any CPU, prefer the local CPU */ WORK_CPU_UNBOUND = NR_CPUS, - /* - * Reserve 8 bits off of pwq pointer w/ debugobjects turned off. - * This makes pwqs aligned to 256 bytes and allows 16 workqueue - * flush colors. - */ - WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT + - WORK_STRUCT_COLOR_BITS, - - /* data contains off-queue information when !WORK_STRUCT_PWQ */ - WORK_OFFQ_FLAG_BASE = WORK_STRUCT_COLOR_SHIFT, - - __WORK_OFFQ_CANCELING = WORK_OFFQ_FLAG_BASE, - - /* - * When a work item is off queue, its high bits point to the last - * pool it was on. Cap at 31 bits and use the highest number to - * indicate that no pool is associated. - */ - WORK_OFFQ_FLAG_BITS = 1, - WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS, - WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT, - WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31, - /* bit mask for work_busy() return values */ WORK_BUSY_PENDING = 1 << 0, WORK_BUSY_RUNNING = 1 << 1, @@ -88,21 +96,10 @@ enum { }; /* Convenience constants - of type 'unsigned long', not 'enum'! */ -#define WORK_OFFQ_CANCELING (1ul << __WORK_OFFQ_CANCELING) +#define WORK_OFFQ_CANCELING (1ul << WORK_OFFQ_CANCELING_BIT) #define WORK_OFFQ_POOL_NONE ((1ul << WORK_OFFQ_POOL_BITS) - 1) #define WORK_STRUCT_NO_POOL (WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT) - -#define WORK_STRUCT_FLAG_MASK ((1ul << WORK_STRUCT_FLAG_BITS) - 1) -#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK) - -struct work_struct { - atomic_long_t data; - struct list_head entry; - work_func_t func; -#ifdef CONFIG_LOCKDEP - struct lockdep_map lockdep_map; -#endif -}; +#define WORK_STRUCT_PWQ_MASK (~((1ul << WORK_STRUCT_PWQ_SHIFT) - 1)) #define WORK_DATA_INIT() ATOMIC_LONG_INIT((unsigned long)WORK_STRUCT_NO_POOL) #define WORK_DATA_STATIC_INIT() \ @@ -274,18 +271,16 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } * to generate better code. */ #ifdef CONFIG_LOCKDEP -#define __INIT_WORK(_work, _func, _onstack) \ +#define __INIT_WORK_KEY(_work, _func, _onstack, _key) \ do { \ - static struct lock_class_key __key; \ - \ __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ - lockdep_init_map(&(_work)->lockdep_map, "(work_completion)"#_work, &__key, 0); \ + lockdep_init_map(&(_work)->lockdep_map, "(work_completion)"#_work, (_key), 0); \ INIT_LIST_HEAD(&(_work)->entry); \ (_work)->func = (_func); \ } while (0) #else -#define __INIT_WORK(_work, _func, _onstack) \ +#define __INIT_WORK_KEY(_work, _func, _onstack, _key) \ do { \ __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ @@ -294,12 +289,22 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } } while (0) #endif +#define __INIT_WORK(_work, _func, _onstack) \ + do { \ + static __maybe_unused struct lock_class_key __key; \ + \ + __INIT_WORK_KEY(_work, _func, _onstack, &__key); \ + } while (0) + #define INIT_WORK(_work, _func) \ __INIT_WORK((_work), (_func), 0) #define INIT_WORK_ONSTACK(_work, _func) \ __INIT_WORK((_work), (_func), 1) +#define INIT_WORK_ONSTACK_KEY(_work, _func, _key) \ + __INIT_WORK_KEY((_work), (_func), 1, _key) + #define __INIT_DELAYED_WORK(_work, _func, _tflags) \ do { \ INIT_WORK(&(_work)->work, (_func)); \ @@ -353,7 +358,8 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } * Workqueue flags and constants. For details, please refer to * Documentation/core-api/workqueue.rst. */ -enum { +enum wq_flags { + WQ_BH = 1 << 0, /* execute in bottom half (softirq) context */ WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ WQ_FREEZABLE = 1 << 2, /* freeze during suspend */ WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */ @@ -392,11 +398,22 @@ enum { __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ - __WQ_ORDERED_EXPLICIT = 1 << 19, /* internal: alloc_ordered_workqueue() */ + /* BH wq only allows the following flags */ + __WQ_BH_ALLOWS = WQ_BH | WQ_HIGHPRI, +}; + +enum wq_consts { WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ WQ_UNBOUND_MAX_ACTIVE = WQ_MAX_ACTIVE, WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2, + + /* + * Per-node default cap on min_active. Unless explicitly set, min_active + * is set to min(max_active, WQ_DFL_MIN_ACTIVE). For more details, see + * workqueue_struct->min_active definition. + */ + WQ_DFL_MIN_ACTIVE = 8, }; /* @@ -426,6 +443,9 @@ enum { * they are same as their non-power-efficient counterparts - e.g. * system_power_efficient_wq is identical to system_wq if * 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info. + * + * system_bh[_highpri]_wq are convenience interface to softirq. BH work items + * are executed in the queueing CPU's BH context in the queueing order. */ extern struct workqueue_struct *system_wq; extern struct workqueue_struct *system_highpri_wq; @@ -434,16 +454,43 @@ extern struct workqueue_struct *system_unbound_wq; extern struct workqueue_struct *system_freezable_wq; extern struct workqueue_struct *system_power_efficient_wq; extern struct workqueue_struct *system_freezable_power_efficient_wq; +extern struct workqueue_struct *system_bh_wq; +extern struct workqueue_struct *system_bh_highpri_wq; + +void workqueue_softirq_action(bool highpri); +void workqueue_softirq_dead(unsigned int cpu); /** * alloc_workqueue - allocate a workqueue * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags - * @max_active: max in-flight work items per CPU, 0 for default + * @max_active: max in-flight work items, 0 for default * remaining args: args for @fmt * - * Allocate a workqueue with the specified parameters. For detailed - * information on WQ_* flags, please refer to + * For a per-cpu workqueue, @max_active limits the number of in-flight work + * items for each CPU. e.g. @max_active of 1 indicates that each CPU can be + * executing at most one work item for the workqueue. + * + * For unbound workqueues, @max_active limits the number of in-flight work items + * for the whole system. e.g. @max_active of 16 indicates that that there can be + * at most 16 work items executing for the workqueue in the whole system. + * + * As sharing the same active counter for an unbound workqueue across multiple + * NUMA nodes can be expensive, @max_active is distributed to each NUMA node + * according to the proportion of the number of online CPUs and enforced + * independently. + * + * Depending on online CPU distribution, a node may end up with per-node + * max_active which is significantly lower than @max_active, which can lead to + * deadlocks if the per-node concurrency limit is lower than the maximum number + * of interdependent work items for the workqueue. + * + * To guarantee forward progress regardless of online CPU distribution, the + * concurrency limit on every node is guaranteed to be equal to or greater than + * min_active which is set to min(@max_active, %WQ_DFL_MIN_ACTIVE). This means + * that the sum of per-node max_active's may be larger than @max_active. + * + * For detailed information on %WQ_* flags, please refer to * Documentation/core-api/workqueue.rst. * * RETURNS: @@ -466,8 +513,7 @@ alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...); * Pointer to the allocated workqueue on success, %NULL on failure. */ #define alloc_ordered_workqueue(fmt, flags, args...) \ - alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | \ - __WQ_ORDERED_EXPLICIT | (flags), 1, ##args) + alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) #define create_workqueue(name) \ alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name)) @@ -477,13 +523,16 @@ alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...); #define create_singlethread_workqueue(name) \ alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name) +#define from_work(var, callback_work, work_fieldname) \ + container_of(callback_work, typeof(*var), work_fieldname) + extern void destroy_workqueue(struct workqueue_struct *wq); struct workqueue_attrs *alloc_workqueue_attrs(void); void free_workqueue_attrs(struct workqueue_attrs *attrs); int apply_workqueue_attrs(struct workqueue_struct *wq, const struct workqueue_attrs *attrs); -int workqueue_set_unbound_cpumask(cpumask_var_t cpumask); +extern int workqueue_unbound_exclude_cpumask(cpumask_var_t cpumask); extern bool queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work); @@ -514,6 +563,8 @@ extern bool flush_rcu_work(struct rcu_work *rwork); extern void workqueue_set_max_active(struct workqueue_struct *wq, int max_active); +extern void workqueue_set_min_active(struct workqueue_struct *wq, + int min_active); extern struct work_struct *current_work(void); extern bool current_is_workqueue_rescuer(void); extern bool workqueue_congested(int cpu, struct workqueue_struct *wq); @@ -693,8 +744,32 @@ static inline long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg) return fn(arg); } #else -long work_on_cpu(int cpu, long (*fn)(void *), void *arg); -long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg); +long work_on_cpu_key(int cpu, long (*fn)(void *), + void *arg, struct lock_class_key *key); +/* + * A new key is defined for each caller to make sure the work + * associated with the function doesn't share its locking class. + */ +#define work_on_cpu(_cpu, _fn, _arg) \ +({ \ + static struct lock_class_key __key; \ + \ + work_on_cpu_key(_cpu, _fn, _arg, &__key); \ +}) + +long work_on_cpu_safe_key(int cpu, long (*fn)(void *), + void *arg, struct lock_class_key *key); + +/* + * A new key is defined for each caller to make sure the work + * associated with the function doesn't share its locking class. + */ +#define work_on_cpu_safe(_cpu, _fn, _arg) \ +({ \ + static struct lock_class_key __key; \ + \ + work_on_cpu_safe_key(_cpu, _fn, _arg, &__key); \ +}) #endif /* CONFIG_SMP */ #ifdef CONFIG_FREEZER diff --git a/include/linux/workqueue_types.h b/include/linux/workqueue_types.h new file mode 100644 index 000000000000..4c38824f3ab4 --- /dev/null +++ b/include/linux/workqueue_types.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_WORKQUEUE_TYPES_H +#define _LINUX_WORKQUEUE_TYPES_H + +#include <linux/atomic.h> +#include <linux/lockdep_types.h> +#include <linux/timer_types.h> +#include <linux/types.h> + +struct workqueue_struct; + +struct work_struct; +typedef void (*work_func_t)(struct work_struct *work); +void delayed_work_timer_fn(struct timer_list *t); + +struct work_struct { + atomic_long_t data; + struct list_head entry; + work_func_t func; +#ifdef CONFIG_LOCKDEP + struct lockdep_map lockdep_map; +#endif +}; + +#endif /* _LINUX_WORKQUEUE_TYPES_H */ diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 083387c00f0c..9845cb62e40b 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -11,6 +11,7 @@ #include <linux/flex_proportions.h> #include <linux/backing-dev-defs.h> #include <linux/blk_types.h> +#include <linux/pagevec.h> struct bio; @@ -40,6 +41,7 @@ enum writeback_sync_modes { * in a manner such that unspecified fields are set to zero. */ struct writeback_control { + /* public fields that can be set and/or consumed by the caller: */ long nr_to_write; /* Write this many pages, and decrement this for each page written */ long pages_skipped; /* Pages which were not written */ @@ -60,7 +62,7 @@ struct writeback_control { unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ - unsigned unpinned_fscache_wb:1; /* Cleared I_PINNING_FSCACHE_WB */ + unsigned unpinned_netfs_wb:1; /* Cleared I_PINNING_NETFS_WB */ /* * When writeback IOs are bounced through async layers, only the @@ -77,6 +79,11 @@ struct writeback_control { */ struct swap_iocb **swap_plug; + /* internal fields used by the ->writepages implementation: */ + struct folio_batch fbatch; + pgoff_t index; + int saved_err; + #ifdef CONFIG_CGROUP_WRITEBACK struct bdi_writeback *wb; /* wb this writeback is issued under */ struct inode *inode; /* inode being written out */ @@ -193,7 +200,6 @@ void inode_io_list_del(struct inode *inode); /* writeback.h requires fs.h; it, too, is not included from here. */ static inline void wait_on_inode(struct inode *inode) { - might_sleep(); wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE); } @@ -361,11 +367,12 @@ int balance_dirty_pages_ratelimited_flags(struct address_space *mapping, bool wb_over_bg_thresh(struct bdi_writeback *wb); +struct folio *writeback_iter(struct address_space *mapping, + struct writeback_control *wbc, struct folio *folio, int *error); + typedef int (*writepage_t)(struct folio *folio, struct writeback_control *wbc, void *data); -void tag_pages_for_writeback(struct address_space *mapping, - pgoff_t start, pgoff_t end); int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data); diff --git a/include/linux/wwan.h b/include/linux/wwan.h index 01fa15506286..170fdee6339c 100644 --- a/include/linux/wwan.h +++ b/include/linux/wwan.h @@ -16,6 +16,7 @@ * @WWAN_PORT_QCDM: Qcom Modem diagnostic interface * @WWAN_PORT_FIREHOSE: XML based command protocol * @WWAN_PORT_XMMRPC: Control protocol for Intel XMM modems + * @WWAN_PORT_FASTBOOT: Fastboot protocol control * * @WWAN_PORT_MAX: Highest supported port types * @WWAN_PORT_UNKNOWN: Special value to indicate an unknown port type @@ -28,6 +29,7 @@ enum wwan_port_type { WWAN_PORT_QCDM, WWAN_PORT_FIREHOSE, WWAN_PORT_XMMRPC, + WWAN_PORT_FASTBOOT, /* Add new port types above this line */ diff --git a/include/linux/zswap.h b/include/linux/zswap.h index 2a60ce39cfde..341aea490070 100644 --- a/include/linux/zswap.h +++ b/include/linux/zswap.h @@ -5,19 +5,41 @@ #include <linux/types.h> #include <linux/mm_types.h> +struct lruvec; + extern u64 zswap_pool_total_size; extern atomic_t zswap_stored_pages; #ifdef CONFIG_ZSWAP +struct zswap_lruvec_state { + /* + * Number of pages in zswap that should be protected from the shrinker. + * This number is an estimate of the following counts: + * + * a) Recent page faults. + * b) Recent insertion to the zswap LRU. This includes new zswap stores, + * as well as recent zswap LRU rotations. + * + * These pages are likely to be warm, and might incur IO if the are written + * to swap. + */ + atomic_long_t nr_zswap_protected; +}; + bool zswap_store(struct folio *folio); bool zswap_load(struct folio *folio); -void zswap_invalidate(int type, pgoff_t offset); -void zswap_swapon(int type); +void zswap_invalidate(swp_entry_t swp); +int zswap_swapon(int type, unsigned long nr_pages); void zswap_swapoff(int type); - +void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg); +void zswap_lruvec_state_init(struct lruvec *lruvec); +void zswap_folio_swapin(struct folio *folio); +bool is_zswap_enabled(void); #else +struct zswap_lruvec_state {}; + static inline bool zswap_store(struct folio *folio) { return false; @@ -28,9 +50,20 @@ static inline bool zswap_load(struct folio *folio) return false; } -static inline void zswap_invalidate(int type, pgoff_t offset) {} -static inline void zswap_swapon(int type) {} +static inline void zswap_invalidate(swp_entry_t swp) {} +static inline int zswap_swapon(int type, unsigned long nr_pages) +{ + return 0; +} static inline void zswap_swapoff(int type) {} +static inline void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) {} +static inline void zswap_lruvec_state_init(struct lruvec *lruvec) {} +static inline void zswap_folio_swapin(struct folio *folio) {} + +static inline bool is_zswap_enabled(void) +{ + return false; +} #endif |
