diff options
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/compiler-gcc.h | 2 | ||||
-rw-r--r-- | include/linux/crash_reserve.h | 15 | ||||
-rw-r--r-- | include/linux/execmem.h | 54 | ||||
-rw-r--r-- | include/linux/f2fs_fs.h | 2 | ||||
-rw-r--r-- | include/linux/fscrypt.h | 10 | ||||
-rw-r--r-- | include/linux/gcd.h | 3 | ||||
-rw-r--r-- | include/linux/hung_task.h | 18 | ||||
-rw-r--r-- | include/linux/i3c/device.h | 4 | ||||
-rw-r--r-- | include/linux/i3c/master.h | 13 | ||||
-rw-r--r-- | include/linux/io-mapping.h | 3 | ||||
-rw-r--r-- | include/linux/jhash.h | 8 | ||||
-rw-r--r-- | include/linux/kexec.h | 10 | ||||
-rw-r--r-- | include/linux/mm.h | 6 | ||||
-rw-r--r-- | include/linux/mmap_lock.h | 30 | ||||
-rw-r--r-- | include/linux/module.h | 29 | ||||
-rw-r--r-- | include/linux/moduleparam.h | 15 | ||||
-rw-r--r-- | include/linux/page-flags.h | 2 | ||||
-rw-r--r-- | include/linux/pgtable.h | 45 | ||||
-rw-r--r-- | include/linux/raid/pq.h | 12 | ||||
-rw-r--r-- | include/linux/relay.h | 24 | ||||
-rw-r--r-- | include/linux/ring_buffer.h | 3 | ||||
-rw-r--r-- | include/linux/rmap.h | 22 | ||||
-rw-r--r-- | include/linux/rtc/ds1685.h | 2 | ||||
-rw-r--r-- | include/linux/rwsem.h | 12 | ||||
-rw-r--r-- | include/linux/sprintf.h | 2 | ||||
-rw-r--r-- | include/linux/sys_info.h | 28 | ||||
-rw-r--r-- | include/linux/xxhash.h | 26 |
27 files changed, 279 insertions, 121 deletions
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 32048052c64a..5d07c469b571 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -127,6 +127,8 @@ #define __diag_GCC_8(s) #endif +#define __diag_GCC_all(s) __diag(s) + #define __diag_ignore_all(option, comment) \ __diag(__diag_GCC_ignore option) diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h index 1fe7e7d1b214..7b44b41d0a20 100644 --- a/include/linux/crash_reserve.h +++ b/include/linux/crash_reserve.h @@ -13,10 +13,23 @@ */ extern struct resource crashk_res; extern struct resource crashk_low_res; +extern struct range crashk_cma_ranges[]; +#if defined(CONFIG_CMA) && defined(CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION) +#define CRASHKERNEL_CMA +#define CRASHKERNEL_CMA_RANGES_MAX 4 +extern int crashk_cma_cnt; +#else +#define crashk_cma_cnt 0 +#define CRASHKERNEL_CMA_RANGES_MAX 0 +#endif + int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base, - unsigned long long *low_size, bool *high); + unsigned long long *low_size, unsigned long long *cma_size, + bool *high); + +void __init reserve_crashkernel_cma(unsigned long long cma_size); #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION #ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 3be35680a54f..7de229134e30 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -60,27 +60,11 @@ enum execmem_range_flags { * will trap * @ptr: pointer to memory to fill * @size: size of the range to fill - * @writable: is the memory poited by @ptr is writable or ROX * * A hook for architecures to fill execmem ranges with invalid instructions. * Architectures that use EXECMEM_ROX_CACHE must implement this. */ -void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable); - -/** - * execmem_make_temp_rw - temporarily remap region with read-write - * permissions - * @ptr: address of the region to remap - * @size: size of the region to remap - * - * Remaps a part of the cached large page in the ROX cache in the range - * [@ptr, @ptr + @size) as writable and not executable. The caller must - * have exclusive ownership of this range and ensure nothing will try to - * execute code in this range. - * - * Return: 0 on success or negative error code on failure. - */ -int execmem_make_temp_rw(void *ptr, size_t size); +void execmem_fill_trapping_insns(void *ptr, size_t size); /** * execmem_restore_rox - restore read-only-execute permissions @@ -95,7 +79,6 @@ int execmem_make_temp_rw(void *ptr, size_t size); */ int execmem_restore_rox(void *ptr, size_t size); #else -static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } #endif @@ -166,6 +149,28 @@ struct execmem_info *execmem_arch_setup(void); void *execmem_alloc(enum execmem_type type, size_t size); /** + * execmem_alloc_rw - allocate writable executable memory + * @type: type of the allocation + * @size: how many bytes of memory are required + * + * Allocates memory that will contain executable code, either generated or + * loaded from kernel modules. + * + * Allocates memory that will contain data coupled with executable code, + * like data sections in kernel modules. + * + * Forces writable permissions on the allocated memory and the caller is + * responsible to manage the permissions afterwards. + * + * For architectures that use ROX cache the permissions will be set to R+W. + * For architectures that don't use ROX cache the default permissions for @type + * will be used as they must be writable. + * + * Return: a pointer to the allocated memory or %NULL + */ +void *execmem_alloc_rw(enum execmem_type type, size_t size); + +/** * execmem_free - free executable memory * @ptr: pointer to the memory that should be freed */ @@ -186,19 +191,6 @@ struct vm_struct *execmem_vmap(size_t size); #endif /** - * execmem_update_copy - copy an update to executable memory - * @dst: destination address to update - * @src: source address containing the data - * @size: how many bytes of memory shold be copied - * - * Copy @size bytes from @src to @dst using text poking if the memory at - * @dst is read-only. - * - * Return: a pointer to @dst or NULL on error - */ -void *execmem_update_copy(void *dst, const void *src, size_t size); - -/** * execmem_is_rox - check if execmem is read-only * @type - the execmem type to check * diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 5206d63b3386..2f8b8bfc0e73 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -268,7 +268,7 @@ struct node_footer { /* Node IDs in an Indirect Block */ #define NIDS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) -#define ADDRS_PER_PAGE(page, inode) (addrs_per_page(inode, IS_INODE(page))) +#define ADDRS_PER_PAGE(folio, inode) (addrs_per_page(inode, IS_INODE(folio))) #define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) #define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 8d0e3ad89b94..10dd161690a2 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -332,12 +332,13 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) return (struct page *)page_private(bounce_page); } -static inline bool fscrypt_is_bounce_folio(struct folio *folio) +static inline bool fscrypt_is_bounce_folio(const struct folio *folio) { return folio->mapping == NULL; } -static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio) +static inline +struct folio *fscrypt_pagecache_folio(const struct folio *bounce_folio) { return bounce_folio->private; } @@ -517,12 +518,13 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) return ERR_PTR(-EINVAL); } -static inline bool fscrypt_is_bounce_folio(struct folio *folio) +static inline bool fscrypt_is_bounce_folio(const struct folio *folio) { return false; } -static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio) +static inline +struct folio *fscrypt_pagecache_folio(const struct folio *bounce_folio) { WARN_ON_ONCE(1); return ERR_PTR(-EINVAL); diff --git a/include/linux/gcd.h b/include/linux/gcd.h index cb572677fd7f..616e81a7f7e3 100644 --- a/include/linux/gcd.h +++ b/include/linux/gcd.h @@ -3,6 +3,9 @@ #define _GCD_H #include <linux/compiler.h> +#include <linux/jump_label.h> + +DECLARE_STATIC_KEY_TRUE(efficient_ffs_key); unsigned long gcd(unsigned long a, unsigned long b) __attribute_const__; diff --git a/include/linux/hung_task.h b/include/linux/hung_task.h index 1bc2b3244613..34e615c76ca5 100644 --- a/include/linux/hung_task.h +++ b/include/linux/hung_task.h @@ -21,17 +21,17 @@ * type. * * Type encoding: - * 00 - Blocked on mutex (BLOCKER_TYPE_MUTEX) - * 01 - Blocked on semaphore (BLOCKER_TYPE_SEM) - * 10 - Blocked on rt-mutex (BLOCKER_TYPE_RTMUTEX) - * 11 - Blocked on rw-semaphore (BLOCKER_TYPE_RWSEM) + * 00 - Blocked on mutex (BLOCKER_TYPE_MUTEX) + * 01 - Blocked on semaphore (BLOCKER_TYPE_SEM) + * 10 - Blocked on rw-semaphore as READER (BLOCKER_TYPE_RWSEM_READER) + * 11 - Blocked on rw-semaphore as WRITER (BLOCKER_TYPE_RWSEM_WRITER) */ -#define BLOCKER_TYPE_MUTEX 0x00UL -#define BLOCKER_TYPE_SEM 0x01UL -#define BLOCKER_TYPE_RTMUTEX 0x02UL -#define BLOCKER_TYPE_RWSEM 0x03UL +#define BLOCKER_TYPE_MUTEX 0x00UL +#define BLOCKER_TYPE_SEM 0x01UL +#define BLOCKER_TYPE_RWSEM_READER 0x02UL +#define BLOCKER_TYPE_RWSEM_WRITER 0x03UL -#define BLOCKER_TYPE_MASK 0x03UL +#define BLOCKER_TYPE_MASK 0x03UL #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER static inline void hung_task_set_blocker(void *lock, unsigned long type) diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index b674f64d0822..7f136de4b73e 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -245,7 +245,7 @@ void i3c_driver_unregister(struct i3c_driver *drv); * * Return: 0 if both registrations succeeds, a negative error code otherwise. */ -static inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv, +static __always_inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv, struct i2c_driver *i2cdrv) { int ret; @@ -270,7 +270,7 @@ static inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv, * Note that when CONFIG_I3C is not enabled, this function only unregisters the * @i2cdrv. */ -static inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv, +static __always_inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv, struct i2c_driver *i2cdrv) { if (IS_ENABLED(CONFIG_I3C)) diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index c67922ece617..043f5c7ff398 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -249,10 +249,15 @@ struct i3c_device { */ #define I3C_BUS_MAX_DEVS 11 -#define I3C_BUS_MAX_I3C_SCL_RATE 12900000 -#define I3C_BUS_TYP_I3C_SCL_RATE 12500000 -#define I3C_BUS_I2C_FM_PLUS_SCL_RATE 1000000 -#define I3C_BUS_I2C_FM_SCL_RATE 400000 +/* Taken from the I3C Spec V1.1.1, chapter 6.2. "Timing specification" */ +#define I3C_BUS_I2C_FM_PLUS_SCL_MAX_RATE 1000000 +#define I3C_BUS_I2C_FM_SCL_MAX_RATE 400000 +#define I3C_BUS_I3C_SCL_MAX_RATE 12900000 +#define I3C_BUS_I3C_SCL_TYP_RATE 12500000 +#define I3C_BUS_TAVAL_MIN_NS 1000 +#define I3C_BUS_TBUF_MIXED_FM_MIN_NS 1300 +#define I3C_BUS_THIGH_MIXED_MAX_NS 41 +#define I3C_BUS_TIDLE_MIN_NS 200000 #define I3C_BUS_TLOW_OD_MIN_NS 200 /** diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 7376c1df9c90..c16353cc6e3c 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -225,7 +225,4 @@ io_mapping_free(struct io_mapping *iomap) kfree(iomap); } -int io_mapping_map_user(struct io_mapping *iomap, struct vm_area_struct *vma, - unsigned long addr, unsigned long pfn, unsigned long size); - #endif /* _LINUX_IO_MAPPING_H */ diff --git a/include/linux/jhash.h b/include/linux/jhash.h index fa26a2dd3b52..7c1c1821c694 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -24,7 +24,7 @@ * Jozsef */ #include <linux/bitops.h> -#include <linux/unaligned/packed_struct.h> +#include <linux/unaligned.h> /* Best hash sizes are of power of two */ #define jhash_size(n) ((u32)1<<(n)) @@ -77,9 +77,9 @@ static inline u32 jhash(const void *key, u32 length, u32 initval) /* All but the last block: affect some 32 bits of (a,b,c) */ while (length > 12) { - a += __get_unaligned_cpu32(k); - b += __get_unaligned_cpu32(k + 4); - c += __get_unaligned_cpu32(k + 8); + a += get_unaligned((u32 *)k); + b += get_unaligned((u32 *)(k + 4)); + c += get_unaligned((u32 *)(k + 8)); __jhash_mix(a, b, c); length -= 12; k += 12; diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 03f85ad03025..1b10a5d84b68 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -79,6 +79,12 @@ extern note_buf_t __percpu *crash_notes; typedef unsigned long kimage_entry_t; +/* + * This is a copy of the UAPI struct kexec_segment and must be identical + * to it because it gets copied straight from user space into kernel + * memory. Do not modify this structure unless you change the way segments + * get ingested from user space. + */ struct kexec_segment { /* * This pointer can point to user memory if kexec_load() system @@ -172,6 +178,7 @@ int kexec_image_post_load_cleanup_default(struct kimage *image); * @buf_align: Minimum alignment needed. * @buf_min: The buffer can't be placed below this address. * @buf_max: The buffer can't be placed above this address. + * @cma: CMA page if the buffer is backed by CMA. * @top_down: Allocate from top of memory. * @random: Place the buffer at a random position. */ @@ -184,6 +191,7 @@ struct kexec_buf { unsigned long buf_align; unsigned long buf_min; unsigned long buf_max; + struct page *cma; bool top_down; #ifdef CONFIG_CRASH_DUMP bool random; @@ -340,6 +348,7 @@ struct kimage { unsigned long nr_segments; struct kexec_segment segment[KEXEC_SEGMENT_MAX]; + struct page *segment_cma[KEXEC_SEGMENT_MAX]; struct list_head control_pages; struct list_head dest_pages; @@ -361,6 +370,7 @@ struct kimage { */ unsigned int hotplug_support:1; #endif + unsigned int no_cma:1; #ifdef ARCH_HAS_KIMAGE_ARCH struct kimage_arch arch; diff --git a/include/linux/mm.h b/include/linux/mm.h index 349f0d9aad22..1ae97a0b8ec7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -414,8 +414,10 @@ extern unsigned int kobjsize(const void *objp); #endif #ifdef CONFIG_64BIT -/* VM is sealed, in vm_flags */ -#define VM_SEALED _BITUL(63) +#define VM_SEALED_BIT 42 +#define VM_SEALED BIT(VM_SEALED_BIT) +#else +#define VM_SEALED VM_NONE #endif /* Bits set in the VMA until the stack is in its final location */ diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 1f4f44951abe..11a078de9150 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -12,6 +12,7 @@ extern int rcuwait_wake_up(struct rcuwait *w); #include <linux/tracepoint-defs.h> #include <linux/types.h> #include <linux/cleanup.h> +#include <linux/sched/mm.h> #define MMAP_LOCK_INITIALIZER(name) \ .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock), @@ -154,6 +155,10 @@ static inline void vma_refcount_put(struct vm_area_struct *vma) * reused and attached to a different mm before we lock it. * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got * detached. + * + * WARNING! The vma passed to this function cannot be used if the function + * fails to lock it because in certain cases RCU lock is dropped and then + * reacquired. Once RCU lock is dropped the vma can be concurently freed. */ static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, struct vm_area_struct *vma) @@ -183,6 +188,31 @@ static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, } rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); + + /* + * If vma got attached to another mm from under us, that mm is not + * stable and can be freed in the narrow window after vma->vm_refcnt + * is dropped and before rcuwait_wake_up(mm) is called. Grab it before + * releasing vma->vm_refcnt. + */ + if (unlikely(vma->vm_mm != mm)) { + /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ + struct mm_struct *other_mm = vma->vm_mm; + + /* + * __mmdrop() is a heavy operation and we don't need RCU + * protection here. Release RCU lock during these operations. + * We reinstate the RCU read lock as the caller expects it to + * be held when this function returns even on error. + */ + rcu_read_unlock(); + mmgrab(other_mm); + vma_refcount_put(vma); + mmdrop(other_mm); + rcu_read_lock(); + return NULL; + } + /* * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result. * False unlocked result is impossible because we modify and check diff --git a/include/linux/module.h b/include/linux/module.h index a7cac01d95e7..313ecb8e5181 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -33,7 +33,7 @@ #include <linux/percpu.h> #include <asm/module.h> -#define MODULE_NAME_LEN MAX_PARAM_PREFIX_LEN +#define MODULE_NAME_LEN __MODULE_NAME_LEN struct modversion_info { unsigned long crc; @@ -303,23 +303,6 @@ static typeof(name) __mod_device_table__##type##__##name \ struct notifier_block; -#ifdef CONFIG_MODULES - -/* Get/put a kernel symbol (calls must be symmetric) */ -void *__symbol_get(const char *symbol); -void *__symbol_get_gpl(const char *symbol); -#define symbol_get(x) ({ \ - static const char __notrim[] \ - __used __section(".no_trim_symbol") = __stringify(x); \ - (typeof(&x))(__symbol_get(__stringify(x))); }) - -/* modules using other modules: kdb wants to see this. */ -struct module_use { - struct list_head source_list; - struct list_head target_list; - struct module *source, *target; -}; - enum module_state { MODULE_STATE_LIVE, /* Normal state. */ MODULE_STATE_COMING, /* Full formed, running module_init. */ @@ -604,6 +587,16 @@ struct module { #define MODULE_ARCH_INIT {} #endif +#ifdef CONFIG_MODULES + +/* Get/put a kernel symbol (calls must be symmetric) */ +void *__symbol_get(const char *symbol); +void *__symbol_get_gpl(const char *symbol); +#define symbol_get(x) ({ \ + static const char __notrim[] \ + __used __section(".no_trim_symbol") = __stringify(x); \ + (typeof(&x))(__symbol_get(__stringify(x))); }) + #ifndef HAVE_ARCH_KALLSYMS_SYMBOL_VALUE static inline unsigned long kallsyms_symbol_value(const Elf_Sym *sym) { diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index bfb85fd13e1f..a04a2bc4f51e 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -6,6 +6,13 @@ #include <linux/stringify.h> #include <linux/kernel.h> +/* + * The maximum module name length, including the NUL byte. + * Chosen so that structs with an unsigned long line up, specifically + * modversion_info. + */ +#define __MODULE_NAME_LEN (64 - sizeof(unsigned long)) + /* You can override this manually, but generally this should match the module name. */ #ifdef MODULE @@ -17,9 +24,6 @@ #define __MODULE_INFO_PREFIX KBUILD_MODNAME "." #endif -/* Chosen so that structs with an unsigned long line up. */ -#define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long)) - #define __MODULE_INFO(tag, name, info) \ static const char __UNIQUE_ID(name)[] \ __used __section(".modinfo") __aligned(1) \ @@ -282,10 +286,9 @@ struct kparam_array #define __moduleparam_const const #endif -/* This is the fundamental function for registering boot/module - parameters. */ +/* This is the fundamental function for registering boot/module parameters. */ #define __module_param_call(prefix, name, ops, arg, perm, level, flags) \ - /* Default value instead of permissions? */ \ + static_assert(sizeof(""prefix) - 1 <= __MODULE_NAME_LEN); \ static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ __used __section("__param") \ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 8e4d6eda8a8d..8d3fa3a91ce4 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -837,8 +837,6 @@ void set_page_writeback(struct page *page); #define folio_start_writeback(folio) \ __folio_start_writeback(folio, false) -#define folio_start_writeback_keepwrite(folio) \ - __folio_start_writeback(folio, true) static __always_inline bool folio_test_head(const struct folio *folio) { diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index e3b99920be05..4c035637eeb7 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -736,6 +736,29 @@ static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm, } #endif +/** + * get_and_clear_ptes - Clear present PTEs that map consecutive pages of + * the same folio, collecting dirty/accessed bits. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * + * Use this instead of get_and_clear_full_ptes() if it is known that we don't + * need to clear the full mm, which is mostly the case. + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline pte_t get_and_clear_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr) +{ + return get_and_clear_full_ptes(mm, addr, ptep, nr, 0); +} + #ifndef clear_full_ptes /** * clear_full_ptes - Clear present PTEs that map consecutive pages of the same @@ -768,6 +791,28 @@ static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr, } #endif +/** + * clear_ptes - Clear present PTEs that map consecutive pages of the same folio. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * + * Use this instead of clear_full_ptes() if it is known that we don't need to + * clear the full mm, which is mostly the case. + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline void clear_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr) +{ + clear_full_ptes(mm, addr, ptep, nr, 0); +} + /* * If two threads concurrently fault at the same page, the thread that * won the race updates the PTE and its local TLB/Cache. The other thread diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 72ff44cca864..2467b3be15c9 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -11,8 +11,13 @@ #ifdef __KERNEL__ #include <linux/blkdev.h> +#include <linux/mm.h> -extern const char raid6_empty_zero_page[PAGE_SIZE]; +/* This should be const but the raid6 code is too convoluted for that. */ +static inline void *raid6_get_zero_page(void) +{ + return page_address(ZERO_PAGE(0)); +} #else /* ! __KERNEL__ */ /* Used for testing in user space */ @@ -191,6 +196,11 @@ static inline uint32_t raid6_jiffies(void) return tv.tv_sec*1000 + tv.tv_usec/1000; } +static inline void *raid6_get_zero_page(void) +{ + return raid6_empty_zero_page; +} + #endif /* ! __KERNEL__ */ #endif /* LINUX_RAID_RAID6_H */ diff --git a/include/linux/relay.h b/include/linux/relay.h index b3224111d074..6772a7075840 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -29,6 +29,22 @@ #define RELAYFS_CHANNEL_VERSION 7 /* + * Relay buffer statistics + */ +enum { + RELAY_STATS_BUF_FULL = (1 << 0), + RELAY_STATS_WRT_BIG = (1 << 1), + + RELAY_STATS_LAST = RELAY_STATS_WRT_BIG, +}; + +struct rchan_buf_stats +{ + unsigned int full_count; /* counter for buffer full */ + unsigned int big_count; /* counter for too big to write */ +}; + +/* * Per-cpu relay channel buffer */ struct rchan_buf @@ -43,11 +59,11 @@ struct rchan_buf struct irq_work wakeup_work; /* reader wakeup */ struct dentry *dentry; /* channel file dentry */ struct kref kref; /* channel buffer refcount */ + struct rchan_buf_stats stats; /* buffer stats */ struct page **page_array; /* array of current buffer pages */ unsigned int page_count; /* number of current buffer pages */ unsigned int finalized; /* buffer has been finalized */ size_t *padding; /* padding counts per sub-buffer */ - size_t prev_padding; /* temporary variable */ size_t bytes_consumed; /* bytes consumed in cur read subbuf */ size_t early_bytes; /* bytes consumed before VFS inited */ unsigned int cpu; /* this buf's cpu */ @@ -65,7 +81,6 @@ struct rchan const struct rchan_callbacks *cb; /* client callbacks */ struct kref kref; /* channel refcount */ void *private_data; /* for user-defined data */ - size_t last_toobig; /* tried to log event > subbuf size */ struct rchan_buf * __percpu *buf; /* per-cpu channel buffers */ int is_global; /* One global buffer ? */ struct list_head list; /* for channel list */ @@ -84,7 +99,6 @@ struct rchan_callbacks * @buf: the channel buffer containing the new sub-buffer * @subbuf: the start of the new sub-buffer * @prev_subbuf: the start of the previous sub-buffer - * @prev_padding: unused space at the end of previous sub-buffer * * The client should return 1 to continue logging, 0 to stop * logging. @@ -100,8 +114,7 @@ struct rchan_callbacks */ int (*subbuf_start) (struct rchan_buf *buf, void *subbuf, - void *prev_subbuf, - size_t prev_padding); + void *prev_subbuf); /* * create_buf_file - create file to represent a relay channel buffer @@ -161,6 +174,7 @@ struct rchan *relay_open(const char *base_filename, void *private_data); extern void relay_close(struct rchan *chan); extern void relay_flush(struct rchan *chan); +size_t relay_stats(struct rchan *chan, int flags); extern void relay_subbufs_consumed(struct rchan *chan, unsigned int cpu, size_t consumed); diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index bc90c3c7b5fd..876358cfe1b1 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -144,6 +144,9 @@ int ring_buffer_write(struct trace_buffer *buffer, void ring_buffer_nest_start(struct trace_buffer *buffer); void ring_buffer_nest_end(struct trace_buffer *buffer); +DEFINE_GUARD(ring_buffer_nest, struct trace_buffer *, + ring_buffer_nest_start(_T), ring_buffer_nest_end(_T)) + struct ring_buffer_event * ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events); diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 20803fcb49a7..6cd020eea37a 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -449,6 +449,28 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio, default: VM_WARN_ON_ONCE(true); } + + /* + * Anon folios must have an associated live anon_vma as long as they're + * mapped into userspace. + * Note that the atomic_read() mainly does two things: + * + * 1. In KASAN builds with CONFIG_SLUB_RCU_DEBUG, it causes KASAN to + * check that the associated anon_vma has not yet been freed (subject + * to KASAN's usual limitations). This check will pass if the + * anon_vma's refcount has already dropped to 0 but an RCU grace + * period hasn't passed since then. + * 2. If the anon_vma has not yet been freed, it checks that the + * anon_vma still has a nonzero refcount (as opposed to being in the + * middle of an RCU delay for getting freed). + */ + if (folio_test_anon(folio) && !folio_test_ksm(folio)) { + unsigned long mapping = (unsigned long)folio->mapping; + struct anon_vma *anon_vma; + + anon_vma = (void *)(mapping - FOLIO_MAPPING_ANON); + VM_WARN_ON_FOLIO(atomic_read(&anon_vma->refcount) == 0, folio); + } } /* diff --git a/include/linux/rtc/ds1685.h b/include/linux/rtc/ds1685.h index 5a41c3bbcbe3..01da4582db6d 100644 --- a/include/linux/rtc/ds1685.h +++ b/include/linux/rtc/ds1685.h @@ -8,7 +8,7 @@ * include larger, battery-backed NV-SRAM, burst-mode access, and an RTC * write counter. * - * Copyright (C) 2011-2014 Joshua Kinard <kumba@gentoo.org>. + * Copyright (C) 2011-2014 Joshua Kinard <linux@kumba.dev>. * Copyright (C) 2009 Matthias Fuchs <matthias.fuchs@esd-electronics.com>. * * References: diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index cbafdc12e743..f1aaf676a874 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -132,6 +132,18 @@ static inline int rwsem_is_contended(struct rw_semaphore *sem) return !list_empty(&sem->wait_list); } +#if defined(CONFIG_DEBUG_RWSEMS) || defined(CONFIG_DETECT_HUNG_TASK_BLOCKER) +/* + * Return just the real task structure pointer of the owner + */ +extern struct task_struct *rwsem_owner(struct rw_semaphore *sem); + +/* + * Return true if the rwsem is owned by a reader. + */ +extern bool is_rwsem_reader_owned(struct rw_semaphore *sem); +#endif + #else /* !CONFIG_PREEMPT_RT */ #include <linux/rwbase_rt.h> diff --git a/include/linux/sprintf.h b/include/linux/sprintf.h index 876130091384..f06f7b785091 100644 --- a/include/linux/sprintf.h +++ b/include/linux/sprintf.h @@ -23,7 +23,7 @@ __scanf(2, 0) int vsscanf(const char *, const char *, va_list); /* These are for specific cases, do not use without real need */ extern bool no_hash_pointers; -int no_hash_pointers_enable(char *str); +void hash_pointers_finalize(bool slub_debug); /* Used for Rust formatting ('%pA') */ char *rust_fmt_argument(char *buf, char *end, const void *ptr); diff --git a/include/linux/sys_info.h b/include/linux/sys_info.h new file mode 100644 index 000000000000..89d77dc4f2ed --- /dev/null +++ b/include/linux/sys_info.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SYS_INFO_H +#define _LINUX_SYS_INFO_H + +#include <linux/sysctl.h> + +/* + * SYS_INFO_PANIC_CONSOLE_REPLAY is for panic case only, as it needs special + * handling which only fits panic case. + */ +#define SYS_INFO_TASKS 0x00000001 +#define SYS_INFO_MEM 0x00000002 +#define SYS_INFO_TIMERS 0x00000004 +#define SYS_INFO_LOCKS 0x00000008 +#define SYS_INFO_FTRACE 0x00000010 +#define SYS_INFO_PANIC_CONSOLE_REPLAY 0x00000020 +#define SYS_INFO_ALL_CPU_BT 0x00000040 +#define SYS_INFO_BLOCKED_TASKS 0x00000080 + +void sys_info(unsigned long si_mask); +unsigned long sys_info_parse_param(char *str); + +#ifdef CONFIG_SYSCTL +int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, + void *buffer, size_t *lenp, + loff_t *ppos); +#endif +#endif /* _LINUX_SYS_INFO_H */ diff --git a/include/linux/xxhash.h b/include/linux/xxhash.h index df42511438d0..27f57eca8cb1 100644 --- a/include/linux/xxhash.h +++ b/include/linux/xxhash.h @@ -178,32 +178,6 @@ struct xxh64_state { void xxh32_reset(struct xxh32_state *state, uint32_t seed); /** - * xxh32_update() - hash the data given and update the xxh32 state - * - * @state: The xxh32 state to update. - * @input: The data to hash. - * @length: The length of the data to hash. - * - * After calling xxh32_reset() call xxh32_update() as many times as necessary. - * - * Return: Zero on success, otherwise an error code. - */ -int xxh32_update(struct xxh32_state *state, const void *input, size_t length); - -/** - * xxh32_digest() - produce the current xxh32 hash - * - * @state: Produce the current xxh32 hash of this state. - * - * A hash value can be produced at any time. It is still possible to continue - * inserting input into the hash state after a call to xxh32_digest(), and - * generate new hashes later on, by calling xxh32_digest() again. - * - * Return: The xxh32 hash stored in the state. - */ -uint32_t xxh32_digest(const struct xxh32_state *state); - -/** * xxh64_reset() - reset the xxh64 state to start a new hashing operation * * @state: The xxh64 state to reset. |