diff options
Diffstat (limited to 'include/linux')
69 files changed, 914 insertions, 293 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 082ccd8ad96b..aedf573bdb42 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -77,9 +77,6 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) extern struct static_key_false cgroup_bpf_enabled_key[MAX_CGROUP_BPF_ATTACH_TYPE]; #define cgroup_bpf_enabled(atype) static_branch_unlikely(&cgroup_bpf_enabled_key[atype]) -#define for_each_cgroup_storage_type(stype) \ - for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) - struct bpf_cgroup_storage_map; struct bpf_storage_buffer { @@ -510,8 +507,6 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ kernel_optval) ({ 0; }) -#define for_each_cgroup_storage_type(stype) for (; false; ) - #endif /* CONFIG_CGROUP_BPF */ #endif /* _BPF_CGROUP_H */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f9cd2164ed23..cc700925b802 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -208,6 +208,20 @@ enum btf_field_type { BPF_RES_SPIN_LOCK = (1 << 12), }; +enum bpf_cgroup_storage_type { + BPF_CGROUP_STORAGE_SHARED, + BPF_CGROUP_STORAGE_PERCPU, + __BPF_CGROUP_STORAGE_MAX +#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX +}; + +#ifdef CONFIG_CGROUP_BPF +# define for_each_cgroup_storage_type(stype) \ + for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) +#else +# define for_each_cgroup_storage_type(stype) for (; false; ) +#endif /* CONFIG_CGROUP_BPF */ + typedef void (*btf_dtor_kfunc_t)(void *); struct btf_field_kptr { @@ -260,6 +274,19 @@ struct bpf_list_node_kern { void *owner; } __attribute__((aligned(8))); +/* 'Ownership' of program-containing map is claimed by the first program + * that is going to use this map or by the first program which FD is + * stored in the map to make sure that all callers and callees have the + * same prog type, JITed flag and xdp_has_frags flag. + */ +struct bpf_map_owner { + enum bpf_prog_type type; + bool jited; + bool xdp_has_frags; + u64 storage_cookie[MAX_BPF_CGROUP_STORAGE_TYPE]; + const struct btf_type *attach_func_proto; +}; + struct bpf_map { const struct bpf_map_ops *ops; struct bpf_map *inner_map_meta; @@ -292,24 +319,15 @@ struct bpf_map { struct rcu_head rcu; }; atomic64_t writecnt; - /* 'Ownership' of program-containing map is claimed by the first program - * that is going to use this map or by the first program which FD is - * stored in the map to make sure that all callers and callees have the - * same prog type, JITed flag and xdp_has_frags flag. - */ - struct { - const struct btf_type *attach_func_proto; - spinlock_t lock; - enum bpf_prog_type type; - bool jited; - bool xdp_has_frags; - } owner; + spinlock_t owner_lock; + struct bpf_map_owner *owner; bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ bool free_after_mult_rcu_gp; bool free_after_rcu_gp; atomic64_t sleepable_refcnt; s64 __percpu *elem_count; + u64 cookie; /* write-once */ }; static inline const char *btf_field_type_name(enum btf_field_type type) @@ -1082,14 +1100,6 @@ struct bpf_prog_offload { u32 jited_len; }; -enum bpf_cgroup_storage_type { - BPF_CGROUP_STORAGE_SHARED, - BPF_CGROUP_STORAGE_PERCPU, - __BPF_CGROUP_STORAGE_MAX -}; - -#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX - /* The longest tracepoint has 12 args. * See include/trace/bpf_probe.h */ @@ -2108,6 +2118,16 @@ static inline bool bpf_map_flags_access_ok(u32 access_flags) (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG); } +static inline struct bpf_map_owner *bpf_map_owner_alloc(struct bpf_map *map) +{ + return kzalloc(sizeof(*map->owner), GFP_ATOMIC); +} + +static inline void bpf_map_owner_free(struct bpf_map *map) +{ + kfree(map->owner); +} + struct bpf_event_entry { struct perf_event *event; struct file *perf_file; diff --git a/include/linux/cfi.h b/include/linux/cfi.h index 1db17ecbb86c..52a98886a455 100644 --- a/include/linux/cfi.h +++ b/include/linux/cfi.h @@ -11,16 +11,9 @@ #include <linux/module.h> #include <asm/cfi.h> +#ifdef CONFIG_CFI_CLANG extern bool cfi_warn; -#ifndef cfi_get_offset -static inline int cfi_get_offset(void) -{ - return 0; -} -#endif - -#ifdef CONFIG_CFI_CLANG enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr, unsigned long *target, u32 type); @@ -29,6 +22,44 @@ static inline enum bug_trap_type report_cfi_failure_noaddr(struct pt_regs *regs, { return report_cfi_failure(regs, addr, NULL, 0); } + +#ifndef cfi_get_offset +/* + * Returns the CFI prefix offset. By default, the compiler emits only + * a 4-byte CFI type hash before the function. If an architecture + * uses -fpatchable-function-entry=N,M where M>0 to change the prefix + * offset, they must override this function. + */ +static inline int cfi_get_offset(void) +{ + return 4; +} +#endif + +#ifndef cfi_get_func_hash +static inline u32 cfi_get_func_hash(void *func) +{ + u32 hash; + + if (get_kernel_nofault(hash, func - cfi_get_offset())) + return 0; + + return hash; +} +#endif + +/* CFI type hashes for BPF function types */ +extern u32 cfi_bpf_hash; +extern u32 cfi_bpf_subprog_hash; + +#else /* CONFIG_CFI_CLANG */ + +static inline int cfi_get_offset(void) { return 0; } +static inline u32 cfi_get_func_hash(void *func) { return 0; } + +#define cfi_bpf_hash 0U +#define cfi_bpf_subprog_hash 0U + #endif /* CONFIG_CFI_CLANG */ #ifdef CONFIG_ARCH_USES_CFI_TRAPS diff --git a/include/linux/cfi_types.h b/include/linux/cfi_types.h index 6b8713675765..685f7181780f 100644 --- a/include/linux/cfi_types.h +++ b/include/linux/cfi_types.h @@ -41,5 +41,28 @@ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_CFI_CLANG +#define DEFINE_CFI_TYPE(name, func) \ + /* \ + * Force a reference to the function so the compiler generates \ + * __kcfi_typeid_<func>. \ + */ \ + __ADDRESSABLE(func); \ + /* u32 name __ro_after_init = __kcfi_typeid_<func> */ \ + extern u32 name; \ + asm ( \ + " .pushsection .data..ro_after_init,\"aw\",\%progbits \n" \ + " .type " #name ",\%object \n" \ + " .globl " #name " \n" \ + " .p2align 2, 0x0 \n" \ + #name ": \n" \ + " .4byte __kcfi_typeid_" #func " \n" \ + " .size " #name ", 4 \n" \ + " .popsection \n" \ + ); +#endif + #endif /* __ASSEMBLY__ */ #endif /* _LINUX_CFI_TYPES_H */ diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index bee606bebaca..2573585b7f06 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -3,6 +3,8 @@ #define _LINUX_CLEANUP_H #include <linux/compiler.h> +#include <linux/err.h> +#include <linux/args.h> /** * DOC: scope-based cleanup helpers @@ -61,9 +63,20 @@ * Observe the lock is held for the remainder of the "if ()" block not * the remainder of "func()". * - * Now, when a function uses both __free() and guard(), or multiple - * instances of __free(), the LIFO order of variable definition order - * matters. GCC documentation says: + * The ACQUIRE() macro can be used in all places that guard() can be + * used and additionally support conditional locks:: + * + * DEFINE_GUARD_COND(pci_dev, _try, pci_dev_trylock(_T)) + * ... + * ACQUIRE(pci_dev_try, lock)(dev); + * rc = ACQUIRE_ERR(pci_dev_try, &lock); + * if (rc) + * return rc; + * // @lock is held + * + * Now, when a function uses both __free() and guard()/ACQUIRE(), or + * multiple instances of __free(), the LIFO order of variable definition + * order matters. GCC documentation says: * * "When multiple variables in the same scope have cleanup attributes, * at exit from the scope their associated cleanup functions are run in @@ -313,14 +326,46 @@ _label: \ * acquire fails. * * Only for conditional locks. + * + * ACQUIRE(name, var): + * a named instance of the (guard) class, suitable for conditional + * locks when paired with ACQUIRE_ERR(). + * + * ACQUIRE_ERR(name, &var): + * a helper that is effectively a PTR_ERR() conversion of the guard + * pointer. Returns 0 when the lock was acquired and a negative + * error code otherwise. */ #define __DEFINE_CLASS_IS_CONDITIONAL(_name, _is_cond) \ static __maybe_unused const bool class_##_name##_is_conditional = _is_cond -#define __DEFINE_GUARD_LOCK_PTR(_name, _exp) \ - static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ - { return (void *)(__force unsigned long)*(_exp); } +#define __GUARD_IS_ERR(_ptr) \ + ({ \ + unsigned long _rc = (__force unsigned long)(_ptr); \ + unlikely((_rc - 1) >= -MAX_ERRNO - 1); \ + }) + +#define __DEFINE_GUARD_LOCK_PTR(_name, _exp) \ + static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \ + { \ + void *_ptr = (void *)(__force unsigned long)*(_exp); \ + if (IS_ERR(_ptr)) { \ + _ptr = NULL; \ + } \ + return _ptr; \ + } \ + static inline int class_##_name##_lock_err(class_##_name##_t *_T) \ + { \ + long _rc = (__force unsigned long)*(_exp); \ + if (!_rc) { \ + _rc = -EBUSY; \ + } \ + if (!IS_ERR_VALUE(_rc)) { \ + _rc = 0; \ + } \ + return _rc; \ + } #define DEFINE_CLASS_IS_GUARD(_name) \ __DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ @@ -331,23 +376,37 @@ static __maybe_unused const bool class_##_name##_is_conditional = _is_cond __DEFINE_GUARD_LOCK_PTR(_name, _T) #define DEFINE_GUARD(_name, _type, _lock, _unlock) \ - DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \ + DEFINE_CLASS(_name, _type, if (!__GUARD_IS_ERR(_T)) { _unlock; }, ({ _lock; _T; }), _type _T); \ DEFINE_CLASS_IS_GUARD(_name) -#define DEFINE_GUARD_COND(_name, _ext, _condlock) \ +#define DEFINE_GUARD_COND_4(_name, _ext, _lock, _cond) \ __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ EXTEND_CLASS(_name, _ext, \ - ({ void *_t = _T; if (_T && !(_condlock)) _t = NULL; _t; }), \ + ({ void *_t = _T; int _RET = (_lock); if (_T && !(_cond)) _t = ERR_PTR(_RET); _t; }), \ class_##_name##_t _T) \ static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ - { return class_##_name##_lock_ptr(_T); } + { return class_##_name##_lock_ptr(_T); } \ + static inline int class_##_name##_ext##_lock_err(class_##_name##_t *_T) \ + { return class_##_name##_lock_err(_T); } + +/* + * Default binary condition; success on 'true'. + */ +#define DEFINE_GUARD_COND_3(_name, _ext, _lock) \ + DEFINE_GUARD_COND_4(_name, _ext, _lock, _RET) + +#define DEFINE_GUARD_COND(X...) CONCATENATE(DEFINE_GUARD_COND_, COUNT_ARGS(X))(X) #define guard(_name) \ CLASS(_name, __UNIQUE_ID(guard)) #define __guard_ptr(_name) class_##_name##_lock_ptr +#define __guard_err(_name) class_##_name##_lock_err #define __is_cond_ptr(_name) class_##_name##_is_conditional +#define ACQUIRE(_name, _var) CLASS(_name, _var) +#define ACQUIRE_ERR(_name, _var) __guard_err(_name)(_var) + /* * Helper macro for scoped_guard(). * @@ -409,7 +468,7 @@ typedef struct { \ \ static inline void class_##_name##_destructor(class_##_name##_t *_T) \ { \ - if (_T->lock) { _unlock; } \ + if (!__GUARD_IS_ERR(_T->lock)) { _unlock; } \ } \ \ __DEFINE_GUARD_LOCK_PTR(_name, &_T->lock) @@ -441,15 +500,22 @@ __DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ __DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__) \ __DEFINE_LOCK_GUARD_0(_name, _lock) -#define DEFINE_LOCK_GUARD_1_COND(_name, _ext, _condlock) \ +#define DEFINE_LOCK_GUARD_1_COND_4(_name, _ext, _lock, _cond) \ __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ EXTEND_CLASS(_name, _ext, \ ({ class_##_name##_t _t = { .lock = l }, *_T = &_t;\ - if (_T->lock && !(_condlock)) _T->lock = NULL; \ + int _RET = (_lock); \ + if (_T->lock && !(_cond)) _T->lock = ERR_PTR(_RET);\ _t; }), \ typeof_member(class_##_name##_t, lock) l) \ static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ - { return class_##_name##_lock_ptr(_T); } + { return class_##_name##_lock_ptr(_T); } \ + static inline int class_##_name##_ext##_lock_err(class_##_name##_t *_T) \ + { return class_##_name##_lock_err(_T); } + +#define DEFINE_LOCK_GUARD_1_COND_3(_name, _ext, _lock) \ + DEFINE_LOCK_GUARD_1_COND_4(_name, _ext, _lock, _RET) +#define DEFINE_LOCK_GUARD_1_COND(X...) CONCATENATE(DEFINE_LOCK_GUARD_1_COND_, COUNT_ARGS(X))(X) #endif /* _LINUX_CLEANUP_H */ diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 32048052c64a..5d07c469b571 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -127,6 +127,8 @@ #define __diag_GCC_8(s) #endif +#define __diag_GCC_all(s) __diag(s) + #define __diag_ignore_all(option, comment) \ __diag(__diag_GCC_ignore option) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index d381420bbd5f..edfa61d80702 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -90,7 +90,6 @@ enum cpuhp_state { CPUHP_RADIX_DEAD, CPUHP_PAGE_ALLOC, CPUHP_NET_DEV_DEAD, - CPUHP_PCI_XGENE_DEAD, CPUHP_IOMMU_IOVA_DEAD, CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, CPUHP_PADATA_DEAD, diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h index 1fe7e7d1b214..7b44b41d0a20 100644 --- a/include/linux/crash_reserve.h +++ b/include/linux/crash_reserve.h @@ -13,10 +13,23 @@ */ extern struct resource crashk_res; extern struct resource crashk_low_res; +extern struct range crashk_cma_ranges[]; +#if defined(CONFIG_CMA) && defined(CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION) +#define CRASHKERNEL_CMA +#define CRASHKERNEL_CMA_RANGES_MAX 4 +extern int crashk_cma_cnt; +#else +#define crashk_cma_cnt 0 +#define CRASHKERNEL_CMA_RANGES_MAX 0 +#endif + int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base, - unsigned long long *low_size, bool *high); + unsigned long long *low_size, unsigned long long *cma_size, + bool *high); + +void __init reserve_crashkernel_cma(unsigned long long cma_size); #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION #ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE diff --git a/include/linux/efi.h b/include/linux/efi.h index e3776d9cad07..a98cc39e7aaa 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -439,6 +439,7 @@ void efi_native_runtime_setup(void); /* OVMF protocol GUIDs */ #define OVMF_SEV_MEMORY_ACCEPTANCE_PROTOCOL_GUID EFI_GUID(0xc5a010fe, 0x38a7, 0x4531, 0x8a, 0x4a, 0x05, 0x00, 0xd2, 0xfd, 0x16, 0x49) +#define OVMF_MEMORY_LOG_TABLE_GUID EFI_GUID(0x95305139, 0xb20f, 0x4723, 0x84, 0x25, 0x62, 0x7c, 0x88, 0x8f, 0xf1, 0x21) typedef struct { efi_guid_t guid; @@ -642,6 +643,7 @@ extern struct efi { unsigned long esrt; /* ESRT table */ unsigned long tpm_log; /* TPM2 Event Log table */ unsigned long tpm_final_log; /* TPM2 Final Events Log table */ + unsigned long ovmf_debug_log; unsigned long mokvar_table; /* MOK variable config table */ unsigned long coco_secret; /* Confidential computing secret table */ unsigned long unaccepted; /* Unaccepted memory table */ @@ -1344,6 +1346,8 @@ bool efi_config_table_is_usable(const efi_guid_t *guid, unsigned long table) umode_t efi_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n); +int ovmf_log_probe(unsigned long ovmf_debug_log_table); + /* * efivar ops event type */ diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 3be35680a54f..7de229134e30 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -60,27 +60,11 @@ enum execmem_range_flags { * will trap * @ptr: pointer to memory to fill * @size: size of the range to fill - * @writable: is the memory poited by @ptr is writable or ROX * * A hook for architecures to fill execmem ranges with invalid instructions. * Architectures that use EXECMEM_ROX_CACHE must implement this. */ -void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable); - -/** - * execmem_make_temp_rw - temporarily remap region with read-write - * permissions - * @ptr: address of the region to remap - * @size: size of the region to remap - * - * Remaps a part of the cached large page in the ROX cache in the range - * [@ptr, @ptr + @size) as writable and not executable. The caller must - * have exclusive ownership of this range and ensure nothing will try to - * execute code in this range. - * - * Return: 0 on success or negative error code on failure. - */ -int execmem_make_temp_rw(void *ptr, size_t size); +void execmem_fill_trapping_insns(void *ptr, size_t size); /** * execmem_restore_rox - restore read-only-execute permissions @@ -95,7 +79,6 @@ int execmem_make_temp_rw(void *ptr, size_t size); */ int execmem_restore_rox(void *ptr, size_t size); #else -static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } #endif @@ -166,6 +149,28 @@ struct execmem_info *execmem_arch_setup(void); void *execmem_alloc(enum execmem_type type, size_t size); /** + * execmem_alloc_rw - allocate writable executable memory + * @type: type of the allocation + * @size: how many bytes of memory are required + * + * Allocates memory that will contain executable code, either generated or + * loaded from kernel modules. + * + * Allocates memory that will contain data coupled with executable code, + * like data sections in kernel modules. + * + * Forces writable permissions on the allocated memory and the caller is + * responsible to manage the permissions afterwards. + * + * For architectures that use ROX cache the permissions will be set to R+W. + * For architectures that don't use ROX cache the default permissions for @type + * will be used as they must be writable. + * + * Return: a pointer to the allocated memory or %NULL + */ +void *execmem_alloc_rw(enum execmem_type type, size_t size); + +/** * execmem_free - free executable memory * @ptr: pointer to the memory that should be freed */ @@ -186,19 +191,6 @@ struct vm_struct *execmem_vmap(size_t size); #endif /** - * execmem_update_copy - copy an update to executable memory - * @dst: destination address to update - * @src: source address containing the data - * @size: how many bytes of memory shold be copied - * - * Copy @size bytes from @src to @dst using text poking if the memory at - * @dst is read-only. - * - * Return: a pointer to @dst or NULL on error - */ -void *execmem_update_copy(void *dst, const void *src, size_t size); - -/** * execmem_is_rox - check if execmem is read-only * @type - the execmem type to check * diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 5206d63b3386..2f8b8bfc0e73 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -268,7 +268,7 @@ struct node_footer { /* Node IDs in an Indirect Block */ #define NIDS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) -#define ADDRS_PER_PAGE(page, inode) (addrs_per_page(inode, IS_INODE(page))) +#define ADDRS_PER_PAGE(folio, inode) (addrs_per_page(inode, IS_INODE(folio))) #define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) #define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) diff --git a/include/linux/firewire.h b/include/linux/firewire.h index b632eec3ab52..cceb70415ed2 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -136,6 +136,7 @@ struct fw_card { __be32 maint_utility_register; struct workqueue_struct *isoc_wq; + struct workqueue_struct *async_wq; }; static inline struct fw_card *fw_card_get(struct fw_card *card) @@ -307,8 +308,7 @@ struct fw_packet { * For successful transmission, the status code is the ack received * from the destination. Otherwise it is one of the juju-specific * rcodes: RCODE_SEND_ERROR, _CANCELLED, _BUSY, _GENERATION, _NO_ACK. - * The callback can be called from tasklet context and thus - * must never block. + * The callback can be called from workqueue and thus must never block. */ fw_packet_callback_t callback; int ack; @@ -381,6 +381,10 @@ void __fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode * * A variation of __fw_send_request() to generate callback for response subaction without time * stamp. + * + * The callback is invoked in the workqueue context in most cases. However, if an error is detected + * before queueing or the destination address refers to the local node, it is invoked in the + * current context instead. */ static inline void fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode, int destination_id, int generation, int speed, @@ -410,6 +414,10 @@ static inline void fw_send_request(struct fw_card *card, struct fw_transaction * * @callback_data: data to be passed to the transaction completion callback * * A variation of __fw_send_request() to generate callback for response subaction with time stamp. + * + * The callback is invoked in the workqueue context in most cases. However, if an error is detected + * before queueing or the destination address refers to the local node, it is invoked in the current + * context instead. */ static inline void fw_send_request_with_tstamp(struct fw_card *card, struct fw_transaction *t, int tcode, int destination_id, int generation, int speed, unsigned long long offset, diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 8d0e3ad89b94..10dd161690a2 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -332,12 +332,13 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) return (struct page *)page_private(bounce_page); } -static inline bool fscrypt_is_bounce_folio(struct folio *folio) +static inline bool fscrypt_is_bounce_folio(const struct folio *folio) { return folio->mapping == NULL; } -static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio) +static inline +struct folio *fscrypt_pagecache_folio(const struct folio *bounce_folio) { return bounce_folio->private; } @@ -517,12 +518,13 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) return ERR_PTR(-EINVAL); } -static inline bool fscrypt_is_bounce_folio(struct folio *folio) +static inline bool fscrypt_is_bounce_folio(const struct folio *folio) { return false; } -static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio) +static inline +struct folio *fscrypt_pagecache_folio(const struct folio *bounce_folio) { WARN_ON_ONCE(1); return ERR_PTR(-EINVAL); diff --git a/include/linux/gcd.h b/include/linux/gcd.h index cb572677fd7f..616e81a7f7e3 100644 --- a/include/linux/gcd.h +++ b/include/linux/gcd.h @@ -3,6 +3,9 @@ #define _GCD_H #include <linux/compiler.h> +#include <linux/jump_label.h> + +DECLARE_STATIC_KEY_TRUE(efficient_ffs_key); unsigned long gcd(unsigned long a, unsigned long b) __attribute_const__; diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 4b984e8f8fcd..667f8fd58a79 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -347,12 +347,10 @@ struct gpio_irq_chip { * @get: returns value for signal "offset", 0=low, 1=high, or negative error * @get_multiple: reads values for multiple signals defined by "mask" and * stores them in "bits", returns 0 on success or negative error - * @set: **DEPRECATED** - please use set_rv() instead - * @set_multiple: **DEPRECATED** - please use set_multiple_rv() instead - * @set_rv: assigns output value for signal "offset", returns 0 on success or - * negative error value - * @set_multiple_rv: assigns output values for multiple signals defined by - * "mask", returns 0 on success or negative error value + * @set: assigns output value for signal "offset", returns 0 on success or + * negative error value + * @set_multiple: assigns output values for multiple signals defined by + * "mask", returns 0 on success or negative error value * @set_config: optional hook for all kinds of settings. Uses the same * packed config format as generic pinconf. Must return 0 on success and * a negative error number on failure. @@ -445,17 +443,11 @@ struct gpio_chip { int (*get_multiple)(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits); - void (*set)(struct gpio_chip *gc, - unsigned int offset, int value); - void (*set_multiple)(struct gpio_chip *gc, + int (*set)(struct gpio_chip *gc, + unsigned int offset, int value); + int (*set_multiple)(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits); - int (*set_rv)(struct gpio_chip *gc, - unsigned int offset, - int value); - int (*set_multiple_rv)(struct gpio_chip *gc, - unsigned long *mask, - unsigned long *bits); int (*set_config)(struct gpio_chip *gc, unsigned int offset, unsigned long config); diff --git a/include/linux/gpio/generic.h b/include/linux/gpio/generic.h index b511acd58ab0..f3a8db4598bb 100644 --- a/include/linux/gpio/generic.h +++ b/include/linux/gpio/generic.h @@ -88,10 +88,10 @@ static inline int gpio_generic_chip_set(struct gpio_generic_chip *chip, unsigned int offset, int value) { - if (WARN_ON(!chip->gc.set_rv)) + if (WARN_ON(!chip->gc.set)) return -EOPNOTSUPP; - return chip->gc.set_rv(&chip->gc, offset, value); + return chip->gc.set(&chip->gc, offset, value); } #define gpio_generic_chip_lock(gen_gc) \ diff --git a/include/linux/hung_task.h b/include/linux/hung_task.h index 1bc2b3244613..34e615c76ca5 100644 --- a/include/linux/hung_task.h +++ b/include/linux/hung_task.h @@ -21,17 +21,17 @@ * type. * * Type encoding: - * 00 - Blocked on mutex (BLOCKER_TYPE_MUTEX) - * 01 - Blocked on semaphore (BLOCKER_TYPE_SEM) - * 10 - Blocked on rt-mutex (BLOCKER_TYPE_RTMUTEX) - * 11 - Blocked on rw-semaphore (BLOCKER_TYPE_RWSEM) + * 00 - Blocked on mutex (BLOCKER_TYPE_MUTEX) + * 01 - Blocked on semaphore (BLOCKER_TYPE_SEM) + * 10 - Blocked on rw-semaphore as READER (BLOCKER_TYPE_RWSEM_READER) + * 11 - Blocked on rw-semaphore as WRITER (BLOCKER_TYPE_RWSEM_WRITER) */ -#define BLOCKER_TYPE_MUTEX 0x00UL -#define BLOCKER_TYPE_SEM 0x01UL -#define BLOCKER_TYPE_RTMUTEX 0x02UL -#define BLOCKER_TYPE_RWSEM 0x03UL +#define BLOCKER_TYPE_MUTEX 0x00UL +#define BLOCKER_TYPE_SEM 0x01UL +#define BLOCKER_TYPE_RWSEM_READER 0x02UL +#define BLOCKER_TYPE_RWSEM_WRITER 0x03UL -#define BLOCKER_TYPE_MASK 0x03UL +#define BLOCKER_TYPE_MASK 0x03UL #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER static inline void hung_task_set_blocker(void *lock, unsigned long type) diff --git a/include/linux/hypervisor.h b/include/linux/hypervisor.h index 9efbc54e35e5..be5417303ecf 100644 --- a/include/linux/hypervisor.h +++ b/include/linux/hypervisor.h @@ -37,6 +37,9 @@ static inline bool hypervisor_isolated_pci_functions(void) if (IS_ENABLED(CONFIG_S390)) return true; + if (IS_ENABLED(CONFIG_LOONGARCH)) + return true; + return jailhouse_paravirt(); } diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index b674f64d0822..7f136de4b73e 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -245,7 +245,7 @@ void i3c_driver_unregister(struct i3c_driver *drv); * * Return: 0 if both registrations succeeds, a negative error code otherwise. */ -static inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv, +static __always_inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv, struct i2c_driver *i2cdrv) { int ret; @@ -270,7 +270,7 @@ static inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv, * Note that when CONFIG_I3C is not enabled, this function only unregisters the * @i2cdrv. */ -static inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv, +static __always_inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv, struct i2c_driver *i2cdrv) { if (IS_ENABLED(CONFIG_I3C)) diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index c67922ece617..043f5c7ff398 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -249,10 +249,15 @@ struct i3c_device { */ #define I3C_BUS_MAX_DEVS 11 -#define I3C_BUS_MAX_I3C_SCL_RATE 12900000 -#define I3C_BUS_TYP_I3C_SCL_RATE 12500000 -#define I3C_BUS_I2C_FM_PLUS_SCL_RATE 1000000 -#define I3C_BUS_I2C_FM_SCL_RATE 400000 +/* Taken from the I3C Spec V1.1.1, chapter 6.2. "Timing specification" */ +#define I3C_BUS_I2C_FM_PLUS_SCL_MAX_RATE 1000000 +#define I3C_BUS_I2C_FM_SCL_MAX_RATE 400000 +#define I3C_BUS_I3C_SCL_MAX_RATE 12900000 +#define I3C_BUS_I3C_SCL_TYP_RATE 12500000 +#define I3C_BUS_TAVAL_MIN_NS 1000 +#define I3C_BUS_TBUF_MIXED_FM_MIN_NS 1300 +#define I3C_BUS_THIGH_MIXED_MAX_NS 41 +#define I3C_BUS_TIDLE_MIN_NS 200000 #define I3C_BUS_TLOW_OD_MIN_NS 200 /** diff --git a/include/linux/input/touch-overlay.h b/include/linux/input/touch-overlay.h new file mode 100644 index 000000000000..0253e554d3cd --- /dev/null +++ b/include/linux/input/touch-overlay.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023 Javier Carrasco <javier.carrasco@wolfvision.net> + */ + +#ifndef _TOUCH_OVERLAY +#define _TOUCH_OVERLAY + +#include <linux/types.h> + +struct input_dev; + +int touch_overlay_map(struct list_head *list, struct input_dev *input); + +void touch_overlay_get_touchscreen_abs(struct list_head *list, u16 *x, u16 *y); + +bool touch_overlay_mapped_touchscreen(struct list_head *list); + +bool touch_overlay_process_contact(struct list_head *list, + struct input_dev *input, + struct input_mt_pos *pos, int slot); + +void touch_overlay_sync_frame(struct list_head *list, struct input_dev *input); + +#endif diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 7376c1df9c90..c16353cc6e3c 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -225,7 +225,4 @@ io_mapping_free(struct io_mapping *iomap) kfree(iomap); } -int io_mapping_map_user(struct io_mapping *iomap, struct vm_area_struct *vma, - unsigned long addr, unsigned long pfn, unsigned long size); - #endif /* _LINUX_IO_MAPPING_H */ diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index b25377b6ea98..5210e8371238 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -60,7 +60,8 @@ static inline int __get_task_ioprio(struct task_struct *p) int prio; if (!ioc) - return IOPRIO_DEFAULT; + return IOPRIO_PRIO_VALUE(task_nice_ioclass(p), + task_nice_ioprio(p)); if (p != current) lockdep_assert_held(&p->alloc_lock); diff --git a/include/linux/irq-entry-common.h b/include/linux/irq-entry-common.h index 4fb2f93d82ed..d643c7c87822 100644 --- a/include/linux/irq-entry-common.h +++ b/include/linux/irq-entry-common.h @@ -7,6 +7,7 @@ #include <linux/context_tracking.h> #include <linux/tick.h> #include <linux/kmsan.h> +#include <linux/unwind_deferred.h> #include <asm/entry-common.h> @@ -256,6 +257,7 @@ static __always_inline void exit_to_user_mode(void) lockdep_hardirqs_on_prepare(); instrumentation_end(); + unwind_reset_info(); user_enter_irqoff(); arch_exit_to_user_mode(); lockdep_hardirqs_on(CALLER_ADDR0); diff --git a/include/linux/jhash.h b/include/linux/jhash.h index fa26a2dd3b52..7c1c1821c694 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -24,7 +24,7 @@ * Jozsef */ #include <linux/bitops.h> -#include <linux/unaligned/packed_struct.h> +#include <linux/unaligned.h> /* Best hash sizes are of power of two */ #define jhash_size(n) ((u32)1<<(n)) @@ -77,9 +77,9 @@ static inline u32 jhash(const void *key, u32 length, u32 initval) /* All but the last block: affect some 32 bits of (a,b,c) */ while (length > 12) { - a += __get_unaligned_cpu32(k); - b += __get_unaligned_cpu32(k + 4); - c += __get_unaligned_cpu32(k + 8); + a += get_unaligned((u32 *)k); + b += get_unaligned((u32 *)(k + 4)); + c += get_unaligned((u32 *)(k + 8)); __jhash_mix(a, b, c); length -= 12; k += 12; diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 03f85ad03025..1b10a5d84b68 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -79,6 +79,12 @@ extern note_buf_t __percpu *crash_notes; typedef unsigned long kimage_entry_t; +/* + * This is a copy of the UAPI struct kexec_segment and must be identical + * to it because it gets copied straight from user space into kernel + * memory. Do not modify this structure unless you change the way segments + * get ingested from user space. + */ struct kexec_segment { /* * This pointer can point to user memory if kexec_load() system @@ -172,6 +178,7 @@ int kexec_image_post_load_cleanup_default(struct kimage *image); * @buf_align: Minimum alignment needed. * @buf_min: The buffer can't be placed below this address. * @buf_max: The buffer can't be placed above this address. + * @cma: CMA page if the buffer is backed by CMA. * @top_down: Allocate from top of memory. * @random: Place the buffer at a random position. */ @@ -184,6 +191,7 @@ struct kexec_buf { unsigned long buf_align; unsigned long buf_min; unsigned long buf_max; + struct page *cma; bool top_down; #ifdef CONFIG_CRASH_DUMP bool random; @@ -340,6 +348,7 @@ struct kimage { unsigned long nr_segments; struct kexec_segment segment[KEXEC_SEGMENT_MAX]; + struct page *segment_cma[KEXEC_SEGMENT_MAX]; struct list_head control_pages; struct list_head dest_pages; @@ -361,6 +370,7 @@ struct kimage { */ unsigned int hotplug_support:1; #endif + unsigned int no_cma:1; #ifdef ARCH_HAS_KIMAGE_ARCH struct kimage_arch arch; diff --git a/include/linux/libata.h b/include/linux/libata.h index 912ace523880..0620dd67369f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -545,6 +545,7 @@ typedef void (*ata_postreset_fn_t)(struct ata_link *link, unsigned int *classes) extern struct device_attribute dev_attr_unload_heads; #ifdef CONFIG_SATA_HOST +extern struct device_attribute dev_attr_link_power_management_supported; extern struct device_attribute dev_attr_link_power_management_policy; extern struct device_attribute dev_attr_ncq_prio_supported; extern struct device_attribute dev_attr_ncq_prio_enable; diff --git a/include/linux/mm.h b/include/linux/mm.h index 349f0d9aad22..1ae97a0b8ec7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -414,8 +414,10 @@ extern unsigned int kobjsize(const void *objp); #endif #ifdef CONFIG_64BIT -/* VM is sealed, in vm_flags */ -#define VM_SEALED _BITUL(63) +#define VM_SEALED_BIT 42 +#define VM_SEALED BIT(VM_SEALED_BIT) +#else +#define VM_SEALED VM_NONE #endif /* Bits set in the VMA until the stack is in its final location */ diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 1f4f44951abe..11a078de9150 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -12,6 +12,7 @@ extern int rcuwait_wake_up(struct rcuwait *w); #include <linux/tracepoint-defs.h> #include <linux/types.h> #include <linux/cleanup.h> +#include <linux/sched/mm.h> #define MMAP_LOCK_INITIALIZER(name) \ .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock), @@ -154,6 +155,10 @@ static inline void vma_refcount_put(struct vm_area_struct *vma) * reused and attached to a different mm before we lock it. * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got * detached. + * + * WARNING! The vma passed to this function cannot be used if the function + * fails to lock it because in certain cases RCU lock is dropped and then + * reacquired. Once RCU lock is dropped the vma can be concurently freed. */ static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, struct vm_area_struct *vma) @@ -183,6 +188,31 @@ static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, } rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); + + /* + * If vma got attached to another mm from under us, that mm is not + * stable and can be freed in the narrow window after vma->vm_refcnt + * is dropped and before rcuwait_wake_up(mm) is called. Grab it before + * releasing vma->vm_refcnt. + */ + if (unlikely(vma->vm_mm != mm)) { + /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ + struct mm_struct *other_mm = vma->vm_mm; + + /* + * __mmdrop() is a heavy operation and we don't need RCU + * protection here. Release RCU lock during these operations. + * We reinstate the RCU read lock as the caller expects it to + * be held when this function returns even on error. + */ + rcu_read_unlock(); + mmgrab(other_mm); + vma_refcount_put(vma); + mmdrop(other_mm); + rcu_read_lock(); + return NULL; + } + /* * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result. * False unlocked result is impossible because we modify and check diff --git a/include/linux/module.h b/include/linux/module.h index a7cac01d95e7..3319a5269d28 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -33,7 +33,7 @@ #include <linux/percpu.h> #include <asm/module.h> -#define MODULE_NAME_LEN MAX_PARAM_PREFIX_LEN +#define MODULE_NAME_LEN __MODULE_NAME_LEN struct modversion_info { unsigned long crc; @@ -165,9 +165,6 @@ extern void cleanup_module(void); struct module_kobject *lookup_or_create_module_kobject(const char *name); -/* Generic info of form tag = "info" */ -#define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info) - /* For userspace: you can also call me... */ #define MODULE_ALIAS(_alias) MODULE_INFO(alias, _alias) @@ -303,23 +300,6 @@ static typeof(name) __mod_device_table__##type##__##name \ struct notifier_block; -#ifdef CONFIG_MODULES - -/* Get/put a kernel symbol (calls must be symmetric) */ -void *__symbol_get(const char *symbol); -void *__symbol_get_gpl(const char *symbol); -#define symbol_get(x) ({ \ - static const char __notrim[] \ - __used __section(".no_trim_symbol") = __stringify(x); \ - (typeof(&x))(__symbol_get(__stringify(x))); }) - -/* modules using other modules: kdb wants to see this. */ -struct module_use { - struct list_head source_list; - struct list_head target_list; - struct module *source, *target; -}; - enum module_state { MODULE_STATE_LIVE, /* Normal state. */ MODULE_STATE_COMING, /* Full formed, running module_init. */ @@ -604,6 +584,16 @@ struct module { #define MODULE_ARCH_INIT {} #endif +#ifdef CONFIG_MODULES + +/* Get/put a kernel symbol (calls must be symmetric) */ +void *__symbol_get(const char *symbol); +void *__symbol_get_gpl(const char *symbol); +#define symbol_get(x) ({ \ + static const char __notrim[] \ + __used __section(".no_trim_symbol") = __stringify(x); \ + (typeof(&x))(__symbol_get(__stringify(x))); }) + #ifndef HAVE_ARCH_KALLSYMS_SYMBOL_VALUE static inline unsigned long kallsyms_symbol_value(const Elf_Sym *sym) { diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index bfb85fd13e1f..3a25122d83e2 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -6,6 +6,13 @@ #include <linux/stringify.h> #include <linux/kernel.h> +/* + * The maximum module name length, including the NUL byte. + * Chosen so that structs with an unsigned long line up, specifically + * modversion_info. + */ +#define __MODULE_NAME_LEN (64 - sizeof(unsigned long)) + /* You can override this manually, but generally this should match the module name. */ #ifdef MODULE @@ -17,21 +24,19 @@ #define __MODULE_INFO_PREFIX KBUILD_MODNAME "." #endif -/* Chosen so that structs with an unsigned long line up. */ -#define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long)) - -#define __MODULE_INFO(tag, name, info) \ - static const char __UNIQUE_ID(name)[] \ +/* Generic info of form tag = "info" */ +#define MODULE_INFO(tag, info) \ + static const char __UNIQUE_ID(modinfo)[] \ __used __section(".modinfo") __aligned(1) \ = __MODULE_INFO_PREFIX __stringify(tag) "=" info #define __MODULE_PARM_TYPE(name, _type) \ - __MODULE_INFO(parmtype, name##type, #name ":" _type) + MODULE_INFO(parmtype, #name ":" _type) /* One for each parameter, describing how to use it. Some files do multiple of these per line, so can't just use MODULE_INFO. */ #define MODULE_PARM_DESC(_parm, desc) \ - __MODULE_INFO(parm, _parm, #_parm ":" desc) + MODULE_INFO(parm, #_parm ":" desc) struct kernel_param; @@ -282,10 +287,9 @@ struct kparam_array #define __moduleparam_const const #endif -/* This is the fundamental function for registering boot/module - parameters. */ +/* This is the fundamental function for registering boot/module parameters. */ #define __module_param_call(prefix, name, ops, arg, perm, level, flags) \ - /* Default value instead of permissions? */ \ + static_assert(sizeof(""prefix) - 1 <= __MODULE_NAME_LEN); \ static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ __used __section("__param") \ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 00afd341d293..847b81ca6436 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -227,7 +227,7 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T)) DEFINE_GUARD_COND(mutex, _try, mutex_trylock(_T)) -DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T) == 0) +DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T), _RET == 0) extern unsigned long mutex_get_owner(struct mutex *lock); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 67ae2c3f41d2..c585939b6cd6 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -161,6 +161,12 @@ struct nfs_inode { unsigned long cache_validity; /* bit mask */ /* + * NFS Attributes not included in struct inode + */ + + struct timespec64 btime; + + /* * read_cache_jiffies is when we started read-caching this inode. * attrtimeo is for how long the cached information is assumed * to be valid. A successful attribute revalidation doubles @@ -316,10 +322,12 @@ struct nfs4_copy_state { #define NFS_INO_INVALID_XATTR BIT(15) /* xattrs are invalid */ #define NFS_INO_INVALID_NLINK BIT(16) /* cached nlinks is invalid */ #define NFS_INO_INVALID_MODE BIT(17) /* cached mode is invalid */ +#define NFS_INO_INVALID_BTIME BIT(18) /* cached btime is invalid */ #define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ | NFS_INO_INVALID_CTIME \ | NFS_INO_INVALID_MTIME \ + | NFS_INO_INVALID_BTIME \ | NFS_INO_INVALID_SIZE \ | NFS_INO_INVALID_NLINK \ | NFS_INO_INVALID_MODE \ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 63141320c2a8..d30c0245031c 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -172,12 +172,11 @@ struct nfs_server { #define NFS_MOUNT_FORCE_RDIRPLUS 0x20000000 #define NFS_MOUNT_NETUNREACH_FATAL 0x40000000 - unsigned int fattr_valid; /* Valid attributes */ unsigned int caps; /* server capabilities */ + __u64 fattr_valid; /* Valid attributes */ unsigned int rsize; /* read size */ unsigned int rpages; /* read size (in pages) */ unsigned int wsize; /* write size */ - unsigned int wpages; /* write size (in pages) */ unsigned int wtmult; /* server disk block size */ unsigned int dtsize; /* readdir size */ unsigned short port; /* "port=" setting */ @@ -203,7 +202,6 @@ struct nfs_server { struct nfs_fsid fsid; int s_sysfs_id; /* sysfs dentry index */ __u64 maxfilesize; /* maximum file size */ - struct timespec64 time_delta; /* smallest time granularity */ unsigned long mount_time; /* when this fs was mounted */ struct super_block *super; /* VFS super block */ dev_t s_dev; /* superblock dev numbers */ @@ -248,7 +246,6 @@ struct nfs_server { filesystem */ struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */ struct rpc_wait_queue roc_rpcwaitq; - void *pnfs_ld_data; /* per mount point data */ /* the following fields are protected by nfs_client->cl_lock */ struct rb_root state_owners; @@ -257,6 +254,9 @@ struct nfs_server { struct list_head state_owners_lru; struct list_head layouts; struct list_head delegations; + atomic_long_t nr_active_delegations; + unsigned int delegation_hash_mask; + struct hlist_head *delegation_hash_table; struct list_head ss_copies; struct list_head ss_src_copies; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 67f6632f723b..ac4bff6e9913 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -45,7 +45,7 @@ struct nfs4_threshold { }; struct nfs_fattr { - unsigned int valid; /* which fields are valid */ + __u64 valid; /* which fields are valid */ umode_t mode; __u32 nlink; kuid_t uid; @@ -67,6 +67,7 @@ struct nfs_fattr { struct timespec64 atime; struct timespec64 mtime; struct timespec64 ctime; + struct timespec64 btime; __u64 change_attr; /* NFSv4 change attribute */ __u64 pre_change_attr;/* pre-op NFSv4 change attribute */ __u64 pre_size; /* pre_op_attr.size */ @@ -80,32 +81,33 @@ struct nfs_fattr { struct nfs4_label *label; }; -#define NFS_ATTR_FATTR_TYPE (1U << 0) -#define NFS_ATTR_FATTR_MODE (1U << 1) -#define NFS_ATTR_FATTR_NLINK (1U << 2) -#define NFS_ATTR_FATTR_OWNER (1U << 3) -#define NFS_ATTR_FATTR_GROUP (1U << 4) -#define NFS_ATTR_FATTR_RDEV (1U << 5) -#define NFS_ATTR_FATTR_SIZE (1U << 6) -#define NFS_ATTR_FATTR_PRESIZE (1U << 7) -#define NFS_ATTR_FATTR_BLOCKS_USED (1U << 8) -#define NFS_ATTR_FATTR_SPACE_USED (1U << 9) -#define NFS_ATTR_FATTR_FSID (1U << 10) -#define NFS_ATTR_FATTR_FILEID (1U << 11) -#define NFS_ATTR_FATTR_ATIME (1U << 12) -#define NFS_ATTR_FATTR_MTIME (1U << 13) -#define NFS_ATTR_FATTR_CTIME (1U << 14) -#define NFS_ATTR_FATTR_PREMTIME (1U << 15) -#define NFS_ATTR_FATTR_PRECTIME (1U << 16) -#define NFS_ATTR_FATTR_CHANGE (1U << 17) -#define NFS_ATTR_FATTR_PRECHANGE (1U << 18) -#define NFS_ATTR_FATTR_V4_LOCATIONS (1U << 19) -#define NFS_ATTR_FATTR_V4_REFERRAL (1U << 20) -#define NFS_ATTR_FATTR_MOUNTPOINT (1U << 21) -#define NFS_ATTR_FATTR_MOUNTED_ON_FILEID (1U << 22) -#define NFS_ATTR_FATTR_OWNER_NAME (1U << 23) -#define NFS_ATTR_FATTR_GROUP_NAME (1U << 24) -#define NFS_ATTR_FATTR_V4_SECURITY_LABEL (1U << 25) +#define NFS_ATTR_FATTR_TYPE BIT_ULL(0) +#define NFS_ATTR_FATTR_MODE BIT_ULL(1) +#define NFS_ATTR_FATTR_NLINK BIT_ULL(2) +#define NFS_ATTR_FATTR_OWNER BIT_ULL(3) +#define NFS_ATTR_FATTR_GROUP BIT_ULL(4) +#define NFS_ATTR_FATTR_RDEV BIT_ULL(5) +#define NFS_ATTR_FATTR_SIZE BIT_ULL(6) +#define NFS_ATTR_FATTR_PRESIZE BIT_ULL(7) +#define NFS_ATTR_FATTR_BLOCKS_USED BIT_ULL(8) +#define NFS_ATTR_FATTR_SPACE_USED BIT_ULL(9) +#define NFS_ATTR_FATTR_FSID BIT_ULL(10) +#define NFS_ATTR_FATTR_FILEID BIT_ULL(11) +#define NFS_ATTR_FATTR_ATIME BIT_ULL(12) +#define NFS_ATTR_FATTR_MTIME BIT_ULL(13) +#define NFS_ATTR_FATTR_CTIME BIT_ULL(14) +#define NFS_ATTR_FATTR_PREMTIME BIT_ULL(15) +#define NFS_ATTR_FATTR_PRECTIME BIT_ULL(16) +#define NFS_ATTR_FATTR_CHANGE BIT_ULL(17) +#define NFS_ATTR_FATTR_PRECHANGE BIT_ULL(18) +#define NFS_ATTR_FATTR_V4_LOCATIONS BIT_ULL(19) +#define NFS_ATTR_FATTR_V4_REFERRAL BIT_ULL(20) +#define NFS_ATTR_FATTR_MOUNTPOINT BIT_ULL(21) +#define NFS_ATTR_FATTR_MOUNTED_ON_FILEID BIT_ULL(22) +#define NFS_ATTR_FATTR_OWNER_NAME BIT_ULL(23) +#define NFS_ATTR_FATTR_GROUP_NAME BIT_ULL(24) +#define NFS_ATTR_FATTR_V4_SECURITY_LABEL BIT_ULL(25) +#define NFS_ATTR_FATTR_BTIME BIT_ULL(26) #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \ | NFS_ATTR_FATTR_MODE \ @@ -126,6 +128,7 @@ struct nfs_fattr { | NFS_ATTR_FATTR_SPACE_USED) #define NFS_ATTR_FATTR_V4 (NFS_ATTR_FATTR \ | NFS_ATTR_FATTR_SPACE_USED \ + | NFS_ATTR_FATTR_BTIME \ | NFS_ATTR_FATTR_V4_SECURITY_LABEL) /* diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 8e4d6eda8a8d..8d3fa3a91ce4 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -837,8 +837,6 @@ void set_page_writeback(struct page *page); #define folio_start_writeback(folio) \ __folio_start_writeback(folio, false) -#define folio_start_writeback_keepwrite(folio) \ - __folio_start_writeback(folio, true) static __always_inline bool folio_test_head(const struct folio *folio) { diff --git a/include/linux/pci-ep-msi.h b/include/linux/pci-ep-msi.h new file mode 100644 index 000000000000..7c5db90f9620 --- /dev/null +++ b/include/linux/pci-ep-msi.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * PCI Endpoint *Function* side MSI header file + * + * Copyright (C) 2024 NXP + * Author: Frank Li <Frank.Li@nxp.com> + */ + +#ifndef __PCI_EP_MSI__ +#define __PCI_EP_MSI__ + +struct pci_epf; + +#ifdef CONFIG_PCI_ENDPOINT_MSI_DOORBELL +int pci_epf_alloc_doorbell(struct pci_epf *epf, u16 nums); +void pci_epf_free_doorbell(struct pci_epf *epf); +#else +static inline int pci_epf_alloc_doorbell(struct pci_epf *epf, u16 nums) +{ + return -ENODATA; +} + +static inline void pci_epf_free_doorbell(struct pci_epf *epf) +{ +} +#endif /* CONFIG_GENERIC_MSI_IRQ */ + +#endif /* __PCI_EP_MSI__ */ diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 749cee0bcf2c..2e85504ba2ba 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -12,6 +12,7 @@ #include <linux/configfs.h> #include <linux/device.h> #include <linux/mod_devicetable.h> +#include <linux/msi.h> #include <linux/pci.h> struct pci_epf; @@ -129,6 +130,16 @@ struct pci_epf_bar { }; /** + * struct pci_epf_doorbell_msg - represents doorbell message + * @msg: MSI message + * @virq: IRQ number of this doorbell MSI message + */ +struct pci_epf_doorbell_msg { + struct msi_msg msg; + int virq; +}; + +/** * struct pci_epf - represents the PCI EPF device * @dev: the PCI EPF device * @name: the name of the PCI EPF device @@ -155,6 +166,8 @@ struct pci_epf_bar { * @vfunction_num_map: bitmap to manage virtual function number * @pci_vepf: list of virtual endpoint functions associated with this function * @event_ops: callbacks for capturing the EPC events + * @db_msg: data for MSI from RC side + * @num_db: number of doorbells */ struct pci_epf { struct device dev; @@ -185,6 +198,8 @@ struct pci_epf { unsigned long vfunction_num_map; struct list_head pci_vepf; const struct pci_epc_event_ops *event_ops; + struct pci_epf_doorbell_msg *db_msg; + u16 num_db; }; /** @@ -226,6 +241,9 @@ void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar, enum pci_epc_interface_type type); void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar, enum pci_epc_interface_type type); + +int pci_epf_align_inbound_addr(struct pci_epf *epf, enum pci_barno bar, + u64 addr, dma_addr_t *base, size_t *off); int pci_epf_bind(struct pci_epf *epf); void pci_epf_unbind(struct pci_epf *epf); int pci_epf_add_vepf(struct pci_epf *epf_pf, struct pci_epf *epf_vf); diff --git a/include/linux/pci-pwrctrl.h b/include/linux/pci-pwrctrl.h index 7d439b0675e9..4aefc7901cd1 100644 --- a/include/linux/pci-pwrctrl.h +++ b/include/linux/pci-pwrctrl.h @@ -39,7 +39,7 @@ struct device_link; struct pci_pwrctrl { struct device *dev; - /* Private: don't use. */ + /* private: internal use only */ struct notifier_block nb; struct device_link *link; struct work_struct work; diff --git a/include/linux/pci.h b/include/linux/pci.h index 05e68f35f392..59876de13860 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -328,6 +328,11 @@ struct rcec_ea; * determined (e.g., for Root Complex Integrated * Endpoints without the relevant Capability * Registers). + * @is_hotplug_bridge: Hotplug bridge of any kind (e.g. PCIe Hot-Plug Capable, + * Conventional PCI Hot-Plug, ACPI slot). + * Such bridges are allocated additional MMIO and bus + * number resources to allow for hierarchy expansion. + * @is_pciehp: PCIe Hot-Plug Capable bridge. */ struct pci_dev { struct list_head bus_list; /* Node in per-bus list */ @@ -451,6 +456,7 @@ struct pci_dev { unsigned int is_physfn:1; unsigned int is_virtfn:1; unsigned int is_hotplug_bridge:1; + unsigned int is_pciehp:1; unsigned int shpc_managed:1; /* SHPC owned by shpchp */ unsigned int is_thunderbolt:1; /* Thunderbolt controller */ /* @@ -744,6 +750,21 @@ static inline bool pci_is_vga(struct pci_dev *pdev) return false; } +/** + * pci_is_display - check if the PCI device is a display controller + * @pdev: PCI device + * + * Determine whether the given PCI device corresponds to a display + * controller. Display controllers are typically used for graphical output + * and are identified based on their class code. + * + * Return: true if the PCI device is a display controller, false otherwise. + */ +static inline bool pci_is_display(struct pci_dev *pdev) +{ + return (pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY; +} + #define for_each_pci_bridge(dev, bus) \ list_for_each_entry(dev, &bus->devices, bus_list) \ if (!pci_is_bridge(dev)) {} else @@ -2438,6 +2459,8 @@ int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs); int pci_sriov_get_totalvfs(struct pci_dev *dev); int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn); resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno); +int pci_iov_vf_bar_set_size(struct pci_dev *dev, int resno, int size); +u32 pci_iov_vf_bar_get_sizes(struct pci_dev *dev, int resno, int num_vfs); void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe); /* Arch may override these (weak) */ @@ -2490,6 +2513,10 @@ static inline int pci_sriov_get_totalvfs(struct pci_dev *dev) #define pci_sriov_configure_simple NULL static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno) { return 0; } +static inline int pci_iov_vf_bar_set_size(struct pci_dev *dev, int resno, int size) +{ return -ENODEV; } +static inline u32 pci_iov_vf_bar_get_sizes(struct pci_dev *dev, int resno, int num_vfs) +{ return 0; } static inline void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe) { } #endif diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index ec77ccf1fc4d..ddf79641917f 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -104,6 +104,7 @@ static inline bool shpchp_is_native(struct pci_dev *bridge) { return true; } static inline bool hotplug_is_native(struct pci_dev *bridge) { - return pciehp_is_native(bridge) || shpchp_is_native(bridge); + return (bridge->is_pciehp && pciehp_is_native(bridge)) || + shpchp_is_native(bridge); } #endif diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index e3b99920be05..4c035637eeb7 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -736,6 +736,29 @@ static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm, } #endif +/** + * get_and_clear_ptes - Clear present PTEs that map consecutive pages of + * the same folio, collecting dirty/accessed bits. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * + * Use this instead of get_and_clear_full_ptes() if it is known that we don't + * need to clear the full mm, which is mostly the case. + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline pte_t get_and_clear_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr) +{ + return get_and_clear_full_ptes(mm, addr, ptep, nr, 0); +} + #ifndef clear_full_ptes /** * clear_full_ptes - Clear present PTEs that map consecutive pages of the same @@ -768,6 +791,28 @@ static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr, } #endif +/** + * clear_ptes - Clear present PTEs that map consecutive pages of the same folio. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * + * Use this instead of clear_full_ptes() if it is known that we don't need to + * clear the full mm, which is mostly the case. + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline void clear_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr) +{ + clear_full_ptes(mm, addr, ptep, nr, 0); +} + /* * If two threads concurrently fault at the same page, the thread that * won the race updates the PTE and its local TLB/Cache. The other thread diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 9a8189ffd0f2..d138e1815645 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -165,25 +165,25 @@ struct pinctrl_desc { /* External interface to pin controller */ -extern int pinctrl_register_and_init(struct pinctrl_desc *pctldesc, +extern int pinctrl_register_and_init(const struct pinctrl_desc *pctldesc, struct device *dev, void *driver_data, struct pinctrl_dev **pctldev); extern int pinctrl_enable(struct pinctrl_dev *pctldev); /* Please use pinctrl_register_and_init() and pinctrl_enable() instead */ -extern struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc, +extern struct pinctrl_dev *pinctrl_register(const struct pinctrl_desc *pctldesc, struct device *dev, void *driver_data); extern void pinctrl_unregister(struct pinctrl_dev *pctldev); extern int devm_pinctrl_register_and_init(struct device *dev, - struct pinctrl_desc *pctldesc, + const struct pinctrl_desc *pctldesc, void *driver_data, struct pinctrl_dev **pctldev); /* Please use devm_pinctrl_register_and_init() instead */ extern struct pinctrl_dev *devm_pinctrl_register(struct device *dev, - struct pinctrl_desc *pctldesc, + const struct pinctrl_desc *pctldesc, void *driver_data); extern void devm_pinctrl_unregister(struct device *dev, diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 72ff44cca864..2467b3be15c9 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -11,8 +11,13 @@ #ifdef __KERNEL__ #include <linux/blkdev.h> +#include <linux/mm.h> -extern const char raid6_empty_zero_page[PAGE_SIZE]; +/* This should be const but the raid6 code is too convoluted for that. */ +static inline void *raid6_get_zero_page(void) +{ + return page_address(ZERO_PAGE(0)); +} #else /* ! __KERNEL__ */ /* Used for testing in user space */ @@ -191,6 +196,11 @@ static inline uint32_t raid6_jiffies(void) return tv.tv_sec*1000 + tv.tv_usec/1000; } +static inline void *raid6_get_zero_page(void) +{ + return raid6_empty_zero_page; +} + #endif /* ! __KERNEL__ */ #endif /* LINUX_RAID_RAID6_H */ diff --git a/include/linux/relay.h b/include/linux/relay.h index b3224111d074..6772a7075840 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -29,6 +29,22 @@ #define RELAYFS_CHANNEL_VERSION 7 /* + * Relay buffer statistics + */ +enum { + RELAY_STATS_BUF_FULL = (1 << 0), + RELAY_STATS_WRT_BIG = (1 << 1), + + RELAY_STATS_LAST = RELAY_STATS_WRT_BIG, +}; + +struct rchan_buf_stats +{ + unsigned int full_count; /* counter for buffer full */ + unsigned int big_count; /* counter for too big to write */ +}; + +/* * Per-cpu relay channel buffer */ struct rchan_buf @@ -43,11 +59,11 @@ struct rchan_buf struct irq_work wakeup_work; /* reader wakeup */ struct dentry *dentry; /* channel file dentry */ struct kref kref; /* channel buffer refcount */ + struct rchan_buf_stats stats; /* buffer stats */ struct page **page_array; /* array of current buffer pages */ unsigned int page_count; /* number of current buffer pages */ unsigned int finalized; /* buffer has been finalized */ size_t *padding; /* padding counts per sub-buffer */ - size_t prev_padding; /* temporary variable */ size_t bytes_consumed; /* bytes consumed in cur read subbuf */ size_t early_bytes; /* bytes consumed before VFS inited */ unsigned int cpu; /* this buf's cpu */ @@ -65,7 +81,6 @@ struct rchan const struct rchan_callbacks *cb; /* client callbacks */ struct kref kref; /* channel refcount */ void *private_data; /* for user-defined data */ - size_t last_toobig; /* tried to log event > subbuf size */ struct rchan_buf * __percpu *buf; /* per-cpu channel buffers */ int is_global; /* One global buffer ? */ struct list_head list; /* for channel list */ @@ -84,7 +99,6 @@ struct rchan_callbacks * @buf: the channel buffer containing the new sub-buffer * @subbuf: the start of the new sub-buffer * @prev_subbuf: the start of the previous sub-buffer - * @prev_padding: unused space at the end of previous sub-buffer * * The client should return 1 to continue logging, 0 to stop * logging. @@ -100,8 +114,7 @@ struct rchan_callbacks */ int (*subbuf_start) (struct rchan_buf *buf, void *subbuf, - void *prev_subbuf, - size_t prev_padding); + void *prev_subbuf); /* * create_buf_file - create file to represent a relay channel buffer @@ -161,6 +174,7 @@ struct rchan *relay_open(const char *base_filename, void *private_data); extern void relay_close(struct rchan *chan); extern void relay_flush(struct rchan *chan); +size_t relay_stats(struct rchan *chan, int flags); extern void relay_subbufs_consumed(struct rchan *chan, unsigned int cpu, size_t consumed); diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index bc90c3c7b5fd..876358cfe1b1 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -144,6 +144,9 @@ int ring_buffer_write(struct trace_buffer *buffer, void ring_buffer_nest_start(struct trace_buffer *buffer); void ring_buffer_nest_end(struct trace_buffer *buffer); +DEFINE_GUARD(ring_buffer_nest, struct trace_buffer *, + ring_buffer_nest_start(_T), ring_buffer_nest_end(_T)) + struct ring_buffer_event * ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events); diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 20803fcb49a7..6cd020eea37a 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -449,6 +449,28 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio, default: VM_WARN_ON_ONCE(true); } + + /* + * Anon folios must have an associated live anon_vma as long as they're + * mapped into userspace. + * Note that the atomic_read() mainly does two things: + * + * 1. In KASAN builds with CONFIG_SLUB_RCU_DEBUG, it causes KASAN to + * check that the associated anon_vma has not yet been freed (subject + * to KASAN's usual limitations). This check will pass if the + * anon_vma's refcount has already dropped to 0 but an RCU grace + * period hasn't passed since then. + * 2. If the anon_vma has not yet been freed, it checks that the + * anon_vma still has a nonzero refcount (as opposed to being in the + * middle of an RCU delay for getting freed). + */ + if (folio_test_anon(folio) && !folio_test_ksm(folio)) { + unsigned long mapping = (unsigned long)folio->mapping; + struct anon_vma *anon_vma; + + anon_vma = (void *)(mapping - FOLIO_MAPPING_ANON); + VM_WARN_ON_FOLIO(atomic_read(&anon_vma->refcount) == 0, folio); + } } /* diff --git a/include/linux/rtc/ds1685.h b/include/linux/rtc/ds1685.h index 5a41c3bbcbe3..01da4582db6d 100644 --- a/include/linux/rtc/ds1685.h +++ b/include/linux/rtc/ds1685.h @@ -8,7 +8,7 @@ * include larger, battery-backed NV-SRAM, burst-mode access, and an RTC * write counter. * - * Copyright (C) 2011-2014 Joshua Kinard <kumba@gentoo.org>. + * Copyright (C) 2011-2014 Joshua Kinard <linux@kumba.dev>. * Copyright (C) 2009 Matthias Fuchs <matthias.fuchs@esd-electronics.com>. * * References: diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index c8b543d428b0..f1aaf676a874 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -132,6 +132,18 @@ static inline int rwsem_is_contended(struct rw_semaphore *sem) return !list_empty(&sem->wait_list); } +#if defined(CONFIG_DEBUG_RWSEMS) || defined(CONFIG_DETECT_HUNG_TASK_BLOCKER) +/* + * Return just the real task structure pointer of the owner + */ +extern struct task_struct *rwsem_owner(struct rw_semaphore *sem); + +/* + * Return true if the rwsem is owned by a reader. + */ +extern bool is_rwsem_reader_owned(struct rw_semaphore *sem); +#endif + #else /* !CONFIG_PREEMPT_RT */ #include <linux/rwbase_rt.h> @@ -240,10 +252,11 @@ extern void up_write(struct rw_semaphore *sem); DEFINE_GUARD(rwsem_read, struct rw_semaphore *, down_read(_T), up_read(_T)) DEFINE_GUARD_COND(rwsem_read, _try, down_read_trylock(_T)) -DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T) == 0) +DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T), _RET == 0) DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) DEFINE_GUARD_COND(rwsem_write, _try, down_write_trylock(_T)) +DEFINE_GUARD_COND(rwsem_write, _kill, down_write_killable(_T), _RET == 0) /* * downgrade write lock to read lock diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 189140bf11fc..ffb9907c7070 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -210,23 +210,6 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth); int sbitmap_get(struct sbitmap *sb); /** - * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap, - * limiting the depth used from each word. - * @sb: Bitmap to allocate from. - * @shallow_depth: The maximum number of bits to allocate from a single word. - * - * This rather specific operation allows for having multiple users with - * different allocation limits. E.g., there can be a high-priority class that - * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow() - * with a @shallow_depth of (1 << (@sb->shift - 1)). Then, the low-priority - * class can only allocate half of the total bits in the bitmap, preventing it - * from starving out the high-priority class. - * - * Return: Non-negative allocated bit number if successful, -1 otherwise. - */ -int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth); - -/** * sbitmap_any_bit_set() - Check for a set bit in a &struct sbitmap. * @sb: Bitmap to check. * @@ -478,7 +461,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, * sbitmap_queue, limiting the depth used from each word, with preemption * already disabled. * @sbq: Bitmap queue to allocate from. - * @shallow_depth: The maximum number of bits to allocate from a single word. + * @shallow_depth: The maximum number of bits to allocate from the queue. * See sbitmap_get_shallow(). * * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after diff --git a/include/linux/sched.h b/include/linux/sched.h index a9693f179c58..2b272382673d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -47,6 +47,7 @@ #include <linux/rv.h> #include <linux/uidgid_types.h> #include <linux/tracepoint-defs.h> +#include <linux/unwind_deferred_types.h> #include <asm/kmap_size.h> /* task_struct member predeclarations (sorted alphabetically): */ @@ -1646,6 +1647,10 @@ struct task_struct { struct user_event_mm *user_event_mm; #endif +#ifdef CONFIG_UNWIND_USER + struct unwind_task_info unwind_info; +#endif + /* CPU-specific state of this task: */ struct thread_struct thread; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b8b06e71b73e..14b923ddb6df 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3033,6 +3033,29 @@ static inline void skb_reset_transport_header(struct sk_buff *skb) skb->transport_header = offset; } +/** + * skb_reset_transport_header_careful - conditionally reset transport header + * @skb: buffer to alter + * + * Hardened version of skb_reset_transport_header(). + * + * Returns: true if the operation was a success. + */ +static inline bool __must_check +skb_reset_transport_header_careful(struct sk_buff *skb) +{ + long offset = skb->data - skb->head; + + if (unlikely(offset != (typeof(skb->transport_header))offset)) + return false; + + if (unlikely(offset == (typeof(skb->transport_header))~0U)) + return false; + + skb->transport_header = offset; + return true; +} + static inline void skb_set_transport_header(struct sk_buff *skb, const int offset) { diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index 1a2c0e0838f9..71e0c09a49eb 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -662,6 +662,14 @@ #define EXYNOS5433_PAD_RETENTION_UFS_OPTION (0x3268) #define EXYNOS5433_PAD_RETENTION_FSYSGENIO_OPTION (0x32A8) +/* For Exynos990 */ +#define EXYNOS990_PHY_CTRL_USB20 (0x72C) + +/* For Exynos7870 */ +#define EXYNOS7870_MIPI_PHY_CONTROL0 (0x070c) +#define EXYNOS7870_MIPI_PHY_CONTROL1 (0x0714) +#define EXYNOS7870_MIPI_PHY_CONTROL2 (0x0734) + /* For Tensor GS101 */ /* PMU ALIVE */ #define GS101_SYSIP_DAT0 (0x810) @@ -669,6 +677,7 @@ #define GS101_CPU_INFORM(cpu) \ (GS101_CPU0_INFORM + (cpu*4)) #define GS101_SYSTEM_CONFIGURATION (0x3A00) +#define GS101_EINT_WAKEUP_MASK (0x3A80) #define GS101_PHY_CTRL_USB20 (0x3EB0) #define GS101_PHY_CTRL_USBDP (0x3EB4) diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h index 6b839987f14c..fe31773d5210 100644 --- a/include/linux/soundwire/sdw_amd.h +++ b/include/linux/soundwire/sdw_amd.h @@ -30,6 +30,7 @@ #define ACP63_PCI_REV_ID 0x63 #define ACP70_PCI_REV_ID 0x70 #define ACP71_PCI_REV_ID 0x71 +#define ACP72_PCI_REV_ID 0x72 struct acp_sdw_pdata { u16 instance; diff --git a/include/linux/sprintf.h b/include/linux/sprintf.h index 876130091384..f06f7b785091 100644 --- a/include/linux/sprintf.h +++ b/include/linux/sprintf.h @@ -23,7 +23,7 @@ __scanf(2, 0) int vsscanf(const char *, const char *, va_list); /* These are for specific cases, do not use without real need */ extern bool no_hash_pointers; -int no_hash_pointers_enable(char *str); +void hash_pointers_finalize(bool slub_debug); /* Used for Rust formatting ('%pA') */ char *rust_fmt_argument(char *buf, char *end, const void *ptr); diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index e3358c630ba1..8a9ec617cf66 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -130,10 +130,7 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) __be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int len); __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int len); __be32 *xdr_encode_string(__be32 *p, const char *s); -__be32 *xdr_decode_string_inplace(__be32 *p, char **sp, unsigned int *lenp, - unsigned int maxlen); __be32 *xdr_encode_netobj(__be32 *p, const struct xdr_netobj *); -__be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *); void xdr_inline_pages(struct xdr_buf *, unsigned int, struct page **, unsigned int, unsigned int); @@ -342,12 +339,6 @@ xdr_stream_remaining(const struct xdr_stream *xdr) return xdr->nwords << 2; } -ssize_t xdr_stream_decode_opaque(struct xdr_stream *xdr, void *ptr, - size_t size); -ssize_t xdr_stream_decode_opaque_dup(struct xdr_stream *xdr, void **ptr, - size_t maxlen, gfp_t gfp_flags); -ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str, - size_t size); ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, size_t maxlen, gfp_t gfp_flags); ssize_t xdr_stream_decode_opaque_auth(struct xdr_stream *xdr, u32 *flavor, diff --git a/include/linux/sys_info.h b/include/linux/sys_info.h new file mode 100644 index 000000000000..89d77dc4f2ed --- /dev/null +++ b/include/linux/sys_info.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SYS_INFO_H +#define _LINUX_SYS_INFO_H + +#include <linux/sysctl.h> + +/* + * SYS_INFO_PANIC_CONSOLE_REPLAY is for panic case only, as it needs special + * handling which only fits panic case. + */ +#define SYS_INFO_TASKS 0x00000001 +#define SYS_INFO_MEM 0x00000002 +#define SYS_INFO_TIMERS 0x00000004 +#define SYS_INFO_LOCKS 0x00000008 +#define SYS_INFO_FTRACE 0x00000010 +#define SYS_INFO_PANIC_CONSOLE_REPLAY 0x00000020 +#define SYS_INFO_ALL_CPU_BT 0x00000040 +#define SYS_INFO_BLOCKED_TASKS 0x00000080 + +void sys_info(unsigned long si_mask); +unsigned long sys_info_parse_param(char *str); + +#ifdef CONFIG_SYSCTL +int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, + void *buffer, size_t *lenp, + loff_t *ppos); +#endif +#endif /* _LINUX_SYS_INFO_H */ diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index fa9cf4292dff..04307a19cde3 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -480,7 +480,6 @@ enum { EVENT_FILE_FL_RECORDED_TGID_BIT, EVENT_FILE_FL_FILTERED_BIT, EVENT_FILE_FL_NO_SET_FILTER_BIT, - EVENT_FILE_FL_SOFT_MODE_BIT, EVENT_FILE_FL_SOFT_DISABLED_BIT, EVENT_FILE_FL_TRIGGER_MODE_BIT, EVENT_FILE_FL_TRIGGER_COND_BIT, @@ -618,7 +617,6 @@ extern int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...); * RECORDED_TGID - The tgids should be recorded at sched_switch * FILTERED - The event has a filter attached * NO_SET_FILTER - Set when filter has error and is to be ignored - * SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED * SOFT_DISABLED - When set, do not trace the event (even though its * tracepoint may be enabled) * TRIGGER_MODE - When set, invoke the triggers associated with the event @@ -633,7 +631,6 @@ enum { EVENT_FILE_FL_RECORDED_TGID = (1 << EVENT_FILE_FL_RECORDED_TGID_BIT), EVENT_FILE_FL_FILTERED = (1 << EVENT_FILE_FL_FILTERED_BIT), EVENT_FILE_FL_NO_SET_FILTER = (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT), - EVENT_FILE_FL_SOFT_MODE = (1 << EVENT_FILE_FL_SOFT_MODE_BIT), EVENT_FILE_FL_SOFT_DISABLED = (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT), EVENT_FILE_FL_TRIGGER_MODE = (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT), EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT), diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h new file mode 100644 index 000000000000..26122d00708a --- /dev/null +++ b/include/linux/unwind_deferred.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UNWIND_USER_DEFERRED_H +#define _LINUX_UNWIND_USER_DEFERRED_H + +#include <linux/task_work.h> +#include <linux/unwind_user.h> +#include <linux/unwind_deferred_types.h> + +struct unwind_work; + +typedef void (*unwind_callback_t)(struct unwind_work *work, struct unwind_stacktrace *trace, u64 cookie); + +struct unwind_work { + struct list_head list; + unwind_callback_t func; + int bit; +}; + +#ifdef CONFIG_UNWIND_USER + +enum { + UNWIND_PENDING_BIT = 0, + UNWIND_USED_BIT, +}; + +enum { + UNWIND_PENDING = BIT(UNWIND_PENDING_BIT), + + /* Set if the unwinding was used (directly or deferred) */ + UNWIND_USED = BIT(UNWIND_USED_BIT) +}; + +void unwind_task_init(struct task_struct *task); +void unwind_task_free(struct task_struct *task); + +int unwind_user_faultable(struct unwind_stacktrace *trace); + +int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func); +int unwind_deferred_request(struct unwind_work *work, u64 *cookie); +void unwind_deferred_cancel(struct unwind_work *work); + +void unwind_deferred_task_exit(struct task_struct *task); + +static __always_inline void unwind_reset_info(void) +{ + struct unwind_task_info *info = ¤t->unwind_info; + unsigned long bits; + + /* Was there any unwinding? */ + if (unlikely(info->unwind_mask)) { + bits = info->unwind_mask; + do { + /* Is a task_work going to run again before going back */ + if (bits & UNWIND_PENDING) + return; + } while (!try_cmpxchg(&info->unwind_mask, &bits, 0UL)); + current->unwind_info.id.id = 0; + + if (unlikely(info->cache)) { + info->cache->nr_entries = 0; + info->cache->unwind_completed = 0; + } + } +} + +#else /* !CONFIG_UNWIND_USER */ + +static inline void unwind_task_init(struct task_struct *task) {} +static inline void unwind_task_free(struct task_struct *task) {} + +static inline int unwind_user_faultable(struct unwind_stacktrace *trace) { return -ENOSYS; } +static inline int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func) { return -ENOSYS; } +static inline int unwind_deferred_request(struct unwind_work *work, u64 *timestamp) { return -ENOSYS; } +static inline void unwind_deferred_cancel(struct unwind_work *work) {} + +static inline void unwind_deferred_task_exit(struct task_struct *task) {} +static inline void unwind_reset_info(void) {} + +#endif /* !CONFIG_UNWIND_USER */ + +#endif /* _LINUX_UNWIND_USER_DEFERRED_H */ diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h new file mode 100644 index 000000000000..33b62ac25c86 --- /dev/null +++ b/include/linux/unwind_deferred_types.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UNWIND_USER_DEFERRED_TYPES_H +#define _LINUX_UNWIND_USER_DEFERRED_TYPES_H + +struct unwind_cache { + unsigned long unwind_completed; + unsigned int nr_entries; + unsigned long entries[]; +}; + +/* + * The unwind_task_id is a unique identifier that maps to a user space + * stacktrace. It is generated the first time a deferred user space + * stacktrace is requested after a task has entered the kerenl and + * is cleared to zero when it exits. The mapped id will be a non-zero + * number. + * + * To simplify the generation of the 64 bit number, 32 bits will be + * the CPU it was generated on, and the other 32 bits will be a per + * cpu counter that gets incremented by two every time a new identifier + * is generated. The LSB will always be set to keep the value + * from being zero. + */ +union unwind_task_id { + struct { + u32 cpu; + u32 cnt; + }; + u64 id; +}; + +struct unwind_task_info { + unsigned long unwind_mask; + struct unwind_cache *cache; + struct callback_head work; + union unwind_task_id id; +}; + +#endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */ diff --git a/include/linux/unwind_user.h b/include/linux/unwind_user.h new file mode 100644 index 000000000000..7f7282516bf5 --- /dev/null +++ b/include/linux/unwind_user.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UNWIND_USER_H +#define _LINUX_UNWIND_USER_H + +#include <linux/unwind_user_types.h> +#include <asm/unwind_user.h> + +#ifndef ARCH_INIT_USER_FP_FRAME + #define ARCH_INIT_USER_FP_FRAME +#endif + +int unwind_user(struct unwind_stacktrace *trace, unsigned int max_entries); + +#endif /* _LINUX_UNWIND_USER_H */ diff --git a/include/linux/unwind_user_types.h b/include/linux/unwind_user_types.h new file mode 100644 index 000000000000..a449f15be890 --- /dev/null +++ b/include/linux/unwind_user_types.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UNWIND_USER_TYPES_H +#define _LINUX_UNWIND_USER_TYPES_H + +#include <linux/types.h> + +/* + * Unwind types, listed in priority order: lower numbers are attempted first if + * available. + */ +enum unwind_user_type_bits { + UNWIND_USER_TYPE_FP_BIT = 0, + + NR_UNWIND_USER_TYPE_BITS, +}; + +enum unwind_user_type { + /* Type "none" for the start of stack walk iteration. */ + UNWIND_USER_TYPE_NONE = 0, + UNWIND_USER_TYPE_FP = BIT(UNWIND_USER_TYPE_FP_BIT), +}; + +struct unwind_stacktrace { + unsigned int nr; + unsigned long *entries; +}; + +struct unwind_user_frame { + s32 cfa_off; + s32 ra_off; + s32 fp_off; + bool use_fp; +}; + +struct unwind_user_state { + unsigned long ip; + unsigned long sp; + unsigned long fp; + enum unwind_user_type current_type; + unsigned int available_types; + bool done; +}; + +#endif /* _LINUX_UNWIND_USER_TYPES_H */ diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 707b00772ce1..eb563f538dee 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -105,6 +105,9 @@ struct vfio_device { * @match: Optional device name match callback (return: 0 for no-match, >0 for * match, -errno for abort (ex. match with insufficient or incorrect * additional args) + * @match_token_uuid: Optional device token match/validation. Return 0 + * if the uuid is valid for the device, -errno otherwise. uuid is NULL + * if none was provided. * @dma_unmap: Called when userspace unmaps IOVA from the container * this device is attached to. * @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl @@ -132,6 +135,7 @@ struct vfio_device_ops { int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma); void (*request)(struct vfio_device *vdev, unsigned int count); int (*match)(struct vfio_device *vdev, char *buf); + int (*match_token_uuid)(struct vfio_device *vdev, const uuid_t *uuid); void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length); int (*device_feature)(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz); diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index fbb472dd99b3..f541044e42a2 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -122,6 +122,8 @@ ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *bu int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma); void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count); int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf); +int vfio_pci_core_match_token_uuid(struct vfio_device *core_vdev, + const uuid_t *uuid); int vfio_pci_core_enable(struct vfio_pci_core_device *vdev); void vfio_pci_core_disable(struct vfio_pci_core_device *vdev); void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 04b90c88d164..db31fc6f4f1f 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -199,7 +199,7 @@ int virtio_device_reset_done(struct virtio_device *dev); size_t virtio_max_dma_size(const struct virtio_device *vdev); #define virtio_device_for_each_vq(vdev, vq) \ - list_for_each_entry(vq, &vdev->vqs, list) + list_for_each_entry(vq, &(vdev)->vqs, list) /** * struct virtio_driver - operations for a virtio I/O driver diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 36fb3edfa403..0c67543a45c8 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -47,31 +47,50 @@ static inline void virtio_vsock_skb_clear_tap_delivered(struct sk_buff *skb) VIRTIO_VSOCK_SKB_CB(skb)->tap_delivered = false; } -static inline void virtio_vsock_skb_rx_put(struct sk_buff *skb) +static inline void virtio_vsock_skb_put(struct sk_buff *skb, u32 len) { - u32 len; + DEBUG_NET_WARN_ON_ONCE(skb->len); - len = le32_to_cpu(virtio_vsock_hdr(skb)->len); - - if (len > 0) + if (skb_is_nonlinear(skb)) + skb->len = len; + else skb_put(skb, len); } -static inline struct sk_buff *virtio_vsock_alloc_skb(unsigned int size, gfp_t mask) +static inline struct sk_buff * +__virtio_vsock_alloc_skb_with_frags(unsigned int header_len, + unsigned int data_len, + gfp_t mask) { struct sk_buff *skb; + int err; - if (size < VIRTIO_VSOCK_SKB_HEADROOM) - return NULL; - - skb = alloc_skb(size, mask); + skb = alloc_skb_with_frags(header_len, data_len, + PAGE_ALLOC_COSTLY_ORDER, &err, mask); if (!skb) return NULL; skb_reserve(skb, VIRTIO_VSOCK_SKB_HEADROOM); + skb->data_len = data_len; return skb; } +static inline struct sk_buff * +virtio_vsock_alloc_linear_skb(unsigned int size, gfp_t mask) +{ + return __virtio_vsock_alloc_skb_with_frags(size, 0, mask); +} + +static inline struct sk_buff *virtio_vsock_alloc_skb(unsigned int size, gfp_t mask) +{ + if (size <= SKB_WITH_OVERHEAD(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) + return virtio_vsock_alloc_linear_skb(size, mask); + + size -= VIRTIO_VSOCK_SKB_HEADROOM; + return __virtio_vsock_alloc_skb_with_frags(VIRTIO_VSOCK_SKB_HEADROOM, + size, mask); +} + static inline void virtio_vsock_skb_queue_head(struct sk_buff_head *list, struct sk_buff *skb) { @@ -111,7 +130,12 @@ static inline size_t virtio_vsock_skb_len(struct sk_buff *skb) return (size_t)(skb_end_pointer(skb) - skb->head); } -#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) +/* Dimension the RX SKB so that the entire thing fits exactly into + * a single 4KiB page. This avoids wasting memory due to alloc_skb() + * rounding up to the next page order and also means that we + * don't leave higher-order pages sitting around in the RX queue. + */ +#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE SKB_WITH_OVERHEAD(1024 * 4) #define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL #define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) diff --git a/include/linux/vringh.h b/include/linux/vringh.h index c3a8117dabe8..49e7cbc9697a 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -175,9 +175,6 @@ int vringh_complete_multi_user(struct vringh *vrh, const struct vring_used_elem used[], unsigned num_used); -/* Pretend we've never seen descriptor (for easy error handling). */ -void vringh_abandon_user(struct vringh *vrh, unsigned int num); - /* Do we need to fire the eventfd to notify the other side? */ int vringh_need_notify_user(struct vringh *vrh); @@ -235,10 +232,6 @@ int vringh_getdesc_kern(struct vringh *vrh, u16 *head, gfp_t gfp); -ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len); -ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov, - const void *src, size_t len); -void vringh_abandon_kern(struct vringh *vrh, unsigned int num); int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len); bool vringh_notify_enable_kern(struct vringh *vrh); @@ -319,13 +312,8 @@ ssize_t vringh_iov_push_iotlb(struct vringh *vrh, struct vringh_kiov *wiov, const void *src, size_t len); -void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num); - int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len); -bool vringh_notify_enable_iotlb(struct vringh *vrh); -void vringh_notify_disable_iotlb(struct vringh *vrh); - int vringh_need_notify_iotlb(struct vringh *vrh); #endif /* CONFIG_VHOST_IOTLB */ diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 99660197a36c..8c60687a3e55 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -9,14 +9,18 @@ #ifndef _LINUX_WATCHDOG_H #define _LINUX_WATCHDOG_H - #include <linux/bitops.h> -#include <linux/cdev.h> -#include <linux/device.h> -#include <linux/kernel.h> +#include <linux/limits.h> #include <linux/notifier.h> +#include <linux/printk.h> +#include <linux/types.h> + #include <uapi/linux/watchdog.h> +struct attribute_group; +struct device; +struct module; + struct watchdog_ops; struct watchdog_device; struct watchdog_core_data; diff --git a/include/linux/xxhash.h b/include/linux/xxhash.h index df42511438d0..27f57eca8cb1 100644 --- a/include/linux/xxhash.h +++ b/include/linux/xxhash.h @@ -178,32 +178,6 @@ struct xxh64_state { void xxh32_reset(struct xxh32_state *state, uint32_t seed); /** - * xxh32_update() - hash the data given and update the xxh32 state - * - * @state: The xxh32 state to update. - * @input: The data to hash. - * @length: The length of the data to hash. - * - * After calling xxh32_reset() call xxh32_update() as many times as necessary. - * - * Return: Zero on success, otherwise an error code. - */ -int xxh32_update(struct xxh32_state *state, const void *input, size_t length); - -/** - * xxh32_digest() - produce the current xxh32 hash - * - * @state: Produce the current xxh32 hash of this state. - * - * A hash value can be produced at any time. It is still possible to continue - * inserting input into the hash state after a call to xxh32_digest(), and - * generate new hashes later on, by calling xxh32_digest() again. - * - * Return: The xxh32 hash stored in the state. - */ -uint32_t xxh32_digest(const struct xxh32_state *state); - -/** * xxh64_reset() - reset the xxh64 state to start a new hashing operation * * @state: The xxh64 state to reset. |