summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/atmdev.h1
-rw-r--r--include/linux/audit.h25
-rw-r--r--include/linux/backing-dev-defs.h4
-rw-r--r--include/linux/bitops.h2
-rw-r--r--include/linux/blkdev.h1
-rw-r--r--include/linux/bpf.h73
-rw-r--r--include/linux/bpf_verifier.h65
-rw-r--r--include/linux/btf.h2
-rw-r--r--include/linux/cc_platform.h8
-rw-r--r--include/linux/cdx/bitfield.h90
-rw-r--r--include/linux/cdx/edac_cdx_pcol.h28
-rw-r--r--include/linux/cdx/mcdi.h199
-rw-r--r--include/linux/cfi.h6
-rw-r--r--include/linux/cfi_types.h8
-rw-r--r--include/linux/cgroup-defs.h43
-rw-r--r--include/linux/cgroup.h53
-rw-r--r--include/linux/cgroup_namespace.h58
-rw-r--r--include/linux/compiler-clang.h29
-rw-r--r--include/linux/compiler.h10
-rw-r--r--include/linux/compiler_types.h23
-rw-r--r--include/linux/cpu.h1
-rw-r--r--include/linux/cpuhotplug.h1
-rw-r--r--include/linux/cred.h2
-rw-r--r--include/linux/damon.h2
-rw-r--r--include/linux/dlm.h33
-rw-r--r--include/linux/dma-map-ops.h3
-rw-r--r--include/linux/energy_model.h10
-rw-r--r--include/linux/ethtool.h4
-rw-r--r--include/linux/export.h2
-rw-r--r--include/linux/exportfs.h6
-rw-r--r--include/linux/filter.h17
-rw-r--r--include/linux/firewire.h4
-rw-r--r--include/linux/firmware/imx/sm.h47
-rw-r--r--include/linux/fs.h285
-rw-r--r--include/linux/fs_context.h18
-rw-r--r--include/linux/fscrypt.h40
-rw-r--r--include/linux/fsverity.h57
-rw-r--r--include/linux/hfs_common.h20
-rw-r--r--include/linux/hrtimer.h14
-rw-r--r--include/linux/hrtimer_defs.h2
-rw-r--r--include/linux/io_uring_types.h3
-rw-r--r--include/linux/iocontext.h6
-rw-r--r--include/linux/iosys-map.h7
-rw-r--r--include/linux/iov_iter.h20
-rw-r--r--include/linux/ipc_namespace.h13
-rw-r--r--include/linux/irq.h6
-rw-r--r--include/linux/jiffies.h2
-rw-r--r--include/linux/kasan.h6
-rw-r--r--include/linux/kcov.h47
-rw-r--r--include/linux/kexec.h3
-rw-r--r--include/linux/lsm_hook_defs.h2
-rw-r--r--include/linux/lsm_hooks.h3
-rw-r--r--include/linux/memblock.h5
-rw-r--r--include/linux/migrate.h5
-rw-r--r--include/linux/mlx5/driver.h1
-rw-r--r--include/linux/mlx5/fs.h2
-rw-r--r--include/linux/mm_types.h10
-rw-r--r--include/linux/mnt_namespace.h4
-rw-r--r--include/linux/msi.h6
-rw-r--r--include/linux/namei.h21
-rw-r--r--include/linux/netdevice.h5
-rw-r--r--include/linux/netfs.h1
-rw-r--r--include/linux/nfs_page.h1
-rw-r--r--include/linux/ns_common.h139
-rw-r--r--include/linux/nsfs.h40
-rw-r--r--include/linux/nsproxy.h13
-rw-r--r--include/linux/nstree.h78
-rw-r--r--include/linux/pagewalk.h3
-rw-r--r--include/linux/perf_event.h4
-rw-r--r--include/linux/pgalloc.h29
-rw-r--r--include/linux/pgtable.h31
-rw-r--r--include/linux/phy.h5
-rw-r--r--include/linux/pid_namespace.h20
-rw-r--r--include/linux/platform_data/dmtimer-omap.h4
-rw-r--r--include/linux/platform_data/x86/int3472.h1
-rw-r--r--include/linux/pm_domain.h7
-rw-r--r--include/linux/preempt.h11
-rw-r--r--include/linux/proc_ns.h22
-rw-r--r--include/linux/psp-platform-access.h2
-rw-r--r--include/linux/rcupdate.h16
-rw-r--r--include/linux/resctrl.h148
-rw-r--r--include/linux/resctrl_types.h18
-rw-r--r--include/linux/rseq.h15
-rw-r--r--include/linux/rv.h6
-rw-r--r--include/linux/rw_hint.h1
-rw-r--r--include/linux/sched.h149
-rw-r--r--include/linux/sched/ext.h6
-rw-r--r--include/linux/sched/signal.h4
-rw-r--r--include/linux/sched/task.h2
-rw-r--r--include/linux/sched/topology.h29
-rw-r--r--include/linux/security.h10
-rw-r--r--include/linux/sem.h4
-rw-r--r--include/linux/skbuff.h2
-rw-r--r--include/linux/stddef.h24
-rw-r--r--include/linux/string_choices.h6
-rw-r--r--include/linux/swap.h10
-rw-r--r--include/linux/syscalls.h2
-rw-r--r--include/linux/time_namespace.h17
-rw-r--r--include/linux/timekeeper_internal.h9
-rw-r--r--include/linux/tnum.h6
-rw-r--r--include/linux/topology.h2
-rw-r--r--include/linux/uprobes.h24
-rw-r--r--include/linux/user_events.h4
-rw-r--r--include/linux/user_namespace.h9
-rw-r--r--include/linux/uts_namespace.h65
-rw-r--r--include/linux/utsname.h53
-rw-r--r--include/linux/verification.h1
-rw-r--r--include/linux/virtio_config.h13
-rw-r--r--include/linux/vmalloc.h16
-rw-r--r--include/linux/workqueue.h32
-rw-r--r--include/linux/writeback.h2
111 files changed, 1987 insertions, 602 deletions
diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 45f2f278b50a..70807c679f1a 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -185,6 +185,7 @@ struct atmdev_ops { /* only send is required */
int (*compat_ioctl)(struct atm_dev *dev,unsigned int cmd,
void __user *arg);
#endif
+ int (*pre_send)(struct atm_vcc *vcc, struct sk_buff *skb);
int (*send)(struct atm_vcc *vcc,struct sk_buff *skb);
int (*send_bh)(struct atm_vcc *vcc, struct sk_buff *skb);
int (*send_oam)(struct atm_vcc *vcc,void *cell,int flags);
diff --git a/include/linux/audit.h b/include/linux/audit.h
index a394614ccd0b..536f8ee8da81 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -37,6 +37,8 @@ struct audit_watch;
struct audit_tree;
struct sk_buff;
struct kern_ipc_perm;
+struct lsm_id;
+struct lsm_prop;
struct audit_krule {
u32 pflags;
@@ -147,6 +149,10 @@ extern unsigned compat_signal_class[];
#define AUDIT_TTY_ENABLE BIT(0)
#define AUDIT_TTY_LOG_PASSWD BIT(1)
+/* bit values for audit_cfg_lsm */
+#define AUDIT_CFG_LSM_SECCTX_SUBJECT BIT(0)
+#define AUDIT_CFG_LSM_SECCTX_OBJECT BIT(1)
+
struct filename;
#define AUDIT_OFF 0
@@ -185,6 +191,8 @@ extern void audit_log_path_denied(int type,
const char *operation);
extern void audit_log_lost(const char *message);
+extern int audit_log_subj_ctx(struct audit_buffer *ab, struct lsm_prop *prop);
+extern int audit_log_obj_ctx(struct audit_buffer *ab, struct lsm_prop *prop);
extern int audit_log_task_context(struct audit_buffer *ab);
extern void audit_log_task_info(struct audit_buffer *ab);
@@ -210,6 +218,8 @@ extern u32 audit_enabled;
extern int audit_signal_info(int sig, struct task_struct *t);
+extern void audit_cfg_lsm(const struct lsm_id *lsmid, int flags);
+
#else /* CONFIG_AUDIT */
static inline __printf(4, 5)
void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type,
@@ -245,6 +255,16 @@ static inline void audit_log_key(struct audit_buffer *ab, char *key)
{ }
static inline void audit_log_path_denied(int type, const char *operation)
{ }
+static inline int audit_log_subj_ctx(struct audit_buffer *ab,
+ struct lsm_prop *prop)
+{
+ return 0;
+}
+static inline int audit_log_obj_ctx(struct audit_buffer *ab,
+ struct lsm_prop *prop)
+{
+ return 0;
+}
static inline int audit_log_task_context(struct audit_buffer *ab)
{
return 0;
@@ -269,6 +289,9 @@ static inline int audit_signal_info(int sig, struct task_struct *t)
return 0;
}
+static inline void audit_cfg_lsm(const struct lsm_id *lsmid, int flags)
+{ }
+
#endif /* CONFIG_AUDIT */
#ifdef CONFIG_AUDIT_COMPAT_GENERIC
@@ -527,7 +550,7 @@ static inline void audit_log_kern_module(const char *name)
static inline void audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar)
{
- if (!audit_dummy_context())
+ if (audit_enabled)
__audit_fanotify(response, friar);
}
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 2ad261082bba..c5c9d89c73ed 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -152,6 +152,10 @@ struct bdi_writeback {
struct list_head blkcg_node; /* anchored at blkcg->cgwb_list */
struct list_head b_attached; /* attached inodes, protected by list_lock */
struct list_head offline_node; /* anchored at offline_cgwbs */
+ struct work_struct switch_work; /* work used to perform inode switching
+ * to this wb */
+ struct llist_head switch_wbs_ctxs; /* queued contexts for
+ * writeback switching */
union {
struct work_struct release_work;
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 9be2d50da09a..ea7898cc5903 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -267,7 +267,7 @@ static inline int parity8(u8 val)
* The result is not defined if no bits are set, so check that @word
* is non-zero before calling this.
*/
-static inline unsigned int __ffs64(u64 word)
+static inline __attribute_const__ unsigned int __ffs64(u64 word)
{
#if BITS_PER_LONG == 32
if (((u32)word) == 0UL)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 95886b404b16..fe1797bbec42 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -656,6 +656,7 @@ enum {
QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */
QUEUE_FLAG_DISABLE_WBT_DEF, /* for sched to disable/enable wbt */
QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */
+ QUEUE_FLAG_QOS_ENABLED, /* qos is enabled */
QUEUE_FLAG_MAX
};
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cc700925b802..a98c83346134 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -7,6 +7,7 @@
#include <uapi/linux/bpf.h>
#include <uapi/linux/filter.h>
+#include <crypto/sha2.h>
#include <linux/workqueue.h>
#include <linux/file.h>
#include <linux/percpu.h>
@@ -109,6 +110,7 @@ struct bpf_map_ops {
long (*map_pop_elem)(struct bpf_map *map, void *value);
long (*map_peek_elem)(struct bpf_map *map, void *value);
void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu);
+ int (*map_get_hash)(struct bpf_map *map, u32 hash_buf_size, void *hash_buf);
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
@@ -206,6 +208,7 @@ enum btf_field_type {
BPF_WORKQUEUE = (1 << 10),
BPF_UPTR = (1 << 11),
BPF_RES_SPIN_LOCK = (1 << 12),
+ BPF_TASK_WORK = (1 << 13),
};
enum bpf_cgroup_storage_type {
@@ -259,6 +262,7 @@ struct btf_record {
int timer_off;
int wq_off;
int refcount_off;
+ int task_work_off;
struct btf_field fields[];
};
@@ -285,9 +289,11 @@ struct bpf_map_owner {
bool xdp_has_frags;
u64 storage_cookie[MAX_BPF_CGROUP_STORAGE_TYPE];
const struct btf_type *attach_func_proto;
+ enum bpf_attach_type expected_attach_type;
};
struct bpf_map {
+ u8 sha[SHA256_DIGEST_SIZE];
const struct bpf_map_ops *ops;
struct bpf_map *inner_map_meta;
#ifdef CONFIG_SECURITY
@@ -328,6 +334,7 @@ struct bpf_map {
atomic64_t sleepable_refcnt;
s64 __percpu *elem_count;
u64 cookie; /* write-once */
+ char *excl_prog_sha;
};
static inline const char *btf_field_type_name(enum btf_field_type type)
@@ -358,6 +365,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
return "bpf_rb_node";
case BPF_REFCOUNT:
return "bpf_refcount";
+ case BPF_TASK_WORK:
+ return "bpf_task_work";
default:
WARN_ON_ONCE(1);
return "unknown";
@@ -396,6 +405,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
return sizeof(struct bpf_rb_node);
case BPF_REFCOUNT:
return sizeof(struct bpf_refcount);
+ case BPF_TASK_WORK:
+ return sizeof(struct bpf_task_work);
default:
WARN_ON_ONCE(1);
return 0;
@@ -428,6 +439,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
return __alignof__(struct bpf_rb_node);
case BPF_REFCOUNT:
return __alignof__(struct bpf_refcount);
+ case BPF_TASK_WORK:
+ return __alignof__(struct bpf_task_work);
default:
WARN_ON_ONCE(1);
return 0;
@@ -459,6 +472,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
case BPF_UPTR:
+ case BPF_TASK_WORK:
break;
default:
WARN_ON_ONCE(1);
@@ -595,6 +609,7 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
bool lock_src);
void bpf_timer_cancel_and_free(void *timer);
void bpf_wq_cancel_and_free(void *timer);
+void bpf_task_work_cancel_and_free(void *timer);
void bpf_list_head_free(const struct btf_field *field, void *list_head,
struct bpf_spin_lock *spin_lock);
void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
@@ -767,12 +782,15 @@ enum bpf_type_flag {
*/
MEM_WRITE = BIT(18 + BPF_BASE_TYPE_BITS),
+ /* DYNPTR points to skb_metadata_end()-skb_metadata_len() */
+ DYNPTR_TYPE_SKB_META = BIT(19 + BPF_BASE_TYPE_BITS),
+
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \
- | DYNPTR_TYPE_XDP)
+ | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META)
/* Max number of base types. */
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
@@ -1110,7 +1128,7 @@ struct bpf_prog_offload {
*/
#define MAX_BPF_FUNC_REG_ARGS 5
-/* The argument is a structure. */
+/* The argument is a structure or a union. */
#define BTF_FMODEL_STRUCT_ARG BIT(0)
/* The argument is signed. */
@@ -1358,6 +1376,8 @@ enum bpf_dynptr_type {
BPF_DYNPTR_TYPE_SKB,
/* Underlying data is a xdp_buff */
BPF_DYNPTR_TYPE_XDP,
+ /* Points to skb_metadata_end()-skb_metadata_len() */
+ BPF_DYNPTR_TYPE_SKB_META,
};
int bpf_dynptr_check_size(u32 size);
@@ -1619,6 +1639,7 @@ struct bpf_prog_aux {
bool priv_stack_requested;
bool changes_pkt_data;
bool might_sleep;
+ bool kprobe_write_ctx;
u64 prog_array_member_cnt; /* counts how many times as member of prog_array */
struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */
struct bpf_arena *arena;
@@ -1628,6 +1649,7 @@ struct bpf_prog_aux {
/* function name for valid attach_btf_id */
const char *attach_func_name;
struct bpf_prog **func;
+ struct bpf_prog_aux *main_prog_aux;
void *jit_data; /* JIT specific data. arch dependent */
struct bpf_jit_poke_descriptor *poke_tab;
struct bpf_kfunc_desc_tab *kfunc_tab;
@@ -1711,7 +1733,10 @@ struct bpf_prog {
enum bpf_attach_type expected_attach_type; /* For some prog types */
u32 len; /* Number of filter blocks */
u32 jited_len; /* Size of jited insns in bytes */
- u8 tag[BPF_TAG_SIZE];
+ union {
+ u8 digest[SHA256_DIGEST_SIZE];
+ u8 tag[BPF_TAG_SIZE];
+ };
struct bpf_prog_stats __percpu *stats;
int __percpu *active;
unsigned int (*bpf_func)(const void *ctx,
@@ -1985,6 +2010,7 @@ static inline void bpf_module_put(const void *data, struct module *owner)
module_put(owner);
}
int bpf_struct_ops_link_create(union bpf_attr *attr);
+u32 bpf_struct_ops_id(const void *kdata);
#ifdef CONFIG_NET
/* Define it here to avoid the use of forward declaration */
@@ -2411,6 +2437,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec);
bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj);
+void bpf_obj_free_task_work(const struct btf_record *rec, void *obj);
void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu);
@@ -2697,7 +2724,7 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
u64 flags);
-int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
+int bpf_stackmap_extract(struct bpf_map *map, void *key, void *value, bool delete);
int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags);
@@ -2874,6 +2901,7 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
enum bpf_dynptr_type type, u32 offset, u32 size);
void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr);
+void bpf_prog_report_arena_violation(bool write, unsigned long addr, unsigned long fault_ip);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
@@ -3161,6 +3189,11 @@ static inline void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
{
}
+
+static inline void bpf_prog_report_arena_violation(bool write, unsigned long addr,
+ unsigned long fault_ip)
+{
+}
#endif /* CONFIG_BPF_SYSCALL */
static __always_inline int
@@ -3403,6 +3436,38 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
#endif /* CONFIG_BPF_SYSCALL */
#endif /* defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) */
+#if defined(CONFIG_KEYS) && defined(CONFIG_BPF_SYSCALL)
+
+struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags);
+struct bpf_key *bpf_lookup_system_key(u64 id);
+void bpf_key_put(struct bpf_key *bkey);
+int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
+ struct bpf_dynptr *sig_p,
+ struct bpf_key *trusted_keyring);
+
+#else
+static inline struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags)
+{
+ return NULL;
+}
+
+static inline struct bpf_key *bpf_lookup_system_key(u64 id)
+{
+ return NULL;
+}
+
+static inline void bpf_key_put(struct bpf_key *bkey)
+{
+}
+
+static inline int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
+ struct bpf_dynptr *sig_p,
+ struct bpf_key *trusted_keyring)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* defined(CONFIG_KEYS) && defined(CONFIG_BPF_SYSCALL) */
+
/* verifier prototypes for helper functions called from eBPF programs */
extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
extern const struct bpf_func_proto bpf_map_update_elem_proto;
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 94defa405c85..4c497e839526 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -26,28 +26,6 @@
/* Patch buffer size */
#define INSN_BUF_SIZE 32
-/* Liveness marks, used for registers and spilled-regs (in stack slots).
- * Read marks propagate upwards until they find a write mark; they record that
- * "one of this state's descendants read this reg" (and therefore the reg is
- * relevant for states_equal() checks).
- * Write marks collect downwards and do not propagate; they record that "the
- * straight-line code that reached this state (from its parent) wrote this reg"
- * (and therefore that reads propagated from this state or its descendants
- * should not propagate to its parent).
- * A state with a write mark can receive read marks; it just won't propagate
- * them to its parent, since the write mark is a property, not of the state,
- * but of the link between it and its parent. See mark_reg_read() and
- * mark_stack_slot_read() in kernel/bpf/verifier.c.
- */
-enum bpf_reg_liveness {
- REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */
- REG_LIVE_READ32 = 0x1, /* reg was read, so we're sensitive to initial value */
- REG_LIVE_READ64 = 0x2, /* likewise, but full 64-bit content matters */
- REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64,
- REG_LIVE_WRITTEN = 0x4, /* reg was written first, screening off later reads */
- REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */
-};
-
#define ITER_PREFIX "bpf_iter_"
enum bpf_iter_state {
@@ -212,8 +190,6 @@ struct bpf_reg_state {
* allowed and has the same effect as bpf_sk_release(sk).
*/
u32 ref_obj_id;
- /* parentage chain for liveness checking */
- struct bpf_reg_state *parent;
/* Inside the callee two registers can be both PTR_TO_STACK like
* R1=fp-8 and R2=fp-8, but one of them points to this function stack
* while another to the caller's stack. To differentiate them 'frameno'
@@ -226,7 +202,6 @@ struct bpf_reg_state {
* patching which only happens after main verification finished.
*/
s32 subreg_def;
- enum bpf_reg_liveness live;
/* if (!precise && SCALAR_VALUE) min/max/tnum don't affect safety */
bool precise;
};
@@ -445,6 +420,7 @@ struct bpf_verifier_state {
bool speculative;
bool in_sleepable;
+ bool cleaned;
/* first and last insn idx of this verifier state */
u32 first_insn_idx;
@@ -665,6 +641,7 @@ struct bpf_subprog_info {
/* 'start' has to be the first field otherwise find_subprog() won't work */
u32 start; /* insn idx of function entry point */
u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
+ u32 postorder_start; /* The idx to the env->cfg.insn_postorder */
u16 stack_depth; /* max. stack depth used by this function */
u16 stack_extra;
/* offsets in range [stack_depth .. fastcall_stack_off)
@@ -744,6 +721,8 @@ struct bpf_scc_info {
struct bpf_scc_visit visits[];
};
+struct bpf_liveness;
+
/* single container for all structs
* one verifier_env per bpf_check() call
*/
@@ -794,7 +773,10 @@ struct bpf_verifier_env {
struct {
int *insn_state;
int *insn_stack;
- /* vector of instruction indexes sorted in post-order */
+ /*
+ * vector of instruction indexes sorted in post-order, grouped by subprogram,
+ * see bpf_subprog_info->postorder_start.
+ */
int *insn_postorder;
int cur_stack;
/* current position in the insn_postorder vector */
@@ -842,6 +824,7 @@ struct bpf_verifier_env {
struct bpf_insn insn_buf[INSN_BUF_SIZE];
struct bpf_insn epilogue_buf[INSN_BUF_SIZE];
struct bpf_scc_callchain callchain_buf;
+ struct bpf_liveness *liveness;
/* array of pointers to bpf_scc_info indexed by SCC id */
struct bpf_scc_info **scc_info;
u32 scc_cnt;
@@ -875,13 +858,15 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env,
#define verifier_bug_if(cond, env, fmt, args...) \
({ \
bool __cond = (cond); \
- if (unlikely(__cond)) { \
- BPF_WARN_ONCE(1, "verifier bug: " fmt "(" #cond ")\n", ##args); \
- bpf_log(&env->log, "verifier bug: " fmt "(" #cond ")\n", ##args); \
- } \
+ if (unlikely(__cond)) \
+ verifier_bug(env, fmt " (" #cond ")", ##args); \
(__cond); \
})
-#define verifier_bug(env, fmt, args...) verifier_bug_if(1, env, fmt, ##args)
+#define verifier_bug(env, fmt, args...) \
+ ({ \
+ BPF_WARN_ONCE(1, "verifier bug: " fmt "\n", ##args); \
+ bpf_log(&env->log, "verifier bug: " fmt "\n", ##args); \
+ })
static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
{
@@ -962,6 +947,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
case BPF_PROG_TYPE_STRUCT_OPS:
return prog->aux->jits_use_priv_stack;
case BPF_PROG_TYPE_LSM:
+ case BPF_PROG_TYPE_SYSCALL:
return false;
default:
return true;
@@ -1062,4 +1048,21 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate,
u32 frameno);
+struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off);
+int bpf_jmp_offset(struct bpf_insn *insn);
+int bpf_insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2]);
+void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask);
+bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx);
+
+int bpf_stack_liveness_init(struct bpf_verifier_env *env);
+void bpf_stack_liveness_free(struct bpf_verifier_env *env);
+int bpf_update_live_stack(struct bpf_verifier_env *env);
+int bpf_mark_stack_read(struct bpf_verifier_env *env, u32 frameno, u32 insn_idx, u64 mask);
+void bpf_mark_stack_write(struct bpf_verifier_env *env, u32 frameno, u64 mask);
+int bpf_reset_stack_write_marks(struct bpf_verifier_env *env, u32 insn_idx);
+int bpf_commit_stack_write_marks(struct bpf_verifier_env *env);
+int bpf_live_stack_query_init(struct bpf_verifier_env *env, struct bpf_verifier_state *st);
+bool bpf_stack_slot_alive(struct bpf_verifier_env *env, u32 frameno, u32 spi);
+void bpf_reset_live_stack_callchain(struct bpf_verifier_env *env);
+
#endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 9eda6b113f9b..f06976ffb63f 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -86,7 +86,7 @@
* as to avoid issues such as the compiler inlining or eliding either a static
* kfunc, or a global kfunc in an LTO build.
*/
-#define __bpf_kfunc __used __retain noinline
+#define __bpf_kfunc __used __retain __noclone noinline
#define __bpf_kfunc_start_defs() \
__diag_push(); \
diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h
index 0bf7d33a1048..7fcec025c5e0 100644
--- a/include/linux/cc_platform.h
+++ b/include/linux/cc_platform.h
@@ -96,6 +96,14 @@ enum cc_attr {
* enabled to run SEV-SNP guests.
*/
CC_ATTR_HOST_SEV_SNP,
+
+ /**
+ * @CC_ATTR_SNP_SECURE_AVIC: Secure AVIC mode is active.
+ *
+ * The host kernel is running with the necessary features enabled
+ * to run SEV-SNP guests with full Secure AVIC capabilities.
+ */
+ CC_ATTR_SNP_SECURE_AVIC,
};
#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
diff --git a/include/linux/cdx/bitfield.h b/include/linux/cdx/bitfield.h
new file mode 100644
index 000000000000..567f8ec47582
--- /dev/null
+++ b/include/linux/cdx/bitfield.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ * Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
+ */
+
+#ifndef CDX_BITFIELD_H
+#define CDX_BITFIELD_H
+
+#include <linux/bitfield.h>
+
+/* Lowest bit numbers and widths */
+#define CDX_DWORD_LBN 0
+#define CDX_DWORD_WIDTH 32
+
+/* Specified attribute (e.g. LBN) of the specified field */
+#define CDX_VAL(field, attribute) field ## _ ## attribute
+/* Low bit number of the specified field */
+#define CDX_LOW_BIT(field) CDX_VAL(field, LBN)
+/* Bit width of the specified field */
+#define CDX_WIDTH(field) CDX_VAL(field, WIDTH)
+/* High bit number of the specified field */
+#define CDX_HIGH_BIT(field) (CDX_LOW_BIT(field) + CDX_WIDTH(field) - 1)
+
+/* A doubleword (i.e. 4 byte) datatype - little-endian in HW */
+struct cdx_dword {
+ __le32 cdx_u32;
+};
+
+/* Value expanders for printk */
+#define CDX_DWORD_VAL(dword) \
+ ((unsigned int)le32_to_cpu((dword).cdx_u32))
+
+/*
+ * Extract bit field portion [low,high) from the 32-bit little-endian
+ * element which contains bits [min,max)
+ */
+#define CDX_DWORD_FIELD(dword, field) \
+ (FIELD_GET(GENMASK(CDX_HIGH_BIT(field), CDX_LOW_BIT(field)), \
+ le32_to_cpu((dword).cdx_u32)))
+
+/*
+ * Creates the portion of the named bit field that lies within the
+ * range [min,max).
+ */
+#define CDX_INSERT_FIELD(field, value) \
+ (FIELD_PREP(GENMASK(CDX_HIGH_BIT(field), \
+ CDX_LOW_BIT(field)), value))
+
+/*
+ * Creates the portion of the named bit fields that lie within the
+ * range [min,max).
+ */
+#define CDX_INSERT_FIELDS(field1, value1, \
+ field2, value2, \
+ field3, value3, \
+ field4, value4, \
+ field5, value5, \
+ field6, value6, \
+ field7, value7) \
+ (CDX_INSERT_FIELD(field1, (value1)) | \
+ CDX_INSERT_FIELD(field2, (value2)) | \
+ CDX_INSERT_FIELD(field3, (value3)) | \
+ CDX_INSERT_FIELD(field4, (value4)) | \
+ CDX_INSERT_FIELD(field5, (value5)) | \
+ CDX_INSERT_FIELD(field6, (value6)) | \
+ CDX_INSERT_FIELD(field7, (value7)))
+
+#define CDX_POPULATE_DWORD(dword, ...) \
+ (dword).cdx_u32 = cpu_to_le32(CDX_INSERT_FIELDS(__VA_ARGS__))
+
+/* Populate a dword field with various numbers of arguments */
+#define CDX_POPULATE_DWORD_7 CDX_POPULATE_DWORD
+#define CDX_POPULATE_DWORD_6(dword, ...) \
+ CDX_POPULATE_DWORD_7(dword, CDX_DWORD, 0, __VA_ARGS__)
+#define CDX_POPULATE_DWORD_5(dword, ...) \
+ CDX_POPULATE_DWORD_6(dword, CDX_DWORD, 0, __VA_ARGS__)
+#define CDX_POPULATE_DWORD_4(dword, ...) \
+ CDX_POPULATE_DWORD_5(dword, CDX_DWORD, 0, __VA_ARGS__)
+#define CDX_POPULATE_DWORD_3(dword, ...) \
+ CDX_POPULATE_DWORD_4(dword, CDX_DWORD, 0, __VA_ARGS__)
+#define CDX_POPULATE_DWORD_2(dword, ...) \
+ CDX_POPULATE_DWORD_3(dword, CDX_DWORD, 0, __VA_ARGS__)
+#define CDX_POPULATE_DWORD_1(dword, ...) \
+ CDX_POPULATE_DWORD_2(dword, CDX_DWORD, 0, __VA_ARGS__)
+#define CDX_SET_DWORD(dword) \
+ CDX_POPULATE_DWORD_1(dword, CDX_DWORD, 0xffffffff)
+
+#endif /* CDX_BITFIELD_H */
diff --git a/include/linux/cdx/edac_cdx_pcol.h b/include/linux/cdx/edac_cdx_pcol.h
new file mode 100644
index 000000000000..749db33bb482
--- /dev/null
+++ b/include/linux/cdx/edac_cdx_pcol.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Driver for AMD network controllers and boards
+ *
+ * Copyright (C) 2021, Xilinx, Inc.
+ * Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
+ */
+
+#ifndef MC_CDX_PCOL_H
+#define MC_CDX_PCOL_H
+#include <linux/cdx/mcdi.h>
+
+#define MC_CMD_EDAC_GET_DDR_CONFIG_OUT_WORD_LENGTH_LEN 4
+/* Number of registers for the DDR controller */
+#define MC_CMD_GET_DDR_CONFIG_OFST 4
+#define MC_CMD_GET_DDR_CONFIG_LEN 4
+
+/***********************************/
+/* MC_CMD_EDAC_GET_DDR_CONFIG
+ * Provides detailed configuration for the DDR controller of the given index.
+ */
+#define MC_CMD_EDAC_GET_DDR_CONFIG 0x3
+
+/* MC_CMD_EDAC_GET_DDR_CONFIG_IN msgrequest */
+#define MC_CMD_EDAC_GET_DDR_CONFIG_IN_CONTROLLER_INDEX_OFST 0
+#define MC_CMD_EDAC_GET_DDR_CONFIG_IN_CONTROLLER_INDEX_LEN 4
+
+#endif /* MC_CDX_PCOL_H */
diff --git a/include/linux/cdx/mcdi.h b/include/linux/cdx/mcdi.h
new file mode 100644
index 000000000000..74075305cba4
--- /dev/null
+++ b/include/linux/cdx/mcdi.h
@@ -0,0 +1,199 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2008-2013 Solarflare Communications Inc.
+ * Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
+ */
+
+#ifndef CDX_MCDI_H
+#define CDX_MCDI_H
+
+#include <linux/mutex.h>
+#include <linux/kref.h>
+#include <linux/rpmsg.h>
+
+#include "linux/cdx/bitfield.h"
+
+/**
+ * enum cdx_mcdi_mode - MCDI transaction mode
+ * @MCDI_MODE_EVENTS: wait for an mcdi response callback.
+ * @MCDI_MODE_FAIL: we think MCDI is dead, so fail-fast all calls
+ */
+enum cdx_mcdi_mode {
+ MCDI_MODE_EVENTS,
+ MCDI_MODE_FAIL,
+};
+
+#define MCDI_RPC_TIMEOUT (10 * HZ)
+#define MCDI_RPC_LONG_TIMEOU (60 * HZ)
+#define MCDI_RPC_POST_RST_TIME (10 * HZ)
+
+/**
+ * enum cdx_mcdi_cmd_state - State for an individual MCDI command
+ * @MCDI_STATE_QUEUED: Command not started and is waiting to run.
+ * @MCDI_STATE_RETRY: Command was submitted and MC rejected with no resources,
+ * as MC have too many outstanding commands. Command will be retried once
+ * another command returns.
+ * @MCDI_STATE_RUNNING: Command was accepted and is running.
+ * @MCDI_STATE_RUNNING_CANCELLED: Command is running but the issuer cancelled
+ * the command.
+ * @MCDI_STATE_FINISHED: Processing of this command has completed.
+ */
+
+enum cdx_mcdi_cmd_state {
+ MCDI_STATE_QUEUED,
+ MCDI_STATE_RETRY,
+ MCDI_STATE_RUNNING,
+ MCDI_STATE_RUNNING_CANCELLED,
+ MCDI_STATE_FINISHED,
+};
+
+/**
+ * struct cdx_mcdi - CDX MCDI Firmware interface, to interact
+ * with CDX controller.
+ * @mcdi: MCDI interface
+ * @mcdi_ops: MCDI operations
+ * @r5_rproc : R5 Remoteproc device handle
+ * @rpdev: RPMsg device
+ * @ept: RPMsg endpoint
+ * @work: Post probe work
+ */
+struct cdx_mcdi {
+ /* MCDI interface */
+ struct cdx_mcdi_data *mcdi;
+ const struct cdx_mcdi_ops *mcdi_ops;
+
+ struct rproc *r5_rproc;
+ struct rpmsg_device *rpdev;
+ struct rpmsg_endpoint *ept;
+ struct work_struct work;
+};
+
+struct cdx_mcdi_ops {
+ void (*mcdi_request)(struct cdx_mcdi *cdx,
+ const struct cdx_dword *hdr, size_t hdr_len,
+ const struct cdx_dword *sdu, size_t sdu_len);
+ unsigned int (*mcdi_rpc_timeout)(struct cdx_mcdi *cdx, unsigned int cmd);
+};
+
+typedef void cdx_mcdi_async_completer(struct cdx_mcdi *cdx,
+ unsigned long cookie, int rc,
+ struct cdx_dword *outbuf,
+ size_t outlen_actual);
+
+/**
+ * struct cdx_mcdi_cmd - An outstanding MCDI command
+ * @ref: Reference count. There will be one reference if the command is
+ * in the mcdi_iface cmd_list, another if it's on a cleanup list,
+ * and a third if it's queued in the work queue.
+ * @list: The data for this entry in mcdi->cmd_list
+ * @cleanup_list: The data for this entry in a cleanup list
+ * @work: The work item for this command, queued in mcdi->workqueue
+ * @mcdi: The mcdi_iface for this command
+ * @state: The state of this command
+ * @inlen: inbuf length
+ * @inbuf: Input buffer
+ * @quiet: Whether to silence errors
+ * @reboot_seen: Whether a reboot has been seen during this command,
+ * to prevent duplicates
+ * @seq: Sequence number
+ * @started: Jiffies this command was started at
+ * @cookie: Context for completion function
+ * @completer: Completion function
+ * @handle: Command handle
+ * @cmd: Command number
+ * @rc: Return code
+ * @outlen: Length of output buffer
+ * @outbuf: Output buffer
+ */
+struct cdx_mcdi_cmd {
+ struct kref ref;
+ struct list_head list;
+ struct list_head cleanup_list;
+ struct work_struct work;
+ struct cdx_mcdi_iface *mcdi;
+ enum cdx_mcdi_cmd_state state;
+ size_t inlen;
+ const struct cdx_dword *inbuf;
+ bool quiet;
+ bool reboot_seen;
+ u8 seq;
+ unsigned long started;
+ unsigned long cookie;
+ cdx_mcdi_async_completer *completer;
+ unsigned int handle;
+ unsigned int cmd;
+ int rc;
+ size_t outlen;
+ struct cdx_dword *outbuf;
+ /* followed by inbuf data if necessary */
+};
+
+/**
+ * struct cdx_mcdi_iface - MCDI protocol context
+ * @cdx: The associated NIC
+ * @iface_lock: Serialise access to this structure
+ * @outstanding_cleanups: Count of cleanups
+ * @cmd_list: List of outstanding and running commands
+ * @workqueue: Workqueue used for delayed processing
+ * @cmd_complete_wq: Waitqueue for command completion
+ * @db_held_by: Command the MC doorbell is in use by
+ * @seq_held_by: Command each sequence number is in use by
+ * @prev_handle: The last used command handle
+ * @mode: Poll for mcdi completion, or wait for an mcdi_event
+ * @prev_seq: The last used sequence number
+ * @new_epoch: Indicates start of day or start of MC reboot recovery
+ */
+struct cdx_mcdi_iface {
+ struct cdx_mcdi *cdx;
+ /* Serialise access */
+ struct mutex iface_lock;
+ unsigned int outstanding_cleanups;
+ struct list_head cmd_list;
+ struct workqueue_struct *workqueue;
+ wait_queue_head_t cmd_complete_wq;
+ struct cdx_mcdi_cmd *db_held_by;
+ struct cdx_mcdi_cmd *seq_held_by[16];
+ unsigned int prev_handle;
+ enum cdx_mcdi_mode mode;
+ u8 prev_seq;
+ bool new_epoch;
+};
+
+/**
+ * struct cdx_mcdi_data - extra state for NICs that implement MCDI
+ * @iface: Interface/protocol state
+ * @fn_flags: Flags for this function, as returned by %MC_CMD_DRV_ATTACH.
+ */
+struct cdx_mcdi_data {
+ struct cdx_mcdi_iface iface;
+ u32 fn_flags;
+};
+
+void cdx_mcdi_finish(struct cdx_mcdi *cdx);
+int cdx_mcdi_init(struct cdx_mcdi *cdx);
+void cdx_mcdi_process_cmd(struct cdx_mcdi *cdx, struct cdx_dword *outbuf, int len);
+int cdx_mcdi_rpc(struct cdx_mcdi *cdx, unsigned int cmd,
+ const struct cdx_dword *inbuf, size_t inlen,
+ struct cdx_dword *outbuf, size_t outlen, size_t *outlen_actual);
+
+/*
+ * We expect that 16- and 32-bit fields in MCDI requests and responses
+ * are appropriately aligned, but 64-bit fields are only
+ * 32-bit-aligned.
+ */
+#define MCDI_DECLARE_BUF(_name, _len) struct cdx_dword _name[DIV_ROUND_UP(_len, 4)] = {{0}}
+#define _MCDI_PTR(_buf, _offset) \
+ ((u8 *)(_buf) + (_offset))
+#define MCDI_PTR(_buf, _field) \
+ _MCDI_PTR(_buf, MC_CMD_ ## _field ## _OFST)
+#define _MCDI_CHECK_ALIGN(_ofst, _align) \
+ ((void)BUILD_BUG_ON_ZERO((_ofst) & ((_align) - 1)), \
+ (_ofst))
+#define _MCDI_DWORD(_buf, _field) \
+ ((_buf) + (_MCDI_CHECK_ALIGN(MC_CMD_ ## _field ## _OFST, 4) >> 2))
+
+#define MCDI_SET_DWORD(_buf, _field, _value) \
+ CDX_POPULATE_DWORD_1(*_MCDI_DWORD(_buf, _field), CDX_DWORD, _value)
+#define MCDI_DWORD(_buf, _field) \
+ CDX_DWORD_FIELD(*_MCDI_DWORD(_buf, _field), CDX_DWORD)
+#endif /* CDX_MCDI_H */
diff --git a/include/linux/cfi.h b/include/linux/cfi.h
index 52a98886a455..1fd22ea6eba4 100644
--- a/include/linux/cfi.h
+++ b/include/linux/cfi.h
@@ -11,7 +11,7 @@
#include <linux/module.h>
#include <asm/cfi.h>
-#ifdef CONFIG_CFI_CLANG
+#ifdef CONFIG_CFI
extern bool cfi_warn;
enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr,
@@ -52,7 +52,7 @@ static inline u32 cfi_get_func_hash(void *func)
extern u32 cfi_bpf_hash;
extern u32 cfi_bpf_subprog_hash;
-#else /* CONFIG_CFI_CLANG */
+#else /* CONFIG_CFI */
static inline int cfi_get_offset(void) { return 0; }
static inline u32 cfi_get_func_hash(void *func) { return 0; }
@@ -60,7 +60,7 @@ static inline u32 cfi_get_func_hash(void *func) { return 0; }
#define cfi_bpf_hash 0U
#define cfi_bpf_subprog_hash 0U
-#endif /* CONFIG_CFI_CLANG */
+#endif /* CONFIG_CFI */
#ifdef CONFIG_ARCH_USES_CFI_TRAPS
bool is_cfi_trap(unsigned long addr);
diff --git a/include/linux/cfi_types.h b/include/linux/cfi_types.h
index 685f7181780f..a86af9bc8bdc 100644
--- a/include/linux/cfi_types.h
+++ b/include/linux/cfi_types.h
@@ -8,7 +8,7 @@
#ifdef __ASSEMBLY__
#include <linux/linkage.h>
-#ifdef CONFIG_CFI_CLANG
+#ifdef CONFIG_CFI
/*
* Use the __kcfi_typeid_<function> type identifier symbol to
* annotate indirectly called assembly functions. The compiler emits
@@ -29,12 +29,12 @@
#define SYM_TYPED_START(name, linkage, align...) \
SYM_TYPED_ENTRY(name, linkage, align)
-#else /* CONFIG_CFI_CLANG */
+#else /* CONFIG_CFI */
#define SYM_TYPED_START(name, linkage, align...) \
SYM_START(name, linkage, align)
-#endif /* CONFIG_CFI_CLANG */
+#endif /* CONFIG_CFI */
#ifndef SYM_TYPED_FUNC_START
#define SYM_TYPED_FUNC_START(name) \
@@ -43,7 +43,7 @@
#else /* __ASSEMBLY__ */
-#ifdef CONFIG_CFI_CLANG
+#ifdef CONFIG_CFI
#define DEFINE_CFI_TYPE(name, func) \
/* \
* Force a reference to the function so the compiler generates \
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 6b93a64115fe..93318fce31f3 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -91,6 +91,12 @@ enum {
* cgroup_threadgroup_rwsem. This makes hot path operations such as
* forks and exits into the slow path and more expensive.
*
+ * Alleviate the contention between fork, exec, exit operations and
+ * writing to cgroup.procs by taking a per threadgroup rwsem instead of
+ * the global cgroup_threadgroup_rwsem. Fork and other operations
+ * from threads in different thread groups no longer contend with
+ * writing to cgroup.procs.
+ *
* The static usage pattern of creating a cgroup, enabling controllers,
* and then seeding it with CLONE_INTO_CGROUP doesn't require write
* locking cgroup_threadgroup_rwsem and thus doesn't benefit from
@@ -140,6 +146,17 @@ enum {
__CFTYPE_ADDED = (1 << 18),
};
+enum cgroup_attach_lock_mode {
+ /* Default */
+ CGRP_ATTACH_LOCK_GLOBAL,
+
+ /* When pid=0 && threadgroup=false, see comments in cgroup_procs_write_start */
+ CGRP_ATTACH_LOCK_NONE,
+
+ /* When favordynmods is on, see comments above CGRP_ROOT_FAVOR_DYNMODS */
+ CGRP_ATTACH_LOCK_PER_THREADGROUP,
+};
+
/*
* cgroup_file is the handle for a file instance created in a cgroup which
* is used, for example, to generate file changed notifications. This can
@@ -433,6 +450,23 @@ struct cgroup_freezer_state {
* frozen, SIGSTOPped, and PTRACEd.
*/
int nr_frozen_tasks;
+
+ /* Freeze time data consistency protection */
+ seqcount_t freeze_seq;
+
+ /*
+ * Most recent time the cgroup was requested to freeze.
+ * Accesses guarded by freeze_seq counter. Writes serialized
+ * by css_set_lock.
+ */
+ u64 freeze_start_nsec;
+
+ /*
+ * Total duration the cgroup has spent freezing.
+ * Accesses guarded by freeze_seq counter. Writes serialized
+ * by css_set_lock.
+ */
+ u64 frozen_nsec;
};
struct cgroup {
@@ -746,7 +780,6 @@ struct cgroup_subsys {
int (*can_attach)(struct cgroup_taskset *tset);
void (*cancel_attach)(struct cgroup_taskset *tset);
void (*attach)(struct cgroup_taskset *tset);
- void (*post_attach)(void);
int (*can_fork)(struct task_struct *task,
struct css_set *cset);
void (*cancel_fork)(struct task_struct *task, struct css_set *cset);
@@ -822,6 +855,7 @@ struct cgroup_subsys {
};
extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
+extern bool cgroup_enable_per_threadgroup_rwsem;
struct cgroup_of_peak {
unsigned long value;
@@ -833,11 +867,14 @@ struct cgroup_of_peak {
* @tsk: target task
*
* Allows cgroup operations to synchronize against threadgroup changes
- * using a percpu_rw_semaphore.
+ * using a global percpu_rw_semaphore and a per threadgroup rw_semaphore when
+ * favordynmods is on. See the comment above CGRP_ROOT_FAVOR_DYNMODS definition.
*/
static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
{
percpu_down_read(&cgroup_threadgroup_rwsem);
+ if (cgroup_enable_per_threadgroup_rwsem)
+ down_read(&tsk->signal->cgroup_threadgroup_rwsem);
}
/**
@@ -848,6 +885,8 @@ static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
*/
static inline void cgroup_threadgroup_change_end(struct task_struct *tsk)
{
+ if (cgroup_enable_per_threadgroup_rwsem)
+ up_read(&tsk->signal->cgroup_threadgroup_rwsem);
percpu_up_read(&cgroup_threadgroup_rwsem);
}
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b18fb5fcb38e..6ed477338b16 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -27,6 +27,7 @@
#include <linux/kernel_stat.h>
#include <linux/cgroup-defs.h>
+#include <linux/cgroup_namespace.h>
struct kernel_clone_args;
@@ -354,6 +355,11 @@ static inline bool css_is_dying(struct cgroup_subsys_state *css)
return css->flags & CSS_DYING;
}
+static inline bool css_is_online(struct cgroup_subsys_state *css)
+{
+ return css->flags & CSS_ONLINE;
+}
+
static inline bool css_is_self(struct cgroup_subsys_state *css)
{
if (css == &css->cgroup->self) {
@@ -650,6 +656,7 @@ static inline void cgroup_kthread_ready(void)
}
void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen);
+struct cgroup *__cgroup_get_from_id(u64 id);
struct cgroup *cgroup_get_from_id(u64 id);
#else /* !CONFIG_CGROUPS */
@@ -783,52 +790,6 @@ static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {}
#endif /* CONFIG_CGROUP_DATA */
-struct cgroup_namespace {
- struct ns_common ns;
- struct user_namespace *user_ns;
- struct ucounts *ucounts;
- struct css_set *root_cset;
-};
-
-extern struct cgroup_namespace init_cgroup_ns;
-
-#ifdef CONFIG_CGROUPS
-
-void free_cgroup_ns(struct cgroup_namespace *ns);
-
-struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
- struct user_namespace *user_ns,
- struct cgroup_namespace *old_ns);
-
-int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
- struct cgroup_namespace *ns);
-
-static inline void get_cgroup_ns(struct cgroup_namespace *ns)
-{
- refcount_inc(&ns->ns.count);
-}
-
-static inline void put_cgroup_ns(struct cgroup_namespace *ns)
-{
- if (refcount_dec_and_test(&ns->ns.count))
- free_cgroup_ns(ns);
-}
-
-#else /* !CONFIG_CGROUPS */
-
-static inline void free_cgroup_ns(struct cgroup_namespace *ns) { }
-static inline struct cgroup_namespace *
-copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns,
- struct cgroup_namespace *old_ns)
-{
- return old_ns;
-}
-
-static inline void get_cgroup_ns(struct cgroup_namespace *ns) { }
-static inline void put_cgroup_ns(struct cgroup_namespace *ns) { }
-
-#endif /* !CONFIG_CGROUPS */
-
#ifdef CONFIG_CGROUPS
void cgroup_enter_frozen(void);
diff --git a/include/linux/cgroup_namespace.h b/include/linux/cgroup_namespace.h
new file mode 100644
index 000000000000..78a8418558a4
--- /dev/null
+++ b/include/linux/cgroup_namespace.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CGROUP_NAMESPACE_H
+#define _LINUX_CGROUP_NAMESPACE_H
+
+#include <linux/ns_common.h>
+
+struct cgroup_namespace {
+ struct ns_common ns;
+ struct user_namespace *user_ns;
+ struct ucounts *ucounts;
+ struct css_set *root_cset;
+};
+
+extern struct cgroup_namespace init_cgroup_ns;
+
+#ifdef CONFIG_CGROUPS
+
+static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct cgroup_namespace, ns);
+}
+
+void free_cgroup_ns(struct cgroup_namespace *ns);
+
+struct cgroup_namespace *copy_cgroup_ns(u64 flags,
+ struct user_namespace *user_ns,
+ struct cgroup_namespace *old_ns);
+
+int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
+ struct cgroup_namespace *ns);
+
+static inline void get_cgroup_ns(struct cgroup_namespace *ns)
+{
+ ns_ref_inc(ns);
+}
+
+static inline void put_cgroup_ns(struct cgroup_namespace *ns)
+{
+ if (ns_ref_put(ns))
+ free_cgroup_ns(ns);
+}
+
+#else /* !CONFIG_CGROUPS */
+
+static inline void free_cgroup_ns(struct cgroup_namespace *ns) { }
+static inline struct cgroup_namespace *
+copy_cgroup_ns(u64 flags, struct user_namespace *user_ns,
+ struct cgroup_namespace *old_ns)
+{
+ return old_ns;
+}
+
+static inline void get_cgroup_ns(struct cgroup_namespace *ns) { }
+static inline void put_cgroup_ns(struct cgroup_namespace *ns) { }
+
+#endif /* !CONFIG_CGROUPS */
+
+#endif /* _LINUX_CGROUP_NAMESPACE_H */
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index fa4ffe037bc7..8720a0705900 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -18,23 +18,42 @@
#define KASAN_ABI_VERSION 5
/*
+ * Clang 22 added preprocessor macros to match GCC, in hopes of eventually
+ * dropping __has_feature support for sanitizers:
+ * https://github.com/llvm/llvm-project/commit/568c23bbd3303518c5056d7f03444dae4fdc8a9c
+ * Create these macros for older versions of clang so that it is easy to clean
+ * up once the minimum supported version of LLVM for building the kernel always
+ * creates these macros.
+ *
* Note: Checking __has_feature(*_sanitizer) is only true if the feature is
* enabled. Therefore it is not required to additionally check defined(CONFIG_*)
* to avoid adding redundant attributes in other configurations.
*/
+#if __has_feature(address_sanitizer) && !defined(__SANITIZE_ADDRESS__)
+#define __SANITIZE_ADDRESS__
+#endif
+#if __has_feature(hwaddress_sanitizer) && !defined(__SANITIZE_HWADDRESS__)
+#define __SANITIZE_HWADDRESS__
+#endif
+#if __has_feature(thread_sanitizer) && !defined(__SANITIZE_THREAD__)
+#define __SANITIZE_THREAD__
+#endif
-#if __has_feature(address_sanitizer) || __has_feature(hwaddress_sanitizer)
-/* Emulate GCC's __SANITIZE_ADDRESS__ flag */
+/*
+ * Treat __SANITIZE_HWADDRESS__ the same as __SANITIZE_ADDRESS__ in the kernel.
+ */
+#ifdef __SANITIZE_HWADDRESS__
#define __SANITIZE_ADDRESS__
+#endif
+
+#ifdef __SANITIZE_ADDRESS__
#define __no_sanitize_address \
__attribute__((no_sanitize("address", "hwaddress")))
#else
#define __no_sanitize_address
#endif
-#if __has_feature(thread_sanitizer)
-/* emulate gcc's __SANITIZE_THREAD__ flag */
-#define __SANITIZE_THREAD__
+#ifdef __SANITIZE_THREAD__
#define __no_sanitize_thread \
__attribute__((no_sanitize("thread")))
#else
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 6f04a1d8c720..5b45ea7dff3e 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -248,7 +248,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
#endif /* __KERNEL__ */
-#if defined(CONFIG_CFI_CLANG) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#if defined(CONFIG_CFI) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
/*
* Force a reference to the external symbol so the compiler generates
* __kcfi_typid.
@@ -288,14 +288,6 @@ static inline void *offset_to_ptr(const int *off)
#define __ADDRESSABLE(sym) \
___ADDRESSABLE(sym, __section(".discard.addressable"))
-#define __ADDRESSABLE_ASM(sym) \
- .pushsection .discard.addressable,"aw"; \
- .align ARCH_SEL(8,4); \
- ARCH_SEL(.quad, .long) __stringify(sym); \
- .popsection;
-
-#define __ADDRESSABLE_ASM_STR(sym) __stringify(__ADDRESSABLE_ASM(sym))
-
/*
* This returns a constant expression while determining if an argument is
* a constant expression, most importantly without evaluating the argument.
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 16755431fc11..2f3e80bf9f35 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -330,6 +330,29 @@ struct ftrace_likely_data {
#endif
/*
+ * The assume attribute is used to indicate that a certain condition is
+ * assumed to be true. If this condition is violated at runtime, the behavior
+ * is undefined. Compilers may or may not use this indication to generate
+ * optimized code.
+ *
+ * Note that the clang documentation states that optimizers may react
+ * differently to this attribute, and this may even have a negative
+ * performance impact. Therefore this attribute should be used with care.
+ *
+ * Optional: only supported since gcc >= 13
+ * Optional: only supported since clang >= 19
+ *
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Statement-Attributes.html#index-assume-statement-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#id13
+ *
+ */
+#ifdef CONFIG_CC_HAS_ASSUME
+# define __assume(expr) __attribute__((__assume__(expr)))
+#else
+# define __assume(expr)
+#endif
+
+/*
* Optional: only supported since gcc >= 15
* Optional: only supported since clang >= 18
*
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index b91b993f58ee..487b3bf2e1ea 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -83,6 +83,7 @@ extern ssize_t cpu_show_old_microcode(struct device *dev,
extern ssize_t cpu_show_indirect_target_selection(struct device *dev,
struct device_attribute *attr, char *buf);
extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_vmscape(struct device *dev, struct device_attribute *attr, char *buf);
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index edfa61d80702..62cd7b35a29c 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -168,6 +168,7 @@ enum cpuhp_state {
CPUHP_AP_QCOM_TIMER_STARTING,
CPUHP_AP_TEGRA_TIMER_STARTING,
CPUHP_AP_ARMADA_TIMER_STARTING,
+ CPUHP_AP_LOONGARCH_ARCH_TIMER_STARTING,
CPUHP_AP_MIPS_GIC_TIMER_STARTING,
CPUHP_AP_ARC_TIMER_STARTING,
CPUHP_AP_REALTEK_TIMER_STARTING,
diff --git a/include/linux/cred.h b/include/linux/cred.h
index a102a10f833f..89ae50ad2ace 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -148,7 +148,7 @@ struct cred {
extern void __put_cred(struct cred *);
extern void exit_creds(struct task_struct *);
-extern int copy_creds(struct task_struct *, unsigned long);
+extern int copy_creds(struct task_struct *, u64);
extern const struct cred *get_task_cred(struct task_struct *);
extern struct cred *cred_alloc_blank(void);
extern struct cred *prepare_creds(void);
diff --git a/include/linux/damon.h b/include/linux/damon.h
index f13664c62ddd..9e62b2a85538 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -636,6 +636,7 @@ struct damon_operations {
* @data: Data that will be passed to @fn.
* @repeat: Repeat invocations.
* @return_code: Return code from @fn invocation.
+ * @dealloc_on_cancel: De-allocate when canceled.
*
* Control damon_call(), which requests specific kdamond to invoke a given
* function. Refer to damon_call() for more details.
@@ -645,6 +646,7 @@ struct damon_call_control {
void *data;
bool repeat;
int return_code;
+ bool dealloc_on_cancel;
/* private: internal use only */
/* informs if the kdamond finished handling of the request */
struct completion completion;
diff --git a/include/linux/dlm.h b/include/linux/dlm.h
index bacda9898f2b..7e7b45b0d097 100644
--- a/include/linux/dlm.h
+++ b/include/linux/dlm.h
@@ -88,12 +88,43 @@ int dlm_new_lockspace(const char *name, const char *cluster,
int *ops_result, dlm_lockspace_t **lockspace);
/*
+ * dlm_release_lockspace() release_option values:
+ *
+ * DLM_RELEASE_NO_LOCKS returns -EBUSY if any locks (lkb's)
+ * exist in the local lockspace.
+ *
+ * DLM_RELEASE_UNUSED previous value that is no longer used.
+ *
+ * DLM_RELEASE_NORMAL releases the lockspace regardless of any
+ * locks managed in the local lockspace.
+ *
+ * DLM_RELEASE_NO_EVENT release the lockspace regardless of any
+ * locks managed in the local lockspace, and does not submit
+ * a leave event to the cluster manager, so other nodes will
+ * not be notified that the node should be removed from the
+ * list of lockspace members.
+ *
+ * DLM_RELEASE_RECOVER like DLM_RELEASE_NORMAL, but the remaining
+ * nodes will handle the removal of the node as if the node
+ * had failed, e.g. the recover_slot() callback would be used.
+ */
+#define DLM_RELEASE_NO_LOCKS 0
+#define DLM_RELEASE_UNUSED 1
+#define DLM_RELEASE_NORMAL 2
+#define DLM_RELEASE_NO_EVENT 3
+#define DLM_RELEASE_RECOVER 4
+#define __DLM_RELEASE_MAX DLM_RELEASE_RECOVER
+
+/*
* dlm_release_lockspace
*
* Stop a lockspace.
+ *
+ * release_option: see DLM_RELEASE values above.
*/
-int dlm_release_lockspace(dlm_lockspace_t *lockspace, int force);
+int dlm_release_lockspace(dlm_lockspace_t *lockspace,
+ unsigned int release_option);
/*
* dlm_lock
diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index f48e5fb88bd5..332b80c42b6f 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -153,6 +153,9 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page,
{
__free_pages(page, get_order(size));
}
+static inline void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
+{
+}
#endif /* CONFIG_DMA_CMA*/
#ifdef CONFIG_DMA_DECLARE_COHERENT
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 7fa1eb3cc823..61d50571ad88 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -171,6 +171,9 @@ int em_dev_update_perf_domain(struct device *dev,
int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
const struct em_data_callback *cb,
const cpumask_t *cpus, bool microwatts);
+int em_dev_register_pd_no_update(struct device *dev, unsigned int nr_states,
+ const struct em_data_callback *cb,
+ const cpumask_t *cpus, bool microwatts);
void em_dev_unregister_perf_domain(struct device *dev);
struct em_perf_table *em_table_alloc(struct em_perf_domain *pd);
void em_table_free(struct em_perf_table *table);
@@ -350,6 +353,13 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
{
return -EINVAL;
}
+static inline
+int em_dev_register_pd_no_update(struct device *dev, unsigned int nr_states,
+ const struct em_data_callback *cb,
+ const cpumask_t *cpus, bool microwatts)
+{
+ return -EINVAL;
+}
static inline void em_dev_unregister_perf_domain(struct device *dev)
{
}
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index de5bd76a400c..d7d757e72554 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -856,8 +856,8 @@ struct kernel_ethtool_ts_info {
enum hwtstamp_provider_qualifier phc_qualifier;
enum hwtstamp_source phc_source;
int phc_phyindex;
- enum hwtstamp_tx_types tx_types;
- enum hwtstamp_rx_filters rx_filters;
+ u32 tx_types;
+ u32 rx_filters;
};
/**
diff --git a/include/linux/export.h b/include/linux/export.h
index f35d03b4113b..a686fd0ba406 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -91,6 +91,6 @@
#define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", ns)
#define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "GPL", ns)
-#define EXPORT_SYMBOL_GPL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods)
+#define EXPORT_SYMBOL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods)
#endif /* _LINUX_EXPORT_H */
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index cfb0dd1ea49c..3aac58a520c7 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -123,6 +123,12 @@ enum fid_type {
FILEID_BCACHEFS_WITH_PARENT = 0xb2,
/*
+ *
+ * 64 bit namespace identifier, 32 bit namespace type, 32 bit inode number.
+ */
+ FILEID_NSFS = 0xf1,
+
+ /*
* 64 bit unique kernfs id
*/
FILEID_KERNFS = 0xfe,
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1e7fd3ee759e..f5c859b8131a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -78,6 +78,9 @@ struct ctl_table_header;
/* unused opcode to mark special atomic instruction */
#define BPF_PROBE_ATOMIC 0xe0
+/* unused opcode to mark special ldsx instruction. Same as BPF_NOSPEC */
+#define BPF_PROBE_MEM32SX 0xc0
+
/* unused opcode to mark call to interpreter with arguments */
#define BPF_CALL_ARGS 0xe0
@@ -997,12 +1000,6 @@ static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog)
return prog->len * sizeof(struct bpf_insn);
}
-static inline u32 bpf_prog_tag_scratch_size(const struct bpf_prog *prog)
-{
- return round_up(bpf_prog_insn_size(prog) +
- sizeof(__be64) + 1, SHA1_BLOCK_SIZE);
-}
-
static inline unsigned int bpf_prog_size(unsigned int proglen)
{
return max(sizeof(struct bpf_prog),
@@ -1296,7 +1293,7 @@ void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other);
static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
u32 pass, void *image)
{
- pr_err("flen=%u proglen=%u pass=%u image=%pK from=%s pid=%d\n", flen,
+ pr_err("flen=%u proglen=%u pass=%u image=%p from=%s pid=%d\n", flen,
proglen, pass, image, current->comm, task_pid_nr(current));
if (image)
@@ -1784,6 +1781,7 @@ int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len);
void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len);
void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
void *buf, unsigned long len, bool flush);
+void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset);
#else /* CONFIG_NET */
static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset,
void *to, u32 len)
@@ -1818,6 +1816,11 @@ static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, voi
unsigned long len, bool flush)
{
}
+
+static inline void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
#endif /* CONFIG_NET */
#endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index cceb70415ed2..d38c6e538e5c 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -341,7 +341,11 @@ struct fw_address_handler {
u64 length;
fw_address_callback_t address_callback;
void *callback_data;
+
+ // Only for core functions.
struct list_head link;
+ struct kref kref;
+ struct completion done;
};
struct fw_address_region {
diff --git a/include/linux/firmware/imx/sm.h b/include/linux/firmware/imx/sm.h
index d4212bc42b2c..a33b45027356 100644
--- a/include/linux/firmware/imx/sm.h
+++ b/include/linux/firmware/imx/sm.h
@@ -26,13 +26,43 @@
#define SCMI_IMX94_CTRL_SAI3_MCLK 5U /*!< WAKE SAI3 MCLK */
#define SCMI_IMX94_CTRL_SAI4_MCLK 6U /*!< WAKE SAI4 MCLK */
+#if IS_ENABLED(CONFIG_IMX_SCMI_MISC_DRV)
int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val);
int scmi_imx_misc_ctrl_set(u32 id, u32 val);
+#else
+static inline int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val)
+{
+ return -EOPNOTSUPP;
+}
+static inline int scmi_imx_misc_ctrl_set(u32 id, u32 val)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_IMX_SCMI_CPU_DRV)
int scmi_imx_cpu_start(u32 cpuid, bool start);
int scmi_imx_cpu_started(u32 cpuid, bool *started);
int scmi_imx_cpu_reset_vector_set(u32 cpuid, u64 vector, bool start, bool boot,
bool resume);
+#else
+static inline int scmi_imx_cpu_start(u32 cpuid, bool start)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int scmi_imx_cpu_started(u32 cpuid, bool *started)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int scmi_imx_cpu_reset_vector_set(u32 cpuid, u64 vector, bool start,
+ bool boot, bool resume)
+{
+ return -EOPNOTSUPP;
+}
+#endif
enum scmi_imx_lmm_op {
SCMI_IMX_LMM_BOOT,
@@ -44,7 +74,24 @@ enum scmi_imx_lmm_op {
#define SCMI_IMX_LMM_OP_FORCEFUL 0
#define SCMI_IMX_LMM_OP_GRACEFUL BIT(0)
+#if IS_ENABLED(CONFIG_IMX_SCMI_LMM_DRV)
int scmi_imx_lmm_operation(u32 lmid, enum scmi_imx_lmm_op op, u32 flags);
int scmi_imx_lmm_info(u32 lmid, struct scmi_imx_lmm_info *info);
int scmi_imx_lmm_reset_vector_set(u32 lmid, u32 cpuid, u32 flags, u64 vector);
+#else
+static inline int scmi_imx_lmm_operation(u32 lmid, enum scmi_imx_lmm_op op, u32 flags)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int scmi_imx_lmm_info(u32 lmid, struct scmi_imx_lmm_info *info)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int scmi_imx_lmm_reset_vector_set(u32 lmid, u32 cpuid, u32 flags, u64 vector)
+{
+ return -EOPNOTSUPP;
+}
+#endif
#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d7ab4f96d705..9e9d7c757efe 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -72,9 +72,7 @@ struct swap_info_struct;
struct seq_file;
struct workqueue_struct;
struct iov_iter;
-struct fscrypt_inode_info;
struct fscrypt_operations;
-struct fsverity_info;
struct fsverity_operations;
struct fsnotify_mark_connector;
struct fsnotify_sb_info;
@@ -149,7 +147,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/* Expect random access pattern */
#define FMODE_RANDOM ((__force fmode_t)(1 << 12))
-/* FMODE_* bit 13 */
+/* Supports IOCB_HAS_METADATA */
+#define FMODE_HAS_METADATA ((__force fmode_t)(1 << 13))
/* File is opened with O_PATH; almost nothing can be done with it */
#define FMODE_PATH ((__force fmode_t)(1 << 14))
@@ -356,6 +355,7 @@ struct readahead_control;
#define IOCB_APPEND (__force int) RWF_APPEND
#define IOCB_ATOMIC (__force int) RWF_ATOMIC
#define IOCB_DONTCACHE (__force int) RWF_DONTCACHE
+#define IOCB_NOSIGNAL (__force int) RWF_NOSIGNAL
/* non-RWF related bits - start at 16 */
#define IOCB_EVENTFD (1 << 16)
@@ -667,6 +667,124 @@ is_uncached_acl(struct posix_acl *acl)
#define IOP_CACHED_LINK 0x0040
/*
+ * Inode state bits. Protected by inode->i_lock
+ *
+ * Four bits determine the dirty state of the inode: I_DIRTY_SYNC,
+ * I_DIRTY_DATASYNC, I_DIRTY_PAGES, and I_DIRTY_TIME.
+ *
+ * Four bits define the lifetime of an inode. Initially, inodes are I_NEW,
+ * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at
+ * various stages of removing an inode.
+ *
+ * Two bits are used for locking and completion notification, I_NEW and I_SYNC.
+ *
+ * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on
+ * fdatasync() (unless I_DIRTY_DATASYNC is also set).
+ * Timestamp updates are the usual cause.
+ * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of
+ * these changes separately from I_DIRTY_SYNC so that we
+ * don't have to write inode on fdatasync() when only
+ * e.g. the timestamps have changed.
+ * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean.
+ * I_DIRTY_TIME The inode itself has dirty timestamps, and the
+ * lazytime mount option is enabled. We keep track of this
+ * separately from I_DIRTY_SYNC in order to implement
+ * lazytime. This gets cleared if I_DIRTY_INODE
+ * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But
+ * I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already
+ * in place because writeback might already be in progress
+ * and we don't want to lose the time update
+ * I_NEW Serves as both a mutex and completion notification.
+ * New inodes set I_NEW. If two processes both create
+ * the same inode, one of them will release its inode and
+ * wait for I_NEW to be released before returning.
+ * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
+ * also cause waiting on I_NEW, without I_NEW actually
+ * being set. find_inode() uses this to prevent returning
+ * nearly-dead inodes.
+ * I_WILL_FREE Must be set when calling write_inode_now() if i_count
+ * is zero. I_FREEING must be set when I_WILL_FREE is
+ * cleared.
+ * I_FREEING Set when inode is about to be freed but still has dirty
+ * pages or buffers attached or the inode itself is still
+ * dirty.
+ * I_CLEAR Added by clear_inode(). In this state the inode is
+ * clean and can be destroyed. Inode keeps I_FREEING.
+ *
+ * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are
+ * prohibited for many purposes. iget() must wait for
+ * the inode to be completely released, then create it
+ * anew. Other functions will just ignore such inodes,
+ * if appropriate. I_NEW is used for waiting.
+ *
+ * I_SYNC Writeback of inode is running. The bit is set during
+ * data writeback, and cleared with a wakeup on the bit
+ * address once it is done. The bit is also used to pin
+ * the inode in memory for flusher thread.
+ *
+ * I_REFERENCED Marks the inode as recently references on the LRU list.
+ *
+ * I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to
+ * synchronize competing switching instances and to tell
+ * wb stat updates to grab the i_pages lock. See
+ * inode_switch_wbs_work_fn() for details.
+ *
+ * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper
+ * and work dirs among overlayfs mounts.
+ *
+ * I_CREATING New object's inode in the middle of setting up.
+ *
+ * I_DONTCACHE Evict inode as soon as it is not used anymore.
+ *
+ * I_SYNC_QUEUED Inode is queued in b_io or b_more_io writeback lists.
+ * Used to detect that mark_inode_dirty() should not move
+ * inode between dirty lists.
+ *
+ * I_PINNING_FSCACHE_WB Inode is pinning an fscache object for writeback.
+ *
+ * I_LRU_ISOLATING Inode is pinned being isolated from LRU without holding
+ * i_count.
+ *
+ * Q: What is the difference between I_WILL_FREE and I_FREEING?
+ *
+ * __I_{SYNC,NEW,LRU_ISOLATING} are used to derive unique addresses to wait
+ * upon. There's one free address left.
+ */
+
+enum inode_state_bits {
+ __I_NEW = 0U,
+ __I_SYNC = 1U,
+ __I_LRU_ISOLATING = 2U
+ /* reserved wait address bit 3 */
+};
+
+enum inode_state_flags_t {
+ I_NEW = (1U << __I_NEW),
+ I_SYNC = (1U << __I_SYNC),
+ I_LRU_ISOLATING = (1U << __I_LRU_ISOLATING),
+ /* reserved flag bit 3 */
+ I_DIRTY_SYNC = (1U << 4),
+ I_DIRTY_DATASYNC = (1U << 5),
+ I_DIRTY_PAGES = (1U << 6),
+ I_WILL_FREE = (1U << 7),
+ I_FREEING = (1U << 8),
+ I_CLEAR = (1U << 9),
+ I_REFERENCED = (1U << 10),
+ I_LINKABLE = (1U << 11),
+ I_DIRTY_TIME = (1U << 12),
+ I_WB_SWITCH = (1U << 13),
+ I_OVL_INUSE = (1U << 14),
+ I_CREATING = (1U << 15),
+ I_DONTCACHE = (1U << 16),
+ I_SYNC_QUEUED = (1U << 17),
+ I_PINNING_NETFS_WB = (1U << 18)
+};
+
+#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
+#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
+#define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
+
+/*
* Keep mostly read-only and often accessed (especially for
* the RCU path lookup and 'stat' data) fields at the beginning
* of the 'struct inode'
@@ -724,7 +842,7 @@ struct inode {
#endif
/* Misc */
- u32 i_state;
+ enum inode_state_flags_t i_state;
/* 32-bit hole */
struct rw_semaphore i_rwsem;
@@ -780,14 +898,6 @@ struct inode {
struct fsnotify_mark_connector __rcu *i_fsnotify_marks;
#endif
-#ifdef CONFIG_FS_ENCRYPTION
- struct fscrypt_inode_info *i_crypt_info;
-#endif
-
-#ifdef CONFIG_FS_VERITY
- struct fsverity_info *i_verity_info;
-#endif
-
void *i_private; /* fs or device private pointer */
} __randomize_layout;
@@ -2008,20 +2118,18 @@ int vfs_unlink(struct mnt_idmap *, struct inode *, struct dentry *,
/**
* struct renamedata - contains all information required for renaming
- * @old_mnt_idmap: idmap of the old mount the inode was found from
+ * @mnt_idmap: idmap of the mount in which the rename is happening.
* @old_parent: parent of source
* @old_dentry: source
- * @new_mnt_idmap: idmap of the new mount the inode was found from
* @new_parent: parent of destination
* @new_dentry: destination
* @delegated_inode: returns an inode needing a delegation break
* @flags: rename flags
*/
struct renamedata {
- struct mnt_idmap *old_mnt_idmap;
+ struct mnt_idmap *mnt_idmap;
struct dentry *old_parent;
struct dentry *old_dentry;
- struct mnt_idmap *new_mnt_idmap;
struct dentry *new_parent;
struct dentry *new_dentry;
struct inode **delegated_inode;
@@ -2052,8 +2160,6 @@ int vfs_fchown(struct file *file, uid_t user, gid_t group);
int vfs_fchmod(struct file *file, umode_t mode);
int vfs_utimes(const struct path *path, struct timespec64 *times);
-int vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
-
#ifdef CONFIG_COMPAT
extern long compat_ptr_ioctl(struct file *file, unsigned int cmd,
unsigned long arg);
@@ -2492,117 +2598,6 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
};
}
-/*
- * Inode state bits. Protected by inode->i_lock
- *
- * Four bits determine the dirty state of the inode: I_DIRTY_SYNC,
- * I_DIRTY_DATASYNC, I_DIRTY_PAGES, and I_DIRTY_TIME.
- *
- * Four bits define the lifetime of an inode. Initially, inodes are I_NEW,
- * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at
- * various stages of removing an inode.
- *
- * Two bits are used for locking and completion notification, I_NEW and I_SYNC.
- *
- * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on
- * fdatasync() (unless I_DIRTY_DATASYNC is also set).
- * Timestamp updates are the usual cause.
- * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of
- * these changes separately from I_DIRTY_SYNC so that we
- * don't have to write inode on fdatasync() when only
- * e.g. the timestamps have changed.
- * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean.
- * I_DIRTY_TIME The inode itself has dirty timestamps, and the
- * lazytime mount option is enabled. We keep track of this
- * separately from I_DIRTY_SYNC in order to implement
- * lazytime. This gets cleared if I_DIRTY_INODE
- * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But
- * I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already
- * in place because writeback might already be in progress
- * and we don't want to lose the time update
- * I_NEW Serves as both a mutex and completion notification.
- * New inodes set I_NEW. If two processes both create
- * the same inode, one of them will release its inode and
- * wait for I_NEW to be released before returning.
- * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
- * also cause waiting on I_NEW, without I_NEW actually
- * being set. find_inode() uses this to prevent returning
- * nearly-dead inodes.
- * I_WILL_FREE Must be set when calling write_inode_now() if i_count
- * is zero. I_FREEING must be set when I_WILL_FREE is
- * cleared.
- * I_FREEING Set when inode is about to be freed but still has dirty
- * pages or buffers attached or the inode itself is still
- * dirty.
- * I_CLEAR Added by clear_inode(). In this state the inode is
- * clean and can be destroyed. Inode keeps I_FREEING.
- *
- * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are
- * prohibited for many purposes. iget() must wait for
- * the inode to be completely released, then create it
- * anew. Other functions will just ignore such inodes,
- * if appropriate. I_NEW is used for waiting.
- *
- * I_SYNC Writeback of inode is running. The bit is set during
- * data writeback, and cleared with a wakeup on the bit
- * address once it is done. The bit is also used to pin
- * the inode in memory for flusher thread.
- *
- * I_REFERENCED Marks the inode as recently references on the LRU list.
- *
- * I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to
- * synchronize competing switching instances and to tell
- * wb stat updates to grab the i_pages lock. See
- * inode_switch_wbs_work_fn() for details.
- *
- * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper
- * and work dirs among overlayfs mounts.
- *
- * I_CREATING New object's inode in the middle of setting up.
- *
- * I_DONTCACHE Evict inode as soon as it is not used anymore.
- *
- * I_SYNC_QUEUED Inode is queued in b_io or b_more_io writeback lists.
- * Used to detect that mark_inode_dirty() should not move
- * inode between dirty lists.
- *
- * I_PINNING_FSCACHE_WB Inode is pinning an fscache object for writeback.
- *
- * I_LRU_ISOLATING Inode is pinned being isolated from LRU without holding
- * i_count.
- *
- * Q: What is the difference between I_WILL_FREE and I_FREEING?
- *
- * __I_{SYNC,NEW,LRU_ISOLATING} are used to derive unique addresses to wait
- * upon. There's one free address left.
- */
-#define __I_NEW 0
-#define I_NEW (1 << __I_NEW)
-#define __I_SYNC 1
-#define I_SYNC (1 << __I_SYNC)
-#define __I_LRU_ISOLATING 2
-#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING)
-
-#define I_DIRTY_SYNC (1 << 3)
-#define I_DIRTY_DATASYNC (1 << 4)
-#define I_DIRTY_PAGES (1 << 5)
-#define I_WILL_FREE (1 << 6)
-#define I_FREEING (1 << 7)
-#define I_CLEAR (1 << 8)
-#define I_REFERENCED (1 << 9)
-#define I_LINKABLE (1 << 10)
-#define I_DIRTY_TIME (1 << 11)
-#define I_WB_SWITCH (1 << 12)
-#define I_OVL_INUSE (1 << 13)
-#define I_CREATING (1 << 14)
-#define I_DONTCACHE (1 << 15)
-#define I_SYNC_QUEUED (1 << 16)
-#define I_PINNING_NETFS_WB (1 << 17)
-
-#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
-#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
-#define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
-
extern void __mark_inode_dirty(struct inode *, int);
static inline void mark_inode_dirty(struct inode *inode)
{
@@ -2614,6 +2609,11 @@ static inline void mark_inode_dirty_sync(struct inode *inode)
__mark_inode_dirty(inode, I_DIRTY_SYNC);
}
+static inline int icount_read(const struct inode *inode)
+{
+ return atomic_read(&inode->i_count);
+}
+
/*
* Returns true if the given inode itself only has dirty timestamps (its pages
* may still be dirty) and isn't currently being allocated or freed.
@@ -2713,12 +2713,6 @@ static inline bool is_mgtime(const struct inode *inode)
return inode->i_opflags & IOP_MGTIME;
}
-extern struct dentry *mount_bdev(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data,
- int (*fill_super)(struct super_block *, void *, int));
-extern struct dentry *mount_nodev(struct file_system_type *fs_type,
- int flags, void *data,
- int (*fill_super)(struct super_block *, void *, int));
extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path);
void retire_super(struct super_block *sb);
void generic_shutdown_super(struct super_block *sb);
@@ -3281,7 +3275,7 @@ static inline bool is_dot_dotdot(const char *name, size_t len)
/**
* name_contains_dotdot - check if a file name contains ".." path components
- *
+ * @name: File path string to check
* Search for ".." surrounded by either '/' or start/end of string.
*/
static inline bool name_contains_dotdot(const char *name)
@@ -3313,8 +3307,8 @@ extern void address_space_init_once(struct address_space *mapping);
extern struct inode * igrab(struct inode *);
extern ino_t iunique(struct super_block *, ino_t);
extern int inode_needs_sync(struct inode *inode);
-extern int generic_delete_inode(struct inode *inode);
-static inline int generic_drop_inode(struct inode *inode)
+extern int inode_just_drop(struct inode *inode);
+static inline int inode_generic_drop(struct inode *inode)
{
return !inode->i_nlink || inode_unhashed(inode);
}
@@ -3393,7 +3387,6 @@ static inline struct inode *new_inode_pseudo(struct super_block *sb)
extern struct inode *new_inode(struct super_block *sb);
extern void free_inode_nonrcu(struct inode *inode);
extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *);
-extern int file_remove_privs_flags(struct file *file, unsigned int flags);
extern int file_remove_privs(struct file *);
int setattr_should_drop_sgid(struct mnt_idmap *idmap,
const struct inode *inode);
@@ -4023,4 +4016,18 @@ static inline bool vfs_empty_path(int dfd, const char __user *path)
int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter);
+static inline bool extensible_ioctl_valid(unsigned int cmd_a,
+ unsigned int cmd_b, size_t min_size)
+{
+ if (_IOC_DIR(cmd_a) != _IOC_DIR(cmd_b))
+ return false;
+ if (_IOC_TYPE(cmd_a) != _IOC_TYPE(cmd_b))
+ return false;
+ if (_IOC_NR(cmd_a) != _IOC_NR(cmd_b))
+ return false;
+ if (_IOC_SIZE(cmd_a) < min_size)
+ return false;
+ return true;
+}
+
#endif /* _LINUX_FS_H */
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index 7773eb870039..671f031be173 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -186,10 +186,12 @@ struct fc_log {
extern __attribute__((format(printf, 4, 5)))
void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, ...);
-#define __logfc(fc, l, fmt, ...) logfc((fc)->log.log, NULL, \
- l, fmt, ## __VA_ARGS__)
-#define __plog(p, l, fmt, ...) logfc((p)->log, (p)->prefix, \
- l, fmt, ## __VA_ARGS__)
+#define __logfc(fc, l, fmt, ...) \
+ logfc((fc)->log.log, NULL, (l), (fmt), ## __VA_ARGS__)
+#define __plogp(p, prefix, l, fmt, ...) \
+ logfc((p)->log, (prefix), (l), (fmt), ## __VA_ARGS__)
+#define __plog(p, l, fmt, ...) __plogp(p, (p)->prefix, l, fmt, ## __VA_ARGS__)
+
/**
* infof - Store supplementary informational message
* @fc: The context in which to log the informational message
@@ -201,6 +203,8 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt,
#define infof(fc, fmt, ...) __logfc(fc, 'i', fmt, ## __VA_ARGS__)
#define info_plog(p, fmt, ...) __plog(p, 'i', fmt, ## __VA_ARGS__)
#define infofc(fc, fmt, ...) __plog((&(fc)->log), 'i', fmt, ## __VA_ARGS__)
+#define infofcp(fc, prefix, fmt, ...) \
+ __plogp((&(fc)->log), prefix, 'i', fmt, ## __VA_ARGS__)
/**
* warnf - Store supplementary warning message
@@ -213,6 +217,8 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt,
#define warnf(fc, fmt, ...) __logfc(fc, 'w', fmt, ## __VA_ARGS__)
#define warn_plog(p, fmt, ...) __plog(p, 'w', fmt, ## __VA_ARGS__)
#define warnfc(fc, fmt, ...) __plog((&(fc)->log), 'w', fmt, ## __VA_ARGS__)
+#define warnfcp(fc, prefix, fmt, ...) \
+ __plogp((&(fc)->log), prefix, 'w', fmt, ## __VA_ARGS__)
/**
* errorf - Store supplementary error message
@@ -225,6 +231,8 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt,
#define errorf(fc, fmt, ...) __logfc(fc, 'e', fmt, ## __VA_ARGS__)
#define error_plog(p, fmt, ...) __plog(p, 'e', fmt, ## __VA_ARGS__)
#define errorfc(fc, fmt, ...) __plog((&(fc)->log), 'e', fmt, ## __VA_ARGS__)
+#define errorfcp(fc, prefix, fmt, ...) \
+ __plogp((&(fc)->log), prefix, 'e', fmt, ## __VA_ARGS__)
/**
* invalf - Store supplementary invalid argument error message
@@ -237,5 +245,7 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt,
#define invalf(fc, fmt, ...) (errorf(fc, fmt, ## __VA_ARGS__), -EINVAL)
#define inval_plog(p, fmt, ...) (error_plog(p, fmt, ## __VA_ARGS__), -EINVAL)
#define invalfc(fc, fmt, ...) (errorfc(fc, fmt, ## __VA_ARGS__), -EINVAL)
+#define invalfcp(fc, prefix, fmt, ...) \
+ (errorfcp(fc, prefix, fmt, ## __VA_ARGS__), -EINVAL)
#endif /* _LINUX_FS_CONTEXT_H */
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 10dd161690a2..516aba5b858b 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -61,6 +61,12 @@ struct fscrypt_name {
/* Crypto operations for filesystems */
struct fscrypt_operations {
+ /*
+ * The offset of the pointer to struct fscrypt_inode_info in the
+ * filesystem-specific part of the inode, relative to the beginning of
+ * the common part of the inode (the 'struct inode').
+ */
+ ptrdiff_t inode_info_offs;
/*
* If set, then fs/crypto/ will allocate a global bounce page pool the
@@ -195,16 +201,44 @@ struct fscrypt_operations {
int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name,
struct dentry *dentry, unsigned int flags);
+/*
+ * Returns the address of the fscrypt info pointer within the
+ * filesystem-specific part of the inode. (To save memory on filesystems that
+ * don't support fscrypt, a field in 'struct inode' itself is no longer used.)
+ */
+static inline struct fscrypt_inode_info **
+fscrypt_inode_info_addr(const struct inode *inode)
+{
+ VFS_WARN_ON_ONCE(inode->i_sb->s_cop->inode_info_offs == 0);
+ return (void *)inode + inode->i_sb->s_cop->inode_info_offs;
+}
+
+/*
+ * Load the inode's fscrypt info pointer, using a raw dereference. Since this
+ * uses a raw dereference with no memory barrier, it is appropriate to use only
+ * when the caller knows the inode's key setup already happened, resulting in
+ * non-NULL fscrypt info. E.g., the file contents en/decryption functions use
+ * this, since fscrypt_file_open() set up the key.
+ */
+static inline struct fscrypt_inode_info *
+fscrypt_get_inode_info_raw(const struct inode *inode)
+{
+ struct fscrypt_inode_info *ci = *fscrypt_inode_info_addr(inode);
+
+ VFS_WARN_ON_ONCE(ci == NULL);
+ return ci;
+}
+
static inline struct fscrypt_inode_info *
fscrypt_get_inode_info(const struct inode *inode)
{
/*
* Pairs with the cmpxchg_release() in fscrypt_setup_encryption_info().
- * I.e., another task may publish ->i_crypt_info concurrently, executing
- * a RELEASE barrier. We need to use smp_load_acquire() here to safely
+ * I.e., another task may publish the fscrypt info concurrently,
+ * executing a RELEASE barrier. Use smp_load_acquire() here to safely
* ACQUIRE the memory the other task published.
*/
- return smp_load_acquire(&inode->i_crypt_info);
+ return smp_load_acquire(fscrypt_inode_info_addr(inode));
}
/**
diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
index 1eb7eae580be..5bc7280425a7 100644
--- a/include/linux/fsverity.h
+++ b/include/linux/fsverity.h
@@ -26,8 +26,16 @@
/* Arbitrary limit to bound the kmalloc() size. Can be changed. */
#define FS_VERITY_MAX_DESCRIPTOR_SIZE 16384
+struct fsverity_info;
+
/* Verity operations for filesystems */
struct fsverity_operations {
+ /**
+ * The offset of the pointer to struct fsverity_info in the
+ * filesystem-specific part of the inode, relative to the beginning of
+ * the common part of the inode (the 'struct inode').
+ */
+ ptrdiff_t inode_info_offs;
/**
* Begin enabling verity on the given file.
@@ -124,15 +132,37 @@ struct fsverity_operations {
#ifdef CONFIG_FS_VERITY
+/*
+ * Returns the address of the verity info pointer within the filesystem-specific
+ * part of the inode. (To save memory on filesystems that don't support
+ * fsverity, a field in 'struct inode' itself is no longer used.)
+ */
+static inline struct fsverity_info **
+fsverity_info_addr(const struct inode *inode)
+{
+ VFS_WARN_ON_ONCE(inode->i_sb->s_vop->inode_info_offs == 0);
+ return (void *)inode + inode->i_sb->s_vop->inode_info_offs;
+}
+
static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
{
/*
- * Pairs with the cmpxchg_release() in fsverity_set_info().
- * I.e., another task may publish ->i_verity_info concurrently,
- * executing a RELEASE barrier. We need to use smp_load_acquire() here
- * to safely ACQUIRE the memory the other task published.
+ * Since this function can be called on inodes belonging to filesystems
+ * that don't support fsverity at all, and fsverity_info_addr() doesn't
+ * work on such filesystems, we have to start with an IS_VERITY() check.
+ * Checking IS_VERITY() here is also useful to minimize the overhead of
+ * fsverity_active() on non-verity files.
+ */
+ if (!IS_VERITY(inode))
+ return NULL;
+
+ /*
+ * Pairs with the cmpxchg_release() in fsverity_set_info(). I.e.,
+ * another task may publish the inode's verity info concurrently,
+ * executing a RELEASE barrier. Use smp_load_acquire() here to safely
+ * ACQUIRE the memory the other task published.
*/
- return smp_load_acquire(&inode->i_verity_info);
+ return smp_load_acquire(fsverity_info_addr(inode));
}
/* enable.c */
@@ -156,12 +186,19 @@ void __fsverity_cleanup_inode(struct inode *inode);
* fsverity_cleanup_inode() - free the inode's verity info, if present
* @inode: an inode being evicted
*
- * Filesystems must call this on inode eviction to free ->i_verity_info.
+ * Filesystems must call this on inode eviction to free the inode's verity info.
*/
static inline void fsverity_cleanup_inode(struct inode *inode)
{
- if (inode->i_verity_info)
+ /*
+ * Only IS_VERITY() inodes can have verity info, so start by checking
+ * for IS_VERITY() (which is faster than retrieving the pointer to the
+ * verity info). This minimizes overhead for non-verity inodes.
+ */
+ if (IS_VERITY(inode))
__fsverity_cleanup_inode(inode);
+ else
+ VFS_WARN_ON_ONCE(*fsverity_info_addr(inode) != NULL);
}
/* read_metadata.c */
@@ -267,12 +304,12 @@ static inline bool fsverity_verify_page(struct page *page)
* fsverity_active() - do reads from the inode need to go through fs-verity?
* @inode: inode to check
*
- * This checks whether ->i_verity_info has been set.
+ * This checks whether the inode's verity info has been set.
*
* Filesystems call this from ->readahead() to check whether the pages need to
* be verified or not. Don't use IS_VERITY() for this purpose; it's subject to
* a race condition where the file is being read concurrently with
- * FS_IOC_ENABLE_VERITY completing. (S_VERITY is set before ->i_verity_info.)
+ * FS_IOC_ENABLE_VERITY completing. (S_VERITY is set before the verity info.)
*
* Return: true if reads need to go through fs-verity, otherwise false
*/
@@ -287,7 +324,7 @@ static inline bool fsverity_active(const struct inode *inode)
* @filp: the struct file being set up
*
* When opening a verity file, deny the open if it is for writing. Otherwise,
- * set up the inode's ->i_verity_info if not already done.
+ * set up the inode's verity info if not already done.
*
* When combined with fscrypt, this must be called after fscrypt_file_open().
* Otherwise, we won't have the key set up to decrypt the verity metadata.
diff --git a/include/linux/hfs_common.h b/include/linux/hfs_common.h
new file mode 100644
index 000000000000..8838ca2f3d08
--- /dev/null
+++ b/include/linux/hfs_common.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * HFS/HFS+ common definitions, inline functions,
+ * and shared functionality.
+ */
+
+#ifndef _HFS_COMMON_H_
+#define _HFS_COMMON_H_
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#define hfs_dbg(fmt, ...) \
+ pr_debug("pid %d:%s:%d %s(): " fmt, \
+ current->pid, __FILE__, __LINE__, __func__, ##__VA_ARGS__) \
+
+#endif /* _HFS_COMMON_H_ */
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 1ef867bb8c44..2cf1bf65b225 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -154,14 +154,11 @@ static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer)
return ktime_to_ns(timer->node.expires);
}
-static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
-{
- return ktime_sub(timer->node.expires, timer->base->get_time());
-}
+ktime_t hrtimer_cb_get_time(const struct hrtimer *timer);
-static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer)
+static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
{
- return timer->base->get_time();
+ return ktime_sub(timer->node.expires, hrtimer_cb_get_time(timer));
}
static inline int hrtimer_is_hres_active(struct hrtimer *timer)
@@ -200,8 +197,7 @@ __hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now)
static inline ktime_t
hrtimer_expires_remaining_adjusted(const struct hrtimer *timer)
{
- return __hrtimer_expires_remaining_adjusted(timer,
- timer->base->get_time());
+ return __hrtimer_expires_remaining_adjusted(timer, hrtimer_cb_get_time(timer));
}
#ifdef CONFIG_TIMERFD
@@ -363,7 +359,7 @@ hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval);
static inline u64 hrtimer_forward_now(struct hrtimer *timer,
ktime_t interval)
{
- return hrtimer_forward(timer, timer->base->get_time(), interval);
+ return hrtimer_forward(timer, hrtimer_cb_get_time(timer), interval);
}
/* Precise sleep: */
diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h
index 84a5045f80f3..aa49ffa130e5 100644
--- a/include/linux/hrtimer_defs.h
+++ b/include/linux/hrtimer_defs.h
@@ -41,7 +41,6 @@
* @seq: seqcount around __run_hrtimer
* @running: pointer to the currently running hrtimer
* @active: red black tree root node for the active timers
- * @get_time: function to retrieve the current time of the clock
* @offset: offset of this clock to the monotonic base
*/
struct hrtimer_clock_base {
@@ -51,7 +50,6 @@ struct hrtimer_clock_base {
seqcount_raw_spinlock_t seq;
struct hrtimer *running;
struct timerqueue_head active;
- ktime_t (*get_time)(void);
ktime_t offset;
} __hrtimer_clock_base_align;
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 80a178f3d896..12f5ee43850e 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -420,9 +420,6 @@ struct io_ring_ctx {
struct list_head defer_list;
unsigned nr_drained;
- struct io_alloc_cache msg_cache;
- spinlock_t msg_lock;
-
#ifdef CONFIG_NET_RX_BUSY_POLL
struct list_head napi_list; /* track busy poll napi_id */
spinlock_t napi_lock; /* napi_list lock */
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 14f7eaf1b443..079d8773790c 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -118,8 +118,8 @@ struct task_struct;
#ifdef CONFIG_BLOCK
void put_io_context(struct io_context *ioc);
void exit_io_context(struct task_struct *task);
-int __copy_io(unsigned long clone_flags, struct task_struct *tsk);
-static inline int copy_io(unsigned long clone_flags, struct task_struct *tsk)
+int __copy_io(u64 clone_flags, struct task_struct *tsk);
+static inline int copy_io(u64 clone_flags, struct task_struct *tsk)
{
if (!current->io_context)
return 0;
@@ -129,7 +129,7 @@ static inline int copy_io(unsigned long clone_flags, struct task_struct *tsk)
struct io_context;
static inline void put_io_context(struct io_context *ioc) { }
static inline void exit_io_context(struct task_struct *task) { }
-static inline int copy_io(unsigned long clone_flags, struct task_struct *tsk)
+static inline int copy_io(u64 clone_flags, struct task_struct *tsk)
{
return 0;
}
diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h
index 4696abfd311c..3e85afe794c0 100644
--- a/include/linux/iosys-map.h
+++ b/include/linux/iosys-map.h
@@ -264,12 +264,7 @@ static inline bool iosys_map_is_set(const struct iosys_map *map)
*/
static inline void iosys_map_clear(struct iosys_map *map)
{
- if (map->is_iomem) {
- map->vaddr_iomem = NULL;
- map->is_iomem = false;
- } else {
- map->vaddr = NULL;
- }
+ memset(map, 0, sizeof(*map));
}
/**
diff --git a/include/linux/iov_iter.h b/include/linux/iov_iter.h
index c4aa58032faf..f9a17fbbd398 100644
--- a/include/linux/iov_iter.h
+++ b/include/linux/iov_iter.h
@@ -160,7 +160,7 @@ size_t iterate_folioq(struct iov_iter *iter, size_t len, void *priv, void *priv2
do {
struct folio *folio = folioq_folio(folioq, slot);
- size_t part, remain, consumed;
+ size_t part, remain = 0, consumed;
size_t fsize;
void *base;
@@ -168,14 +168,16 @@ size_t iterate_folioq(struct iov_iter *iter, size_t len, void *priv, void *priv2
break;
fsize = folioq_folio_size(folioq, slot);
- base = kmap_local_folio(folio, skip);
- part = umin(len, PAGE_SIZE - skip % PAGE_SIZE);
- remain = step(base, progress, part, priv, priv2);
- kunmap_local(base);
- consumed = part - remain;
- len -= consumed;
- progress += consumed;
- skip += consumed;
+ if (skip < fsize) {
+ base = kmap_local_folio(folio, skip);
+ part = umin(len, PAGE_SIZE - skip % PAGE_SIZE);
+ remain = step(base, progress, part, priv, priv2);
+ kunmap_local(base);
+ consumed = part - remain;
+ len -= consumed;
+ progress += consumed;
+ skip += consumed;
+ }
if (skip >= fsize) {
skip = 0;
slot++;
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index e8240cf2611a..12faca29bbb9 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -129,20 +129,25 @@ static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
#endif
#if defined(CONFIG_IPC_NS)
-extern struct ipc_namespace *copy_ipcs(unsigned long flags,
+static inline struct ipc_namespace *to_ipc_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct ipc_namespace, ns);
+}
+
+extern struct ipc_namespace *copy_ipcs(u64 flags,
struct user_namespace *user_ns, struct ipc_namespace *ns);
static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
{
if (ns)
- refcount_inc(&ns->ns.count);
+ ns_ref_inc(ns);
return ns;
}
static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
{
if (ns) {
- if (refcount_inc_not_zero(&ns->ns.count))
+ if (ns_ref_get(ns))
return ns;
}
@@ -151,7 +156,7 @@ static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns
extern void put_ipc_ns(struct ipc_namespace *ns);
#else
-static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
+static inline struct ipc_namespace *copy_ipcs(u64 flags,
struct user_namespace *user_ns, struct ipc_namespace *ns)
{
if (flags & CLONE_NEWIPC)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 1d6b606a81ef..c67e76fbcc07 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -669,6 +669,8 @@ extern int irq_chip_set_parent_state(struct irq_data *data,
extern int irq_chip_get_parent_state(struct irq_data *data,
enum irqchip_irq_state which,
bool *state);
+extern void irq_chip_shutdown_parent(struct irq_data *data);
+extern unsigned int irq_chip_startup_parent(struct irq_data *data);
extern void irq_chip_enable_parent(struct irq_data *data);
extern void irq_chip_disable_parent(struct irq_data *data);
extern void irq_chip_ack_parent(struct irq_data *data);
@@ -976,10 +978,6 @@ static inline void irq_free_desc(unsigned int irq)
irq_free_descs(irq, 1);
}
-#ifdef CONFIG_GENERIC_IRQ_LEGACY
-void irq_init_desc(unsigned int irq);
-#endif
-
/**
* struct irq_chip_regs - register offsets for struct irq_gci
* @enable: Enable register offset to reg_base
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 91b20788273d..0d1927da8055 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -61,7 +61,7 @@
extern void register_refined_jiffies(long clock_tick_rate);
-/* TICK_USEC is the time between ticks in usec assuming SHIFTED_HZ */
+/* TICK_USEC is the time between ticks in usec */
#define TICK_USEC ((USEC_PER_SEC + HZ/2) / HZ)
/* USER_TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 890011071f2b..fe5ce9215821 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -562,7 +562,7 @@ static inline void kasan_init_hw_tags(void) { }
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
void kasan_populate_early_vm_area_shadow(void *start, unsigned long size);
-int kasan_populate_vmalloc(unsigned long addr, unsigned long size);
+int kasan_populate_vmalloc(unsigned long addr, unsigned long size, gfp_t gfp_mask);
void kasan_release_vmalloc(unsigned long start, unsigned long end,
unsigned long free_region_start,
unsigned long free_region_end,
@@ -574,7 +574,7 @@ static inline void kasan_populate_early_vm_area_shadow(void *start,
unsigned long size)
{ }
static inline int kasan_populate_vmalloc(unsigned long start,
- unsigned long size)
+ unsigned long size, gfp_t gfp_mask)
{
return 0;
}
@@ -610,7 +610,7 @@ static __always_inline void kasan_poison_vmalloc(const void *start,
static inline void kasan_populate_early_vm_area_shadow(void *start,
unsigned long size) { }
static inline int kasan_populate_vmalloc(unsigned long start,
- unsigned long size)
+ unsigned long size, gfp_t gfp_mask)
{
return 0;
}
diff --git a/include/linux/kcov.h b/include/linux/kcov.h
index 75a2fb8b16c3..0143358874b0 100644
--- a/include/linux/kcov.h
+++ b/include/linux/kcov.h
@@ -57,47 +57,21 @@ static inline void kcov_remote_start_usb(u64 id)
/*
* The softirq flavor of kcov_remote_*() functions is introduced as a temporary
- * workaround for KCOV's lack of nested remote coverage sections support.
- *
- * Adding support is tracked in https://bugzilla.kernel.org/show_bug.cgi?id=210337.
- *
- * kcov_remote_start_usb_softirq():
- *
- * 1. Only collects coverage when called in the softirq context. This allows
- * avoiding nested remote coverage collection sections in the task context.
- * For example, USB/IP calls usb_hcd_giveback_urb() in the task context
- * within an existing remote coverage collection section. Thus, KCOV should
- * not attempt to start collecting coverage within the coverage collection
- * section in __usb_hcd_giveback_urb() in this case.
- *
- * 2. Disables interrupts for the duration of the coverage collection section.
- * This allows avoiding nested remote coverage collection sections in the
- * softirq context (a softirq might occur during the execution of a work in
- * the BH workqueue, which runs with in_serving_softirq() > 0).
- * For example, usb_giveback_urb_bh() runs in the BH workqueue with
- * interrupts enabled, so __usb_hcd_giveback_urb() might be interrupted in
- * the middle of its remote coverage collection section, and the interrupt
- * handler might invoke __usb_hcd_giveback_urb() again.
+ * work around for kcov's lack of nested remote coverage sections support in
+ * task context. Adding support for nested sections is tracked in:
+ * https://bugzilla.kernel.org/show_bug.cgi?id=210337
*/
-static inline unsigned long kcov_remote_start_usb_softirq(u64 id)
+static inline void kcov_remote_start_usb_softirq(u64 id)
{
- unsigned long flags = 0;
-
- if (in_serving_softirq()) {
- local_irq_save(flags);
+ if (in_serving_softirq() && !in_hardirq())
kcov_remote_start_usb(id);
- }
-
- return flags;
}
-static inline void kcov_remote_stop_softirq(unsigned long flags)
+static inline void kcov_remote_stop_softirq(void)
{
- if (in_serving_softirq()) {
+ if (in_serving_softirq() && !in_hardirq())
kcov_remote_stop();
- local_irq_restore(flags);
- }
}
#ifdef CONFIG_64BIT
@@ -131,11 +105,8 @@ static inline u64 kcov_common_handle(void)
}
static inline void kcov_remote_start_common(u64 id) {}
static inline void kcov_remote_start_usb(u64 id) {}
-static inline unsigned long kcov_remote_start_usb_softirq(u64 id)
-{
- return 0;
-}
-static inline void kcov_remote_stop_softirq(unsigned long flags) {}
+static inline void kcov_remote_start_usb_softirq(u64 id) {}
+static inline void kcov_remote_stop_softirq(void) {}
#endif /* CONFIG_KCOV */
#endif /* _LINUX_KCOV_H */
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 1b10a5d84b68..39fe3e6cd282 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -460,7 +460,8 @@ bool kexec_load_permitted(int kexec_image_type);
/* List of defined/legal kexec file flags */
#define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \
- KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG)
+ KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG | \
+ KEXEC_FILE_NO_CMA)
/* flag to track if kexec reboot is in progress */
extern bool kexec_in_progress;
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index fd11fffdd3c3..adbe234a6f6c 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -211,7 +211,7 @@ LSM_HOOK(int, 0, file_open, struct file *file)
LSM_HOOK(int, 0, file_post_open, struct file *file, int mask)
LSM_HOOK(int, 0, file_truncate, struct file *file)
LSM_HOOK(int, 0, task_alloc, struct task_struct *task,
- unsigned long clone_flags)
+ u64 clone_flags)
LSM_HOOK(void, LSM_RET_VOID, task_free, struct task_struct *task)
LSM_HOOK(int, 0, cred_alloc_blank, struct cred *cred, gfp_t gfp)
LSM_HOOK(void, LSM_RET_VOID, cred_free, struct cred *cred)
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 090d1d3e19fe..79ec5a2bdcca 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -116,6 +116,9 @@ struct lsm_blob_sizes {
int lbs_xattr_count; /* number of xattr slots in new_xattrs array */
int lbs_tun_dev;
int lbs_bdev;
+ int lbs_bpf_map;
+ int lbs_bpf_prog;
+ int lbs_bpf_token;
};
/*
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index b96746376e17..fcda8481de9a 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -40,8 +40,9 @@ extern unsigned long long max_possible_pfn;
* via a driver, and never indicated in the firmware-provided memory map as
* system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the
* kernel resource tree.
- * @MEMBLOCK_RSRV_NOINIT: memory region for which struct pages are
- * not initialized (only for reserved regions).
+ * @MEMBLOCK_RSRV_NOINIT: reserved memory region for which struct pages are not
+ * fully initialized. Users of this flag are responsible to properly initialize
+ * struct pages of this region
* @MEMBLOCK_RSRV_KERN: memory region that is reserved for kernel use,
* either explictitly with memblock_reserve_kern() or via memblock
* allocation APIs. All memblock allocations set this flag.
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index acadd41e0b5c..9009e27b5f44 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -79,6 +79,7 @@ void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl)
void folio_migrate_flags(struct folio *newfolio, struct folio *folio);
int folio_migrate_mapping(struct address_space *mapping,
struct folio *newfolio, struct folio *folio, int extra_count);
+int set_movable_ops(const struct movable_operations *ops, enum pagetype type);
#else
@@ -100,6 +101,10 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
{
return -ENOSYS;
}
+static inline int set_movable_ops(const struct movable_operations *ops, enum pagetype type)
+{
+ return -ENOSYS;
+}
#endif /* CONFIG_MIGRATION */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 8c5fbfb85749..10fe492e1fed 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -663,6 +663,7 @@ struct mlx5e_resources {
bool tisn_valid;
} hw_objs;
struct net_device *uplink_netdev;
+ netdevice_tracker tracker;
struct mutex uplink_netdev_lock;
struct mlx5_crypto_dek_priv *dek_priv;
};
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 86055d55836d..6ac76a0c3827 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -308,6 +308,8 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
struct mlx5_fc *mlx5_fc_local_create(u32 counter_id, u32 offset, u32 bulk_size);
void mlx5_fc_local_destroy(struct mlx5_fc *counter);
+void mlx5_fc_local_get(struct mlx5_fc *counter);
+void mlx5_fc_local_put(struct mlx5_fc *counter);
u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter);
void mlx5_fc_query_cached(struct mlx5_fc *counter,
u64 *bytes, u64 *packets, u64 *lastuse);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 08bc2442db93..7f625c35128b 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -631,6 +631,11 @@ static inline int ptdesc_pmd_pts_count(struct ptdesc *ptdesc)
{
return atomic_read(&ptdesc->pt_share_count);
}
+
+static inline bool ptdesc_pmd_is_shared(struct ptdesc *ptdesc)
+{
+ return !!ptdesc_pmd_pts_count(ptdesc);
+}
#else
static inline void ptdesc_pmd_pts_init(struct ptdesc *ptdesc)
{
@@ -1102,6 +1107,11 @@ struct mm_struct {
unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
+#ifdef CONFIG_ARCH_HAS_ELF_CORE_EFLAGS
+ /* the ABI-related flags from the ELF header. Used for core dump */
+ unsigned long saved_e_flags;
+#endif
+
struct percpu_counter rss_stat[NR_MM_COUNTERS];
struct linux_binfmt *binfmt;
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 70b366b64816..0acd1089d149 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -11,7 +11,9 @@ struct fs_struct;
struct user_namespace;
struct ns_common;
-extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
+extern struct mnt_namespace init_mnt_ns;
+
+extern struct mnt_namespace *copy_mnt_ns(u64, struct mnt_namespace *,
struct user_namespace *, struct fs_struct *);
extern void put_mnt_ns(struct mnt_namespace *ns);
DEFINE_FREE(put_mnt_ns, struct mnt_namespace *, if (!IS_ERR_OR_NULL(_T)) put_mnt_ns(_T))
diff --git a/include/linux/msi.h b/include/linux/msi.h
index e5e86a8529fb..d415dd15a0a9 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -431,8 +431,6 @@ struct msi_domain_info;
* function.
* @domain_free_irqs: Optional function to override the default free
* function.
- * @msi_post_free: Optional function which is invoked after freeing
- * all interrupts.
* @msi_translate: Optional translate callback to support the odd wire to
* MSI bridges, e.g. MBIGEN
*
@@ -473,8 +471,6 @@ struct msi_domain_ops {
struct device *dev, int nvec);
void (*domain_free_irqs)(struct irq_domain *domain,
struct device *dev);
- void (*msi_post_free)(struct irq_domain *domain,
- struct device *dev);
int (*msi_translate)(struct irq_domain *domain, struct irq_fwspec *fwspec,
irq_hw_number_t *hwirq, unsigned int *type);
};
@@ -568,6 +564,8 @@ enum {
MSI_FLAG_PARENT_PM_DEV = (1 << 8),
/* Support for parent mask/unmask */
MSI_FLAG_PCI_MSI_MASK_PARENT = (1 << 9),
+ /* Support for parent startup/shutdown */
+ MSI_FLAG_PCI_MSI_STARTUP_PARENT = (1 << 10),
/* Mask for the generic functionality */
MSI_GENERIC_FLAGS_MASK = GENMASK(15, 0),
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 5d085428e471..a7800ef04e76 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -57,13 +57,17 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
struct dentry *base,
unsigned int flags);
extern int kern_path(const char *, unsigned, struct path *);
-
-extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int);
-extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int);
-extern void done_path_create(struct path *, struct dentry *);
-extern struct dentry *kern_path_locked(const char *, struct path *);
-extern struct dentry *kern_path_locked_negative(const char *, struct path *);
-extern struct dentry *user_path_locked_at(int , const char __user *, struct path *);
+struct dentry *kern_path_parent(const char *name, struct path *parent);
+
+extern struct dentry *start_creating_path(int, const char *, struct path *, unsigned int);
+extern struct dentry *start_creating_user_path(int, const char __user *, struct path *, unsigned int);
+extern void end_creating_path(struct path *, struct dentry *);
+extern struct dentry *start_removing_path(const char *, struct path *);
+extern struct dentry *start_removing_user_path_at(int , const char __user *, struct path *);
+static inline void end_removing_path(struct path *path , struct dentry *dentry)
+{
+ end_creating_path(path, dentry);
+}
int vfs_path_parent_lookup(struct filename *filename, unsigned int flags,
struct path *parent, struct qstr *last, int *type,
const struct path *root);
@@ -80,6 +84,9 @@ struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap,
struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap,
struct qstr *name,
struct dentry *base);
+struct dentry *lookup_one_positive_killable(struct mnt_idmap *idmap,
+ struct qstr *name,
+ struct dentry *base);
extern int follow_down_one(struct path *);
extern int follow_down(struct path *path, unsigned int flags);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5e5de4b0a433..f3a3b761abfb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2071,6 +2071,8 @@ enum netdev_reg_state {
* @max_pacing_offload_horizon: max EDT offload horizon in nsec.
* @napi_config: An array of napi_config structures containing per-NAPI
* settings.
+ * @num_napi_configs: number of allocated NAPI config structs,
+ * always >= max(num_rx_queues, num_tx_queues).
* @gro_flush_timeout: timeout for GRO layer in NAPI
* @napi_defer_hard_irqs: If not zero, provides a counter that would
* allow to avoid NIC hard IRQ, on busy queues.
@@ -2482,8 +2484,9 @@ struct net_device {
u64 max_pacing_offload_horizon;
struct napi_config *napi_config;
- unsigned long gro_flush_timeout;
+ u32 num_napi_configs;
u32 napi_defer_hard_irqs;
+ unsigned long gro_flush_timeout;
/**
* @up: copy of @state's IFF_UP, but safe to read with just @lock.
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 185bd8196503..98c96d649bf9 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -150,6 +150,7 @@ struct netfs_io_stream {
bool active; /* T if stream is active */
bool need_retry; /* T if this stream needs retrying */
bool failed; /* T if this stream failed */
+ bool transferred_valid; /* T is ->transferred is valid */
};
/*
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 169b4ae30ff4..9aed39abc94b 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -160,6 +160,7 @@ extern void nfs_join_page_group(struct nfs_page *head,
extern int nfs_page_group_lock(struct nfs_page *);
extern void nfs_page_group_unlock(struct nfs_page *);
extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
+extern bool nfs_page_group_sync_on_bit_locked(struct nfs_page *, unsigned int);
extern int nfs_page_set_headlock(struct nfs_page *req);
extern void nfs_page_clear_headlock(struct nfs_page *req);
extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *);
diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index 7d22ea50b098..f5b68b8abb54 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -3,14 +3,151 @@
#define _LINUX_NS_COMMON_H
#include <linux/refcount.h>
+#include <linux/rbtree.h>
+#include <uapi/linux/sched.h>
struct proc_ns_operations;
+struct cgroup_namespace;
+struct ipc_namespace;
+struct mnt_namespace;
+struct net;
+struct pid_namespace;
+struct time_namespace;
+struct user_namespace;
+struct uts_namespace;
+
+extern struct cgroup_namespace init_cgroup_ns;
+extern struct ipc_namespace init_ipc_ns;
+extern struct mnt_namespace init_mnt_ns;
+extern struct net init_net;
+extern struct pid_namespace init_pid_ns;
+extern struct time_namespace init_time_ns;
+extern struct user_namespace init_user_ns;
+extern struct uts_namespace init_uts_ns;
+
+extern const struct proc_ns_operations netns_operations;
+extern const struct proc_ns_operations utsns_operations;
+extern const struct proc_ns_operations ipcns_operations;
+extern const struct proc_ns_operations pidns_operations;
+extern const struct proc_ns_operations pidns_for_children_operations;
+extern const struct proc_ns_operations userns_operations;
+extern const struct proc_ns_operations mntns_operations;
+extern const struct proc_ns_operations cgroupns_operations;
+extern const struct proc_ns_operations timens_operations;
+extern const struct proc_ns_operations timens_for_children_operations;
+
struct ns_common {
+ u32 ns_type;
struct dentry *stashed;
const struct proc_ns_operations *ops;
unsigned int inum;
- refcount_t count;
+ refcount_t __ns_ref; /* do not use directly */
+ union {
+ struct {
+ u64 ns_id;
+ struct rb_node ns_tree_node;
+ struct list_head ns_list_node;
+ };
+ struct rcu_head ns_rcu;
+ };
};
+int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_operations *ops, int inum);
+void __ns_common_free(struct ns_common *ns);
+
+#define to_ns_common(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: &(__ns)->ns, \
+ const struct cgroup_namespace *: &(__ns)->ns, \
+ struct ipc_namespace *: &(__ns)->ns, \
+ const struct ipc_namespace *: &(__ns)->ns, \
+ struct mnt_namespace *: &(__ns)->ns, \
+ const struct mnt_namespace *: &(__ns)->ns, \
+ struct net *: &(__ns)->ns, \
+ const struct net *: &(__ns)->ns, \
+ struct pid_namespace *: &(__ns)->ns, \
+ const struct pid_namespace *: &(__ns)->ns, \
+ struct time_namespace *: &(__ns)->ns, \
+ const struct time_namespace *: &(__ns)->ns, \
+ struct user_namespace *: &(__ns)->ns, \
+ const struct user_namespace *: &(__ns)->ns, \
+ struct uts_namespace *: &(__ns)->ns, \
+ const struct uts_namespace *: &(__ns)->ns)
+
+#define ns_init_inum(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: CGROUP_NS_INIT_INO, \
+ struct ipc_namespace *: IPC_NS_INIT_INO, \
+ struct mnt_namespace *: MNT_NS_INIT_INO, \
+ struct net *: NET_NS_INIT_INO, \
+ struct pid_namespace *: PID_NS_INIT_INO, \
+ struct time_namespace *: TIME_NS_INIT_INO, \
+ struct user_namespace *: USER_NS_INIT_INO, \
+ struct uts_namespace *: UTS_NS_INIT_INO)
+
+#define ns_init_ns(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: &init_cgroup_ns, \
+ struct ipc_namespace *: &init_ipc_ns, \
+ struct mnt_namespace *: &init_mnt_ns, \
+ struct net *: &init_net, \
+ struct pid_namespace *: &init_pid_ns, \
+ struct time_namespace *: &init_time_ns, \
+ struct user_namespace *: &init_user_ns, \
+ struct uts_namespace *: &init_uts_ns)
+
+#define to_ns_operations(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: (IS_ENABLED(CONFIG_CGROUPS) ? &cgroupns_operations : NULL), \
+ struct ipc_namespace *: (IS_ENABLED(CONFIG_IPC_NS) ? &ipcns_operations : NULL), \
+ struct mnt_namespace *: &mntns_operations, \
+ struct net *: (IS_ENABLED(CONFIG_NET_NS) ? &netns_operations : NULL), \
+ struct pid_namespace *: (IS_ENABLED(CONFIG_PID_NS) ? &pidns_operations : NULL), \
+ struct time_namespace *: (IS_ENABLED(CONFIG_TIME_NS) ? &timens_operations : NULL), \
+ struct user_namespace *: (IS_ENABLED(CONFIG_USER_NS) ? &userns_operations : NULL), \
+ struct uts_namespace *: (IS_ENABLED(CONFIG_UTS_NS) ? &utsns_operations : NULL))
+
+#define ns_common_type(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: CLONE_NEWCGROUP, \
+ struct ipc_namespace *: CLONE_NEWIPC, \
+ struct mnt_namespace *: CLONE_NEWNS, \
+ struct net *: CLONE_NEWNET, \
+ struct pid_namespace *: CLONE_NEWPID, \
+ struct time_namespace *: CLONE_NEWTIME, \
+ struct user_namespace *: CLONE_NEWUSER, \
+ struct uts_namespace *: CLONE_NEWUTS)
+
+#define ns_common_init(__ns) \
+ __ns_common_init(to_ns_common(__ns), \
+ ns_common_type(__ns), \
+ to_ns_operations(__ns), \
+ (((__ns) == ns_init_ns(__ns)) ? ns_init_inum(__ns) : 0))
+
+#define ns_common_init_inum(__ns, __inum) \
+ __ns_common_init(to_ns_common(__ns), \
+ ns_common_type(__ns), \
+ to_ns_operations(__ns), \
+ __inum)
+
+#define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns)))
+
+static __always_inline __must_check bool __ns_ref_put(struct ns_common *ns)
+{
+ return refcount_dec_and_test(&ns->__ns_ref);
+}
+
+static __always_inline __must_check bool __ns_ref_get(struct ns_common *ns)
+{
+ return refcount_inc_not_zero(&ns->__ns_ref);
+}
+
+#define ns_ref_read(__ns) refcount_read(&to_ns_common((__ns))->__ns_ref)
+#define ns_ref_inc(__ns) refcount_inc(&to_ns_common((__ns))->__ns_ref)
+#define ns_ref_get(__ns) __ns_ref_get(to_ns_common((__ns)))
+#define ns_ref_put(__ns) __ns_ref_put(to_ns_common((__ns)))
+#define ns_ref_put_and_lock(__ns, __lock) \
+ refcount_dec_and_lock(&to_ns_common((__ns))->__ns_ref, (__lock))
+
#endif
diff --git a/include/linux/nsfs.h b/include/linux/nsfs.h
new file mode 100644
index 000000000000..e5a5fa83d36b
--- /dev/null
+++ b/include/linux/nsfs.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2025 Christian Brauner <brauner@kernel.org> */
+
+#ifndef _LINUX_NSFS_H
+#define _LINUX_NSFS_H
+
+#include <linux/ns_common.h>
+#include <linux/cred.h>
+#include <linux/pid_namespace.h>
+
+struct path;
+struct task_struct;
+struct proc_ns_operations;
+
+int ns_get_path(struct path *path, struct task_struct *task,
+ const struct proc_ns_operations *ns_ops);
+typedef struct ns_common *ns_get_path_helper_t(void *);
+int ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb,
+ void *private_data);
+
+bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino);
+
+int ns_get_name(char *buf, size_t size, struct task_struct *task,
+ const struct proc_ns_operations *ns_ops);
+void nsfs_init(void);
+
+#define __current_namespace_from_type(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: current->nsproxy->cgroup_ns, \
+ struct ipc_namespace *: current->nsproxy->ipc_ns, \
+ struct net *: current->nsproxy->net_ns, \
+ struct pid_namespace *: task_active_pid_ns(current), \
+ struct mnt_namespace *: current->nsproxy->mnt_ns, \
+ struct time_namespace *: current->nsproxy->time_ns, \
+ struct user_namespace *: current_user_ns(), \
+ struct uts_namespace *: current->nsproxy->uts_ns)
+
+#define current_in_namespace(__ns) (__current_namespace_from_type(__ns) == __ns)
+
+#endif /* _LINUX_NSFS_H */
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index dab6a1734a22..bd118a187dec 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -42,17 +42,6 @@ struct nsproxy {
};
extern struct nsproxy init_nsproxy;
-#define to_ns_common(__ns) \
- _Generic((__ns), \
- struct cgroup_namespace *: &(__ns->ns), \
- struct ipc_namespace *: &(__ns->ns), \
- struct net *: &(__ns->ns), \
- struct pid_namespace *: &(__ns->ns), \
- struct mnt_namespace *: &(__ns->ns), \
- struct time_namespace *: &(__ns->ns), \
- struct user_namespace *: &(__ns->ns), \
- struct uts_namespace *: &(__ns->ns))
-
/*
* A structure to encompass all bits needed to install
* a partial or complete new set of namespaces.
@@ -103,7 +92,7 @@ static inline struct cred *nsset_cred(struct nsset *set)
*
*/
-int copy_namespaces(unsigned long flags, struct task_struct *tsk);
+int copy_namespaces(u64 flags, struct task_struct *tsk);
void exit_task_namespaces(struct task_struct *tsk);
void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
int exec_task_namespaces(void);
diff --git a/include/linux/nstree.h b/include/linux/nstree.h
new file mode 100644
index 000000000000..8b8636690473
--- /dev/null
+++ b/include/linux/nstree.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_NSTREE_H
+#define _LINUX_NSTREE_H
+
+#include <linux/ns_common.h>
+#include <linux/nsproxy.h>
+#include <linux/rbtree.h>
+#include <linux/seqlock.h>
+#include <linux/rculist.h>
+#include <linux/cookie.h>
+
+extern struct ns_tree cgroup_ns_tree;
+extern struct ns_tree ipc_ns_tree;
+extern struct ns_tree mnt_ns_tree;
+extern struct ns_tree net_ns_tree;
+extern struct ns_tree pid_ns_tree;
+extern struct ns_tree time_ns_tree;
+extern struct ns_tree user_ns_tree;
+extern struct ns_tree uts_ns_tree;
+
+#define to_ns_tree(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: &(cgroup_ns_tree), \
+ struct ipc_namespace *: &(ipc_ns_tree), \
+ struct net *: &(net_ns_tree), \
+ struct pid_namespace *: &(pid_ns_tree), \
+ struct mnt_namespace *: &(mnt_ns_tree), \
+ struct time_namespace *: &(time_ns_tree), \
+ struct user_namespace *: &(user_ns_tree), \
+ struct uts_namespace *: &(uts_ns_tree))
+
+u64 ns_tree_gen_id(struct ns_common *ns);
+void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree);
+void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree);
+struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type);
+struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns,
+ struct ns_tree *ns_tree,
+ bool previous);
+
+static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree)
+{
+ ns_tree_gen_id(ns);
+ __ns_tree_add_raw(ns, ns_tree);
+}
+
+/**
+ * ns_tree_add_raw - Add a namespace to a namespace
+ * @ns: Namespace to add
+ *
+ * This function adds a namespace to the appropriate namespace tree
+ * without assigning a id.
+ */
+#define ns_tree_add_raw(__ns) __ns_tree_add_raw(to_ns_common(__ns), to_ns_tree(__ns))
+
+/**
+ * ns_tree_add - Add a namespace to a namespace tree
+ * @ns: Namespace to add
+ *
+ * This function assigns a new id to the namespace and adds it to the
+ * appropriate namespace tree and list.
+ */
+#define ns_tree_add(__ns) __ns_tree_add(to_ns_common(__ns), to_ns_tree(__ns))
+
+/**
+ * ns_tree_remove - Remove a namespace from a namespace tree
+ * @ns: Namespace to remove
+ *
+ * This function removes a namespace from the appropriate namespace
+ * tree and list.
+ */
+#define ns_tree_remove(__ns) __ns_tree_remove(to_ns_common(__ns), to_ns_tree(__ns))
+
+#define ns_tree_adjoined_rcu(__ns, __previous) \
+ __ns_tree_adjoined_rcu(to_ns_common(__ns), to_ns_tree(__ns), __previous)
+
+#define ns_tree_active(__ns) (!RB_EMPTY_NODE(&to_ns_common(__ns)->ns_tree_node))
+
+#endif /* _LINUX_NSTREE_H */
diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index 682472c15495..88e18615dd72 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -134,6 +134,9 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
int walk_kernel_page_table_range(unsigned long start,
unsigned long end, const struct mm_walk_ops *ops,
pgd_t *pgd, void *private);
+int walk_kernel_page_table_range_lockless(unsigned long start,
+ unsigned long end, const struct mm_walk_ops *ops,
+ pgd_t *pgd, void *private);
int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start,
unsigned long end, const struct mm_walk_ops *ops,
void *private);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ec9d96025683..fd1d91017b99 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -859,7 +859,7 @@ struct perf_event {
/* mmap bits */
struct mutex mmap_mutex;
- atomic_t mmap_count;
+ refcount_t mmap_count;
struct perf_buffer *rb;
struct list_head rb_entry;
@@ -1719,7 +1719,7 @@ DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
extern struct perf_callchain_entry *
-get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
u32 max_stack, bool crosstask, bool add_mark);
extern int get_callchain_buffers(int max_stack);
extern void put_callchain_buffers(void);
diff --git a/include/linux/pgalloc.h b/include/linux/pgalloc.h
new file mode 100644
index 000000000000..9174fa59bbc5
--- /dev/null
+++ b/include/linux/pgalloc.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PGALLOC_H
+#define _LINUX_PGALLOC_H
+
+#include <linux/pgtable.h>
+#include <asm/pgalloc.h>
+
+/*
+ * {pgd,p4d}_populate_kernel() are defined as macros to allow
+ * compile-time optimization based on the configured page table levels.
+ * Without this, linking may fail because callers (e.g., KASAN) may rely
+ * on calls to these functions being optimized away when passing symbols
+ * that exist only for certain page table levels.
+ */
+#define pgd_populate_kernel(addr, pgd, p4d) \
+ do { \
+ pgd_populate(&init_mm, pgd, p4d); \
+ if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_PGD_MODIFIED) \
+ arch_sync_kernel_mappings(addr, addr); \
+ } while (0)
+
+#define p4d_populate_kernel(addr, p4d, pud) \
+ do { \
+ p4d_populate(&init_mm, p4d, pud); \
+ if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_P4D_MODIFIED) \
+ arch_sync_kernel_mappings(addr, addr); \
+ } while (0)
+
+#endif /* _LINUX_PGALLOC_H */
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 4c035637eeb7..25a7257052ff 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -232,9 +232,9 @@ static inline int pmd_dirty(pmd_t pmd)
* and the mode cannot be used in interrupt context.
*/
#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
-#define arch_enter_lazy_mmu_mode() do {} while (0)
-#define arch_leave_lazy_mmu_mode() do {} while (0)
-#define arch_flush_lazy_mmu_mode() do {} while (0)
+static inline void arch_enter_lazy_mmu_mode(void) {}
+static inline void arch_leave_lazy_mmu_mode(void) {}
+static inline void arch_flush_lazy_mmu_mode(void) {}
#endif
#ifndef pte_batch_hint
@@ -1467,6 +1467,22 @@ static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned
}
#endif
+/*
+ * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
+ * and let generic vmalloc, ioremap and page table update code know when
+ * arch_sync_kernel_mappings() needs to be called.
+ */
+#ifndef ARCH_PAGE_TABLE_SYNC_MASK
+#define ARCH_PAGE_TABLE_SYNC_MASK 0
+#endif
+
+/*
+ * There is no default implementation for arch_sync_kernel_mappings(). It is
+ * relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK
+ * is 0.
+ */
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end);
+
#endif /* CONFIG_MMU */
/*
@@ -1938,10 +1954,11 @@ static inline bool arch_has_pfn_modify_check(void)
/*
* Page Table Modification bits for pgtbl_mod_mask.
*
- * These are used by the p?d_alloc_track*() set of functions an in the generic
- * vmalloc/ioremap code to track at which page-table levels entries have been
- * modified. Based on that the code can better decide when vmalloc and ioremap
- * mapping changes need to be synchronized to other page-tables in the system.
+ * These are used by the p?d_alloc_track*() and p*d_populate_kernel()
+ * functions in the generic vmalloc, ioremap and page table update code
+ * to track at which page-table levels entries have been modified.
+ * Based on that the code can better decide when page table changes need
+ * to be synchronized to other page-tables in the system.
*/
#define __PGTBL_PGD_MODIFIED 0
#define __PGTBL_P4D_MODIFIED 1
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 4c2b8b6e7187..bb45787d8684 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -169,6 +169,11 @@ static inline bool phy_interface_empty(const unsigned long *intf)
return bitmap_empty(intf, PHY_INTERFACE_MODE_MAX);
}
+static inline unsigned int phy_interface_weight(const unsigned long *intf)
+{
+ return bitmap_weight(intf, PHY_INTERFACE_MODE_MAX);
+}
+
static inline void phy_interface_and(unsigned long *dst, const unsigned long *a,
const unsigned long *b)
{
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 7c67a5811199..445517a72ad0 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -54,10 +54,15 @@ extern struct pid_namespace init_pid_ns;
#define PIDNS_ADDING (1U << 31)
#ifdef CONFIG_PID_NS
+static inline struct pid_namespace *to_pid_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct pid_namespace, ns);
+}
+
static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
{
if (ns != &init_pid_ns)
- refcount_inc(&ns->ns.count);
+ ns_ref_inc(ns);
return ns;
}
@@ -78,12 +83,15 @@ static inline int pidns_memfd_noexec_scope(struct pid_namespace *ns)
}
#endif
-extern struct pid_namespace *copy_pid_ns(unsigned long flags,
+extern struct pid_namespace *copy_pid_ns(u64 flags,
struct user_namespace *user_ns, struct pid_namespace *ns);
extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
extern void put_pid_ns(struct pid_namespace *ns);
+extern bool pidns_is_ancestor(struct pid_namespace *child,
+ struct pid_namespace *ancestor);
+
#else /* !CONFIG_PID_NS */
#include <linux/err.h>
@@ -97,7 +105,7 @@ static inline int pidns_memfd_noexec_scope(struct pid_namespace *ns)
return 0;
}
-static inline struct pid_namespace *copy_pid_ns(unsigned long flags,
+static inline struct pid_namespace *copy_pid_ns(u64 flags,
struct user_namespace *user_ns, struct pid_namespace *ns)
{
if (flags & CLONE_NEWPID)
@@ -118,6 +126,12 @@ static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
{
return 0;
}
+
+static inline bool pidns_is_ancestor(struct pid_namespace *child,
+ struct pid_namespace *ancestor)
+{
+ return false;
+}
#endif /* CONFIG_PID_NS */
extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
diff --git a/include/linux/platform_data/dmtimer-omap.h b/include/linux/platform_data/dmtimer-omap.h
index 95d852aef130..726d89143842 100644
--- a/include/linux/platform_data/dmtimer-omap.h
+++ b/include/linux/platform_data/dmtimer-omap.h
@@ -36,9 +36,13 @@ struct omap_dm_timer_ops {
int (*set_pwm)(struct omap_dm_timer *timer, int def_on,
int toggle, int trigger, int autoreload);
int (*get_pwm_status)(struct omap_dm_timer *timer);
+ int (*set_cap)(struct omap_dm_timer *timer,
+ int autoreload, bool config_period);
+ int (*get_cap_status)(struct omap_dm_timer *timer);
int (*set_prescaler)(struct omap_dm_timer *timer, int prescaler);
unsigned int (*read_counter)(struct omap_dm_timer *timer);
+ unsigned int (*read_cap)(struct omap_dm_timer *timer, bool is_period);
int (*write_counter)(struct omap_dm_timer *timer,
unsigned int value);
unsigned int (*read_status)(struct omap_dm_timer *timer);
diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h
index 78276a11c48d..1571e9157fa5 100644
--- a/include/linux/platform_data/x86/int3472.h
+++ b/include/linux/platform_data/x86/int3472.h
@@ -27,6 +27,7 @@
#define INT3472_GPIO_TYPE_CLK_ENABLE 0x0c
#define INT3472_GPIO_TYPE_PRIVACY_LED 0x0d
#define INT3472_GPIO_TYPE_HANDSHAKE 0x12
+#define INT3472_GPIO_TYPE_HOTPLUG_DETECT 0x13
#define INT3472_PDEV_MAX_NAME_LEN 23
#define INT3472_MAX_SENSOR_GPIOS 3
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index c84edf217819..f67a2cb7d781 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -115,6 +115,12 @@ struct dev_pm_domain_list {
* genpd provider specific way, likely through a
* parent device node. This flag makes genpd to
* skip its internal support for this.
+ *
+ * GENPD_FLAG_NO_STAY_ON: For genpd OF providers a powered-on PM domain at
+ * initialization is prevented from being
+ * powered-off until the ->sync_state() callback is
+ * invoked. This flag informs genpd to allow a
+ * power-off without waiting for ->sync_state().
*/
#define GENPD_FLAG_PM_CLK (1U << 0)
#define GENPD_FLAG_IRQ_SAFE (1U << 1)
@@ -126,6 +132,7 @@ struct dev_pm_domain_list {
#define GENPD_FLAG_OPP_TABLE_FW (1U << 7)
#define GENPD_FLAG_DEV_NAME_FW (1U << 8)
#define GENPD_FLAG_NO_SYNC_STATE (1U << 9)
+#define GENPD_FLAG_NO_STAY_ON (1U << 10)
enum gpd_status {
GENPD_STATE_ON = 0, /* PM domain is on */
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 1fad1c8a4c76..102202185d7a 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -372,7 +372,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
/*
* Migrate-Disable and why it is undesired.
*
- * When a preempted task becomes elegible to run under the ideal model (IOW it
+ * When a preempted task becomes eligible to run under the ideal model (IOW it
* becomes one of the M highest priority tasks), it might still have to wait
* for the preemptee's migrate_disable() section to complete. Thereby suffering
* a reduction in bandwidth in the exact duration of the migrate_disable()
@@ -387,7 +387,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
* - a lower priority tasks; which under preempt_disable() could've instantly
* migrated away when another CPU becomes available, is now constrained
* by the ability to push the higher priority task away, which might itself be
- * in a migrate_disable() section, reducing it's available bandwidth.
+ * in a migrate_disable() section, reducing its available bandwidth.
*
* IOW it trades latency / moves the interference term, but it stays in the
* system, and as long as it remains unbounded, the system is not fully
@@ -399,7 +399,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
* PREEMPT_RT breaks a number of assumptions traditionally held. By forcing a
* number of primitives into becoming preemptible, they would also allow
* migration. This turns out to break a bunch of per-cpu usage. To this end,
- * all these primitives employ migirate_disable() to restore this implicit
+ * all these primitives employ migrate_disable() to restore this implicit
* assumption.
*
* This is a 'temporary' work-around at best. The correct solution is getting
@@ -407,7 +407,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
* per-cpu locking or short preempt-disable regions.
*
* The end goal must be to get rid of migrate_disable(), alternatively we need
- * a schedulability theory that does not depend on abritrary migration.
+ * a schedulability theory that does not depend on arbitrary migration.
*
*
* Notes on the implementation.
@@ -424,8 +424,6 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
* work-conserving schedulers.
*
*/
-extern void migrate_disable(void);
-extern void migrate_enable(void);
/**
* preempt_disable_nested - Disable preemption inside a normally preempt disabled section
@@ -471,7 +469,6 @@ static __always_inline void preempt_enable_nested(void)
DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable())
DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace())
-DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
#ifdef CONFIG_PREEMPT_DYNAMIC
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 4b20375f3783..e81b8e596e4f 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -5,7 +5,7 @@
#ifndef _LINUX_PROC_NS_H
#define _LINUX_PROC_NS_H
-#include <linux/ns_common.h>
+#include <linux/nsfs.h>
#include <uapi/linux/nsfs.h>
struct pid_namespace;
@@ -17,7 +17,6 @@ struct inode;
struct proc_ns_operations {
const char *name;
const char *real_ns_name;
- int type;
struct ns_common *(*get)(struct task_struct *task);
void (*put)(struct ns_common *ns);
int (*install)(struct nsset *nsset, struct ns_common *ns);
@@ -66,25 +65,6 @@ static inline void proc_free_inum(unsigned int inum) {}
#endif /* CONFIG_PROC_FS */
-static inline int ns_alloc_inum(struct ns_common *ns)
-{
- WRITE_ONCE(ns->stashed, NULL);
- return proc_alloc_inum(&ns->inum);
-}
-
-#define ns_free_inum(ns) proc_free_inum((ns)->inum)
-
#define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
-extern int ns_get_path(struct path *path, struct task_struct *task,
- const struct proc_ns_operations *ns_ops);
-typedef struct ns_common *ns_get_path_helper_t(void *);
-extern int ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb,
- void *private_data);
-
-extern bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino);
-
-extern int ns_get_name(char *buf, size_t size, struct task_struct *task,
- const struct proc_ns_operations *ns_ops);
-extern void nsfs_init(void);
#endif /* _LINUX_PROC_NS_H */
diff --git a/include/linux/psp-platform-access.h b/include/linux/psp-platform-access.h
index 1504fb012c05..540abf7de048 100644
--- a/include/linux/psp-platform-access.h
+++ b/include/linux/psp-platform-access.h
@@ -7,6 +7,8 @@
enum psp_platform_access_msg {
PSP_CMD_NONE = 0x0,
+ PSP_SFS_GET_FW_VERSIONS,
+ PSP_SFS_UPDATE,
PSP_CMD_HSTI_QUERY = 0x14,
PSP_I2C_REQ_BUS_CMD = 0x64,
PSP_DYNAMIC_BOOST_GET_NONCE,
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 120536f4c6eb..f67f96711f0d 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -24,7 +24,7 @@
#include <linux/compiler.h>
#include <linux/atomic.h>
#include <linux/irqflags.h>
-#include <linux/preempt.h>
+#include <linux/sched.h>
#include <linux/bottom_half.h>
#include <linux/lockdep.h>
#include <linux/cleanup.h>
@@ -962,6 +962,20 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
preempt_enable_notrace();
}
+static __always_inline void rcu_read_lock_dont_migrate(void)
+{
+ if (IS_ENABLED(CONFIG_PREEMPT_RCU))
+ migrate_disable();
+ rcu_read_lock();
+}
+
+static inline void rcu_read_unlock_migrate(void)
+{
+ rcu_read_unlock();
+ if (IS_ENABLED(CONFIG_PREEMPT_RCU))
+ migrate_enable();
+}
+
/**
* RCU_INIT_POINTER() - initialize an RCU protected pointer
* @p: The pointer to be initialized.
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 6fb4894b8cfd..a7d92718b653 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -157,27 +157,42 @@ struct rdt_ctrl_domain {
};
/**
+ * struct mbm_cntr_cfg - Assignable counter configuration.
+ * @evtid: MBM event to which the counter is assigned. Only valid
+ * if @rdtgroup is not NULL.
+ * @rdtgrp: resctrl group assigned to the counter. NULL if the
+ * counter is free.
+ */
+struct mbm_cntr_cfg {
+ enum resctrl_event_id evtid;
+ struct rdtgroup *rdtgrp;
+};
+
+/**
* struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource
* @hdr: common header for different domain types
* @ci_id: cache info id for this domain
* @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold
- * @mbm_total: saved state for MBM total bandwidth
- * @mbm_local: saved state for MBM local bandwidth
+ * @mbm_states: Per-event pointer to the MBM event's saved state.
+ * An MBM event's state is an array of struct mbm_state
+ * indexed by RMID on x86 or combined CLOSID, RMID on Arm.
* @mbm_over: worker to periodically read MBM h/w counters
* @cqm_limbo: worker to periodically read CQM h/w counters
* @mbm_work_cpu: worker CPU for MBM h/w counters
* @cqm_work_cpu: worker CPU for CQM h/w counters
+ * @cntr_cfg: array of assignable counters' configuration (indexed
+ * by counter ID)
*/
struct rdt_mon_domain {
struct rdt_domain_hdr hdr;
unsigned int ci_id;
unsigned long *rmid_busy_llc;
- struct mbm_state *mbm_total;
- struct mbm_state *mbm_local;
+ struct mbm_state *mbm_states[QOS_NUM_L3_MBM_EVENTS];
struct delayed_work mbm_over;
struct delayed_work cqm_limbo;
int mbm_work_cpu;
int cqm_work_cpu;
+ struct mbm_cntr_cfg *cntr_cfg;
};
/**
@@ -256,39 +271,52 @@ enum resctrl_schema_fmt {
};
/**
+ * struct resctrl_mon - Monitoring related data of a resctrl resource.
+ * @num_rmid: Number of RMIDs available.
+ * @mbm_cfg_mask: Memory transactions that can be tracked when bandwidth
+ * monitoring events can be configured.
+ * @num_mbm_cntrs: Number of assignable counters.
+ * @mbm_cntr_assignable:Is system capable of supporting counter assignment?
+ * @mbm_assign_on_mkdir:True if counters should automatically be assigned to MBM
+ * events of monitor groups created via mkdir.
+ */
+struct resctrl_mon {
+ int num_rmid;
+ unsigned int mbm_cfg_mask;
+ int num_mbm_cntrs;
+ bool mbm_cntr_assignable;
+ bool mbm_assign_on_mkdir;
+};
+
+/**
* struct rdt_resource - attributes of a resctrl resource
* @rid: The index of the resource
* @alloc_capable: Is allocation available on this machine
* @mon_capable: Is monitor feature available on this machine
- * @num_rmid: Number of RMIDs available
* @ctrl_scope: Scope of this resource for control functions
* @mon_scope: Scope of this resource for monitor functions
* @cache: Cache allocation related data
* @membw: If the component has bandwidth controls, their properties.
+ * @mon: Monitoring related data.
* @ctrl_domains: RCU list of all control domains for this resource
* @mon_domains: RCU list of all monitor domains for this resource
* @name: Name to use in "schemata" file.
* @schema_fmt: Which format string and parser is used for this schema.
- * @evt_list: List of monitoring events
- * @mbm_cfg_mask: Bandwidth sources that can be tracked when bandwidth
- * monitoring events can be configured.
* @cdp_capable: Is the CDP feature available on this resource
*/
struct rdt_resource {
int rid;
bool alloc_capable;
bool mon_capable;
- int num_rmid;
enum resctrl_scope ctrl_scope;
enum resctrl_scope mon_scope;
struct resctrl_cache cache;
struct resctrl_membw membw;
+ struct resctrl_mon mon;
struct list_head ctrl_domains;
struct list_head mon_domains;
char *name;
enum resctrl_schema_fmt schema_fmt;
- struct list_head evt_list;
- unsigned int mbm_cfg_mask;
bool cdp_capable;
};
@@ -372,8 +400,29 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
u32 resctrl_arch_system_num_rmid_idx(void);
int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
+void resctrl_enable_mon_event(enum resctrl_event_id eventid);
+
+bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid);
+
bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt);
+static inline bool resctrl_is_mbm_event(enum resctrl_event_id eventid)
+{
+ return (eventid >= QOS_L3_MBM_TOTAL_EVENT_ID &&
+ eventid <= QOS_L3_MBM_LOCAL_EVENT_ID);
+}
+
+u32 resctrl_get_mon_evt_cfg(enum resctrl_event_id eventid);
+
+/* Iterate over all memory bandwidth events */
+#define for_each_mbm_event_id(eventid) \
+ for (eventid = QOS_L3_MBM_TOTAL_EVENT_ID; \
+ eventid <= QOS_L3_MBM_LOCAL_EVENT_ID; eventid++)
+
+/* Iterate over memory bandwidth arrays in domain structures */
+#define for_each_mbm_idx(idx) \
+ for (idx = 0; idx < QOS_NUM_L3_MBM_EVENTS; idx++)
+
/**
* resctrl_arch_mon_event_config_write() - Write the config for an event.
* @config_info: struct resctrl_mon_config_info describing the resource, domain
@@ -416,6 +465,26 @@ static inline u32 resctrl_get_config_index(u32 closid,
bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l);
int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable);
+/**
+ * resctrl_arch_mbm_cntr_assign_enabled() - Check if MBM counter assignment
+ * mode is enabled.
+ * @r: Pointer to the resource structure.
+ *
+ * Return:
+ * true if the assignment mode is enabled, false otherwise.
+ */
+bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r);
+
+/**
+ * resctrl_arch_mbm_cntr_assign_set() - Configure the MBM counter assignment mode.
+ * @r: Pointer to the resource structure.
+ * @enable: Set to true to enable, false to disable the assignment mode.
+ *
+ * Return:
+ * 0 on success, < 0 on error.
+ */
+int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable);
+
/*
* Update the ctrl_val and apply this config right now.
* Must be called on one of the domain's CPUs.
@@ -528,6 +597,63 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *
*/
void resctrl_arch_reset_all_ctrls(struct rdt_resource *r);
+/**
+ * resctrl_arch_config_cntr() - Configure the counter with its new RMID
+ * and event details.
+ * @r: Resource structure.
+ * @d: The domain in which counter with ID @cntr_id should be configured.
+ * @evtid: Monitoring event type (e.g., QOS_L3_MBM_TOTAL_EVENT_ID
+ * or QOS_L3_MBM_LOCAL_EVENT_ID).
+ * @rmid: RMID.
+ * @closid: CLOSID.
+ * @cntr_id: Counter ID to configure.
+ * @assign: True to assign the counter or update an existing assignment,
+ * false to unassign the counter.
+ *
+ * This can be called from any CPU.
+ */
+void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+ enum resctrl_event_id evtid, u32 rmid, u32 closid,
+ u32 cntr_id, bool assign);
+
+/**
+ * resctrl_arch_cntr_read() - Read the event data corresponding to the counter ID
+ * assigned to the RMID, event pair for this resource
+ * and domain.
+ * @r: Resource that the counter should be read from.
+ * @d: Domain that the counter should be read from.
+ * @closid: CLOSID that matches the RMID.
+ * @rmid: The RMID to which @cntr_id is assigned.
+ * @cntr_id: The counter to read.
+ * @eventid: The MBM event to which @cntr_id is assigned.
+ * @val: Result of the counter read in bytes.
+ *
+ * Called on a CPU that belongs to domain @d when "mbm_event" mode is enabled.
+ * Called from a non-migrateable process context via smp_call_on_cpu() unless all
+ * CPUs are nohz_full, in which case it is called via IPI (smp_call_function_any()).
+ *
+ * Return:
+ * 0 on success, or -EIO, -EINVAL etc on error.
+ */
+int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d,
+ u32 closid, u32 rmid, int cntr_id,
+ enum resctrl_event_id eventid, u64 *val);
+
+/**
+ * resctrl_arch_reset_cntr() - Reset any private state associated with counter ID.
+ * @r: The domain's resource.
+ * @d: The counter ID's domain.
+ * @closid: CLOSID that matches the RMID.
+ * @rmid: The RMID to which @cntr_id is assigned.
+ * @cntr_id: The counter to reset.
+ * @eventid: The MBM event to which @cntr_id is assigned.
+ *
+ * This can be called from any CPU.
+ */
+void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+ u32 closid, u32 rmid, int cntr_id,
+ enum resctrl_event_id eventid);
+
extern unsigned int resctrl_rmid_realloc_threshold;
extern unsigned int resctrl_rmid_realloc_limit;
diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
index a25fb9c4070d..acfe07860b34 100644
--- a/include/linux/resctrl_types.h
+++ b/include/linux/resctrl_types.h
@@ -34,11 +34,18 @@
/* Max event bits supported */
#define MAX_EVT_CONFIG_BITS GENMASK(6, 0)
-/*
- * Event IDs, the values match those used to program IA32_QM_EVTSEL before
- * reading IA32_QM_CTR on RDT systems.
- */
+/* Number of memory transactions that an MBM event can be configured with */
+#define NUM_MBM_TRANSACTIONS 7
+
+/* Event IDs */
enum resctrl_event_id {
+ /* Must match value of first event below */
+ QOS_FIRST_EVENT = 0x01,
+
+ /*
+ * These values match those used to program IA32_QM_EVTSEL before
+ * reading IA32_QM_CTR on RDT systems.
+ */
QOS_L3_OCCUP_EVENT_ID = 0x01,
QOS_L3_MBM_TOTAL_EVENT_ID = 0x02,
QOS_L3_MBM_LOCAL_EVENT_ID = 0x03,
@@ -47,4 +54,7 @@ enum resctrl_event_id {
QOS_NUM_EVENTS,
};
+#define QOS_NUM_L3_MBM_EVENTS (QOS_L3_MBM_LOCAL_EVENT_ID - QOS_L3_MBM_TOTAL_EVENT_ID + 1)
+#define MBM_STATE_IDX(evt) ((evt) - QOS_L3_MBM_TOTAL_EVENT_ID)
+
#endif /* __LINUX_RESCTRL_TYPES_H */
diff --git a/include/linux/rseq.h b/include/linux/rseq.h
index bc8af3eb5598..69553e7c14c1 100644
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -7,6 +7,12 @@
#include <linux/preempt.h>
#include <linux/sched.h>
+#ifdef CONFIG_MEMBARRIER
+# define RSEQ_EVENT_GUARD irq
+#else
+# define RSEQ_EVENT_GUARD preempt
+#endif
+
/*
* Map the event mask on the user-space ABI enum rseq_cs_flags
* for direct mask checks.
@@ -41,9 +47,8 @@ static inline void rseq_handle_notify_resume(struct ksignal *ksig,
static inline void rseq_signal_deliver(struct ksignal *ksig,
struct pt_regs *regs)
{
- preempt_disable();
- __set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask);
- preempt_enable();
+ scoped_guard(RSEQ_EVENT_GUARD)
+ __set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask);
rseq_handle_notify_resume(ksig, regs);
}
@@ -65,7 +70,7 @@ static inline void rseq_migrate(struct task_struct *t)
* If parent process has a registered restartable sequences area, the
* child inherits. Unregister rseq for a clone with CLONE_VM set.
*/
-static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
+static inline void rseq_fork(struct task_struct *t, u64 clone_flags)
{
if (clone_flags & CLONE_VM) {
t->rseq = NULL;
@@ -107,7 +112,7 @@ static inline void rseq_preempt(struct task_struct *t)
static inline void rseq_migrate(struct task_struct *t)
{
}
-static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
+static inline void rseq_fork(struct task_struct *t, u64 clone_flags)
{
}
static inline void rseq_execve(struct task_struct *t)
diff --git a/include/linux/rv.h b/include/linux/rv.h
index 14410a42faef..9520aab34bcb 100644
--- a/include/linux/rv.h
+++ b/include/linux/rv.h
@@ -7,16 +7,14 @@
#ifndef _LINUX_RV_H
#define _LINUX_RV_H
-#include <linux/types.h>
-#include <linux/list.h>
-
#define MAX_DA_NAME_LEN 32
#define MAX_DA_RETRY_RACING_EVENTS 3
#ifdef CONFIG_RV
+#include <linux/array_size.h>
#include <linux/bitops.h>
+#include <linux/list.h>
#include <linux/types.h>
-#include <linux/array_size.h>
/*
* Deterministic automaton per-object variables.
diff --git a/include/linux/rw_hint.h b/include/linux/rw_hint.h
index 309ca72f2dfb..adcc43042c90 100644
--- a/include/linux/rw_hint.h
+++ b/include/linux/rw_hint.h
@@ -14,6 +14,7 @@ enum rw_hint {
WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM,
WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG,
WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME,
+ WRITE_LIFE_HINT_NR,
} __packed;
/* Sparse ignores __packed annotations on enums, hence the #ifndef below. */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2b272382673d..cbb7340c5866 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -49,6 +49,9 @@
#include <linux/tracepoint-defs.h>
#include <linux/unwind_deferred_types.h>
#include <asm/kmap_size.h>
+#ifndef COMPILE_OFFSETS
+#include <generated/rq-offsets.h>
+#endif
/* task_struct member predeclarations (sorted alphabetically): */
struct audit_context;
@@ -706,7 +709,6 @@ struct sched_dl_entity {
unsigned int dl_defer : 1;
unsigned int dl_defer_armed : 1;
unsigned int dl_defer_running : 1;
- unsigned int dl_server_idle : 1;
/*
* Bandwidth enforcement timer. Each -deadline task has its
@@ -733,7 +735,6 @@ struct sched_dl_entity {
* runnable task.
*/
struct rq *rq;
- dl_server_has_tasks_f server_has_tasks;
dl_server_pick_f server_pick_task;
#ifdef CONFIG_RT_MUTEXES
@@ -883,6 +884,11 @@ struct task_struct {
#ifdef CONFIG_CGROUP_SCHED
struct task_group *sched_task_group;
+#ifdef CONFIG_CFS_BANDWIDTH
+ struct callback_head sched_throttle_work;
+ struct list_head throttle_node;
+ bool throttled;
+#endif
#endif
@@ -2152,6 +2158,8 @@ static inline struct mutex *__get_task_blocked_on(struct task_struct *p)
static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m)
{
+ struct mutex *blocked_on = READ_ONCE(p->blocked_on);
+
WARN_ON_ONCE(!m);
/* The task should only be setting itself as blocked */
WARN_ON_ONCE(p != current);
@@ -2162,8 +2170,8 @@ static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m)
* with a different mutex. Note, setting it to the same
* lock repeatedly is ok.
*/
- WARN_ON_ONCE(p->blocked_on && p->blocked_on != m);
- p->blocked_on = m;
+ WARN_ON_ONCE(blocked_on && blocked_on != m);
+ WRITE_ONCE(p->blocked_on, m);
}
static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m)
@@ -2174,16 +2182,19 @@ static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m)
static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m)
{
- WARN_ON_ONCE(!m);
- /* Currently we serialize blocked_on under the mutex::wait_lock */
- lockdep_assert_held_once(&m->wait_lock);
- /*
- * There may be cases where we re-clear already cleared
- * blocked_on relationships, but make sure we are not
- * clearing the relationship with a different lock.
- */
- WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m);
- p->blocked_on = NULL;
+ if (m) {
+ struct mutex *blocked_on = READ_ONCE(p->blocked_on);
+
+ /* Currently we serialize blocked_on under the mutex::wait_lock */
+ lockdep_assert_held_once(&m->wait_lock);
+ /*
+ * There may be cases where we re-clear already cleared
+ * blocked_on relationships, but make sure we are not
+ * clearing the relationship with a different lock.
+ */
+ WARN_ON_ONCE(blocked_on && blocked_on != m);
+ }
+ WRITE_ONCE(p->blocked_on, NULL);
}
static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m)
@@ -2307,4 +2318,114 @@ static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct allo
#define alloc_tag_restore(_tag, _old) do {} while (0)
#endif
+#ifndef MODULE
+#ifndef COMPILE_OFFSETS
+
+extern void ___migrate_enable(void);
+
+struct rq;
+DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
+
+/*
+ * The "struct rq" is not available here, so we can't access the
+ * "runqueues" with this_cpu_ptr(), as the compilation will fail in
+ * this_cpu_ptr() -> raw_cpu_ptr() -> __verify_pcpu_ptr():
+ * typeof((ptr) + 0)
+ *
+ * So use arch_raw_cpu_ptr()/PERCPU_PTR() directly here.
+ */
+#ifdef CONFIG_SMP
+#define this_rq_raw() arch_raw_cpu_ptr(&runqueues)
+#else
+#define this_rq_raw() PERCPU_PTR(&runqueues)
+#endif
+#define this_rq_pinned() (*(unsigned int *)((void *)this_rq_raw() + RQ_nr_pinned))
+
+static inline void __migrate_enable(void)
+{
+ struct task_struct *p = current;
+
+#ifdef CONFIG_DEBUG_PREEMPT
+ /*
+ * Check both overflow from migrate_disable() and superfluous
+ * migrate_enable().
+ */
+ if (WARN_ON_ONCE((s16)p->migration_disabled <= 0))
+ return;
+#endif
+
+ if (p->migration_disabled > 1) {
+ p->migration_disabled--;
+ return;
+ }
+
+ /*
+ * Ensure stop_task runs either before or after this, and that
+ * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
+ */
+ guard(preempt)();
+ if (unlikely(p->cpus_ptr != &p->cpus_mask))
+ ___migrate_enable();
+ /*
+ * Mustn't clear migration_disabled() until cpus_ptr points back at the
+ * regular cpus_mask, otherwise things that race (eg.
+ * select_fallback_rq) get confused.
+ */
+ barrier();
+ p->migration_disabled = 0;
+ this_rq_pinned()--;
+}
+
+static inline void __migrate_disable(void)
+{
+ struct task_struct *p = current;
+
+ if (p->migration_disabled) {
+#ifdef CONFIG_DEBUG_PREEMPT
+ /*
+ *Warn about overflow half-way through the range.
+ */
+ WARN_ON_ONCE((s16)p->migration_disabled < 0);
+#endif
+ p->migration_disabled++;
+ return;
+ }
+
+ guard(preempt)();
+ this_rq_pinned()++;
+ p->migration_disabled = 1;
+}
+#else /* !COMPILE_OFFSETS */
+static inline void __migrate_disable(void) { }
+static inline void __migrate_enable(void) { }
+#endif /* !COMPILE_OFFSETS */
+
+/*
+ * So that it is possible to not export the runqueues variable, define and
+ * export migrate_enable/migrate_disable in kernel/sched/core.c too, and use
+ * them for the modules. The macro "INSTANTIATE_EXPORTED_MIGRATE_DISABLE" will
+ * be defined in kernel/sched/core.c.
+ */
+#ifndef INSTANTIATE_EXPORTED_MIGRATE_DISABLE
+static inline void migrate_disable(void)
+{
+ __migrate_disable();
+}
+
+static inline void migrate_enable(void)
+{
+ __migrate_enable();
+}
+#else /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */
+extern void migrate_disable(void);
+extern void migrate_enable(void);
+#endif /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */
+
+#else /* MODULE */
+extern void migrate_disable(void);
+extern void migrate_enable(void);
+#endif /* MODULE */
+
+DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
+
#endif
diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 7047101dbf58..d82b7a9b0658 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -108,7 +108,11 @@ enum scx_kf_mask {
SCX_KF_UNLOCKED = 0, /* sleepable and not rq locked */
/* ENQUEUE and DISPATCH may be nested inside CPU_RELEASE */
SCX_KF_CPU_RELEASE = 1 << 0, /* ops.cpu_release() */
- /* ops.dequeue (in REST) may be nested inside DISPATCH */
+ /*
+ * ops.dispatch() may release rq lock temporarily and thus ENQUEUE and
+ * SELECT_CPU may be nested inside. ops.dequeue (in REST) may also be
+ * nested inside DISPATCH.
+ */
SCX_KF_DISPATCH = 1 << 1, /* ops.dispatch() */
SCX_KF_ENQUEUE = 1 << 2, /* ops.enqueue() and ops.select_cpu() */
SCX_KF_SELECT_CPU = 1 << 3, /* ops.select_cpu() */
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 1ef1edbaaf79..7d6449982822 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -226,6 +226,10 @@ struct signal_struct {
struct tty_audit_buf *tty_audit_buf;
#endif
+#ifdef CONFIG_CGROUPS
+ struct rw_semaphore cgroup_threadgroup_rwsem;
+#endif
+
/*
* Thread is the potential origin of an oom condition; kill first on
* oom
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index ea41795a352b..34d6a0e108c3 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -63,7 +63,7 @@ extern int lockdep_tasklist_lock_is_held(void);
extern asmlinkage void schedule_tail(struct task_struct *prev);
extern void init_idle(struct task_struct *idle, int cpu);
-extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
+extern int sched_fork(u64 clone_flags, struct task_struct *p);
extern int sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs);
extern void sched_cancel_fork(struct task_struct *p);
extern void sched_post_fork(struct task_struct *p);
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 5263746b63e8..bbcfdf12aa6e 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -30,33 +30,24 @@ struct sd_flag_debug {
};
extern const struct sd_flag_debug sd_flag_debug[];
+struct sched_domain_topology_level;
+
#ifdef CONFIG_SCHED_SMT
-static inline int cpu_smt_flags(void)
-{
- return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC;
-}
+extern int cpu_smt_flags(void);
+extern const struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cpu);
#endif
#ifdef CONFIG_SCHED_CLUSTER
-static inline int cpu_cluster_flags(void)
-{
- return SD_CLUSTER | SD_SHARE_LLC;
-}
+extern int cpu_cluster_flags(void);
+extern const struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cpu);
#endif
#ifdef CONFIG_SCHED_MC
-static inline int cpu_core_flags(void)
-{
- return SD_SHARE_LLC;
-}
+extern int cpu_core_flags(void);
+extern const struct cpumask *tl_mc_mask(struct sched_domain_topology_level *tl, int cpu);
#endif
-#ifdef CONFIG_NUMA
-static inline int cpu_numa_flags(void)
-{
- return SD_NUMA;
-}
-#endif
+extern const struct cpumask *tl_pkg_mask(struct sched_domain_topology_level *tl, int cpu);
extern int arch_asym_cpu_priority(int cpu);
@@ -172,7 +163,7 @@ bool cpus_equal_capacity(int this_cpu, int that_cpu);
bool cpus_share_cache(int this_cpu, int that_cpu);
bool cpus_share_resources(int this_cpu, int that_cpu);
-typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
+typedef const struct cpumask *(*sched_domain_mask_f)(struct sched_domain_topology_level *tl, int cpu);
typedef int (*sched_domain_flags_f)(void);
struct sd_data {
diff --git a/include/linux/security.h b/include/linux/security.h
index 521bcb5b9717..bd33f194c94a 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -489,7 +489,7 @@ int security_file_receive(struct file *file);
int security_file_open(struct file *file);
int security_file_post_open(struct file *file, int mask);
int security_file_truncate(struct file *file);
-int security_task_alloc(struct task_struct *task, unsigned long clone_flags);
+int security_task_alloc(struct task_struct *task, u64 clone_flags);
void security_task_free(struct task_struct *task);
int security_cred_alloc_blank(struct cred *cred, gfp_t gfp);
void security_cred_free(struct cred *cred);
@@ -567,7 +567,8 @@ int security_getprocattr(struct task_struct *p, int lsmid, const char *name,
int security_setprocattr(int lsmid, const char *name, void *value, size_t size);
int security_ismaclabel(const char *name);
int security_secid_to_secctx(u32 secid, struct lsm_context *cp);
-int security_lsmprop_to_secctx(struct lsm_prop *prop, struct lsm_context *cp);
+int security_lsmprop_to_secctx(struct lsm_prop *prop, struct lsm_context *cp,
+ int lsmid);
int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid);
void security_release_secctx(struct lsm_context *cp);
void security_inode_invalidate_secctx(struct inode *inode);
@@ -1215,7 +1216,7 @@ static inline int security_file_truncate(struct file *file)
}
static inline int security_task_alloc(struct task_struct *task,
- unsigned long clone_flags)
+ u64 clone_flags)
{
return 0;
}
@@ -1551,7 +1552,8 @@ static inline int security_secid_to_secctx(u32 secid, struct lsm_context *cp)
}
static inline int security_lsmprop_to_secctx(struct lsm_prop *prop,
- struct lsm_context *cp)
+ struct lsm_context *cp,
+ int lsmid)
{
return -EOPNOTSUPP;
}
diff --git a/include/linux/sem.h b/include/linux/sem.h
index c4deefe42aeb..275269ce2ec8 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -9,12 +9,12 @@ struct task_struct;
#ifdef CONFIG_SYSVIPC
-extern int copy_semundo(unsigned long clone_flags, struct task_struct *tsk);
+extern int copy_semundo(u64 clone_flags, struct task_struct *tsk);
extern void exit_sem(struct task_struct *tsk);
#else
-static inline int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
+static inline int copy_semundo(u64 clone_flags, struct task_struct *tsk)
{
return 0;
}
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 14b923ddb6df..fa633657e4c0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4172,6 +4172,8 @@ int skb_copy_and_crc32c_datagram_iter(const struct sk_buff *skb, int offset,
struct iov_iter *to, int len, u32 *crcp);
int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
struct iov_iter *from, int len);
+int skb_copy_datagram_from_iter_full(struct sk_buff *skb, int offset,
+ struct iov_iter *from, int len);
int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm);
void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags);
diff --git a/include/linux/stddef.h b/include/linux/stddef.h
index dab49e2ec8c0..80b6bfb944f0 100644
--- a/include/linux/stddef.h
+++ b/include/linux/stddef.h
@@ -94,7 +94,8 @@ enum {
__DECLARE_FLEX_ARRAY(TYPE, NAME)
/**
- * TRAILING_OVERLAP() - Overlap a flexible-array member with trailing members.
+ * __TRAILING_OVERLAP() - Overlap a flexible-array member with trailing
+ * members.
*
* Creates a union between a flexible-array member (FAM) in a struct and a set
* of additional members that would otherwise follow it.
@@ -102,15 +103,30 @@ enum {
* @TYPE: Flexible structure type name, including "struct" keyword.
* @NAME: Name for a variable to define.
* @FAM: The flexible-array member within @TYPE
+ * @ATTRS: Any struct attributes (usually empty)
* @MEMBERS: Trailing overlapping members.
*/
-#define TRAILING_OVERLAP(TYPE, NAME, FAM, MEMBERS) \
+#define __TRAILING_OVERLAP(TYPE, NAME, FAM, ATTRS, MEMBERS) \
union { \
TYPE NAME; \
struct { \
- unsigned char __offset_to_##FAM[offsetof(TYPE, FAM)]; \
+ unsigned char __offset_to_FAM[offsetof(TYPE, FAM)]; \
MEMBERS \
- }; \
+ } ATTRS; \
}
+/**
+ * TRAILING_OVERLAP() - Overlap a flexible-array member with trailing members.
+ *
+ * Creates a union between a flexible-array member (FAM) in a struct and a set
+ * of additional members that would otherwise follow it.
+ *
+ * @TYPE: Flexible structure type name, including "struct" keyword.
+ * @NAME: Name for a variable to define.
+ * @FAM: The flexible-array member within @TYPE
+ * @MEMBERS: Trailing overlapping members.
+ */
+#define TRAILING_OVERLAP(TYPE, NAME, FAM, MEMBERS) \
+ __TRAILING_OVERLAP(TYPE, NAME, FAM, /* no attrs */, MEMBERS)
+
#endif
diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h
index f3ba4f52ff26..6c4077be7742 100644
--- a/include/linux/string_choices.h
+++ b/include/linux/string_choices.h
@@ -17,6 +17,12 @@
#include <linux/types.h>
+static inline const char *str_assert_deassert(bool v)
+{
+ return v ? "assert" : "deassert";
+}
+#define str_deassert_assert(v) str_assert_deassert(!(v))
+
static inline const char *str_enable_disable(bool v)
{
return v ? "enable" : "disable";
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 2fe6ed2cc3fd..7012a0f758d8 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -385,6 +385,16 @@ void folio_add_lru_vma(struct folio *, struct vm_area_struct *);
void mark_page_accessed(struct page *);
void folio_mark_accessed(struct folio *);
+static inline bool folio_may_be_lru_cached(struct folio *folio)
+{
+ /*
+ * Holding PMD-sized folios in per-CPU LRU cache unbalances accounting.
+ * Holding small numbers of low-order mTHP folios in per-CPU LRU cache
+ * will be sensible, but nobody has implemented and tested that yet.
+ */
+ return !folio_test_large(folio);
+}
+
extern atomic_t lru_disable_count;
static inline bool lru_cache_disabled(void)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 77f45e5d4413..66c06fcdfe19 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1005,6 +1005,8 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on);
asmlinkage long sys_uretprobe(void);
+asmlinkage long sys_uprobe(void);
+
/* pciconfig: alpha, arm, arm64, ia64, sparc */
asmlinkage long sys_pciconfig_read(unsigned long bus, unsigned long dfn,
unsigned long off, unsigned long len,
diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index bb2c52f4fc94..c514d0e5a45c 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -33,17 +33,22 @@ struct time_namespace {
extern struct time_namespace init_time_ns;
#ifdef CONFIG_TIME_NS
+static inline struct time_namespace *to_time_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct time_namespace, ns);
+}
+void __init time_ns_init(void);
extern int vdso_join_timens(struct task_struct *task,
struct time_namespace *ns);
extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns);
static inline struct time_namespace *get_time_ns(struct time_namespace *ns)
{
- refcount_inc(&ns->ns.count);
+ ns_ref_inc(ns);
return ns;
}
-struct time_namespace *copy_time_ns(unsigned long flags,
+struct time_namespace *copy_time_ns(u64 flags,
struct user_namespace *user_ns,
struct time_namespace *old_ns);
void free_time_ns(struct time_namespace *ns);
@@ -52,7 +57,7 @@ struct page *find_timens_vvar_page(struct vm_area_struct *vma);
static inline void put_time_ns(struct time_namespace *ns)
{
- if (refcount_dec_and_test(&ns->ns.count))
+ if (ns_ref_put(ns))
free_time_ns(ns);
}
@@ -108,6 +113,10 @@ static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim)
}
#else
+static inline void __init time_ns_init(void)
+{
+}
+
static inline int vdso_join_timens(struct task_struct *task,
struct time_namespace *ns)
{
@@ -129,7 +138,7 @@ static inline void put_time_ns(struct time_namespace *ns)
}
static inline
-struct time_namespace *copy_time_ns(unsigned long flags,
+struct time_namespace *copy_time_ns(u64 flags,
struct user_namespace *user_ns,
struct time_namespace *old_ns)
{
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index c27aac67cb3f..b8ae89ea28ab 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -76,6 +76,7 @@ struct tk_read_base {
* @cs_was_changed_seq: The sequence number of clocksource change events
* @clock_valid: Indicator for valid clock
* @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
+ * @monotonic_to_aux: CLOCK_MONOTONIC to CLOCK_AUX offset
* @cycle_interval: Number of clock cycles in one NTP interval
* @xtime_interval: Number of clock shifted nano seconds in one NTP
* interval.
@@ -117,6 +118,9 @@ struct tk_read_base {
* @offs_aux is used by the auxiliary timekeepers which do not utilize any
* of the regular timekeeper offset fields.
*
+ * @monotonic_to_aux is a timespec64 representation of @offs_aux to
+ * accelerate the VDSO update for CLOCK_AUX.
+ *
* The cacheline ordering of the structure is optimized for in kernel usage of
* the ktime_get() and ktime_get_ts64() family of time accessors. Struct
* timekeeper is prepended in the core timekeeping code with a sequence count,
@@ -159,7 +163,10 @@ struct timekeeper {
u8 cs_was_changed_seq;
u8 clock_valid;
- struct timespec64 monotonic_to_boot;
+ union {
+ struct timespec64 monotonic_to_boot;
+ struct timespec64 monotonic_to_aux;
+ };
u64 cycle_interval;
u64 xtime_interval;
diff --git a/include/linux/tnum.h b/include/linux/tnum.h
index 57ed3035cc30..c52b862dad45 100644
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -51,9 +51,15 @@ struct tnum tnum_xor(struct tnum a, struct tnum b);
/* Multiply two tnums, return @a * @b */
struct tnum tnum_mul(struct tnum a, struct tnum b);
+/* Return true if the known bits of both tnums have the same value */
+bool tnum_overlap(struct tnum a, struct tnum b);
+
/* Return a tnum representing numbers satisfying both @a and @b */
struct tnum tnum_intersect(struct tnum a, struct tnum b);
+/* Returns a tnum representing numbers satisfying either @a or @b */
+struct tnum tnum_union(struct tnum t1, struct tnum t2);
+
/* Return @a with all but the lowest @size bytes cleared */
struct tnum tnum_cast(struct tnum a, u8 size);
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 33b7fda97d39..6575af39fd10 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -260,7 +260,7 @@ static inline bool topology_is_primary_thread(unsigned int cpu)
#endif
-static inline const struct cpumask *cpu_cpu_mask(int cpu)
+static inline const struct cpumask *cpu_node_mask(int cpu)
{
return cpumask_of_node(cpu_to_node(cpu));
}
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 516217c39094..ee3d36eda45d 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -17,6 +17,7 @@
#include <linux/wait.h>
#include <linux/timer.h>
#include <linux/seqlock.h>
+#include <linux/mutex.h>
struct uprobe;
struct vm_area_struct;
@@ -185,8 +186,14 @@ struct xol_area;
struct uprobes_state {
struct xol_area *xol_area;
+#ifdef CONFIG_X86_64
+ struct hlist_head head_tramps;
+#endif
};
+typedef int (*uprobe_write_verify_t)(struct page *page, unsigned long vaddr,
+ uprobe_opcode_t *insn, int nbytes, void *data);
+
extern void __init uprobes_init(void);
extern int set_swbp(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr);
extern int set_orig_insn(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr);
@@ -194,7 +201,11 @@ extern bool is_swbp_insn(uprobe_opcode_t *insn);
extern bool is_trap_insn(uprobe_opcode_t *insn);
extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs);
extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
-extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t);
+extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t,
+ bool is_register);
+extern int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long opcode_vaddr,
+ uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify, bool is_register, bool do_update_ref_ctr,
+ void *data);
extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc);
extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool);
extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc);
@@ -205,7 +216,7 @@ extern void uprobe_start_dup_mmap(void);
extern void uprobe_end_dup_mmap(void);
extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm);
extern void uprobe_free_utask(struct task_struct *t);
-extern void uprobe_copy_process(struct task_struct *t, unsigned long flags);
+extern void uprobe_copy_process(struct task_struct *t, u64 flags);
extern int uprobe_post_sstep_notifier(struct pt_regs *regs);
extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
extern void uprobe_notify_resume(struct pt_regs *regs);
@@ -224,8 +235,13 @@ extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
void *src, unsigned long len);
extern void uprobe_handle_trampoline(struct pt_regs *regs);
-extern void *arch_uprobe_trampoline(unsigned long *psize);
+extern void *arch_uretprobe_trampoline(unsigned long *psize);
extern unsigned long uprobe_get_trampoline_vaddr(void);
+extern void uprobe_copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len);
+extern void arch_uprobe_clear_state(struct mm_struct *mm);
+extern void arch_uprobe_init_state(struct mm_struct *mm);
+extern void handle_syscall_uprobe(struct pt_regs *regs, unsigned long bp_vaddr);
+extern void arch_uprobe_optimize(struct arch_uprobe *auprobe, unsigned long vaddr);
#else /* !CONFIG_UPROBES */
struct uprobes_state {
};
@@ -281,7 +297,7 @@ static inline bool uprobe_deny_signal(void)
static inline void uprobe_free_utask(struct task_struct *t)
{
}
-static inline void uprobe_copy_process(struct task_struct *t, unsigned long flags)
+static inline void uprobe_copy_process(struct task_struct *t, u64 flags)
{
}
static inline void uprobe_clear_state(struct mm_struct *mm)
diff --git a/include/linux/user_events.h b/include/linux/user_events.h
index 8afa8c3a0973..57d1ff006090 100644
--- a/include/linux/user_events.h
+++ b/include/linux/user_events.h
@@ -33,7 +33,7 @@ extern void user_event_mm_dup(struct task_struct *t,
extern void user_event_mm_remove(struct task_struct *t);
static inline void user_events_fork(struct task_struct *t,
- unsigned long clone_flags)
+ u64 clone_flags)
{
struct user_event_mm *old_mm;
@@ -68,7 +68,7 @@ static inline void user_events_exit(struct task_struct *t)
}
#else
static inline void user_events_fork(struct task_struct *t,
- unsigned long clone_flags)
+ u64 clone_flags)
{
}
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index a0bb6d012137..9a9aebbf96b9 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -168,10 +168,15 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns,
#ifdef CONFIG_USER_NS
+static inline struct user_namespace *to_user_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct user_namespace, ns);
+}
+
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
{
if (ns)
- refcount_inc(&ns->ns.count);
+ ns_ref_inc(ns);
return ns;
}
@@ -181,7 +186,7 @@ extern void __put_user_ns(struct user_namespace *ns);
static inline void put_user_ns(struct user_namespace *ns)
{
- if (ns && refcount_dec_and_test(&ns->ns.count))
+ if (ns && ns_ref_put(ns))
__put_user_ns(ns);
}
diff --git a/include/linux/uts_namespace.h b/include/linux/uts_namespace.h
new file mode 100644
index 000000000000..60f37fec0f4b
--- /dev/null
+++ b/include/linux/uts_namespace.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_UTS_NAMESPACE_H
+#define _LINUX_UTS_NAMESPACE_H
+
+#include <linux/ns_common.h>
+#include <uapi/linux/utsname.h>
+
+struct user_namespace;
+extern struct user_namespace init_user_ns;
+
+struct uts_namespace {
+ struct new_utsname name;
+ struct user_namespace *user_ns;
+ struct ucounts *ucounts;
+ struct ns_common ns;
+} __randomize_layout;
+
+extern struct uts_namespace init_uts_ns;
+
+#ifdef CONFIG_UTS_NS
+static inline struct uts_namespace *to_uts_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct uts_namespace, ns);
+}
+
+static inline void get_uts_ns(struct uts_namespace *ns)
+{
+ ns_ref_inc(ns);
+}
+
+extern struct uts_namespace *copy_utsname(u64 flags,
+ struct user_namespace *user_ns, struct uts_namespace *old_ns);
+extern void free_uts_ns(struct uts_namespace *ns);
+
+static inline void put_uts_ns(struct uts_namespace *ns)
+{
+ if (ns_ref_put(ns))
+ free_uts_ns(ns);
+}
+
+void uts_ns_init(void);
+#else
+static inline void get_uts_ns(struct uts_namespace *ns)
+{
+}
+
+static inline void put_uts_ns(struct uts_namespace *ns)
+{
+}
+
+static inline struct uts_namespace *copy_utsname(u64 flags,
+ struct user_namespace *user_ns, struct uts_namespace *old_ns)
+{
+ if (flags & CLONE_NEWUTS)
+ return ERR_PTR(-EINVAL);
+
+ return old_ns;
+}
+
+static inline void uts_ns_init(void)
+{
+}
+#endif
+
+#endif /* _LINUX_UTS_NAMESPACE_H */
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index bf7613ba412b..547bd4439706 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -7,7 +7,7 @@
#include <linux/nsproxy.h>
#include <linux/ns_common.h>
#include <linux/err.h>
-#include <uapi/linux/utsname.h>
+#include <linux/uts_namespace.h>
enum uts_proc {
UTS_PROC_ARCH,
@@ -18,57 +18,6 @@ enum uts_proc {
UTS_PROC_DOMAINNAME,
};
-struct user_namespace;
-extern struct user_namespace init_user_ns;
-
-struct uts_namespace {
- struct new_utsname name;
- struct user_namespace *user_ns;
- struct ucounts *ucounts;
- struct ns_common ns;
-} __randomize_layout;
-extern struct uts_namespace init_uts_ns;
-
-#ifdef CONFIG_UTS_NS
-static inline void get_uts_ns(struct uts_namespace *ns)
-{
- refcount_inc(&ns->ns.count);
-}
-
-extern struct uts_namespace *copy_utsname(unsigned long flags,
- struct user_namespace *user_ns, struct uts_namespace *old_ns);
-extern void free_uts_ns(struct uts_namespace *ns);
-
-static inline void put_uts_ns(struct uts_namespace *ns)
-{
- if (refcount_dec_and_test(&ns->ns.count))
- free_uts_ns(ns);
-}
-
-void uts_ns_init(void);
-#else
-static inline void get_uts_ns(struct uts_namespace *ns)
-{
-}
-
-static inline void put_uts_ns(struct uts_namespace *ns)
-{
-}
-
-static inline struct uts_namespace *copy_utsname(unsigned long flags,
- struct user_namespace *user_ns, struct uts_namespace *old_ns)
-{
- if (flags & CLONE_NEWUTS)
- return ERR_PTR(-EINVAL);
-
- return old_ns;
-}
-
-static inline void uts_ns_init(void)
-{
-}
-#endif
-
#ifdef CONFIG_PROC_SYSCTL
extern void uts_proc_notify(enum uts_proc proc);
#else
diff --git a/include/linux/verification.h b/include/linux/verification.h
index 4f3022d081c3..dec7f2beabfd 100644
--- a/include/linux/verification.h
+++ b/include/linux/verification.h
@@ -36,6 +36,7 @@ enum key_being_used_for {
VERIFYING_KEY_SIGNATURE,
VERIFYING_KEY_SELF_SIGNATURE,
VERIFYING_UNSPECIFIED_SIGNATURE,
+ VERIFYING_BPF_SIGNATURE,
NR__KEY_BEING_USED_FOR
};
#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 918cf25cd3c6..7427b79d6f3d 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -193,14 +193,15 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
}
static inline void virtio_get_features(struct virtio_device *vdev,
- u64 *features)
+ u64 *features_out)
{
if (vdev->config->get_extended_features) {
- vdev->config->get_extended_features(vdev, features);
+ vdev->config->get_extended_features(vdev, features_out);
return;
}
- virtio_features_from_u64(features, vdev->config->get_features(vdev));
+ virtio_features_from_u64(features_out,
+ vdev->config->get_features(vdev));
}
/**
@@ -326,13 +327,11 @@ int virtqueue_set_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask)
static inline
bool virtio_get_shm_region(struct virtio_device *vdev,
- struct virtio_shm_region *region, u8 id)
+ struct virtio_shm_region *region_out, u8 id)
{
- if (!region->len)
- return false;
if (!vdev->config->get_shm_region)
return false;
- return vdev->config->get_shm_region(vdev, region, id);
+ return vdev->config->get_shm_region(vdev, region_out, id);
}
static inline bool virtio_is_little_endian(struct virtio_device *vdev)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index fdc9aeb74a44..2759dac6be44 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -220,22 +220,6 @@ int vmap_pages_range(unsigned long addr, unsigned long end, pgprot_t prot,
struct page **pages, unsigned int page_shift);
/*
- * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
- * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
- * needs to be called.
- */
-#ifndef ARCH_PAGE_TABLE_SYNC_MASK
-#define ARCH_PAGE_TABLE_SYNC_MASK 0
-#endif
-
-/*
- * There is no default implementation for arch_sync_kernel_mappings(). It is
- * relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK
- * is 0.
- */
-void arch_sync_kernel_mappings(unsigned long start, unsigned long end);
-
-/*
* Lowlevel-APIs (not for driver use!)
*/
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 45d5dd470ff6..dabc351cc127 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -410,7 +410,7 @@ enum wq_flags {
__WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */
/* BH wq only allows the following flags */
- __WQ_BH_ALLOWS = WQ_BH | WQ_HIGHPRI,
+ __WQ_BH_ALLOWS = WQ_BH | WQ_HIGHPRI | WQ_PERCPU,
};
enum wq_consts {
@@ -434,10 +434,10 @@ enum wq_consts {
* short queue flush time. Don't queue works which can run for too
* long.
*
- * system_highpri_wq is similar to system_wq but for work items which
+ * system_highpri_wq is similar to system_percpu_wq but for work items which
* require WQ_HIGHPRI.
*
- * system_long_wq is similar to system_wq but may host long running
+ * system_long_wq is similar to system_percpu_wq but may host long running
* works. Queue flushing might take relatively long.
*
* system_dfl_wq is unbound workqueue. Workers are not bound to
@@ -445,13 +445,13 @@ enum wq_consts {
* executed immediately as long as max_active limit is not reached and
* resources are available.
*
- * system_freezable_wq is equivalent to system_wq except that it's
+ * system_freezable_wq is equivalent to system_percpu_wq except that it's
* freezable.
*
* *_power_efficient_wq are inclined towards saving power and converted
* into WQ_UNBOUND variants if 'wq_power_efficient' is enabled; otherwise,
* they are same as their non-power-efficient counterparts - e.g.
- * system_power_efficient_wq is identical to system_wq if
+ * system_power_efficient_wq is identical to system_percpu_wq if
* 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info.
*
* system_bh[_highpri]_wq are convenience interface to softirq. BH work items
@@ -502,7 +502,7 @@ void workqueue_softirq_dead(unsigned int cpu);
* min_active which is set to min(@max_active, %WQ_DFL_MIN_ACTIVE). This means
* that the sum of per-node max_active's may be larger than @max_active.
*
- * For detailed information on %WQ_* flags, please refer to
+ * For detailed information on %WQ_\* flags, please refer to
* Documentation/core-api/workqueue.rst.
*
* RETURNS:
@@ -570,7 +570,7 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active,
alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
#define create_workqueue(name) \
- alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
+ alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM | WQ_PERCPU, 1, (name))
#define create_freezable_workqueue(name) \
alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND | \
WQ_MEM_RECLAIM, 1, (name))
@@ -708,7 +708,7 @@ static inline bool mod_delayed_work(struct workqueue_struct *wq,
*/
static inline bool schedule_work_on(int cpu, struct work_struct *work)
{
- return queue_work_on(cpu, system_wq, work);
+ return queue_work_on(cpu, system_percpu_wq, work);
}
/**
@@ -727,7 +727,7 @@ static inline bool schedule_work_on(int cpu, struct work_struct *work)
*/
static inline bool schedule_work(struct work_struct *work)
{
- return queue_work(system_wq, work);
+ return queue_work(system_percpu_wq, work);
}
/**
@@ -770,21 +770,21 @@ extern void __warn_flushing_systemwide_wq(void)
#define flush_scheduled_work() \
({ \
__warn_flushing_systemwide_wq(); \
- __flush_workqueue(system_wq); \
+ __flush_workqueue(system_percpu_wq); \
})
#define flush_workqueue(wq) \
({ \
struct workqueue_struct *_wq = (wq); \
\
- if ((__builtin_constant_p(_wq == system_wq) && \
- _wq == system_wq) || \
+ if ((__builtin_constant_p(_wq == system_percpu_wq) && \
+ _wq == system_percpu_wq) || \
(__builtin_constant_p(_wq == system_highpri_wq) && \
_wq == system_highpri_wq) || \
(__builtin_constant_p(_wq == system_long_wq) && \
_wq == system_long_wq) || \
- (__builtin_constant_p(_wq == system_unbound_wq) && \
- _wq == system_unbound_wq) || \
+ (__builtin_constant_p(_wq == system_dfl_wq) && \
+ _wq == system_dfl_wq) || \
(__builtin_constant_p(_wq == system_freezable_wq) && \
_wq == system_freezable_wq) || \
(__builtin_constant_p(_wq == system_power_efficient_wq) && \
@@ -807,7 +807,7 @@ extern void __warn_flushing_systemwide_wq(void)
static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
unsigned long delay)
{
- return queue_delayed_work_on(cpu, system_wq, dwork, delay);
+ return queue_delayed_work_on(cpu, system_percpu_wq, dwork, delay);
}
/**
@@ -821,7 +821,7 @@ static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
static inline bool schedule_delayed_work(struct delayed_work *dwork,
unsigned long delay)
{
- return queue_delayed_work(system_wq, dwork, delay);
+ return queue_delayed_work(system_percpu_wq, dwork, delay);
}
#ifndef CONFIG_SMP
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index a2848d731a46..15a4bc4ab819 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -265,6 +265,8 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css);
}
+void inode_switch_wbs_work_fn(struct work_struct *work);
+
#else /* CONFIG_CGROUP_WRITEBACK */
static inline void inode_attach_wb(struct inode *inode, struct folio *folio)