From 22945688acd4d0ec2620b0670a53110401ed9c59 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 25 Nov 2019 08:36:30 +0530 Subject: KVM: PPC: Book3S HV: Support reset of secure guest Add support for reset of secure guest via a new ioctl KVM_PPC_SVM_OFF. This ioctl will be issued by QEMU during reset and includes the the following steps: - Release all device pages of the secure guest. - Ask UV to terminate the guest via UV_SVM_TERMINATE ucall - Unpin the VPA pages so that they can be migrated back to secure side when guest becomes secure again. This is required because pinned pages can't be migrated. - Reinit the partition scoped page tables After these steps, guest is ready to issue UV_ESM call once again to switch to secure mode. Signed-off-by: Bharata B Rao Signed-off-by: Sukadev Bhattiprolu [Implementation of uv_svm_terminate() and its call from guest shutdown path] Signed-off-by: Ram Pai [Unpinning of VPA pages] Signed-off-by: Paul Mackerras --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e6f17c8e2dba..f0a16b4adbbd 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1348,6 +1348,7 @@ struct kvm_s390_ucas_mapping { #define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char) /* Available with KVM_CAP_PMU_EVENT_FILTER */ #define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter) +#define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) -- cgit v1.2.3 From da8c96906990f1108cb626ee7865e69267a3263b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 2 Dec 2019 18:51:26 -0700 Subject: io_uring: mark us with IORING_FEAT_SUBMIT_STABLE If this flag is set, applications can be certain that any data for async offload has been consumed when the kernel has consumed the SQE. Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 ++- include/uapi/linux/io_uring.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/fs/io_uring.c b/fs/io_uring.c index 5fcd89c507ec..c47a08afcee5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5077,7 +5077,8 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p) if (ret < 0) goto err; - p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP; + p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | + IORING_FEAT_SUBMIT_STABLE; trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); return ret; err: diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 4637ed1d9949..eabccb46edd1 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -157,6 +157,7 @@ struct io_uring_params { */ #define IORING_FEAT_SINGLE_MMAP (1U << 0) #define IORING_FEAT_NODROP (1U << 1) +#define IORING_FEAT_SUBMIT_STABLE (1U << 2) /* * io_uring_register(2) opcodes and arguments -- cgit v1.2.3 From 25b2f1b77a92b4d850d40eca50d446dd25c09934 Mon Sep 17 00:00:00 2001 From: Mathew King Date: Wed, 4 Dec 2019 17:27:47 -0800 Subject: Input: add privacy screen toggle keycode Add keycode for toggling electronic privacy screen to the keycodes definition. Some new laptops have a privacy screen which can be toggled with a key on the keyboard. Signed-off-by: Mathew King Link: https://lore.kernel.org/r/20191017163208.235518-1-mathewk@chromium.org Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 85387c76c24f..05d8b4f4f82f 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -649,6 +649,8 @@ */ #define KEY_DATA 0x277 #define KEY_ONSCREEN_KEYBOARD 0x278 +/* Electronic privacy screen control */ +#define KEY_PRIVACY_SCREEN_TOGGLE 0x279 #define BTN_TRIGGER_HAPPY 0x2c0 #define BTN_TRIGGER_HAPPY1 0x2c0 -- cgit v1.2.3 From 1a18374fc370da5a335b061fe94ebf677b07bd1d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 4 Dec 2019 16:50:23 -0800 Subject: linux/scc.h: make uapi linux/scc.h self-contained Userspace cannot compile CC usr/include/linux/scc.h.s In file included from :32:0: usr/include/linux/scc.h:20:20: error: `SIOCDEVPRIVATE' undeclared here (not in a function) SIOCSCCRESERVED = SIOCDEVPRIVATE, ^~~~~~~~~~~~~~ Include to make it self-contained, and add it to the compile-test coverage. Link: http://lkml.kernel.org/r/20191108055809.26969-1-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/scc.h | 1 + usr/include/Makefile | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/scc.h b/include/uapi/linux/scc.h index c5bc7f747755..947edb17ce9d 100644 --- a/include/uapi/linux/scc.h +++ b/include/uapi/linux/scc.h @@ -4,6 +4,7 @@ #ifndef _UAPI_SCC_H #define _UAPI_SCC_H +#include /* selection of hardware types */ diff --git a/usr/include/Makefile b/usr/include/Makefile index 24543a30b9f0..5acd9bb6d714 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -40,7 +40,6 @@ header-test- += linux/omapfb.h header-test- += linux/patchkey.h header-test- += linux/phonet.h header-test- += linux/reiserfs_xattr.h -header-test- += linux/scc.h header-test- += linux/sctp.h header-test- += linux/signal.h header-test- += linux/sysctl.h -- cgit v1.2.3 From eec028c9386ed1a692aa01a85b55952202b41619 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 4 Dec 2019 16:52:43 -0800 Subject: kcov: remote coverage support Patch series " kcov: collect coverage from usb and vhost", v3. This patchset extends kcov to allow collecting coverage from backgound kernel threads. This extension requires custom annotations for each of the places where coverage collection is desired. This patchset implements this for hub events in the USB subsystem and for vhost workers. See the first patch description for details about the kcov extension. The other two patches apply this kcov extension to USB and vhost. Examples of other subsystems that might potentially benefit from this when custom annotations are added (the list is based on process_one_work() callers for bugs recently reported by syzbot): 1. fs: writeback wb_workfn() worker, 2. net: addrconf_dad_work()/addrconf_verify_work() workers, 3. net: neigh_periodic_work() worker, 4. net/p9: p9_write_work()/p9_read_work() workers, 5. block: blk_mq_run_work_fn() worker. These patches have been used to enable coverage-guided USB fuzzing with syzkaller for the last few years, see the details here: https://github.com/google/syzkaller/blob/master/docs/linux/external_fuzzing_usb.md This patchset has been pushed to the public Linux kernel Gerrit instance: https://linux-review.googlesource.com/c/linux/kernel/git/torvalds/linux/+/1524 This patch (of 3): Add background thread coverage collection ability to kcov. With KCOV_ENABLE coverage is collected only for syscalls that are issued from the current process. With KCOV_REMOTE_ENABLE it's possible to collect coverage for arbitrary parts of the kernel code, provided that those parts are annotated with kcov_remote_start()/kcov_remote_stop(). This allows to collect coverage from two types of kernel background threads: the global ones, that are spawned during kernel boot in a limited number of instances (e.g. one USB hub_event() worker thread is spawned per USB HCD); and the local ones, that are spawned when a user interacts with some kernel interface (e.g. vhost workers). To enable collecting coverage from a global background thread, a unique global handle must be assigned and passed to the corresponding kcov_remote_start() call. Then a userspace process can pass a list of such handles to the KCOV_REMOTE_ENABLE ioctl in the handles array field of the kcov_remote_arg struct. This will attach the used kcov device to the code sections, that are referenced by those handles. Since there might be many local background threads spawned from different userspace processes, we can't use a single global handle per annotation. Instead, the userspace process passes a non-zero handle through the common_handle field of the kcov_remote_arg struct. This common handle gets saved to the kcov_handle field in the current task_struct and needs to be passed to the newly spawned threads via custom annotations. Those threads should in turn be annotated with kcov_remote_start()/kcov_remote_stop(). Internally kcov stores handles as u64 integers. The top byte of a handle is used to denote the id of a subsystem that this handle belongs to, and the lower 4 bytes are used to denote the id of a thread instance within that subsystem. A reserved value 0 is used as a subsystem id for common handles as they don't belong to a particular subsystem. The bytes 4-7 are currently reserved and must be zero. In the future the number of bytes used for the subsystem or handle ids might be increased. When a particular userspace process collects coverage by via a common handle, kcov will collect coverage for each code section that is annotated to use the common handle obtained as kcov_handle from the current task_struct. However non common handles allow to collect coverage selectively from different subsystems. Link: http://lkml.kernel.org/r/e90e315426a384207edbec1d6aa89e43008e4caf.1572366574.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Dmitry Vyukov Cc: Greg Kroah-Hartman Cc: Alan Stern Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Arnd Bergmann Cc: Steven Rostedt Cc: David Windsor Cc: Elena Reshetova Cc: Anders Roxell Cc: Alexander Potapenko Cc: Marco Elver Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/dev-tools/kcov.rst | 129 +++++++++ include/linux/kcov.h | 23 ++ include/linux/sched.h | 8 + include/uapi/linux/kcov.h | 28 ++ kernel/kcov.c | 547 ++++++++++++++++++++++++++++++++++++--- 5 files changed, 700 insertions(+), 35 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst index 42b612677799..36890b026e77 100644 --- a/Documentation/dev-tools/kcov.rst +++ b/Documentation/dev-tools/kcov.rst @@ -34,6 +34,7 @@ Profiling data will only become accessible once debugfs has been mounted:: Coverage collection ------------------- + The following program demonstrates coverage collection from within a test program using kcov: @@ -128,6 +129,7 @@ only need to enable coverage (disable happens automatically on thread end). Comparison operands collection ------------------------------ + Comparison operands collection is similar to coverage collection: .. code-block:: c @@ -202,3 +204,130 @@ Comparison operands collection is similar to coverage collection: Note that the kcov modes (coverage collection or comparison operands) are mutually exclusive. + +Remote coverage collection +-------------------------- + +With KCOV_ENABLE coverage is collected only for syscalls that are issued +from the current process. With KCOV_REMOTE_ENABLE it's possible to collect +coverage for arbitrary parts of the kernel code, provided that those parts +are annotated with kcov_remote_start()/kcov_remote_stop(). + +This allows to collect coverage from two types of kernel background +threads: the global ones, that are spawned during kernel boot in a limited +number of instances (e.g. one USB hub_event() worker thread is spawned per +USB HCD); and the local ones, that are spawned when a user interacts with +some kernel interface (e.g. vhost workers). + +To enable collecting coverage from a global background thread, a unique +global handle must be assigned and passed to the corresponding +kcov_remote_start() call. Then a userspace process can pass a list of such +handles to the KCOV_REMOTE_ENABLE ioctl in the handles array field of the +kcov_remote_arg struct. This will attach the used kcov device to the code +sections, that are referenced by those handles. + +Since there might be many local background threads spawned from different +userspace processes, we can't use a single global handle per annotation. +Instead, the userspace process passes a non-zero handle through the +common_handle field of the kcov_remote_arg struct. This common handle gets +saved to the kcov_handle field in the current task_struct and needs to be +passed to the newly spawned threads via custom annotations. Those threads +should in turn be annotated with kcov_remote_start()/kcov_remote_stop(). + +Internally kcov stores handles as u64 integers. The top byte of a handle +is used to denote the id of a subsystem that this handle belongs to, and +the lower 4 bytes are used to denote the id of a thread instance within +that subsystem. A reserved value 0 is used as a subsystem id for common +handles as they don't belong to a particular subsystem. The bytes 4-7 are +currently reserved and must be zero. In the future the number of bytes +used for the subsystem or handle ids might be increased. + +When a particular userspace proccess collects coverage by via a common +handle, kcov will collect coverage for each code section that is annotated +to use the common handle obtained as kcov_handle from the current +task_struct. However non common handles allow to collect coverage +selectively from different subsystems. + +.. code-block:: c + + struct kcov_remote_arg { + unsigned trace_mode; + unsigned area_size; + unsigned num_handles; + uint64_t common_handle; + uint64_t handles[0]; + }; + + #define KCOV_INIT_TRACE _IOR('c', 1, unsigned long) + #define KCOV_DISABLE _IO('c', 101) + #define KCOV_REMOTE_ENABLE _IOW('c', 102, struct kcov_remote_arg) + + #define COVER_SIZE (64 << 10) + + #define KCOV_TRACE_PC 0 + + #define KCOV_SUBSYSTEM_COMMON (0x00ull << 56) + #define KCOV_SUBSYSTEM_USB (0x01ull << 56) + + #define KCOV_SUBSYSTEM_MASK (0xffull << 56) + #define KCOV_INSTANCE_MASK (0xffffffffull) + + static inline __u64 kcov_remote_handle(__u64 subsys, __u64 inst) + { + if (subsys & ~KCOV_SUBSYSTEM_MASK || inst & ~KCOV_INSTANCE_MASK) + return 0; + return subsys | inst; + } + + #define KCOV_COMMON_ID 0x42 + #define KCOV_USB_BUS_NUM 1 + + int main(int argc, char **argv) + { + int fd; + unsigned long *cover, n, i; + struct kcov_remote_arg *arg; + + fd = open("/sys/kernel/debug/kcov", O_RDWR); + if (fd == -1) + perror("open"), exit(1); + if (ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE)) + perror("ioctl"), exit(1); + cover = (unsigned long*)mmap(NULL, COVER_SIZE * sizeof(unsigned long), + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if ((void*)cover == MAP_FAILED) + perror("mmap"), exit(1); + + /* Enable coverage collection via common handle and from USB bus #1. */ + arg = calloc(1, sizeof(*arg) + sizeof(uint64_t)); + if (!arg) + perror("calloc"), exit(1); + arg->trace_mode = KCOV_TRACE_PC; + arg->area_size = COVER_SIZE; + arg->num_handles = 1; + arg->common_handle = kcov_remote_handle(KCOV_SUBSYSTEM_COMMON, + KCOV_COMMON_ID); + arg->handles[0] = kcov_remote_handle(KCOV_SUBSYSTEM_USB, + KCOV_USB_BUS_NUM); + if (ioctl(fd, KCOV_REMOTE_ENABLE, arg)) + perror("ioctl"), free(arg), exit(1); + free(arg); + + /* + * Here the user needs to trigger execution of a kernel code section + * that is either annotated with the common handle, or to trigger some + * activity on USB bus #1. + */ + sleep(2); + + n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED); + for (i = 0; i < n; i++) + printf("0x%lx\n", cover[i + 1]); + if (ioctl(fd, KCOV_DISABLE, 0)) + perror("ioctl"), exit(1); + if (munmap(cover, COVER_SIZE * sizeof(unsigned long))) + perror("munmap"), exit(1); + if (close(fd)) + perror("close"), exit(1); + return 0; + } diff --git a/include/linux/kcov.h b/include/linux/kcov.h index b76a1807028d..a10e84707d82 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -37,12 +37,35 @@ do { \ (t)->kcov_mode &= ~KCOV_IN_CTXSW; \ } while (0) +/* See Documentation/dev-tools/kcov.rst for usage details. */ +void kcov_remote_start(u64 handle); +void kcov_remote_stop(void); +u64 kcov_common_handle(void); + +static inline void kcov_remote_start_common(u64 id) +{ + kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_COMMON, id)); +} + +static inline void kcov_remote_start_usb(u64 id) +{ + kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_USB, id)); +} + #else static inline void kcov_task_init(struct task_struct *t) {} static inline void kcov_task_exit(struct task_struct *t) {} static inline void kcov_prepare_switch(struct task_struct *t) {} static inline void kcov_finish_switch(struct task_struct *t) {} +static inline void kcov_remote_start(u64 handle) {} +static inline void kcov_remote_stop(void) {} +static inline u64 kcov_common_handle(void) +{ + return 0; +} +static inline void kcov_remote_start_common(u64 id) {} +static inline void kcov_remote_start_usb(u64 id) {} #endif /* CONFIG_KCOV */ #endif /* _LINUX_KCOV_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 0cd97d9dd021..467d26046416 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1210,6 +1210,8 @@ struct task_struct { #endif /* CONFIG_TRACING */ #ifdef CONFIG_KCOV + /* See kernel/kcov.c for more details. */ + /* Coverage collection mode enabled for this task (0 if disabled): */ unsigned int kcov_mode; @@ -1221,6 +1223,12 @@ struct task_struct { /* KCOV descriptor wired with this task or NULL: */ struct kcov *kcov; + + /* KCOV common handle for remote coverage collection: */ + u64 kcov_handle; + + /* KCOV sequence number: */ + int kcov_sequence; #endif #ifdef CONFIG_MEMCG diff --git a/include/uapi/linux/kcov.h b/include/uapi/linux/kcov.h index 9529867717a8..409d3ad1e6e2 100644 --- a/include/uapi/linux/kcov.h +++ b/include/uapi/linux/kcov.h @@ -4,9 +4,24 @@ #include +/* + * Argument for KCOV_REMOTE_ENABLE ioctl, see Documentation/dev-tools/kcov.rst + * and the comment before kcov_remote_start() for usage details. + */ +struct kcov_remote_arg { + unsigned int trace_mode; /* KCOV_TRACE_PC or KCOV_TRACE_CMP */ + unsigned int area_size; /* Length of coverage buffer in words */ + unsigned int num_handles; /* Size of handles array */ + __u64 common_handle; + __u64 handles[0]; +}; + +#define KCOV_REMOTE_MAX_HANDLES 0x100 + #define KCOV_INIT_TRACE _IOR('c', 1, unsigned long) #define KCOV_ENABLE _IO('c', 100) #define KCOV_DISABLE _IO('c', 101) +#define KCOV_REMOTE_ENABLE _IOW('c', 102, struct kcov_remote_arg) enum { /* @@ -32,4 +47,17 @@ enum { #define KCOV_CMP_SIZE(n) ((n) << 1) #define KCOV_CMP_MASK KCOV_CMP_SIZE(3) +#define KCOV_SUBSYSTEM_COMMON (0x00ull << 56) +#define KCOV_SUBSYSTEM_USB (0x01ull << 56) + +#define KCOV_SUBSYSTEM_MASK (0xffull << 56) +#define KCOV_INSTANCE_MASK (0xffffffffull) + +static inline __u64 kcov_remote_handle(__u64 subsys, __u64 inst) +{ + if (subsys & ~KCOV_SUBSYSTEM_MASK || inst & ~KCOV_INSTANCE_MASK) + return 0; + return subsys | inst; +} + #endif /* _LINUX_KCOV_IOCTLS_H */ diff --git a/kernel/kcov.c b/kernel/kcov.c index 2ee38727844a..f50354202dbe 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -21,8 +22,11 @@ #include #include #include +#include #include +#define kcov_debug(fmt, ...) pr_debug("%s: " fmt, __func__, ##__VA_ARGS__) + /* Number of 64-bit words written per one comparison: */ #define KCOV_WORDS_PER_CMP 4 @@ -44,19 +48,100 @@ struct kcov { * Reference counter. We keep one for: * - opened file descriptor * - task with enabled coverage (we can't unwire it from another task) + * - each code section for remote coverage collection */ refcount_t refcount; /* The lock protects mode, size, area and t. */ spinlock_t lock; enum kcov_mode mode; - /* Size of arena (in long's for KCOV_MODE_TRACE). */ - unsigned size; + /* Size of arena (in long's). */ + unsigned int size; /* Coverage buffer shared with user space. */ void *area; /* Task for which we collect coverage, or NULL. */ struct task_struct *t; + /* Collecting coverage from remote (background) threads. */ + bool remote; + /* Size of remote area (in long's). */ + unsigned int remote_size; + /* + * Sequence is incremented each time kcov is reenabled, used by + * kcov_remote_stop(), see the comment there. + */ + int sequence; }; +struct kcov_remote_area { + struct list_head list; + unsigned int size; +}; + +struct kcov_remote { + u64 handle; + struct kcov *kcov; + struct hlist_node hnode; +}; + +static DEFINE_SPINLOCK(kcov_remote_lock); +static DEFINE_HASHTABLE(kcov_remote_map, 4); +static struct list_head kcov_remote_areas = LIST_HEAD_INIT(kcov_remote_areas); + +/* Must be called with kcov_remote_lock locked. */ +static struct kcov_remote *kcov_remote_find(u64 handle) +{ + struct kcov_remote *remote; + + hash_for_each_possible(kcov_remote_map, remote, hnode, handle) { + if (remote->handle == handle) + return remote; + } + return NULL; +} + +static struct kcov_remote *kcov_remote_add(struct kcov *kcov, u64 handle) +{ + struct kcov_remote *remote; + + if (kcov_remote_find(handle)) + return ERR_PTR(-EEXIST); + remote = kmalloc(sizeof(*remote), GFP_ATOMIC); + if (!remote) + return ERR_PTR(-ENOMEM); + remote->handle = handle; + remote->kcov = kcov; + hash_add(kcov_remote_map, &remote->hnode, handle); + return remote; +} + +/* Must be called with kcov_remote_lock locked. */ +static struct kcov_remote_area *kcov_remote_area_get(unsigned int size) +{ + struct kcov_remote_area *area; + struct list_head *pos; + + kcov_debug("size = %u\n", size); + list_for_each(pos, &kcov_remote_areas) { + area = list_entry(pos, struct kcov_remote_area, list); + if (area->size == size) { + list_del(&area->list); + kcov_debug("rv = %px\n", area); + return area; + } + } + kcov_debug("rv = NULL\n"); + return NULL; +} + +/* Must be called with kcov_remote_lock locked. */ +static void kcov_remote_area_put(struct kcov_remote_area *area, + unsigned int size) +{ + kcov_debug("area = %px, size = %u\n", area, size); + INIT_LIST_HEAD(&area->list); + area->size = size; + list_add(&area->list, &kcov_remote_areas); +} + static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t) { unsigned int mode; @@ -73,7 +158,7 @@ static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_stru * in_interrupt() returns false (e.g. preempt_schedule_irq()). * READ_ONCE()/barrier() effectively provides load-acquire wrt * interrupts, there are paired barrier()/WRITE_ONCE() in - * kcov_ioctl_locked(). + * kcov_start(). */ barrier(); return mode == needed_mode; @@ -227,6 +312,78 @@ void notrace __sanitizer_cov_trace_switch(u64 val, u64 *cases) EXPORT_SYMBOL(__sanitizer_cov_trace_switch); #endif /* ifdef CONFIG_KCOV_ENABLE_COMPARISONS */ +static void kcov_start(struct task_struct *t, unsigned int size, + void *area, enum kcov_mode mode, int sequence) +{ + kcov_debug("t = %px, size = %u, area = %px\n", t, size, area); + /* Cache in task struct for performance. */ + t->kcov_size = size; + t->kcov_area = area; + /* See comment in check_kcov_mode(). */ + barrier(); + WRITE_ONCE(t->kcov_mode, mode); + t->kcov_sequence = sequence; +} + +static void kcov_stop(struct task_struct *t) +{ + WRITE_ONCE(t->kcov_mode, KCOV_MODE_DISABLED); + barrier(); + t->kcov_size = 0; + t->kcov_area = NULL; +} + +static void kcov_task_reset(struct task_struct *t) +{ + kcov_stop(t); + t->kcov = NULL; + t->kcov_sequence = 0; + t->kcov_handle = 0; +} + +void kcov_task_init(struct task_struct *t) +{ + kcov_task_reset(t); + t->kcov_handle = current->kcov_handle; +} + +static void kcov_reset(struct kcov *kcov) +{ + kcov->t = NULL; + kcov->mode = KCOV_MODE_INIT; + kcov->remote = false; + kcov->remote_size = 0; + kcov->sequence++; +} + +static void kcov_remote_reset(struct kcov *kcov) +{ + int bkt; + struct kcov_remote *remote; + struct hlist_node *tmp; + + spin_lock(&kcov_remote_lock); + hash_for_each_safe(kcov_remote_map, bkt, tmp, remote, hnode) { + if (remote->kcov != kcov) + continue; + kcov_debug("removing handle %llx\n", remote->handle); + hash_del(&remote->hnode); + kfree(remote); + } + /* Do reset before unlock to prevent races with kcov_remote_start(). */ + kcov_reset(kcov); + spin_unlock(&kcov_remote_lock); +} + +static void kcov_disable(struct task_struct *t, struct kcov *kcov) +{ + kcov_task_reset(t); + if (kcov->remote) + kcov_remote_reset(kcov); + else + kcov_reset(kcov); +} + static void kcov_get(struct kcov *kcov) { refcount_inc(&kcov->refcount); @@ -235,20 +392,12 @@ static void kcov_get(struct kcov *kcov) static void kcov_put(struct kcov *kcov) { if (refcount_dec_and_test(&kcov->refcount)) { + kcov_remote_reset(kcov); vfree(kcov->area); kfree(kcov); } } -void kcov_task_init(struct task_struct *t) -{ - WRITE_ONCE(t->kcov_mode, KCOV_MODE_DISABLED); - barrier(); - t->kcov_size = 0; - t->kcov_area = NULL; - t->kcov = NULL; -} - void kcov_task_exit(struct task_struct *t) { struct kcov *kcov; @@ -256,15 +405,36 @@ void kcov_task_exit(struct task_struct *t) kcov = t->kcov; if (kcov == NULL) return; + spin_lock(&kcov->lock); + kcov_debug("t = %px, kcov->t = %px\n", t, kcov->t); + /* + * For KCOV_ENABLE devices we want to make sure that t->kcov->t == t, + * which comes down to: + * WARN_ON(!kcov->remote && kcov->t != t); + * + * For KCOV_REMOTE_ENABLE devices, the exiting task is either: + * 2. A remote task between kcov_remote_start() and kcov_remote_stop(). + * In this case we should print a warning right away, since a task + * shouldn't be exiting when it's in a kcov coverage collection + * section. Here t points to the task that is collecting remote + * coverage, and t->kcov->t points to the thread that created the + * kcov device. Which means that to detect this case we need to + * check that t != t->kcov->t, and this gives us the following: + * WARN_ON(kcov->remote && kcov->t != t); + * + * 2. The task that created kcov exiting without calling KCOV_DISABLE, + * and then again we can make sure that t->kcov->t == t: + * WARN_ON(kcov->remote && kcov->t != t); + * + * By combining all three checks into one we get: + */ if (WARN_ON(kcov->t != t)) { spin_unlock(&kcov->lock); return; } /* Just to not leave dangling references behind. */ - kcov_task_init(t); - kcov->t = NULL; - kcov->mode = KCOV_MODE_INIT; + kcov_disable(t, kcov); spin_unlock(&kcov->lock); kcov_put(kcov); } @@ -313,6 +483,7 @@ static int kcov_open(struct inode *inode, struct file *filep) if (!kcov) return -ENOMEM; kcov->mode = KCOV_MODE_DISABLED; + kcov->sequence = 1; refcount_set(&kcov->refcount, 1); spin_lock_init(&kcov->lock); filep->private_data = kcov; @@ -325,6 +496,20 @@ static int kcov_close(struct inode *inode, struct file *filep) return 0; } +static int kcov_get_mode(unsigned long arg) +{ + if (arg == KCOV_TRACE_PC) + return KCOV_MODE_TRACE_PC; + else if (arg == KCOV_TRACE_CMP) +#ifdef CONFIG_KCOV_ENABLE_COMPARISONS + return KCOV_MODE_TRACE_CMP; +#else + return -ENOTSUPP; +#endif + else + return -EINVAL; +} + /* * Fault in a lazily-faulted vmalloc area before it can be used by * __santizer_cov_trace_pc(), to avoid recursion issues if any code on the @@ -340,14 +525,35 @@ static void kcov_fault_in_area(struct kcov *kcov) READ_ONCE(area[offset]); } +static inline bool kcov_check_handle(u64 handle, bool common_valid, + bool uncommon_valid, bool zero_valid) +{ + if (handle & ~(KCOV_SUBSYSTEM_MASK | KCOV_INSTANCE_MASK)) + return false; + switch (handle & KCOV_SUBSYSTEM_MASK) { + case KCOV_SUBSYSTEM_COMMON: + return (handle & KCOV_INSTANCE_MASK) ? + common_valid : zero_valid; + case KCOV_SUBSYSTEM_USB: + return uncommon_valid; + default: + return false; + } + return false; +} + static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, unsigned long arg) { struct task_struct *t; unsigned long size, unused; + int mode, i; + struct kcov_remote_arg *remote_arg; + struct kcov_remote *remote; switch (cmd) { case KCOV_INIT_TRACE: + kcov_debug("KCOV_INIT_TRACE\n"); /* * Enable kcov in trace mode and setup buffer size. * Must happen before anything else. @@ -366,6 +572,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, kcov->mode = KCOV_MODE_INIT; return 0; case KCOV_ENABLE: + kcov_debug("KCOV_ENABLE\n"); /* * Enable coverage for the current task. * At this point user must have been enabled trace mode, @@ -378,29 +585,20 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, t = current; if (kcov->t != NULL || t->kcov != NULL) return -EBUSY; - if (arg == KCOV_TRACE_PC) - kcov->mode = KCOV_MODE_TRACE_PC; - else if (arg == KCOV_TRACE_CMP) -#ifdef CONFIG_KCOV_ENABLE_COMPARISONS - kcov->mode = KCOV_MODE_TRACE_CMP; -#else - return -ENOTSUPP; -#endif - else - return -EINVAL; + mode = kcov_get_mode(arg); + if (mode < 0) + return mode; kcov_fault_in_area(kcov); - /* Cache in task struct for performance. */ - t->kcov_size = kcov->size; - t->kcov_area = kcov->area; - /* See comment in check_kcov_mode(). */ - barrier(); - WRITE_ONCE(t->kcov_mode, kcov->mode); + kcov->mode = mode; + kcov_start(t, kcov->size, kcov->area, kcov->mode, + kcov->sequence); t->kcov = kcov; kcov->t = t; - /* This is put either in kcov_task_exit() or in KCOV_DISABLE. */ + /* Put either in kcov_task_exit() or in KCOV_DISABLE. */ kcov_get(kcov); return 0; case KCOV_DISABLE: + kcov_debug("KCOV_DISABLE\n"); /* Disable coverage for the current task. */ unused = arg; if (unused != 0 || current->kcov != kcov) @@ -408,11 +606,65 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, t = current; if (WARN_ON(kcov->t != t)) return -EINVAL; - kcov_task_init(t); - kcov->t = NULL; - kcov->mode = KCOV_MODE_INIT; + kcov_disable(t, kcov); kcov_put(kcov); return 0; + case KCOV_REMOTE_ENABLE: + kcov_debug("KCOV_REMOTE_ENABLE\n"); + if (kcov->mode != KCOV_MODE_INIT || !kcov->area) + return -EINVAL; + t = current; + if (kcov->t != NULL || t->kcov != NULL) + return -EBUSY; + remote_arg = (struct kcov_remote_arg *)arg; + mode = kcov_get_mode(remote_arg->trace_mode); + if (mode < 0) + return mode; + if (remote_arg->area_size > LONG_MAX / sizeof(unsigned long)) + return -EINVAL; + kcov->mode = mode; + t->kcov = kcov; + kcov->t = t; + kcov->remote = true; + kcov->remote_size = remote_arg->area_size; + spin_lock(&kcov_remote_lock); + for (i = 0; i < remote_arg->num_handles; i++) { + kcov_debug("handle %llx\n", remote_arg->handles[i]); + if (!kcov_check_handle(remote_arg->handles[i], + false, true, false)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return -EINVAL; + } + remote = kcov_remote_add(kcov, remote_arg->handles[i]); + if (IS_ERR(remote)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return PTR_ERR(remote); + } + } + if (remote_arg->common_handle) { + kcov_debug("common handle %llx\n", + remote_arg->common_handle); + if (!kcov_check_handle(remote_arg->common_handle, + true, false, false)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return -EINVAL; + } + remote = kcov_remote_add(kcov, + remote_arg->common_handle); + if (IS_ERR(remote)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return PTR_ERR(remote); + } + t->kcov_handle = remote_arg->common_handle; + } + spin_unlock(&kcov_remote_lock); + /* Put either in kcov_task_exit() or in KCOV_DISABLE. */ + kcov_get(kcov); + return 0; default: return -ENOTTY; } @@ -422,11 +674,35 @@ static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { struct kcov *kcov; int res; + struct kcov_remote_arg *remote_arg = NULL; + unsigned int remote_num_handles; + unsigned long remote_arg_size; + + if (cmd == KCOV_REMOTE_ENABLE) { + if (get_user(remote_num_handles, (unsigned __user *)(arg + + offsetof(struct kcov_remote_arg, num_handles)))) + return -EFAULT; + if (remote_num_handles > KCOV_REMOTE_MAX_HANDLES) + return -EINVAL; + remote_arg_size = struct_size(remote_arg, handles, + remote_num_handles); + remote_arg = memdup_user((void __user *)arg, remote_arg_size); + if (IS_ERR(remote_arg)) + return PTR_ERR(remote_arg); + if (remote_arg->num_handles != remote_num_handles) { + kfree(remote_arg); + return -EINVAL; + } + arg = (unsigned long)remote_arg; + } kcov = filep->private_data; spin_lock(&kcov->lock); res = kcov_ioctl_locked(kcov, cmd, arg); spin_unlock(&kcov->lock); + + kfree(remote_arg); + return res; } @@ -438,6 +714,207 @@ static const struct file_operations kcov_fops = { .release = kcov_close, }; +/* + * kcov_remote_start() and kcov_remote_stop() can be used to annotate a section + * of code in a kernel background thread to allow kcov to be used to collect + * coverage from that part of code. + * + * The handle argument of kcov_remote_start() identifies a code section that is + * used for coverage collection. A userspace process passes this handle to + * KCOV_REMOTE_ENABLE ioctl to make the used kcov device start collecting + * coverage for the code section identified by this handle. + * + * The usage of these annotations in the kernel code is different depending on + * the type of the kernel thread whose code is being annotated. + * + * For global kernel threads that are spawned in a limited number of instances + * (e.g. one USB hub_event() worker thread is spawned per USB HCD), each + * instance must be assigned a unique 4-byte instance id. The instance id is + * then combined with a 1-byte subsystem id to get a handle via + * kcov_remote_handle(subsystem_id, instance_id). + * + * For local kernel threads that are spawned from system calls handler when a + * user interacts with some kernel interface (e.g. vhost workers), a handle is + * passed from a userspace process as the common_handle field of the + * kcov_remote_arg struct (note, that the user must generate a handle by using + * kcov_remote_handle() with KCOV_SUBSYSTEM_COMMON as the subsystem id and an + * arbitrary 4-byte non-zero number as the instance id). This common handle + * then gets saved into the task_struct of the process that issued the + * KCOV_REMOTE_ENABLE ioctl. When this proccess issues system calls that spawn + * kernel threads, the common handle must be retrived via kcov_common_handle() + * and passed to the spawned threads via custom annotations. Those kernel + * threads must in turn be annotated with kcov_remote_start(common_handle) and + * kcov_remote_stop(). All of the threads that are spawned by the same process + * obtain the same handle, hence the name "common". + * + * See Documentation/dev-tools/kcov.rst for more details. + * + * Internally, this function looks up the kcov device associated with the + * provided handle, allocates an area for coverage collection, and saves the + * pointers to kcov and area into the current task_struct to allow coverage to + * be collected via __sanitizer_cov_trace_pc() + * In turns kcov_remote_stop() clears those pointers from task_struct to stop + * collecting coverage and copies all collected coverage into the kcov area. + */ +void kcov_remote_start(u64 handle) +{ + struct kcov_remote *remote; + void *area; + struct task_struct *t; + unsigned int size; + enum kcov_mode mode; + int sequence; + + if (WARN_ON(!kcov_check_handle(handle, true, true, true))) + return; + if (WARN_ON(!in_task())) + return; + t = current; + /* + * Check that kcov_remote_start is not called twice + * nor called by user tasks (with enabled kcov). + */ + if (WARN_ON(t->kcov)) + return; + + kcov_debug("handle = %llx\n", handle); + + spin_lock(&kcov_remote_lock); + remote = kcov_remote_find(handle); + if (!remote) { + kcov_debug("no remote found"); + spin_unlock(&kcov_remote_lock); + return; + } + /* Put in kcov_remote_stop(). */ + kcov_get(remote->kcov); + t->kcov = remote->kcov; + /* + * Read kcov fields before unlock to prevent races with + * KCOV_DISABLE / kcov_remote_reset(). + */ + size = remote->kcov->remote_size; + mode = remote->kcov->mode; + sequence = remote->kcov->sequence; + area = kcov_remote_area_get(size); + spin_unlock(&kcov_remote_lock); + + if (!area) { + area = vmalloc(size * sizeof(unsigned long)); + if (!area) { + t->kcov = NULL; + kcov_put(remote->kcov); + return; + } + } + /* Reset coverage size. */ + *(u64 *)area = 0; + + kcov_debug("area = %px, size = %u", area, size); + + kcov_start(t, size, area, mode, sequence); + +} +EXPORT_SYMBOL(kcov_remote_start); + +static void kcov_move_area(enum kcov_mode mode, void *dst_area, + unsigned int dst_area_size, void *src_area) +{ + u64 word_size = sizeof(unsigned long); + u64 count_size, entry_size_log; + u64 dst_len, src_len; + void *dst_entries, *src_entries; + u64 dst_occupied, dst_free, bytes_to_move, entries_moved; + + kcov_debug("%px %u <= %px %lu\n", + dst_area, dst_area_size, src_area, *(unsigned long *)src_area); + + switch (mode) { + case KCOV_MODE_TRACE_PC: + dst_len = READ_ONCE(*(unsigned long *)dst_area); + src_len = *(unsigned long *)src_area; + count_size = sizeof(unsigned long); + entry_size_log = __ilog2_u64(sizeof(unsigned long)); + break; + case KCOV_MODE_TRACE_CMP: + dst_len = READ_ONCE(*(u64 *)dst_area); + src_len = *(u64 *)src_area; + count_size = sizeof(u64); + BUILD_BUG_ON(!is_power_of_2(KCOV_WORDS_PER_CMP)); + entry_size_log = __ilog2_u64(sizeof(u64) * KCOV_WORDS_PER_CMP); + break; + default: + WARN_ON(1); + return; + } + + /* As arm can't divide u64 integers use log of entry size. */ + if (dst_len > ((dst_area_size * word_size - count_size) >> + entry_size_log)) + return; + dst_occupied = count_size + (dst_len << entry_size_log); + dst_free = dst_area_size * word_size - dst_occupied; + bytes_to_move = min(dst_free, src_len << entry_size_log); + dst_entries = dst_area + dst_occupied; + src_entries = src_area + count_size; + memcpy(dst_entries, src_entries, bytes_to_move); + entries_moved = bytes_to_move >> entry_size_log; + + switch (mode) { + case KCOV_MODE_TRACE_PC: + WRITE_ONCE(*(unsigned long *)dst_area, dst_len + entries_moved); + break; + case KCOV_MODE_TRACE_CMP: + WRITE_ONCE(*(u64 *)dst_area, dst_len + entries_moved); + break; + default: + break; + } +} + +/* See the comment before kcov_remote_start() for usage details. */ +void kcov_remote_stop(void) +{ + struct task_struct *t = current; + struct kcov *kcov = t->kcov; + void *area = t->kcov_area; + unsigned int size = t->kcov_size; + int sequence = t->kcov_sequence; + + if (!kcov) { + kcov_debug("no kcov found\n"); + return; + } + + kcov_stop(t); + t->kcov = NULL; + + spin_lock(&kcov->lock); + /* + * KCOV_DISABLE could have been called between kcov_remote_start() + * and kcov_remote_stop(), hence the check. + */ + kcov_debug("move if: %d == %d && %d\n", + sequence, kcov->sequence, (int)kcov->remote); + if (sequence == kcov->sequence && kcov->remote) + kcov_move_area(kcov->mode, kcov->area, kcov->size, area); + spin_unlock(&kcov->lock); + + spin_lock(&kcov_remote_lock); + kcov_remote_area_put(area, size); + spin_unlock(&kcov_remote_lock); + + kcov_put(kcov); +} +EXPORT_SYMBOL(kcov_remote_stop); + +/* See the comment before kcov_remote_start() for usage details. */ +u64 kcov_common_handle(void) +{ + return current->kcov_handle; +} +EXPORT_SYMBOL(kcov_common_handle); + static int __init kcov_init(void) { /* -- cgit v1.2.3 From 5b009673594d569674a9e0e60109f6a1723075b0 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 4 Dec 2019 16:52:57 -0800 Subject: arch: ipcbuf.h: make uapi asm/ipcbuf.h self-contained Userspace cannot compile due to some missing type definitions. For example, building it for x86 fails as follows: CC usr/include/asm/ipcbuf.h.s In file included from usr/include/asm/ipcbuf.h:1:0, from :32: usr/include/asm-generic/ipcbuf.h:21:2: error: unknown type name `__kernel_key_t' __kernel_key_t key; ^~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:22:2: error: unknown type name `__kernel_uid32_t' __kernel_uid32_t uid; ^~~~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:23:2: error: unknown type name `__kernel_gid32_t' __kernel_gid32_t gid; ^~~~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:24:2: error: unknown type name `__kernel_uid32_t' __kernel_uid32_t cuid; ^~~~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:25:2: error: unknown type name `__kernel_gid32_t' __kernel_gid32_t cgid; ^~~~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:26:2: error: unknown type name `__kernel_mode_t' __kernel_mode_t mode; ^~~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:28:35: error: `__kernel_mode_t' undeclared here (not in a function) unsigned char __pad1[4 - sizeof(__kernel_mode_t)]; ^~~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:31:2: error: unknown type name `__kernel_ulong_t' __kernel_ulong_t __unused1; ^~~~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:32:2: error: unknown type name `__kernel_ulong_t' __kernel_ulong_t __unused2; ^~~~~~~~~~~~~~~~ It is just a matter of missing include directive. Include to make it self-contained, and add it to the compile-test coverage. Link: http://lkml.kernel.org/r/20191030063855.9989-1-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/include/uapi/asm/ipcbuf.h | 2 ++ arch/sparc/include/uapi/asm/ipcbuf.h | 2 ++ arch/xtensa/include/uapi/asm/ipcbuf.h | 2 ++ include/uapi/asm-generic/ipcbuf.h | 2 ++ usr/include/Makefile | 1 - 5 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/arch/s390/include/uapi/asm/ipcbuf.h b/arch/s390/include/uapi/asm/ipcbuf.h index 5b1c4f47c656..1030cd186899 100644 --- a/arch/s390/include/uapi/asm/ipcbuf.h +++ b/arch/s390/include/uapi/asm/ipcbuf.h @@ -2,6 +2,8 @@ #ifndef __S390_IPCBUF_H__ #define __S390_IPCBUF_H__ +#include + /* * The user_ipc_perm structure for S/390 architecture. * Note extra padding because this structure is passed back and forth diff --git a/arch/sparc/include/uapi/asm/ipcbuf.h b/arch/sparc/include/uapi/asm/ipcbuf.h index 9d0d125500e2..5b933a598a33 100644 --- a/arch/sparc/include/uapi/asm/ipcbuf.h +++ b/arch/sparc/include/uapi/asm/ipcbuf.h @@ -2,6 +2,8 @@ #ifndef __SPARC_IPCBUF_H #define __SPARC_IPCBUF_H +#include + /* * The ipc64_perm structure for sparc/sparc64 architecture. * Note extra padding because this structure is passed back and forth diff --git a/arch/xtensa/include/uapi/asm/ipcbuf.h b/arch/xtensa/include/uapi/asm/ipcbuf.h index a57afa0b606f..3bd0642f6660 100644 --- a/arch/xtensa/include/uapi/asm/ipcbuf.h +++ b/arch/xtensa/include/uapi/asm/ipcbuf.h @@ -12,6 +12,8 @@ #ifndef _XTENSA_IPCBUF_H #define _XTENSA_IPCBUF_H +#include + /* * Pad space is left for: * - 32-bit mode_t and seq diff --git a/include/uapi/asm-generic/ipcbuf.h b/include/uapi/asm-generic/ipcbuf.h index 7d80dbd336fb..41a01b494fc7 100644 --- a/include/uapi/asm-generic/ipcbuf.h +++ b/include/uapi/asm-generic/ipcbuf.h @@ -2,6 +2,8 @@ #ifndef __ASM_GENERIC_IPCBUF_H #define __ASM_GENERIC_IPCBUF_H +#include + /* * The generic ipc64_perm structure: * Note extra padding because this structure is passed back and forth diff --git a/usr/include/Makefile b/usr/include/Makefile index 5acd9bb6d714..00cc763ec488 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -16,7 +16,6 @@ override c_flags = $(UAPI_CFLAGS) -Wp,-MD,$(depfile) -I$(objtree)/usr/include # Please consider to fix the header first. # # Sorted alphabetically. -header-test- += asm/ipcbuf.h header-test- += asm/msgbuf.h header-test- += asm/sembuf.h header-test- += asm/shmbuf.h -- cgit v1.2.3 From 9ef0e004181956e158fb7ceb9b43810a193f80cd Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 4 Dec 2019 16:53:00 -0800 Subject: arch: msgbuf.h: make uapi asm/msgbuf.h self-contained Userspace cannot compile due to some missing type definitions. For example, building it for x86 fails as follows: CC usr/include/asm/msgbuf.h.s In file included from usr/include/asm/msgbuf.h:6:0, from :32: usr/include/asm-generic/msgbuf.h:25:20: error: field `msg_perm' has incomplete type struct ipc64_perm msg_perm; ^~~~~~~~ usr/include/asm-generic/msgbuf.h:27:2: error: unknown type name `__kernel_time_t' __kernel_time_t msg_stime; /* last msgsnd time */ ^~~~~~~~~~~~~~~ usr/include/asm-generic/msgbuf.h:28:2: error: unknown type name `__kernel_time_t' __kernel_time_t msg_rtime; /* last msgrcv time */ ^~~~~~~~~~~~~~~ usr/include/asm-generic/msgbuf.h:29:2: error: unknown type name `__kernel_time_t' __kernel_time_t msg_ctime; /* last change time */ ^~~~~~~~~~~~~~~ usr/include/asm-generic/msgbuf.h:41:2: error: unknown type name `__kernel_pid_t' __kernel_pid_t msg_lspid; /* pid of last msgsnd */ ^~~~~~~~~~~~~~ usr/include/asm-generic/msgbuf.h:42:2: error: unknown type name `__kernel_pid_t' __kernel_pid_t msg_lrpid; /* last receive pid */ ^~~~~~~~~~~~~~ It is just a matter of missing include directive. Include to make it self-contained, and add it to the compile-test coverage. Link: http://lkml.kernel.org/r/20191030063855.9989-2-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/include/uapi/asm/msgbuf.h | 1 + arch/parisc/include/uapi/asm/msgbuf.h | 1 + arch/powerpc/include/uapi/asm/msgbuf.h | 2 ++ arch/sparc/include/uapi/asm/msgbuf.h | 2 ++ arch/x86/include/uapi/asm/msgbuf.h | 3 +++ arch/xtensa/include/uapi/asm/msgbuf.h | 2 ++ include/uapi/asm-generic/msgbuf.h | 2 ++ usr/include/Makefile | 1 - 8 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/arch/mips/include/uapi/asm/msgbuf.h b/arch/mips/include/uapi/asm/msgbuf.h index 9e0c2e230274..128af72f2dfe 100644 --- a/arch/mips/include/uapi/asm/msgbuf.h +++ b/arch/mips/include/uapi/asm/msgbuf.h @@ -2,6 +2,7 @@ #ifndef _ASM_MSGBUF_H #define _ASM_MSGBUF_H +#include /* * The msqid64_ds structure for the MIPS architecture. diff --git a/arch/parisc/include/uapi/asm/msgbuf.h b/arch/parisc/include/uapi/asm/msgbuf.h index 3b877335da38..3b4de5b668c3 100644 --- a/arch/parisc/include/uapi/asm/msgbuf.h +++ b/arch/parisc/include/uapi/asm/msgbuf.h @@ -3,6 +3,7 @@ #define _PARISC_MSGBUF_H #include +#include /* * The msqid64_ds structure for parisc architecture, copied from sparc. diff --git a/arch/powerpc/include/uapi/asm/msgbuf.h b/arch/powerpc/include/uapi/asm/msgbuf.h index 969bd83e4d3d..7919b2ba41b5 100644 --- a/arch/powerpc/include/uapi/asm/msgbuf.h +++ b/arch/powerpc/include/uapi/asm/msgbuf.h @@ -2,6 +2,8 @@ #ifndef _ASM_POWERPC_MSGBUF_H #define _ASM_POWERPC_MSGBUF_H +#include + /* * The msqid64_ds structure for the PowerPC architecture. * Note extra padding because this structure is passed back and forth diff --git a/arch/sparc/include/uapi/asm/msgbuf.h b/arch/sparc/include/uapi/asm/msgbuf.h index eeeb91933280..0954552da188 100644 --- a/arch/sparc/include/uapi/asm/msgbuf.h +++ b/arch/sparc/include/uapi/asm/msgbuf.h @@ -2,6 +2,8 @@ #ifndef _SPARC_MSGBUF_H #define _SPARC_MSGBUF_H +#include + /* * The msqid64_ds structure for sparc64 architecture. * Note extra padding because this structure is passed back and forth diff --git a/arch/x86/include/uapi/asm/msgbuf.h b/arch/x86/include/uapi/asm/msgbuf.h index 7c5bb43ed8af..b3d0664fadc9 100644 --- a/arch/x86/include/uapi/asm/msgbuf.h +++ b/arch/x86/include/uapi/asm/msgbuf.h @@ -5,6 +5,9 @@ #if !defined(__x86_64__) || !defined(__ILP32__) #include #else + +#include + /* * The msqid64_ds structure for x86 architecture with x32 ABI. * diff --git a/arch/xtensa/include/uapi/asm/msgbuf.h b/arch/xtensa/include/uapi/asm/msgbuf.h index d6915e9f071c..1ed2c85b693a 100644 --- a/arch/xtensa/include/uapi/asm/msgbuf.h +++ b/arch/xtensa/include/uapi/asm/msgbuf.h @@ -17,6 +17,8 @@ #ifndef _XTENSA_MSGBUF_H #define _XTENSA_MSGBUF_H +#include + struct msqid64_ds { struct ipc64_perm msg_perm; #ifdef __XTENSA_EB__ diff --git a/include/uapi/asm-generic/msgbuf.h b/include/uapi/asm-generic/msgbuf.h index af95aa89012e..6504d7b741ce 100644 --- a/include/uapi/asm-generic/msgbuf.h +++ b/include/uapi/asm-generic/msgbuf.h @@ -3,6 +3,8 @@ #define __ASM_GENERIC_MSGBUF_H #include +#include + /* * generic msqid64_ds structure. * diff --git a/usr/include/Makefile b/usr/include/Makefile index 00cc763ec488..6bd22f83b0df 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -16,7 +16,6 @@ override c_flags = $(UAPI_CFLAGS) -Wp,-MD,$(depfile) -I$(objtree)/usr/include # Please consider to fix the header first. # # Sorted alphabetically. -header-test- += asm/msgbuf.h header-test- += asm/sembuf.h header-test- += asm/shmbuf.h header-test- += asm/signal.h -- cgit v1.2.3 From 0fb9dc28679a627f84974165c8011e0630529ece Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 4 Dec 2019 16:53:03 -0800 Subject: arch: sembuf.h: make uapi asm/sembuf.h self-contained Userspace cannot compile due to some missing type definitions. For example, building it for x86 fails as follows: CC usr/include/asm/sembuf.h.s In file included from :32:0: usr/include/asm/sembuf.h:17:20: error: field `sem_perm' has incomplete type struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ ^~~~~~~~ usr/include/asm/sembuf.h:24:2: error: unknown type name `__kernel_time_t' __kernel_time_t sem_otime; /* last semop time */ ^~~~~~~~~~~~~~~ usr/include/asm/sembuf.h:25:2: error: unknown type name `__kernel_ulong_t' __kernel_ulong_t __unused1; ^~~~~~~~~~~~~~~~ usr/include/asm/sembuf.h:26:2: error: unknown type name `__kernel_time_t' __kernel_time_t sem_ctime; /* last change time */ ^~~~~~~~~~~~~~~ usr/include/asm/sembuf.h:27:2: error: unknown type name `__kernel_ulong_t' __kernel_ulong_t __unused2; ^~~~~~~~~~~~~~~~ usr/include/asm/sembuf.h:29:2: error: unknown type name `__kernel_ulong_t' __kernel_ulong_t sem_nsems; /* no. of semaphores in array */ ^~~~~~~~~~~~~~~~ usr/include/asm/sembuf.h:30:2: error: unknown type name `__kernel_ulong_t' __kernel_ulong_t __unused3; ^~~~~~~~~~~~~~~~ usr/include/asm/sembuf.h:31:2: error: unknown type name `__kernel_ulong_t' __kernel_ulong_t __unused4; ^~~~~~~~~~~~~~~~ It is just a matter of missing include directive. Include to make it self-contained, and add it to the compile-test coverage. Link: http://lkml.kernel.org/r/20191030063855.9989-3-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/include/uapi/asm/sembuf.h | 2 ++ arch/parisc/include/uapi/asm/sembuf.h | 1 + arch/powerpc/include/uapi/asm/sembuf.h | 2 ++ arch/sparc/include/uapi/asm/sembuf.h | 2 ++ arch/x86/include/uapi/asm/sembuf.h | 2 ++ arch/xtensa/include/uapi/asm/sembuf.h | 1 + include/uapi/asm-generic/sembuf.h | 1 + usr/include/Makefile | 1 - 8 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/arch/mips/include/uapi/asm/sembuf.h b/arch/mips/include/uapi/asm/sembuf.h index 43e1b4a2f68a..ba7fe0c89e7d 100644 --- a/arch/mips/include/uapi/asm/sembuf.h +++ b/arch/mips/include/uapi/asm/sembuf.h @@ -2,6 +2,8 @@ #ifndef _ASM_SEMBUF_H #define _ASM_SEMBUF_H +#include + /* * The semid64_ds structure for the MIPS architecture. * Note extra padding because this structure is passed back and forth diff --git a/arch/parisc/include/uapi/asm/sembuf.h b/arch/parisc/include/uapi/asm/sembuf.h index 8241cf126018..e2ca4301c7fb 100644 --- a/arch/parisc/include/uapi/asm/sembuf.h +++ b/arch/parisc/include/uapi/asm/sembuf.h @@ -3,6 +3,7 @@ #define _PARISC_SEMBUF_H #include +#include /* * The semid64_ds structure for parisc architecture. diff --git a/arch/powerpc/include/uapi/asm/sembuf.h b/arch/powerpc/include/uapi/asm/sembuf.h index 008ae77c6746..85e96ccb5f0f 100644 --- a/arch/powerpc/include/uapi/asm/sembuf.h +++ b/arch/powerpc/include/uapi/asm/sembuf.h @@ -2,6 +2,8 @@ #ifndef _ASM_POWERPC_SEMBUF_H #define _ASM_POWERPC_SEMBUF_H +#include + /* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/arch/sparc/include/uapi/asm/sembuf.h b/arch/sparc/include/uapi/asm/sembuf.h index cbcbaa4e7128..10621d066d79 100644 --- a/arch/sparc/include/uapi/asm/sembuf.h +++ b/arch/sparc/include/uapi/asm/sembuf.h @@ -2,6 +2,8 @@ #ifndef _SPARC_SEMBUF_H #define _SPARC_SEMBUF_H +#include + /* * The semid64_ds structure for sparc architecture. * Note extra padding because this structure is passed back and forth diff --git a/arch/x86/include/uapi/asm/sembuf.h b/arch/x86/include/uapi/asm/sembuf.h index 93030e97269a..71205b02a191 100644 --- a/arch/x86/include/uapi/asm/sembuf.h +++ b/arch/x86/include/uapi/asm/sembuf.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_SEMBUF_H #define _ASM_X86_SEMBUF_H +#include + /* * The semid64_ds structure for x86 architecture. * Note extra padding because this structure is passed back and forth diff --git a/arch/xtensa/include/uapi/asm/sembuf.h b/arch/xtensa/include/uapi/asm/sembuf.h index 09f348d643f1..3b9cdd406dfe 100644 --- a/arch/xtensa/include/uapi/asm/sembuf.h +++ b/arch/xtensa/include/uapi/asm/sembuf.h @@ -22,6 +22,7 @@ #define _XTENSA_SEMBUF_H #include +#include struct semid64_ds { struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ diff --git a/include/uapi/asm-generic/sembuf.h b/include/uapi/asm-generic/sembuf.h index 137606018c6a..0e709bd3d730 100644 --- a/include/uapi/asm-generic/sembuf.h +++ b/include/uapi/asm-generic/sembuf.h @@ -3,6 +3,7 @@ #define __ASM_GENERIC_SEMBUF_H #include +#include /* * The semid64_ds structure for x86 architecture. diff --git a/usr/include/Makefile b/usr/include/Makefile index 6bd22f83b0df..4a753a48767b 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -16,7 +16,6 @@ override c_flags = $(UAPI_CFLAGS) -Wp,-MD,$(depfile) -I$(objtree)/usr/include # Please consider to fix the header first. # # Sorted alphabetically. -header-test- += asm/sembuf.h header-test- += asm/shmbuf.h header-test- += asm/signal.h header-test- += asm/ucontext.h -- cgit v1.2.3 From 164166558aacea01b99c8c8ffb710d930405ba69 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 5 Dec 2019 13:35:11 +0100 Subject: netfilter: uapi: Avoid undefined left-shift in xt_sctp.h With 'bytes(__u32)' being 32, a left-shift of 31 may happen which is undefined for the signed 32-bit value 1. Avoid this by declaring 1 as unsigned. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/xt_sctp.h b/include/uapi/linux/netfilter/xt_sctp.h index 4bc6d1a08781..b4d804a9fccb 100644 --- a/include/uapi/linux/netfilter/xt_sctp.h +++ b/include/uapi/linux/netfilter/xt_sctp.h @@ -41,19 +41,19 @@ struct xt_sctp_info { #define SCTP_CHUNKMAP_SET(chunkmap, type) \ do { \ (chunkmap)[type / bytes(__u32)] |= \ - 1 << (type % bytes(__u32)); \ + 1u << (type % bytes(__u32)); \ } while (0) #define SCTP_CHUNKMAP_CLEAR(chunkmap, type) \ do { \ (chunkmap)[type / bytes(__u32)] &= \ - ~(1 << (type % bytes(__u32))); \ + ~(1u << (type % bytes(__u32))); \ } while (0) #define SCTP_CHUNKMAP_IS_SET(chunkmap, type) \ ({ \ ((chunkmap)[type / bytes (__u32)] & \ - (1 << (type % bytes (__u32)))) ? 1: 0; \ + (1u << (type % bytes (__u32)))) ? 1: 0; \ }) #define SCTP_CHUNKMAP_RESET(chunkmap) \ -- cgit v1.2.3 From 4e88d6e7793f2f445f43bd608828541d7f43b608 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 7 Dec 2019 20:59:47 -0700 Subject: io_uring: allow unbreakable links MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some commands will invariably end in a failure in the sense that the completion result will be less than zero. One such example is timeouts that don't have a completion count set, they will always complete with -ETIME unless cancelled. For linked commands, we sever links and fail the rest of the chain if the result is less than zero. Since we have commands where we know that will happen, add IOSQE_IO_HARDLINK as a stronger link that doesn't sever regardless of the completion result. Note that the link will still sever if we fail submitting the parent request, hard links are only resilient in the presence of completion results for requests that did submit correctly. Cc: stable@vger.kernel.org # v5.4 Reviewed-by: Pavel Begunkov Reported-by: 李通洲 Signed-off-by: Jens Axboe --- fs/io_uring.c | 84 +++++++++++++++++++++++-------------------- include/uapi/linux/io_uring.h | 1 + 2 files changed, 47 insertions(+), 38 deletions(-) (limited to 'include/uapi') diff --git a/fs/io_uring.c b/fs/io_uring.c index 405be10da73d..040e6c11ff37 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -377,6 +377,7 @@ struct io_kiocb { #define REQ_F_TIMEOUT_NOSEQ 8192 /* no timeout sequence */ #define REQ_F_INFLIGHT 16384 /* on inflight list */ #define REQ_F_COMP_LOCKED 32768 /* completion under lock */ +#define REQ_F_HARDLINK 65536 /* doesn't sever on completion < 0 */ u64 user_data; u32 result; u32 sequence; @@ -1292,6 +1293,12 @@ static void kiocb_end_write(struct io_kiocb *req) file_end_write(req->file); } +static inline void req_set_fail_links(struct io_kiocb *req) +{ + if ((req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) == REQ_F_LINK) + req->flags |= REQ_F_FAIL_LINK; +} + static void io_complete_rw_common(struct kiocb *kiocb, long res) { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); @@ -1299,8 +1306,8 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res) if (kiocb->ki_flags & IOCB_WRITE) kiocb_end_write(req); - if ((req->flags & REQ_F_LINK) && res != req->result) - req->flags |= REQ_F_FAIL_LINK; + if (res != req->result) + req_set_fail_links(req); io_cqring_add_event(req, res); } @@ -1330,8 +1337,8 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) if (kiocb->ki_flags & IOCB_WRITE) kiocb_end_write(req); - if ((req->flags & REQ_F_LINK) && res != req->result) - req->flags |= REQ_F_FAIL_LINK; + if (res != req->result) + req_set_fail_links(req); req->result = res; if (res != -EAGAIN) req->flags |= REQ_F_IOPOLL_COMPLETED; @@ -1956,8 +1963,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe, end > 0 ? end : LLONG_MAX, fsync_flags & IORING_FSYNC_DATASYNC); - if (ret < 0 && (req->flags & REQ_F_LINK)) - req->flags |= REQ_F_FAIL_LINK; + if (ret < 0) + req_set_fail_links(req); io_cqring_add_event(req, ret); io_put_req_find_next(req, nxt); return 0; @@ -2003,8 +2010,8 @@ static int io_sync_file_range(struct io_kiocb *req, ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags); - if (ret < 0 && (req->flags & REQ_F_LINK)) - req->flags |= REQ_F_FAIL_LINK; + if (ret < 0) + req_set_fail_links(req); io_cqring_add_event(req, ret); io_put_req_find_next(req, nxt); return 0; @@ -2079,8 +2086,8 @@ static int io_sendmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, out: io_cqring_add_event(req, ret); - if (ret < 0 && (req->flags & REQ_F_LINK)) - req->flags |= REQ_F_FAIL_LINK; + if (ret < 0) + req_set_fail_links(req); io_put_req_find_next(req, nxt); return 0; #else @@ -2161,8 +2168,8 @@ static int io_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, out: io_cqring_add_event(req, ret); - if (ret < 0 && (req->flags & REQ_F_LINK)) - req->flags |= REQ_F_FAIL_LINK; + if (ret < 0) + req_set_fail_links(req); io_put_req_find_next(req, nxt); return 0; #else @@ -2196,8 +2203,8 @@ static int io_accept(struct io_kiocb *req, const struct io_uring_sqe *sqe, } if (ret == -ERESTARTSYS) ret = -EINTR; - if (ret < 0 && (req->flags & REQ_F_LINK)) - req->flags |= REQ_F_FAIL_LINK; + if (ret < 0) + req_set_fail_links(req); io_cqring_add_event(req, ret); io_put_req_find_next(req, nxt); return 0; @@ -2263,8 +2270,8 @@ static int io_connect(struct io_kiocb *req, const struct io_uring_sqe *sqe, if (ret == -ERESTARTSYS) ret = -EINTR; out: - if (ret < 0 && (req->flags & REQ_F_LINK)) - req->flags |= REQ_F_FAIL_LINK; + if (ret < 0) + req_set_fail_links(req); io_cqring_add_event(req, ret); io_put_req_find_next(req, nxt); return 0; @@ -2340,8 +2347,8 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe) spin_unlock_irq(&ctx->completion_lock); io_cqring_add_event(req, ret); - if (ret < 0 && (req->flags & REQ_F_LINK)) - req->flags |= REQ_F_FAIL_LINK; + if (ret < 0) + req_set_fail_links(req); io_put_req(req); return 0; } @@ -2399,8 +2406,8 @@ static void io_poll_complete_work(struct io_wq_work **workptr) io_cqring_ev_posted(ctx); - if (ret < 0 && req->flags & REQ_F_LINK) - req->flags |= REQ_F_FAIL_LINK; + if (ret < 0) + req_set_fail_links(req); io_put_req_find_next(req, &nxt); if (nxt) *workptr = &nxt->work; @@ -2582,8 +2589,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) spin_unlock_irqrestore(&ctx->completion_lock, flags); io_cqring_ev_posted(ctx); - if (req->flags & REQ_F_LINK) - req->flags |= REQ_F_FAIL_LINK; + req_set_fail_links(req); io_put_req(req); return HRTIMER_NORESTART; } @@ -2608,8 +2614,7 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data) if (ret == -1) return -EALREADY; - if (req->flags & REQ_F_LINK) - req->flags |= REQ_F_FAIL_LINK; + req_set_fail_links(req); io_cqring_fill_event(req, -ECANCELED); io_put_req(req); return 0; @@ -2640,8 +2645,8 @@ static int io_timeout_remove(struct io_kiocb *req, io_commit_cqring(ctx); spin_unlock_irq(&ctx->completion_lock); io_cqring_ev_posted(ctx); - if (ret < 0 && req->flags & REQ_F_LINK) - req->flags |= REQ_F_FAIL_LINK; + if (ret < 0) + req_set_fail_links(req); io_put_req(req); return 0; } @@ -2822,8 +2827,8 @@ done: spin_unlock_irqrestore(&ctx->completion_lock, flags); io_cqring_ev_posted(ctx); - if (ret < 0 && (req->flags & REQ_F_LINK)) - req->flags |= REQ_F_FAIL_LINK; + if (ret < 0) + req_set_fail_links(req); io_put_req_find_next(req, nxt); } @@ -3044,8 +3049,7 @@ static void io_wq_submit_work(struct io_wq_work **workptr) io_put_req(req); if (ret) { - if (req->flags & REQ_F_LINK) - req->flags |= REQ_F_FAIL_LINK; + req_set_fail_links(req); io_cqring_add_event(req, ret); io_put_req(req); } @@ -3179,8 +3183,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) spin_unlock_irqrestore(&ctx->completion_lock, flags); if (prev) { - if (prev->flags & REQ_F_LINK) - prev->flags |= REQ_F_FAIL_LINK; + req_set_fail_links(prev); io_async_find_and_cancel(ctx, req, prev->user_data, NULL, -ETIME); io_put_req(prev); @@ -3273,8 +3276,7 @@ err: /* and drop final reference, if we failed */ if (ret) { io_cqring_add_event(req, ret); - if (req->flags & REQ_F_LINK) - req->flags |= REQ_F_FAIL_LINK; + req_set_fail_links(req); io_put_req(req); } } @@ -3293,8 +3295,7 @@ static void io_queue_sqe(struct io_kiocb *req) if (ret) { if (ret != -EIOCBQUEUED) { io_cqring_add_event(req, ret); - if (req->flags & REQ_F_LINK) - req->flags |= REQ_F_FAIL_LINK; + req_set_fail_links(req); io_double_put_req(req); } } else @@ -3311,7 +3312,8 @@ static inline void io_queue_link_head(struct io_kiocb *req) } -#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK) +#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \ + IOSQE_IO_HARDLINK) static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state, struct io_kiocb **link) @@ -3349,6 +3351,9 @@ err_req: if (req->sqe->flags & IOSQE_IO_DRAIN) (*link)->flags |= REQ_F_DRAIN_LINK | REQ_F_IO_DRAIN; + if (req->sqe->flags & IOSQE_IO_HARDLINK) + req->flags |= REQ_F_HARDLINK; + io = kmalloc(sizeof(*io), GFP_KERNEL); if (!io) { ret = -EAGAIN; @@ -3358,13 +3363,16 @@ err_req: ret = io_req_defer_prep(req, io); if (ret) { kfree(io); + /* fail even hard links since we don't submit */ prev->flags |= REQ_F_FAIL_LINK; goto err_req; } trace_io_uring_link(ctx, req, prev); list_add_tail(&req->link_list, &prev->link_list); - } else if (req->sqe->flags & IOSQE_IO_LINK) { + } else if (req->sqe->flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) { req->flags |= REQ_F_LINK; + if (req->sqe->flags & IOSQE_IO_HARDLINK) + req->flags |= REQ_F_HARDLINK; INIT_LIST_HEAD(&req->link_list); *link = req; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index eabccb46edd1..ea231366f5fd 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -48,6 +48,7 @@ struct io_uring_sqe { #define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */ #define IOSQE_IO_DRAIN (1U << 1) /* issue after inflight IO */ #define IOSQE_IO_LINK (1U << 2) /* links next sqe */ +#define IOSQE_IO_HARDLINK (1U << 3) /* like LINK, but stronger */ /* * io_uring_setup() flags -- cgit v1.2.3 From 9e3aa61ae3e01ce1ce6361a41ef725e1f4d1d2bf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 11 Dec 2019 15:55:43 -0700 Subject: io_uring: ensure we return -EINVAL on unknown opcode If we submit an unknown opcode and have fd == -1, io_op_needs_file() will return true as we default to needing a file. Then when we go and assign the file, we find the 'fd' invalid and return -EBADF. We really should be returning -EINVAL for that case, as we normally do for unsupported opcodes. Change io_op_needs_file() to have the following return values: 0 - does not need a file 1 - does need a file < 0 - error value and use this to pass back the right value for this invalid case. Signed-off-by: Jens Axboe --- fs/io_uring.c | 21 ++++++++++++++------- include/uapi/linux/io_uring.h | 39 ++++++++++++++++++++++----------------- 2 files changed, 36 insertions(+), 24 deletions(-) (limited to 'include/uapi') diff --git a/fs/io_uring.c b/fs/io_uring.c index 42de210be631..9b1833fedc5c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3062,7 +3062,12 @@ static void io_wq_submit_work(struct io_wq_work **workptr) } } -static bool io_op_needs_file(const struct io_uring_sqe *sqe) +static bool io_req_op_valid(int op) +{ + return op >= IORING_OP_NOP && op < IORING_OP_LAST; +} + +static int io_op_needs_file(const struct io_uring_sqe *sqe) { int op = READ_ONCE(sqe->opcode); @@ -3073,9 +3078,11 @@ static bool io_op_needs_file(const struct io_uring_sqe *sqe) case IORING_OP_TIMEOUT_REMOVE: case IORING_OP_ASYNC_CANCEL: case IORING_OP_LINK_TIMEOUT: - return false; + return 0; default: - return true; + if (io_req_op_valid(op)) + return 1; + return -EINVAL; } } @@ -3092,7 +3099,7 @@ static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; unsigned flags; - int fd; + int fd, ret; flags = READ_ONCE(req->sqe->flags); fd = READ_ONCE(req->sqe->fd); @@ -3100,8 +3107,9 @@ static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req) if (flags & IOSQE_IO_DRAIN) req->flags |= REQ_F_IO_DRAIN; - if (!io_op_needs_file(req->sqe)) - return 0; + ret = io_op_needs_file(req->sqe); + if (ret <= 0) + return ret; if (flags & IOSQE_FIXED_FILE) { if (unlikely(!ctx->file_table || @@ -3312,7 +3320,6 @@ static inline void io_queue_link_head(struct io_kiocb *req) io_queue_sqe(req); } - #define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \ IOSQE_IO_HARDLINK) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ea231366f5fd..a3300e1b9a01 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -58,23 +58,28 @@ struct io_uring_sqe { #define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */ #define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */ -#define IORING_OP_NOP 0 -#define IORING_OP_READV 1 -#define IORING_OP_WRITEV 2 -#define IORING_OP_FSYNC 3 -#define IORING_OP_READ_FIXED 4 -#define IORING_OP_WRITE_FIXED 5 -#define IORING_OP_POLL_ADD 6 -#define IORING_OP_POLL_REMOVE 7 -#define IORING_OP_SYNC_FILE_RANGE 8 -#define IORING_OP_SENDMSG 9 -#define IORING_OP_RECVMSG 10 -#define IORING_OP_TIMEOUT 11 -#define IORING_OP_TIMEOUT_REMOVE 12 -#define IORING_OP_ACCEPT 13 -#define IORING_OP_ASYNC_CANCEL 14 -#define IORING_OP_LINK_TIMEOUT 15 -#define IORING_OP_CONNECT 16 +enum { + IORING_OP_NOP, + IORING_OP_READV, + IORING_OP_WRITEV, + IORING_OP_FSYNC, + IORING_OP_READ_FIXED, + IORING_OP_WRITE_FIXED, + IORING_OP_POLL_ADD, + IORING_OP_POLL_REMOVE, + IORING_OP_SYNC_FILE_RANGE, + IORING_OP_SENDMSG, + IORING_OP_RECVMSG, + IORING_OP_TIMEOUT, + IORING_OP_TIMEOUT_REMOVE, + IORING_OP_ACCEPT, + IORING_OP_ASYNC_CANCEL, + IORING_OP_LINK_TIMEOUT, + IORING_OP_CONNECT, + + /* this goes last, obviously */ + IORING_OP_LAST, +}; /* * sqe->fsync_flags -- cgit v1.2.3 From 911bde0fe5ccd7e55760be9d6dcc67a8850fcc12 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Thu, 12 Dec 2019 12:14:37 +0100 Subject: mac80211: Turn AQL into an NL80211_EXT_FEATURE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of just having an airtime flag in debugfs, turn AQL into a proper NL80211_EXT_FEATURE, so drivers can turn it on when they are ready, and so we also expose the presence of the feature to userspace. This also has the effect of flipping the default, so drivers have to opt in to using AQL instead of getting it by default with TXQs. To keep functionality the same as pre-patch, we set this feature for ath10k (which is where it is needed the most). While we're at it, split out the debugfs interface so AQL gets its own per-station debugfs file instead of using the 'airtime' file. [Johannes:] This effectively disables AQL for iwlwifi, where it fixes a number of issues: * TSO in iwlwifi is causing underflows and associated warnings in AQL * HE (802.11ax) rates aren't reported properly so at HE rates, AQL could never have a valid estimate (it'd use 6 Mbps instead of up to 2400!) Signed-off-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20191212111437.224294-1-toke@redhat.com Fixes: 3ace10f5b5ad ("mac80211: Implement Airtime-based Queue Limit (AQL)") Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/mac.c | 1 + include/uapi/linux/nl80211.h | 5 +++ net/mac80211/debugfs_sta.c | 76 ++++++++++++++++++++++++++--------- net/mac80211/main.c | 4 +- net/mac80211/sta_info.c | 3 ++ net/mac80211/sta_info.h | 1 - net/mac80211/tx.c | 4 +- 7 files changed, 70 insertions(+), 24 deletions(-) (limited to 'include/uapi') diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 83cc8778ca1e..978f0037ed52 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -8958,6 +8958,7 @@ int ath10k_mac_register(struct ath10k *ar) wiphy_ext_feature_set(ar->hw->wiphy, NL80211_EXT_FEATURE_VHT_IBSS); wiphy_ext_feature_set(ar->hw->wiphy, NL80211_EXT_FEATURE_SET_SCAN_DWELL); + wiphy_ext_feature_set(ar->hw->wiphy, NL80211_EXT_FEATURE_AQL); if (test_bit(WMI_SERVICE_TX_DATA_ACK_RSSI, ar->wmi.svc_map) || test_bit(WMI_SERVICE_HTT_MGMT_TX_COMP_VALID_FLAGS, ar->wmi.svc_map)) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 341e0e8cae46..5eab191607f8 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5517,6 +5517,10 @@ enum nl80211_feature_flags { * with VLAN tagged frames and separate VLAN-specific netdevs added using * vconfig similarly to the Ethernet case. * + * @NL80211_EXT_FEATURE_AQL: The driver supports the Airtime Queue Limit (AQL) + * feature, which prevents bufferbloat by using the expected transmission + * time to limit the amount of data buffered in the hardware. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5563,6 +5567,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_STA_TX_PWR, NL80211_EXT_FEATURE_SAE_OFFLOAD, NL80211_EXT_FEATURE_VLAN_OFFLOAD, + NL80211_EXT_FEATURE_AQL, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index b3c9001d1f43..c80b1e163ea4 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -201,8 +201,6 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf, char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf; u64 rx_airtime = 0, tx_airtime = 0; s64 deficit[IEEE80211_NUM_ACS]; - u32 q_depth[IEEE80211_NUM_ACS]; - u32 q_limit_l[IEEE80211_NUM_ACS], q_limit_h[IEEE80211_NUM_ACS]; ssize_t rv; int ac; @@ -214,6 +212,56 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf, rx_airtime += sta->airtime[ac].rx_airtime; tx_airtime += sta->airtime[ac].tx_airtime; deficit[ac] = sta->airtime[ac].deficit; + spin_unlock_bh(&local->active_txq_lock[ac]); + } + + p += scnprintf(p, bufsz + buf - p, + "RX: %llu us\nTX: %llu us\nWeight: %u\n" + "Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n", + rx_airtime, tx_airtime, sta->airtime_weight, + deficit[0], deficit[1], deficit[2], deficit[3]); + + rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); + kfree(buf); + return rv; +} + +static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct sta_info *sta = file->private_data; + struct ieee80211_local *local = sta->sdata->local; + int ac; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + spin_lock_bh(&local->active_txq_lock[ac]); + sta->airtime[ac].rx_airtime = 0; + sta->airtime[ac].tx_airtime = 0; + sta->airtime[ac].deficit = sta->airtime_weight; + spin_unlock_bh(&local->active_txq_lock[ac]); + } + + return count; +} +STA_OPS_RW(airtime); + +static ssize_t sta_aql_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct sta_info *sta = file->private_data; + struct ieee80211_local *local = sta->sdata->local; + size_t bufsz = 400; + char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf; + u32 q_depth[IEEE80211_NUM_ACS]; + u32 q_limit_l[IEEE80211_NUM_ACS], q_limit_h[IEEE80211_NUM_ACS]; + ssize_t rv; + int ac; + + if (!buf) + return -ENOMEM; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + spin_lock_bh(&local->active_txq_lock[ac]); q_limit_l[ac] = sta->airtime[ac].aql_limit_low; q_limit_h[ac] = sta->airtime[ac].aql_limit_high; spin_unlock_bh(&local->active_txq_lock[ac]); @@ -221,12 +269,8 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf, } p += scnprintf(p, bufsz + buf - p, - "RX: %llu us\nTX: %llu us\nWeight: %u\n" - "Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n" "Q depth: VO: %u us VI: %u us BE: %u us BK: %u us\n" "Q limit[low/high]: VO: %u/%u VI: %u/%u BE: %u/%u BK: %u/%u\n", - rx_airtime, tx_airtime, sta->airtime_weight, - deficit[0], deficit[1], deficit[2], deficit[3], q_depth[0], q_depth[1], q_depth[2], q_depth[3], q_limit_l[0], q_limit_h[0], q_limit_l[1], q_limit_h[1], q_limit_l[2], q_limit_h[2], q_limit_l[3], q_limit_h[3]), @@ -236,11 +280,10 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf, return rv; } -static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf, +static ssize_t sta_aql_write(struct file *file, const char __user *userbuf, size_t count, loff_t *ppos) { struct sta_info *sta = file->private_data; - struct ieee80211_local *local = sta->sdata->local; u32 ac, q_limit_l, q_limit_h; char _buf[100] = {}, *buf = _buf; @@ -251,7 +294,7 @@ static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf, return -EFAULT; buf[sizeof(_buf) - 1] = '\0'; - if (sscanf(buf, "queue limit %u %u %u", &ac, &q_limit_l, &q_limit_h) + if (sscanf(buf, "limit %u %u %u", &ac, &q_limit_l, &q_limit_h) != 3) return -EINVAL; @@ -261,17 +304,10 @@ static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf, sta->airtime[ac].aql_limit_low = q_limit_l; sta->airtime[ac].aql_limit_high = q_limit_h; - for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { - spin_lock_bh(&local->active_txq_lock[ac]); - sta->airtime[ac].rx_airtime = 0; - sta->airtime[ac].tx_airtime = 0; - sta->airtime[ac].deficit = sta->airtime_weight; - spin_unlock_bh(&local->active_txq_lock[ac]); - } - return count; } -STA_OPS_RW(airtime); +STA_OPS_RW(aql); + static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) @@ -996,6 +1032,10 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) DEBUGFS_ADD(airtime); + if (wiphy_ext_feature_isset(local->hw.wiphy, + NL80211_EXT_FEATURE_AQL)) + DEBUGFS_ADD(aql); + debugfs_create_xul("driver_buffered_tids", 0400, sta->debugfs_dir, &sta->driver_buffered_tids); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 6cca0853f183..4c2b5ba3ac09 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -672,9 +672,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H; } - local->airtime_flags = AIRTIME_USE_TX | - AIRTIME_USE_RX | - AIRTIME_USE_AQL; + local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX; local->aql_threshold = IEEE80211_AQL_THRESHOLD; atomic_set(&local->aql_total_pending_airtime, 0); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 8eafd81e97b4..0f5f40678885 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1916,6 +1916,9 @@ void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local, { int tx_pending; + if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) + return; + if (!tx_completed) { if (sta) atomic_add(tx_airtime, diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index ad5d8a4ae56d..c00e28585f9d 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -127,7 +127,6 @@ enum ieee80211_agg_stop_reason { /* Debugfs flags to enable/disable use of RX/TX airtime in scheduler */ #define AIRTIME_USE_TX BIT(0) #define AIRTIME_USE_RX BIT(1) -#define AIRTIME_USE_AQL BIT(2) struct airtime_info { u64 rx_airtime; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 38b58a00db46..a8a7306a1f56 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3677,7 +3677,7 @@ begin: IEEE80211_SKB_CB(skb)->control.vif = vif; - if (local->airtime_flags & AIRTIME_USE_AQL) { + if (wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) { u32 airtime; airtime = ieee80211_calc_expected_tx_airtime(hw, vif, txq->sta, @@ -3799,7 +3799,7 @@ bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw, struct sta_info *sta; struct ieee80211_local *local = hw_to_local(hw); - if (!(local->airtime_flags & AIRTIME_USE_AQL)) + if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) return true; if (!txq->sta) -- cgit v1.2.3