From 492e05b0654126bd6a04473028ac4c8cfc22ccec Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 20 Sep 2017 12:41:57 -0300 Subject: tools: Update asm-generic/mman-common.h copy from the kernel To get the defines introduced in the commit aafd4562dfee ("mm: arch: consolidate mmap hugetlb size encodings"), that doesn't brings anything interesting for tools/, but also the ones from d2cd9ede6e19 ("mm,fork: introduce MADV_WIPEONFORK"), which does, and ends up triggering an auto-update to the tools/perf/trace/beauty/generated/madvise_behavior_array.c file, supporting the newly introduced 'behavior' values. This silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/mman-common.h' differs from latest version at 'include/uapi/asm-generic/mman-common.h' Testing it: # cat madvise.c #include #include #ifndef MADV_WIPEONFORK #define MADV_WIPEONFORK 18 #endif #ifndef MADV_KEEPONFORK #define MADV_KEEPONFORK 19 #endif int main(void) { void *ptr = mmap(NULL, 4096, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); madvise(ptr, 4096, MADV_WIPEONFORK); madvise(ptr, 4096, MADV_KEEPONFORK); return 0; } [root@jouet c]# perf trace -e mmap,madvise ./madvise 0.000 ( 0.013 ms): madvise/11732 mmap(len: 8192, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS ) = 0x7fba6e015000 0.047 ( 0.004 ms): madvise/11732 mmap(len: 160164, prot: READ, flags: PRIVATE, fd: 3 ) = 0x7fba6dfed000 0.084 ( 0.009 ms): madvise/11732 mmap(len: 4000096, prot: EXEC|READ, flags: PRIVATE|DENYWRITE, fd: 3 ) = 0x7fba6da20000 0.109 ( 0.006 ms): madvise/11732 mmap(addr: 0x7fba6dde7000, len: 24576, prot: READ|WRITE, flags: PRIVATE|DENYWRITE|FIXED, fd: 3, off: 1863680) = 0x7fba6dde7000 0.125 ( 0.004 ms): madvise/11732 mmap(addr: 0x7fba6dded000, len: 14688, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS|FIXED) = 0x7fba6dded000 0.150 ( 0.006 ms): madvise/11732 mmap(len: 12288, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS ) = 0x7fba6dfea000 0.288 ( 0.003 ms): madvise/11732 mmap(len: 4096, flags: PRIVATE|ANONYMOUS ) = 0x7fba6e014000 0.292 ( 0.002 ms): madvise/11732 madvise(start: 0x7fba6e014000, len_in: 4096, behavior: MADV_WIPEONFORK) = 0 0.295 ( 0.001 ms): madvise/11732 madvise(start: 0x7fba6e014000, len_in: 4096, behavior: MADV_KEEPONFORK) = 0 # uname -a Linux jouet 4.13.0+ #2 SMP Mon Sep 18 17:22:46 -03 2017 x86_64 x86_64 x86_64 GNU/Linux # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Mike Kravetz Cc: Namhyung Kim Cc: Rik van Riel Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-yev9rexu02cl7cjeozzmrl9t@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/mman-common.h | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'tools/include') diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 8c27db0c5c08..203268f9231e 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -58,20 +58,12 @@ overrides the coredump filter bits */ #define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag */ +#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ +#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ + /* compatibility flags */ #define MAP_FILE 0 -/* - * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size. - * This gives us 6 bits, which is enough until someone invents 128 bit address - * spaces. - * - * Assume these are all power of twos. - * When 0 use the default page size. - */ -#define MAP_HUGE_SHIFT 26 -#define MAP_HUGE_MASK 0x3f - #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ -- cgit v1.2.3 From 6ae8eefc6c8fe050f057781b70a83262eb0a61ee Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 21 Sep 2017 12:12:17 -0300 Subject: tools include: Do not use poison with C++ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LIST_POISON[12] are used to initialize list_head and hlist_node pointers, and do void pointer arithmetic, which C++ doesn't like, so, to avoid drifting from the kernel by introducing some HLIST_POISON to do away with void pointer math, just make those poisoned pointers be NULL when building it with a C++ compiler. Noticed with: $ make LLVM_CONFIG=/usr/bin/llvm-config-3.9 LIBCLANGLLVM=1 CXX util/c++/clang.o CXX util/c++/clang-test.o In file included from /home/lizj/linux/tools/include/linux/list.h:5:0, from /home/lizj/linux/tools/perf/util/namespaces.h:13, from /home/lizj/linux/tools/perf/util/util.h:15, from /home/lizj/linux/tools/perf/util/util-cxx.h:20, from util/c++/clang-c.h:5, from util/c++/clang-test.cpp:2: /home/lizj/linux/tools/include/linux/list.h: In function ‘void list_del(list_head*)’: /home/lizj/linux/tools/include/linux/poison.h:14:31: error: pointer of type ‘void *’ used in arithmetic [-Werror=pointer-arith] # define POISON_POINTER_DELTA 0 ^ /home/lizj/linux/tools/include/linux/poison.h:22:41: note: in expansion of macro ‘POISON_POINTER_DELTA’ #define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA) ^ /home/lizj/linux/tools/include/linux/list.h:107:16: note: in expansion of macro ‘LIST_POISON1’ entry->next = LIST_POISON1; ^ In file included from /home/lizj/linux/tools/perf/util/namespaces.h:13:0, from /home/lizj/linux/tools/perf/util/util.h:15, from /home/lizj/linux/tools/perf/util/util-cxx.h:20, from util/c++/clang-c.h:5, from util/c++/clang-test.cpp:2: /home/lizj/linux/tools/include/linux/list.h:107:14: error: invalid conversion from ‘void*’ to ‘list_head*’ [-fpermissive] Reported-by: Li Zhijian Cc: Adrian Hunter Cc: Alexander Shishkin Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Philip Li Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-m5ei2o0mjshucbr28baf5lqz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/poison.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools/include') diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h index 51334edec506..f306a7642509 100644 --- a/tools/include/linux/poison.h +++ b/tools/include/linux/poison.h @@ -14,6 +14,10 @@ # define POISON_POINTER_DELTA 0 #endif +#ifdef __cplusplus +#define LIST_POISON1 NULL +#define LIST_POISON2 NULL +#else /* * These are non-NULL pointers that will result in page faults * under normal circumstances, used to verify that nobody uses @@ -21,6 +25,7 @@ */ #define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA) #define LIST_POISON2 ((void *) 0x200 + POISON_POINTER_DELTA) +#endif /********** include/linux/timer.h **********/ /* -- cgit v1.2.3 From ac29991ba137cc0e3b0f647fb41e79300230f15c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 Sep 2017 02:25:52 +0200 Subject: bpf: update bpf.h uapi header for tools Looks like a couple of updates missed to get carried into tools/include/uapi/, so copy the bpf.h header as usual to pull in latest updates. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- tools/include/uapi/linux/bpf.h | 45 ++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 13 deletions(-) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 461811e57140..e43491ac4823 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -143,12 +143,6 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE -enum bpf_sockmap_flags { - BPF_SOCKMAP_UNSPEC, - BPF_SOCKMAP_STRPARSER, - __MAX_BPF_SOCKMAP_FLAG -}; - /* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command * to the given target_fd cgroup the descendent cgroup will be able to * override effective bpf program that was inherited from this cgroup @@ -368,9 +362,20 @@ union bpf_attr { * int bpf_redirect(ifindex, flags) * redirect to another netdev * @ifindex: ifindex of the net device - * @flags: bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * Return: TC_ACT_REDIRECT + * @flags: + * cls_bpf: + * bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * xdp_bpf: + * all bits - reserved + * Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error + * xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error + * int bpf_redirect_map(map, key, flags) + * redirect to endpoint in map + * @map: pointer to dev map + * @key: index in map to lookup + * @flags: -- + * Return: XDP_REDIRECT on success or XDP_ABORT on error * * u32 bpf_get_route_realm(skb) * retrieve a dst's tclassid @@ -577,6 +582,12 @@ union bpf_attr { * @map: pointer to sockmap to update * @key: key to insert/update sock in map * @flags: same flags as map update elem + * + * int bpf_xdp_adjust_meta(xdp_md, delta) + * Adjust the xdp_md.data_meta by delta + * @xdp_md: pointer to xdp_md + * @delta: An positive/negative integer to be added to xdp_md.data_meta + * Return: 0 on success or negative on error */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -632,7 +643,8 @@ union bpf_attr { FN(skb_adjust_room), \ FN(redirect_map), \ FN(sk_redirect_map), \ - FN(sock_map_update), + FN(sock_map_update), \ + FN(xdp_adjust_meta), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -710,7 +722,7 @@ struct __sk_buff { __u32 data_end; __u32 napi_id; - /* accessed by BPF_PROG_TYPE_sk_skb types */ + /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */ __u32 family; __u32 remote_ip4; /* Stored in network byte order */ __u32 local_ip4; /* Stored in network byte order */ @@ -718,6 +730,9 @@ struct __sk_buff { __u32 local_ip6[4]; /* Stored in network byte order */ __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ + /* ... here. */ + + __u32 data_meta; }; struct bpf_tunnel_key { @@ -753,20 +768,23 @@ struct bpf_sock { __u32 family; __u32 type; __u32 protocol; + __u32 mark; + __u32 priority; }; #define XDP_PACKET_HEADROOM 256 /* User return codes for XDP prog type. * A valid XDP program must return one of these defined values. All other - * return codes are reserved for future use. Unknown return codes will result - * in packet drop. + * return codes are reserved for future use. Unknown return codes will + * result in packet drops and a warning via bpf_warn_invalid_xdp_action(). */ enum xdp_action { XDP_ABORTED = 0, XDP_DROP, XDP_PASS, XDP_TX, + XDP_REDIRECT, }; /* user accessible metadata for XDP packet hook @@ -775,6 +793,7 @@ enum xdp_action { struct xdp_md { __u32 data; __u32 data_end; + __u32 data_meta; }; enum sk_action { -- cgit v1.2.3 From 88cda1c9da02c8aa31e1d5dcf22e8a35cc8c19f2 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 27 Sep 2017 14:37:54 -0700 Subject: bpf: libbpf: Provide basic API support to specify BPF obj name This patch extends the libbpf to provide API support to allow specifying BPF object name. In tools/lib/bpf/libbpf, the C symbol of the function and the map is used. Regarding section name, all maps are under the same section named "maps". Hence, section name is not a good choice for map's name. To be consistent with map, bpf_prog also follows and uses its function symbol as the prog's name. This patch adds logic to collect function's symbols in libbpf. There is existing codes to collect the map's symbols and no change is needed. The bpf_load_program_name() and bpf_map_create_name() are added to take the name argument. For the other bpf_map_create_xxx() variants, a name argument is directly added to them. In samples/bpf, bpf_load.c in particular, the symbol is also used as the map's name and the map symbols has already been collected in the existing code. For bpf_prog, bpf_load.c does not collect the function symbol name. We can consider to collect them later if there is a need to continue supporting the bpf_load.c. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/bpf_load.c | 2 + samples/bpf/map_perf_test_user.c | 1 + tools/include/uapi/linux/bpf.h | 10 +++ tools/lib/bpf/bpf.c | 57 +++++++++++---- tools/lib/bpf/bpf.h | 23 ++++-- tools/lib/bpf/libbpf.c | 109 +++++++++++++++++++++------- tools/testing/selftests/bpf/test_verifier.c | 2 +- 7 files changed, 157 insertions(+), 47 deletions(-) (limited to 'tools/include') diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 6aa50098dfb8..18b1c8dd0391 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -221,6 +221,7 @@ static int load_maps(struct bpf_map_data *maps, int nr_maps, int inner_map_fd = map_fd[maps[i].def.inner_map_idx]; map_fd[i] = bpf_create_map_in_map_node(maps[i].def.type, + maps[i].name, maps[i].def.key_size, inner_map_fd, maps[i].def.max_entries, @@ -228,6 +229,7 @@ static int load_maps(struct bpf_map_data *maps, int nr_maps, numa_node); } else { map_fd[i] = bpf_create_map_node(maps[i].def.type, + maps[i].name, maps[i].def.key_size, maps[i].def.value_size, maps[i].def.max_entries, diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index a0310fc70057..519d9af4b04a 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -137,6 +137,7 @@ static void do_test_lru(enum test_type test, int cpu) inner_lru_map_fds[cpu] = bpf_create_map_node(BPF_MAP_TYPE_LRU_HASH, + test_map_names[INNER_LRU_HASH_PREALLOC], sizeof(uint32_t), sizeof(long), inner_lru_hash_size, 0, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e43491ac4823..6d2137b4cf38 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -175,6 +175,8 @@ enum bpf_attach_type { /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) +#define BPF_OBJ_NAME_LEN 16U + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -188,6 +190,7 @@ union bpf_attr { __u32 numa_node; /* numa node (effective only if * BPF_F_NUMA_NODE is set). */ + __u8 map_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -210,6 +213,7 @@ union bpf_attr { __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; + __u8 prog_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -812,6 +816,11 @@ struct bpf_prog_info { __u32 xlated_prog_len; __aligned_u64 jited_prog_insns; __aligned_u64 xlated_prog_insns; + __u64 load_time; /* ns since boottime */ + __u32 created_by_uid; + __u32 nr_map_ids; + __aligned_u64 map_ids; + __u8 name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); struct bpf_map_info { @@ -821,6 +830,7 @@ struct bpf_map_info { __u32 value_size; __u32 max_entries; __u32 map_flags; + __u8 name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); /* User bpf_sock_ops struct to access socket values and specify request ops diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 1d6907d379c9..daf624e4c720 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -46,6 +46,8 @@ # endif #endif +#define min(x, y) ((x) < (y) ? (x) : (y)) + static inline __u64 ptr_to_u64(const void *ptr) { return (__u64) (unsigned long) ptr; @@ -57,10 +59,11 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, return syscall(__NR_bpf, cmd, attr, size); } -int bpf_create_map_node(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags, - int node) +int bpf_create_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags, int node) { + __u32 name_len = name ? strlen(name) : 0; union bpf_attr attr; memset(&attr, '\0', sizeof(attr)); @@ -70,6 +73,8 @@ int bpf_create_map_node(enum bpf_map_type map_type, int key_size, attr.value_size = value_size; attr.max_entries = max_entries; attr.map_flags = map_flags; + memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1)); + if (node >= 0) { attr.map_flags |= BPF_F_NUMA_NODE; attr.numa_node = node; @@ -81,14 +86,23 @@ int bpf_create_map_node(enum bpf_map_type map_type, int key_size, int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, __u32 map_flags) { - return bpf_create_map_node(map_type, key_size, value_size, + return bpf_create_map_node(map_type, NULL, key_size, value_size, max_entries, map_flags, -1); } -int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size, - int inner_map_fd, int max_entries, +int bpf_create_map_name(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags) +{ + return bpf_create_map_node(map_type, name, key_size, value_size, + max_entries, map_flags, -1); +} + +int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, __u32 map_flags, int node) { + __u32 name_len = name ? strlen(name) : 0; union bpf_attr attr; memset(&attr, '\0', sizeof(attr)); @@ -99,6 +113,8 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size, attr.inner_map_fd = inner_map_fd; attr.max_entries = max_entries; attr.map_flags = map_flags; + memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1)); + if (node >= 0) { attr.map_flags |= BPF_F_NUMA_NODE; attr.numa_node = node; @@ -107,19 +123,24 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size, return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } -int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size, - int inner_map_fd, int max_entries, __u32 map_flags) +int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, + __u32 map_flags) { - return bpf_create_map_in_map_node(map_type, key_size, inner_map_fd, - max_entries, map_flags, -1); + return bpf_create_map_in_map_node(map_type, name, key_size, + inner_map_fd, max_entries, map_flags, + -1); } -int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, const char *license, - __u32 kern_version, char *log_buf, size_t log_buf_sz) +int bpf_load_program_name(enum bpf_prog_type type, const char *name, + const struct bpf_insn *insns, + size_t insns_cnt, const char *license, + __u32 kern_version, char *log_buf, + size_t log_buf_sz) { int fd; union bpf_attr attr; + __u32 name_len = name ? strlen(name) : 0; bzero(&attr, sizeof(attr)); attr.prog_type = type; @@ -130,6 +151,7 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, attr.log_size = 0; attr.log_level = 0; attr.kern_version = kern_version; + memcpy(attr.prog_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1)); fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); if (fd >= 0 || !log_buf || !log_buf_sz) @@ -143,6 +165,15 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); } +int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t insns_cnt, const char *license, + __u32 kern_version, char *log_buf, + size_t log_buf_sz) +{ + return bpf_load_program_name(type, NULL, insns, insns_cnt, license, + kern_version, log_buf, log_buf_sz); +} + int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, int strict_alignment, const char *license, __u32 kern_version, diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index b8ea5843c39e..118d00535a0d 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -24,19 +24,28 @@ #include #include -int bpf_create_map_node(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags, - int node); +int bpf_create_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags, int node); +int bpf_create_map_name(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags); int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, __u32 map_flags); -int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size, - int inner_map_fd, int max_entries, +int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, __u32 map_flags, int node); -int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size, - int inner_map_fd, int max_entries, __u32 map_flags); +int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, + __u32 map_flags); /* Recommend log buffer size */ #define BPF_LOG_BUF_SIZE 65536 +int bpf_load_program_name(enum bpf_prog_type type, const char *name, + const struct bpf_insn *insns, + size_t insns_cnt, const char *license, + __u32 kern_version, char *log_buf, + size_t log_buf_sz); int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, const char *license, __u32 kern_version, char *log_buf, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 35f6dfcdc565..4f402dcdf372 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -171,6 +171,7 @@ int libbpf_strerror(int err, char *buf, size_t size) struct bpf_program { /* Index in elf obj file, for relocation use. */ int idx; + char *name; char *section_name; struct bpf_insn *insns; size_t insns_cnt; @@ -283,6 +284,7 @@ static void bpf_program__exit(struct bpf_program *prog) prog->clear_priv = NULL; bpf_program__unload(prog); + zfree(&prog->name); zfree(&prog->section_name); zfree(&prog->insns); zfree(&prog->reloc_desc); @@ -293,26 +295,27 @@ static void bpf_program__exit(struct bpf_program *prog) } static int -bpf_program__init(void *data, size_t size, char *name, int idx, - struct bpf_program *prog) +bpf_program__init(void *data, size_t size, char *section_name, int idx, + struct bpf_program *prog) { if (size < sizeof(struct bpf_insn)) { - pr_warning("corrupted section '%s'\n", name); + pr_warning("corrupted section '%s'\n", section_name); return -EINVAL; } bzero(prog, sizeof(*prog)); - prog->section_name = strdup(name); + prog->section_name = strdup(section_name); if (!prog->section_name) { - pr_warning("failed to alloc name for prog %s\n", - name); + pr_warning("failed to alloc name for prog under section %s\n", + section_name); goto errout; } prog->insns = malloc(size); if (!prog->insns) { - pr_warning("failed to alloc insns for %s\n", name); + pr_warning("failed to alloc insns for prog under section %s\n", + section_name); goto errout; } prog->insns_cnt = size / sizeof(struct bpf_insn); @@ -331,12 +334,12 @@ errout: static int bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, - char *name, int idx) + char *section_name, int idx) { struct bpf_program prog, *progs; int nr_progs, err; - err = bpf_program__init(data, size, name, idx, &prog); + err = bpf_program__init(data, size, section_name, idx, &prog); if (err) return err; @@ -350,8 +353,8 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, * is still valid, so don't need special treat for * bpf_close_object(). */ - pr_warning("failed to alloc a new program '%s'\n", - name); + pr_warning("failed to alloc a new program under section '%s'\n", + section_name); bpf_program__exit(&prog); return -ENOMEM; } @@ -364,6 +367,54 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, return 0; } +static int +bpf_object__init_prog_names(struct bpf_object *obj) +{ + Elf_Data *symbols = obj->efile.symbols; + struct bpf_program *prog; + size_t pi, si; + + for (pi = 0; pi < obj->nr_programs; pi++) { + char *name = NULL; + + prog = &obj->programs[pi]; + + for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name; + si++) { + GElf_Sym sym; + + if (!gelf_getsym(symbols, si, &sym)) + continue; + if (sym.st_shndx != prog->idx) + continue; + + name = elf_strptr(obj->efile.elf, + obj->efile.strtabidx, + sym.st_name); + if (!name) { + pr_warning("failed to get sym name string for prog %s\n", + prog->section_name); + return -LIBBPF_ERRNO__LIBELF; + } + } + + if (!name) { + pr_warning("failed to find sym for prog %s\n", + prog->section_name); + return -EINVAL; + } + + prog->name = strdup(name); + if (!prog->name) { + pr_warning("failed to allocate memory for prog sym %s\n", + name); + return -ENOMEM; + } + } + + return 0; +} + static struct bpf_object *bpf_object__new(const char *path, void *obj_buf, size_t obj_buf_sz) @@ -766,8 +817,12 @@ static int bpf_object__elf_collect(struct bpf_object *obj) pr_warning("Corrupted ELF file: index of strtab invalid\n"); return LIBBPF_ERRNO__FORMAT; } - if (obj->efile.maps_shndx >= 0) + if (obj->efile.maps_shndx >= 0) { err = bpf_object__init_maps(obj); + if (err) + goto out; + } + err = bpf_object__init_prog_names(obj); out: return err; } @@ -870,11 +925,12 @@ bpf_object__create_maps(struct bpf_object *obj) struct bpf_map_def *def = &obj->maps[i].def; int *pfd = &obj->maps[i].fd; - *pfd = bpf_create_map(def->type, - def->key_size, - def->value_size, - def->max_entries, - 0); + *pfd = bpf_create_map_name(def->type, + obj->maps[i].name, + def->key_size, + def->value_size, + def->max_entries, + 0); if (*pfd < 0) { size_t j; int err = *pfd; @@ -982,7 +1038,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) } static int -load_program(enum bpf_prog_type type, struct bpf_insn *insns, +load_program(enum bpf_prog_type type, const char *name, struct bpf_insn *insns, int insns_cnt, char *license, u32 kern_version, int *pfd) { int ret; @@ -995,8 +1051,8 @@ load_program(enum bpf_prog_type type, struct bpf_insn *insns, if (!log_buf) pr_warning("Alloc log buffer for bpf loader error, continue without log\n"); - ret = bpf_load_program(type, insns, insns_cnt, license, - kern_version, log_buf, BPF_LOG_BUF_SIZE); + ret = bpf_load_program_name(type, name, insns, insns_cnt, license, + kern_version, log_buf, BPF_LOG_BUF_SIZE); if (ret >= 0) { *pfd = ret; @@ -1021,9 +1077,9 @@ load_program(enum bpf_prog_type type, struct bpf_insn *insns, if (type != BPF_PROG_TYPE_KPROBE) { int fd; - fd = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns, - insns_cnt, license, kern_version, - NULL, 0); + fd = bpf_load_program_name(BPF_PROG_TYPE_KPROBE, name, + insns, insns_cnt, license, + kern_version, NULL, 0); if (fd >= 0) { close(fd); ret = -LIBBPF_ERRNO__PROGTYPE; @@ -1067,8 +1123,8 @@ bpf_program__load(struct bpf_program *prog, pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n", prog->section_name, prog->instances.nr); } - err = load_program(prog->type, prog->insns, prog->insns_cnt, - license, kern_version, &fd); + err = load_program(prog->type, prog->name, prog->insns, + prog->insns_cnt, license, kern_version, &fd); if (!err) prog->instances.fds[0] = fd; goto out; @@ -1096,7 +1152,8 @@ bpf_program__load(struct bpf_program *prog, continue; } - err = load_program(prog->type, result.new_insn_ptr, + err = load_program(prog->type, prog->name, + result.new_insn_ptr, result.new_insn_cnt, license, kern_version, &fd); diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index a0426147523d..290d5056c165 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -6939,7 +6939,7 @@ static int create_map_in_map(void) return inner_map_fd; } - outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, + outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL, sizeof(int), inner_map_fd, 1, 0); if (outer_map_fd < 0) printf("Failed to create array of maps '%s'!\n", -- cgit v1.2.3 From defd9c476fa6b01b4eb5450452bfd202138decb7 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:26 -0700 Subject: libbpf: sync bpf.h tools/include/uapi/linux/bpf.h got out of sync with actual kernel header. Update it. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/include/uapi/linux/bpf.h | 55 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 3 deletions(-) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6d2137b4cf38..cb2b9f95160a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -92,6 +92,7 @@ enum bpf_cmd { BPF_PROG_GET_FD_BY_ID, BPF_MAP_GET_FD_BY_ID, BPF_OBJ_GET_INFO_BY_FD, + BPF_PROG_QUERY, }; enum bpf_map_type { @@ -143,11 +144,47 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE -/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command - * to the given target_fd cgroup the descendent cgroup will be able to - * override effective bpf program that was inherited from this cgroup +/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command + * + * NONE(default): No further bpf programs allowed in the subtree. + * + * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, + * the program in this cgroup yields to sub-cgroup program. + * + * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, + * that cgroup program gets run in addition to the program in this cgroup. + * + * Only one program is allowed to be attached to a cgroup with + * NONE or BPF_F_ALLOW_OVERRIDE flag. + * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will + * release old program and attach the new one. Attach flags has to match. + * + * Multiple programs are allowed to be attached to a cgroup with + * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order + * (those that were attached first, run first) + * The programs of sub-cgroup are executed first, then programs of + * this cgroup and then programs of parent cgroup. + * When children program makes decision (like picking TCP CA or sock bind) + * parent program has a chance to override it. + * + * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. + * A cgroup with NONE doesn't allow any programs in sub-cgroups. + * Ex1: + * cgrp1 (MULTI progs A, B) -> + * cgrp2 (OVERRIDE prog C) -> + * cgrp3 (MULTI prog D) -> + * cgrp4 (OVERRIDE prog E) -> + * cgrp5 (NONE prog F) + * the event in cgrp5 triggers execution of F,D,A,B in that order. + * if prog F is detached, the execution is E,D,A,B + * if prog F and D are detached, the execution is E,A,B + * if prog F, E and D are detached, the execution is C,A,B + * + * All eligible programs are executed regardless of return code from + * earlier programs. */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) +#define BPF_F_ALLOW_MULTI (1U << 1) /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will perform strict alignment checking as if the kernel @@ -175,6 +212,9 @@ enum bpf_attach_type { /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) +/* flags for BPF_PROG_QUERY */ +#define BPF_F_QUERY_EFFECTIVE (1U << 0) + #define BPF_OBJ_NAME_LEN 16U union bpf_attr { @@ -253,6 +293,15 @@ union bpf_attr { __u32 info_len; __aligned_u64 info; } info; + + struct { /* anonymous struct used by BPF_PROG_QUERY command */ + __u32 target_fd; /* container object to query */ + __u32 attach_type; + __u32 query_flags; + __u32 attach_flags; + __aligned_u64 prog_ids; + __u32 prog_cnt; + } query; } __attribute__((aligned(8))); /* BPF helper function descriptions: -- cgit v1.2.3 From 020a32d9581ac824d038b0b4e24e977e3cc8589f Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 5 Oct 2017 09:19:21 -0700 Subject: bpf: add a test case for helper bpf_perf_event_read_value The bpf sample program tracex6 is enhanced to use the new helper to read enabled/running time as well. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/tracex6_kern.c | 26 ++++++++++++++++++++++++++ samples/bpf/tracex6_user.c | 13 ++++++++++++- tools/include/uapi/linux/bpf.h | 3 ++- tools/testing/selftests/bpf/bpf_helpers.h | 3 +++ 4 files changed, 43 insertions(+), 2 deletions(-) (limited to 'tools/include') diff --git a/samples/bpf/tracex6_kern.c b/samples/bpf/tracex6_kern.c index e7d180305974..46c557afac73 100644 --- a/samples/bpf/tracex6_kern.c +++ b/samples/bpf/tracex6_kern.c @@ -15,6 +15,12 @@ struct bpf_map_def SEC("maps") values = { .value_size = sizeof(u64), .max_entries = 64, }; +struct bpf_map_def SEC("maps") values2 = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(int), + .value_size = sizeof(struct bpf_perf_event_value), + .max_entries = 64, +}; SEC("kprobe/htab_map_get_next_key") int bpf_prog1(struct pt_regs *ctx) @@ -37,5 +43,25 @@ int bpf_prog1(struct pt_regs *ctx) return 0; } +SEC("kprobe/htab_map_lookup_elem") +int bpf_prog2(struct pt_regs *ctx) +{ + u32 key = bpf_get_smp_processor_id(); + struct bpf_perf_event_value *val, buf; + int error; + + error = bpf_perf_event_read_value(&counters, key, &buf, sizeof(buf)); + if (error) + return 0; + + val = bpf_map_lookup_elem(&values2, &key); + if (val) + *val = buf; + else + bpf_map_update_elem(&values2, &key, &buf, BPF_NOEXIST); + + return 0; +} + char _license[] SEC("license") = "GPL"; u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c index a05a99a0752f..3341a96fc046 100644 --- a/samples/bpf/tracex6_user.c +++ b/samples/bpf/tracex6_user.c @@ -22,6 +22,7 @@ static void check_on_cpu(int cpu, struct perf_event_attr *attr) { + struct bpf_perf_event_value value2; int pmu_fd, error = 0; cpu_set_t set; __u64 value; @@ -46,8 +47,18 @@ static void check_on_cpu(int cpu, struct perf_event_attr *attr) fprintf(stderr, "Value missing for CPU %d\n", cpu); error = 1; goto on_exit; + } else { + fprintf(stderr, "CPU %d: %llu\n", cpu, value); + } + /* The above bpf_map_lookup_elem should trigger the second kprobe */ + if (bpf_map_lookup_elem(map_fd[2], &cpu, &value2)) { + fprintf(stderr, "Value2 missing for CPU %d\n", cpu); + error = 1; + goto on_exit; + } else { + fprintf(stderr, "CPU %d: counter: %llu, enabled: %llu, running: %llu\n", cpu, + value2.counter, value2.enabled, value2.running); } - fprintf(stderr, "CPU %d: %llu\n", cpu, value); on_exit: assert(bpf_map_delete_elem(map_fd[0], &cpu) == 0 || error); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index cb2b9f95160a..cdf6c4f50b0f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -697,7 +697,8 @@ union bpf_attr { FN(redirect_map), \ FN(sk_redirect_map), \ FN(sock_map_update), \ - FN(xdp_adjust_meta), + FN(xdp_adjust_meta), \ + FN(perf_event_read_value), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index a56053db26f5..c15ca83dbbd9 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -72,6 +72,9 @@ static int (*bpf_sk_redirect_map)(void *map, int key, int flags) = static int (*bpf_sock_map_update)(void *map, void *key, void *value, unsigned long long flags) = (void *) BPF_FUNC_sock_map_update; +static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, + void *buf, unsigned int buf_size) = + (void *) BPF_FUNC_perf_event_read_value; /* llvm builtin functions that eBPF C program may use to -- cgit v1.2.3 From 81b9cf8028a17bdbdaa0da80b735b32150d4e89e Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 5 Oct 2017 09:19:23 -0700 Subject: bpf: add a test case for helper bpf_perf_prog_read_value The bpf sample program trace_event is enhanced to use the new helper to print out enabled/running time. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/trace_event_kern.c | 10 ++++++++++ samples/bpf/trace_event_user.c | 13 ++++++++----- tools/include/uapi/linux/bpf.h | 3 ++- tools/testing/selftests/bpf/bpf_helpers.h | 3 +++ 4 files changed, 23 insertions(+), 6 deletions(-) (limited to 'tools/include') diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c index 41b6115a32eb..a77a583d94d4 100644 --- a/samples/bpf/trace_event_kern.c +++ b/samples/bpf/trace_event_kern.c @@ -37,10 +37,14 @@ struct bpf_map_def SEC("maps") stackmap = { SEC("perf_event") int bpf_prog1(struct bpf_perf_event_data *ctx) { + char time_fmt1[] = "Time Enabled: %llu, Time Running: %llu"; + char time_fmt2[] = "Get Time Failed, ErrCode: %d"; char fmt[] = "CPU-%d period %lld ip %llx"; u32 cpu = bpf_get_smp_processor_id(); + struct bpf_perf_event_value value_buf; struct key_t key; u64 *val, one = 1; + int ret; if (ctx->sample_period < 10000) /* ignore warmup */ @@ -54,6 +58,12 @@ int bpf_prog1(struct bpf_perf_event_data *ctx) return 0; } + ret = bpf_perf_prog_read_value(ctx, (void *)&value_buf, sizeof(struct bpf_perf_event_value)); + if (!ret) + bpf_trace_printk(time_fmt1, sizeof(time_fmt1), value_buf.enabled, value_buf.running); + else + bpf_trace_printk(time_fmt2, sizeof(time_fmt2), ret); + val = bpf_map_lookup_elem(&counts, &key); if (val) (*val)++; diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c index 7bd827b84a67..bf4f1b6d9a52 100644 --- a/samples/bpf/trace_event_user.c +++ b/samples/bpf/trace_event_user.c @@ -127,6 +127,9 @@ static void test_perf_event_all_cpu(struct perf_event_attr *attr) int *pmu_fd = malloc(nr_cpus * sizeof(int)); int i, error = 0; + /* system wide perf event, no need to inherit */ + attr->inherit = 0; + /* open perf_event on all cpus */ for (i = 0; i < nr_cpus; i++) { pmu_fd[i] = sys_perf_event_open(attr, -1, i, -1, 0); @@ -154,6 +157,11 @@ static void test_perf_event_task(struct perf_event_attr *attr) { int pmu_fd; + /* per task perf event, enable inherit so the "dd ..." command can be traced properly. + * Enabling inherit will cause bpf_perf_prog_read_time helper failure. + */ + attr->inherit = 1; + /* open task bound event */ pmu_fd = sys_perf_event_open(attr, 0, -1, -1, 0); if (pmu_fd < 0) { @@ -175,14 +183,12 @@ static void test_bpf_perf_event(void) .freq = 1, .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, - .inherit = 1, }; struct perf_event_attr attr_type_sw = { .sample_freq = SAMPLE_FREQ, .freq = 1, .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_CLOCK, - .inherit = 1, }; struct perf_event_attr attr_hw_cache_l1d = { .sample_freq = SAMPLE_FREQ, @@ -192,7 +198,6 @@ static void test_bpf_perf_event(void) PERF_COUNT_HW_CACHE_L1D | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), - .inherit = 1, }; struct perf_event_attr attr_hw_cache_branch_miss = { .sample_freq = SAMPLE_FREQ, @@ -202,7 +207,6 @@ static void test_bpf_perf_event(void) PERF_COUNT_HW_CACHE_BPU | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), - .inherit = 1, }; struct perf_event_attr attr_type_raw = { .sample_freq = SAMPLE_FREQ, @@ -210,7 +214,6 @@ static void test_bpf_perf_event(void) .type = PERF_TYPE_RAW, /* Intel Instruction Retired */ .config = 0xc0, - .inherit = 1, }; printf("Test HW_CPU_CYCLES\n"); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index cdf6c4f50b0f..0894fd20b12b 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -698,7 +698,8 @@ union bpf_attr { FN(sk_redirect_map), \ FN(sock_map_update), \ FN(xdp_adjust_meta), \ - FN(perf_event_read_value), + FN(perf_event_read_value), \ + FN(perf_prog_read_value), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index c15ca83dbbd9..e25dbf6038cf 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -75,6 +75,9 @@ static int (*bpf_sock_map_update)(void *map, void *key, void *value, static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, void *buf, unsigned int buf_size) = (void *) BPF_FUNC_perf_event_read_value; +static int (*bpf_perf_prog_read_value)(void *ctx, void *buf, + unsigned int buf_size) = + (void *) BPF_FUNC_perf_prog_read_value; /* llvm builtin functions that eBPF C program may use to -- cgit v1.2.3 From 067cae47771c864604969fd902efe10916e0d79c Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 5 Oct 2017 21:52:12 -0700 Subject: bpf: Use char in prog and map name Instead of u8, use char for prog and map name. It can avoid the userspace tool getting compiler's signess warning. The bpf_prog_aux, bpf_map, bpf_attr, bpf_prog_info and bpf_map_info are changed. Signed-off-by: Martin KaFai Lau Cc: Jakub Kicinski Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 ++-- include/uapi/linux/bpf.h | 8 ++++---- tools/include/uapi/linux/bpf.h | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'tools/include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a67daea731ab..bc7da2ddfcaf 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -56,7 +56,7 @@ struct bpf_map { struct work_struct work; atomic_t usercnt; struct bpf_map *inner_map_meta; - u8 name[BPF_OBJ_NAME_LEN]; + char name[BPF_OBJ_NAME_LEN]; }; /* function argument constraints */ @@ -189,7 +189,7 @@ struct bpf_prog_aux { struct bpf_prog *prog; struct user_struct *user; u64 load_time; /* ns since boottime */ - u8 name[BPF_OBJ_NAME_LEN]; + char name[BPF_OBJ_NAME_LEN]; union { struct work_struct work; struct rcu_head rcu; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5bbbec17aa5a..6db9e1d679cd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -230,7 +230,7 @@ union bpf_attr { __u32 numa_node; /* numa node (effective only if * BPF_F_NUMA_NODE is set). */ - __u8 map_name[BPF_OBJ_NAME_LEN]; + char map_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -253,7 +253,7 @@ union bpf_attr { __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; - __u8 prog_name[BPF_OBJ_NAME_LEN]; + char prog_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -888,7 +888,7 @@ struct bpf_prog_info { __u32 created_by_uid; __u32 nr_map_ids; __aligned_u64 map_ids; - __u8 name[BPF_OBJ_NAME_LEN]; + char name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); struct bpf_map_info { @@ -898,7 +898,7 @@ struct bpf_map_info { __u32 value_size; __u32 max_entries; __u32 map_flags; - __u8 name[BPF_OBJ_NAME_LEN]; + char name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); /* User bpf_sock_ops struct to access socket values and specify request ops diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0894fd20b12b..fb4fb81ce5b0 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -230,7 +230,7 @@ union bpf_attr { __u32 numa_node; /* numa node (effective only if * BPF_F_NUMA_NODE is set). */ - __u8 map_name[BPF_OBJ_NAME_LEN]; + char map_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -253,7 +253,7 @@ union bpf_attr { __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; - __u8 prog_name[BPF_OBJ_NAME_LEN]; + char prog_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -871,7 +871,7 @@ struct bpf_prog_info { __u32 created_by_uid; __u32 nr_map_ids; __aligned_u64 map_ids; - __u8 name[BPF_OBJ_NAME_LEN]; + char name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); struct bpf_map_info { @@ -881,7 +881,7 @@ struct bpf_map_info { __u32 value_size; __u32 max_entries; __u32 map_flags; - __u8 name[BPF_OBJ_NAME_LEN]; + char name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); /* User bpf_sock_ops struct to access socket values and specify request ops -- cgit v1.2.3 From 6710e1126934d8b4372b4d2f9ae1646cd3f151bf Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 16 Oct 2017 12:19:28 +0200 Subject: bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP The 'cpumap' is primarily used as a backend map for XDP BPF helper call bpf_redirect_map() and XDP_REDIRECT action, like 'devmap'. This patch implement the main part of the map. It is not connected to the XDP redirect system yet, and no SKB allocation are done yet. The main concern in this patch is to ensure the datapath can run without any locking. This adds complexity to the setup and tear-down procedure, which assumptions are extra carefully documented in the code comments. V2: - make sure array isn't larger than NR_CPUS - make sure CPUs added is a valid possible CPU V3: fix nitpicks from Jakub Kicinski V5: - Restrict map allocation to root / CAP_SYS_ADMIN - WARN_ON_ONCE if queue is not empty on tear-down - Return -EPERM on memlock limit instead of -ENOMEM - Error code in __cpu_map_entry_alloc() also handle ptr_ring_cleanup() - Moved cpu_map_enqueue() to next patch V6: all notice by Daniel Borkmann - Fix err return code in cpu_map_alloc() introduced in V5 - Move cpu_possible() check after max_entries boundary check - Forbid usage initially in check_map_func_compatibility() V7: - Fix alloc error path spotted by Daniel Borkmann - Did stress test adding+removing CPUs from the map concurrently - Fixed refcnt issue on cpu_map_entry, kthread started too soon - Make sure packets are flushed during tear-down, involved use of rcu_barrier() and kthread_run only exit after queue is empty - Fix alloc error path in __cpu_map_entry_alloc() for ptr_ring V8: - Nitpicking comments and gramma by Edward Cree - Fix missing semi-colon introduced in V7 due to rebasing - Move struct bpf_cpu_map_entry members cpu+map_id to tracepoint patch Signed-off-by: Jesper Dangaard Brouer Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf_types.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/Makefile | 1 + kernel/bpf/cpumap.c | 560 +++++++++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 8 +- kernel/bpf/verifier.c | 5 + tools/include/uapi/linux/bpf.h | 1 + 7 files changed, 576 insertions(+), 1 deletion(-) create mode 100644 kernel/bpf/cpumap.c (limited to 'tools/include') diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 6f1a567667b8..814c1081a4a9 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -41,4 +41,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) #ifdef CONFIG_STREAM_PARSER BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) #endif +BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) #endif diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6db9e1d679cd..4303fb6c3817 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -112,6 +112,7 @@ enum bpf_map_type { BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_DEVMAP, BPF_MAP_TYPE_SOCKMAP, + BPF_MAP_TYPE_CPUMAP, }; enum bpf_prog_type { diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 53fb09f92e3f..e597daae6120 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list obj-$(CONFIG_BPF_SYSCALL) += disasm.o ifeq ($(CONFIG_NET),y) obj-$(CONFIG_BPF_SYSCALL) += devmap.o +obj-$(CONFIG_BPF_SYSCALL) += cpumap.o ifeq ($(CONFIG_STREAM_PARSER),y) obj-$(CONFIG_BPF_SYSCALL) += sockmap.o endif diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c new file mode 100644 index 000000000000..e1e25ddba038 --- /dev/null +++ b/kernel/bpf/cpumap.c @@ -0,0 +1,560 @@ +/* bpf/cpumap.c + * + * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. + * Released under terms in GPL version 2. See COPYING. + */ + +/* The 'cpumap' is primarily used as a backend map for XDP BPF helper + * call bpf_redirect_map() and XDP_REDIRECT action, like 'devmap'. + * + * Unlike devmap which redirects XDP frames out another NIC device, + * this map type redirects raw XDP frames to another CPU. The remote + * CPU will do SKB-allocation and call the normal network stack. + * + * This is a scalability and isolation mechanism, that allow + * separating the early driver network XDP layer, from the rest of the + * netstack, and assigning dedicated CPUs for this stage. This + * basically allows for 10G wirespeed pre-filtering via bpf. + */ +#include +#include +#include + +#include +#include +#include +#include + +/* General idea: XDP packets getting XDP redirected to another CPU, + * will maximum be stored/queued for one driver ->poll() call. It is + * guaranteed that setting flush bit and flush operation happen on + * same CPU. Thus, cpu_map_flush operation can deduct via this_cpu_ptr() + * which queue in bpf_cpu_map_entry contains packets. + */ + +#define CPU_MAP_BULK_SIZE 8 /* 8 == one cacheline on 64-bit archs */ +struct xdp_bulk_queue { + void *q[CPU_MAP_BULK_SIZE]; + unsigned int count; +}; + +/* Struct for every remote "destination" CPU in map */ +struct bpf_cpu_map_entry { + u32 qsize; /* Queue size placeholder for map lookup */ + + /* XDP can run multiple RX-ring queues, need __percpu enqueue store */ + struct xdp_bulk_queue __percpu *bulkq; + + /* Queue with potential multi-producers, and single-consumer kthread */ + struct ptr_ring *queue; + struct task_struct *kthread; + struct work_struct kthread_stop_wq; + + atomic_t refcnt; /* Control when this struct can be free'ed */ + struct rcu_head rcu; +}; + +struct bpf_cpu_map { + struct bpf_map map; + /* Below members specific for map type */ + struct bpf_cpu_map_entry **cpu_map; + unsigned long __percpu *flush_needed; +}; + +static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, + struct xdp_bulk_queue *bq); + +static u64 cpu_map_bitmap_size(const union bpf_attr *attr) +{ + return BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long); +} + +static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) +{ + struct bpf_cpu_map *cmap; + int err = -ENOMEM; + u64 cost; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + + /* check sanity of attributes */ + if (attr->max_entries == 0 || attr->key_size != 4 || + attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) + return ERR_PTR(-EINVAL); + + cmap = kzalloc(sizeof(*cmap), GFP_USER); + if (!cmap) + return ERR_PTR(-ENOMEM); + + /* mandatory map attributes */ + cmap->map.map_type = attr->map_type; + cmap->map.key_size = attr->key_size; + cmap->map.value_size = attr->value_size; + cmap->map.max_entries = attr->max_entries; + cmap->map.map_flags = attr->map_flags; + cmap->map.numa_node = bpf_map_attr_numa_node(attr); + + /* Pre-limit array size based on NR_CPUS, not final CPU check */ + if (cmap->map.max_entries > NR_CPUS) { + err = -E2BIG; + goto free_cmap; + } + + /* make sure page count doesn't overflow */ + cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *); + cost += cpu_map_bitmap_size(attr) * num_possible_cpus(); + if (cost >= U32_MAX - PAGE_SIZE) + goto free_cmap; + cmap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; + + /* Notice returns -EPERM on if map size is larger than memlock limit */ + ret = bpf_map_precharge_memlock(cmap->map.pages); + if (ret) { + err = ret; + goto free_cmap; + } + + /* A per cpu bitfield with a bit per possible CPU in map */ + cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr), + __alignof__(unsigned long)); + if (!cmap->flush_needed) + goto free_cmap; + + /* Alloc array for possible remote "destination" CPUs */ + cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries * + sizeof(struct bpf_cpu_map_entry *), + cmap->map.numa_node); + if (!cmap->cpu_map) + goto free_percpu; + + return &cmap->map; +free_percpu: + free_percpu(cmap->flush_needed); +free_cmap: + kfree(cmap); + return ERR_PTR(err); +} + +void __cpu_map_queue_destructor(void *ptr) +{ + /* The tear-down procedure should have made sure that queue is + * empty. See __cpu_map_entry_replace() and work-queue + * invoked cpu_map_kthread_stop(). Catch any broken behaviour + * gracefully and warn once. + */ + if (WARN_ON_ONCE(ptr)) + page_frag_free(ptr); +} + +static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) +{ + if (atomic_dec_and_test(&rcpu->refcnt)) { + /* The queue should be empty at this point */ + ptr_ring_cleanup(rcpu->queue, __cpu_map_queue_destructor); + kfree(rcpu->queue); + kfree(rcpu); + } +} + +static void get_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) +{ + atomic_inc(&rcpu->refcnt); +} + +/* called from workqueue, to workaround syscall using preempt_disable */ +static void cpu_map_kthread_stop(struct work_struct *work) +{ + struct bpf_cpu_map_entry *rcpu; + + rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq); + + /* Wait for flush in __cpu_map_entry_free(), via full RCU barrier, + * as it waits until all in-flight call_rcu() callbacks complete. + */ + rcu_barrier(); + + /* kthread_stop will wake_up_process and wait for it to complete */ + kthread_stop(rcpu->kthread); +} + +static int cpu_map_kthread_run(void *data) +{ + struct bpf_cpu_map_entry *rcpu = data; + + set_current_state(TASK_INTERRUPTIBLE); + + /* When kthread gives stop order, then rcpu have been disconnected + * from map, thus no new packets can enter. Remaining in-flight + * per CPU stored packets are flushed to this queue. Wait honoring + * kthread_stop signal until queue is empty. + */ + while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) { + struct xdp_pkt *xdp_pkt; + + schedule(); + /* Do work */ + while ((xdp_pkt = ptr_ring_consume(rcpu->queue))) { + /* For now just "refcnt-free" */ + page_frag_free(xdp_pkt); + } + __set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); + + put_cpu_map_entry(rcpu); + return 0; +} + +struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu, int map_id) +{ + gfp_t gfp = GFP_ATOMIC|__GFP_NOWARN; + struct bpf_cpu_map_entry *rcpu; + int numa, err; + + /* Have map->numa_node, but choose node of redirect target CPU */ + numa = cpu_to_node(cpu); + + rcpu = kzalloc_node(sizeof(*rcpu), gfp, numa); + if (!rcpu) + return NULL; + + /* Alloc percpu bulkq */ + rcpu->bulkq = __alloc_percpu_gfp(sizeof(*rcpu->bulkq), + sizeof(void *), gfp); + if (!rcpu->bulkq) + goto free_rcu; + + /* Alloc queue */ + rcpu->queue = kzalloc_node(sizeof(*rcpu->queue), gfp, numa); + if (!rcpu->queue) + goto free_bulkq; + + err = ptr_ring_init(rcpu->queue, qsize, gfp); + if (err) + goto free_queue; + + rcpu->qsize = qsize; + + /* Setup kthread */ + rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa, + "cpumap/%d/map:%d", cpu, map_id); + if (IS_ERR(rcpu->kthread)) + goto free_ptr_ring; + + get_cpu_map_entry(rcpu); /* 1-refcnt for being in cmap->cpu_map[] */ + get_cpu_map_entry(rcpu); /* 1-refcnt for kthread */ + + /* Make sure kthread runs on a single CPU */ + kthread_bind(rcpu->kthread, cpu); + wake_up_process(rcpu->kthread); + + return rcpu; + +free_ptr_ring: + ptr_ring_cleanup(rcpu->queue, NULL); +free_queue: + kfree(rcpu->queue); +free_bulkq: + free_percpu(rcpu->bulkq); +free_rcu: + kfree(rcpu); + return NULL; +} + +void __cpu_map_entry_free(struct rcu_head *rcu) +{ + struct bpf_cpu_map_entry *rcpu; + int cpu; + + /* This cpu_map_entry have been disconnected from map and one + * RCU graze-period have elapsed. Thus, XDP cannot queue any + * new packets and cannot change/set flush_needed that can + * find this entry. + */ + rcpu = container_of(rcu, struct bpf_cpu_map_entry, rcu); + + /* Flush remaining packets in percpu bulkq */ + for_each_online_cpu(cpu) { + struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu); + + /* No concurrent bq_enqueue can run at this point */ + bq_flush_to_queue(rcpu, bq); + } + free_percpu(rcpu->bulkq); + /* Cannot kthread_stop() here, last put free rcpu resources */ + put_cpu_map_entry(rcpu); +} + +/* After xchg pointer to bpf_cpu_map_entry, use the call_rcu() to + * ensure any driver rcu critical sections have completed, but this + * does not guarantee a flush has happened yet. Because driver side + * rcu_read_lock/unlock only protects the running XDP program. The + * atomic xchg and NULL-ptr check in __cpu_map_flush() makes sure a + * pending flush op doesn't fail. + * + * The bpf_cpu_map_entry is still used by the kthread, and there can + * still be pending packets (in queue and percpu bulkq). A refcnt + * makes sure to last user (kthread_stop vs. call_rcu) free memory + * resources. + * + * The rcu callback __cpu_map_entry_free flush remaining packets in + * percpu bulkq to queue. Due to caller map_delete_elem() disable + * preemption, cannot call kthread_stop() to make sure queue is empty. + * Instead a work_queue is started for stopping kthread, + * cpu_map_kthread_stop, which waits for an RCU graze period before + * stopping kthread, emptying the queue. + */ +void __cpu_map_entry_replace(struct bpf_cpu_map *cmap, + u32 key_cpu, struct bpf_cpu_map_entry *rcpu) +{ + struct bpf_cpu_map_entry *old_rcpu; + + old_rcpu = xchg(&cmap->cpu_map[key_cpu], rcpu); + if (old_rcpu) { + call_rcu(&old_rcpu->rcu, __cpu_map_entry_free); + INIT_WORK(&old_rcpu->kthread_stop_wq, cpu_map_kthread_stop); + schedule_work(&old_rcpu->kthread_stop_wq); + } +} + +int cpu_map_delete_elem(struct bpf_map *map, void *key) +{ + struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); + u32 key_cpu = *(u32 *)key; + + if (key_cpu >= map->max_entries) + return -EINVAL; + + /* notice caller map_delete_elem() use preempt_disable() */ + __cpu_map_entry_replace(cmap, key_cpu, NULL); + return 0; +} + +int cpu_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) +{ + struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); + struct bpf_cpu_map_entry *rcpu; + + /* Array index key correspond to CPU number */ + u32 key_cpu = *(u32 *)key; + /* Value is the queue size */ + u32 qsize = *(u32 *)value; + + if (unlikely(map_flags > BPF_EXIST)) + return -EINVAL; + if (unlikely(key_cpu >= cmap->map.max_entries)) + return -E2BIG; + if (unlikely(map_flags == BPF_NOEXIST)) + return -EEXIST; + if (unlikely(qsize > 16384)) /* sanity limit on qsize */ + return -EOVERFLOW; + + /* Make sure CPU is a valid possible cpu */ + if (!cpu_possible(key_cpu)) + return -ENODEV; + + if (qsize == 0) { + rcpu = NULL; /* Same as deleting */ + } else { + /* Updating qsize cause re-allocation of bpf_cpu_map_entry */ + rcpu = __cpu_map_entry_alloc(qsize, key_cpu, map->id); + if (!rcpu) + return -ENOMEM; + } + rcu_read_lock(); + __cpu_map_entry_replace(cmap, key_cpu, rcpu); + rcu_read_unlock(); + return 0; +} + +void cpu_map_free(struct bpf_map *map) +{ + struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); + int cpu; + u32 i; + + /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, + * so the bpf programs (can be more than one that used this map) were + * disconnected from events. Wait for outstanding critical sections in + * these programs to complete. The rcu critical section only guarantees + * no further "XDP/bpf-side" reads against bpf_cpu_map->cpu_map. + * It does __not__ ensure pending flush operations (if any) are + * complete. + */ + synchronize_rcu(); + + /* To ensure all pending flush operations have completed wait for flush + * bitmap to indicate all flush_needed bits to be zero on _all_ cpus. + * Because the above synchronize_rcu() ensures the map is disconnected + * from the program we can assume no new bits will be set. + */ + for_each_online_cpu(cpu) { + unsigned long *bitmap = per_cpu_ptr(cmap->flush_needed, cpu); + + while (!bitmap_empty(bitmap, cmap->map.max_entries)) + cond_resched(); + } + + /* For cpu_map the remote CPUs can still be using the entries + * (struct bpf_cpu_map_entry). + */ + for (i = 0; i < cmap->map.max_entries; i++) { + struct bpf_cpu_map_entry *rcpu; + + rcpu = READ_ONCE(cmap->cpu_map[i]); + if (!rcpu) + continue; + + /* bq flush and cleanup happens after RCU graze-period */ + __cpu_map_entry_replace(cmap, i, NULL); /* call_rcu */ + } + free_percpu(cmap->flush_needed); + bpf_map_area_free(cmap->cpu_map); + kfree(cmap); +} + +struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) +{ + struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); + struct bpf_cpu_map_entry *rcpu; + + if (key >= map->max_entries) + return NULL; + + rcpu = READ_ONCE(cmap->cpu_map[key]); + return rcpu; +} + +static void *cpu_map_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_cpu_map_entry *rcpu = + __cpu_map_lookup_elem(map, *(u32 *)key); + + return rcpu ? &rcpu->qsize : NULL; +} + +static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key) +{ + struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); + u32 index = key ? *(u32 *)key : U32_MAX; + u32 *next = next_key; + + if (index >= cmap->map.max_entries) { + *next = 0; + return 0; + } + + if (index == cmap->map.max_entries - 1) + return -ENOENT; + *next = index + 1; + return 0; +} + +const struct bpf_map_ops cpu_map_ops = { + .map_alloc = cpu_map_alloc, + .map_free = cpu_map_free, + .map_delete_elem = cpu_map_delete_elem, + .map_update_elem = cpu_map_update_elem, + .map_lookup_elem = cpu_map_lookup_elem, + .map_get_next_key = cpu_map_get_next_key, +}; + +static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, + struct xdp_bulk_queue *bq) +{ + struct ptr_ring *q; + int i; + + if (unlikely(!bq->count)) + return 0; + + q = rcpu->queue; + spin_lock(&q->producer_lock); + + for (i = 0; i < bq->count; i++) { + void *xdp_pkt = bq->q[i]; + int err; + + err = __ptr_ring_produce(q, xdp_pkt); + if (err) { + /* Free xdp_pkt */ + page_frag_free(xdp_pkt); + } + } + bq->count = 0; + spin_unlock(&q->producer_lock); + + return 0; +} + +/* Notice: Will change in later patch */ +struct xdp_pkt { + void *data; + u16 len; + u16 headroom; +}; + +/* Runs under RCU-read-side, plus in softirq under NAPI protection. + * Thus, safe percpu variable access. + */ +int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt) +{ + struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); + + if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) + bq_flush_to_queue(rcpu, bq); + + /* Notice, xdp_buff/page MUST be queued here, long enough for + * driver to code invoking us to finished, due to driver + * (e.g. ixgbe) recycle tricks based on page-refcnt. + * + * Thus, incoming xdp_pkt is always queued here (else we race + * with another CPU on page-refcnt and remaining driver code). + * Queue time is very short, as driver will invoke flush + * operation, when completing napi->poll call. + */ + bq->q[bq->count++] = xdp_pkt; + return 0; +} + +void __cpu_map_insert_ctx(struct bpf_map *map, u32 bit) +{ + struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); + unsigned long *bitmap = this_cpu_ptr(cmap->flush_needed); + + __set_bit(bit, bitmap); +} + +void __cpu_map_flush(struct bpf_map *map) +{ + struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); + unsigned long *bitmap = this_cpu_ptr(cmap->flush_needed); + u32 bit; + + /* The napi->poll softirq makes sure __cpu_map_insert_ctx() + * and __cpu_map_flush() happen on same CPU. Thus, the percpu + * bitmap indicate which percpu bulkq have packets. + */ + for_each_set_bit(bit, bitmap, map->max_entries) { + struct bpf_cpu_map_entry *rcpu = READ_ONCE(cmap->cpu_map[bit]); + struct xdp_bulk_queue *bq; + + /* This is possible if entry is removed by user space + * between xdp redirect and flush op. + */ + if (unlikely(!rcpu)) + continue; + + __clear_bit(bit, bitmap); + + /* Flush all frames in bulkq to real queue */ + bq = this_cpu_ptr(rcpu->bulkq); + bq_flush_to_queue(rcpu, bq); + + /* If already running, costs spin_lock_irqsave + smb_mb */ + wake_up_process(rcpu->kthread); + } +} diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index d124e702e040..54fba06942f5 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -592,6 +592,12 @@ static int map_update_elem(union bpf_attr *attr) if (copy_from_user(value, uvalue, value_size) != 0) goto free_value; + /* Need to create a kthread, thus must support schedule */ + if (map->map_type == BPF_MAP_TYPE_CPUMAP) { + err = map->ops->map_update_elem(map, key, value, attr->flags); + goto out; + } + /* must increment bpf_prog_active to avoid kprobe+bpf triggering from * inside bpf map update or delete otherwise deadlocks are possible */ @@ -622,7 +628,7 @@ static int map_update_elem(union bpf_attr *attr) } __this_cpu_dec(bpf_prog_active); preempt_enable(); - +out: if (!err) trace_bpf_map_update_elem(map, ufd, key, value); free_value: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9755279d94cb..cefa64be9a2f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1444,6 +1444,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, if (func_id != BPF_FUNC_redirect_map) goto error; break; + /* Restrict bpf side of cpumap, open when use-cases appear */ + case BPF_MAP_TYPE_CPUMAP: + if (func_id != BPF_FUNC_redirect_map) + goto error; + break; case BPF_MAP_TYPE_ARRAY_OF_MAPS: case BPF_MAP_TYPE_HASH_OF_MAPS: if (func_id != BPF_FUNC_map_lookup_elem) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index fb4fb81ce5b0..fa93033dc521 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -112,6 +112,7 @@ enum bpf_map_type { BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_DEVMAP, BPF_MAP_TYPE_SOCKMAP, + BPF_MAP_TYPE_CPUMAP, }; enum bpf_prog_type { -- cgit v1.2.3 From e27afb84b4680570b64c958dfcba9e0b3da92fc9 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sun, 22 Oct 2017 10:29:06 -0700 Subject: selftests/bpf: fix broken build of test_maps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix multiple build errors and warnings 1. test_maps.c: In function ‘test_map_rdonly’: test_maps.c:1051:30: error: ‘BPF_F_RDONLY’ undeclared (first use in this function) MAP_SIZE, map_flags | BPF_F_RDONLY); 2. test_maps.c:1048:6: warning: unused variable ‘i’ [-Wunused-variable] int i, fd, key = 0, value = 0; 3. test_maps.c:1087:2: error: called object is not a function or function pointer assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM); 4. ./bpf_helpers.h:72:11: error: use of undeclared identifier 'BPF_FUNC_getsockopt' (void *) BPF_FUNC_getsockopt; Fixes: e043325b3087 ("bpf: Add tests for eBPF file mode") Fixes: 6e71b04a8224 ("bpf: Add file mode configuration into bpf maps") Fixes: cd86d1fd2102 ("bpf: Adding helper function bpf_getsockops") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/include/uapi/linux/bpf.h | 62 ++++++++++++++++++++++++++++----- tools/testing/selftests/bpf/test_maps.c | 6 ++-- 2 files changed, 57 insertions(+), 11 deletions(-) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 850a5497dcc3..f650346aaa1a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -218,6 +218,10 @@ enum bpf_attach_type { #define BPF_OBJ_NAME_LEN 16U +/* Flags for accessing BPF object */ +#define BPF_F_RDONLY (1U << 3) +#define BPF_F_WRONLY (1U << 4) + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -260,6 +264,7 @@ union bpf_attr { struct { /* anonymous struct used by BPF_OBJ_* commands */ __aligned_u64 pathname; __u32 bpf_fd; + __u32 file_flags; }; struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ @@ -287,6 +292,7 @@ union bpf_attr { __u32 map_id; }; __u32 next_id; + __u32 open_flags; }; struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ @@ -607,12 +613,22 @@ union bpf_attr { * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen) * Calls setsockopt. Not all opts are available, only those with * integer optvals plus TCP_CONGESTION. - * Supported levels: SOL_SOCKET and IPROTO_TCP + * Supported levels: SOL_SOCKET and IPPROTO_TCP * @bpf_socket: pointer to bpf_socket - * @level: SOL_SOCKET or IPROTO_TCP + * @level: SOL_SOCKET or IPPROTO_TCP * @optname: option name * @optval: pointer to option value - * @optlen: length of optval in byes + * @optlen: length of optval in bytes + * Return: 0 or negative error + * + * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen) + * Calls getsockopt. Not all opts are available. + * Supported levels: IPPROTO_TCP + * @bpf_socket: pointer to bpf_socket + * @level: IPPROTO_TCP + * @optname: option name + * @optval: pointer to option value + * @optlen: length of optval in bytes * Return: 0 or negative error * * int bpf_skb_adjust_room(skb, len_diff, mode, flags) @@ -623,10 +639,9 @@ union bpf_attr { * @flags: reserved for future use * Return: 0 on success or negative error code * - * int bpf_sk_redirect_map(skb, map, key, flags) + * int bpf_sk_redirect_map(map, key, flags) * Redirect skb to a sock in map using key as a lookup key for the * sock in map. - * @skb: pointer to skb * @map: pointer to sockmap * @key: key to lookup sock in map * @flags: reserved for future use @@ -643,6 +658,21 @@ union bpf_attr { * @xdp_md: pointer to xdp_md * @delta: An positive/negative integer to be added to xdp_md.data_meta * Return: 0 on success or negative on error + * + * int bpf_perf_event_read_value(map, flags, buf, buf_size) + * read perf event counter value and perf event enabled/running time + * @map: pointer to perf_event_array map + * @flags: index of event in the map or bitmask flags + * @buf: buf to fill + * @buf_size: size of the buf + * Return: 0 on success or negative error code + * + * int bpf_perf_prog_read_value(ctx, buf, buf_size) + * read perf prog attached perf event counter and enabled/running time + * @ctx: pointer to ctx + * @buf: buf to fill + * @buf_size: size of the buf + * Return : 0 on success or negative error code */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -701,7 +731,8 @@ union bpf_attr { FN(sock_map_update), \ FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ - FN(perf_prog_read_value), + FN(perf_prog_read_value), \ + FN(getsockopt), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -745,7 +776,9 @@ enum bpf_func_id { #define BPF_F_ZERO_CSUM_TX (1ULL << 1) #define BPF_F_DONT_FRAGMENT (1ULL << 2) -/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */ +/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and + * BPF_FUNC_perf_event_read_value flags. + */ #define BPF_F_INDEX_MASK 0xffffffffULL #define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK /* BPF_FUNC_perf_event_output for sk_buff input context. */ @@ -873,7 +906,7 @@ struct bpf_prog_info { __u32 created_by_uid; __u32 nr_map_ids; __aligned_u64 map_ids; - char name[BPF_OBJ_NAME_LEN]; + char name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); struct bpf_map_info { @@ -933,9 +966,22 @@ enum { BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control * needs ECN */ + BPF_SOCK_OPS_BASE_RTT, /* Get base RTT. The correct value is + * based on the path and may be + * dependent on the congestion control + * algorithm. In general it indicates + * a congestion threshold. RTTs above + * this indicate congestion + */ }; #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ #define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ +struct bpf_perf_event_value { + __u64 counter; + __u64 enabled; + __u64 running; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 057da0cba517..040356ecc862 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1045,7 +1045,7 @@ static void test_map_parallel(void) static void test_map_rdonly(void) { - int i, fd, key = 0, value = 0; + int fd, key = 0, value = 0; fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), MAP_SIZE, map_flags | BPF_F_RDONLY); @@ -1068,7 +1068,7 @@ static void test_map_rdonly(void) static void test_map_wronly(void) { - int i, fd, key = 0, value = 0; + int fd, key = 0, value = 0; fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), MAP_SIZE, map_flags | BPF_F_WRONLY); @@ -1081,7 +1081,7 @@ static void test_map_wronly(void) key = 1; value = 1234; /* Insert key=1 element. */ - assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0) + assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0); /* Check that key=2 is not found. */ assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM); -- cgit v1.2.3 From 6aa7de059173a986114ac43b8f50b297a86f09a8 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 23 Oct 2017 14:07:29 -0700 Subject: locking/atomics: COCCINELLE/treewide: Convert trivial ACCESS_ONCE() patterns to READ_ONCE()/WRITE_ONCE() Please do not apply this to mainline directly, instead please re-run the coccinelle script shown below and apply its output. For several reasons, it is desirable to use {READ,WRITE}_ONCE() in preference to ACCESS_ONCE(), and new code is expected to use one of the former. So far, there's been no reason to change most existing uses of ACCESS_ONCE(), as these aren't harmful, and changing them results in churn. However, for some features, the read/write distinction is critical to correct operation. To distinguish these cases, separate read/write accessors must be used. This patch migrates (most) remaining ACCESS_ONCE() instances to {READ,WRITE}_ONCE(), using the following coccinelle script: ---- // Convert trivial ACCESS_ONCE() uses to equivalent READ_ONCE() and // WRITE_ONCE() // $ make coccicheck COCCI=/home/mark/once.cocci SPFLAGS="--include-headers" MODE=patch virtual patch @ depends on patch @ expression E1, E2; @@ - ACCESS_ONCE(E1) = E2 + WRITE_ONCE(E1, E2) @ depends on patch @ expression E; @@ - ACCESS_ONCE(E) + READ_ONCE(E) ---- Signed-off-by: Mark Rutland Signed-off-by: Paul E. McKenney Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: davem@davemloft.net Cc: linux-arch@vger.kernel.org Cc: mpe@ellerman.id.au Cc: shuah@kernel.org Cc: snitzer@redhat.com Cc: thor.thayer@linux.intel.com Cc: tj@kernel.org Cc: viro@zeniv.linux.org.uk Cc: will.deacon@arm.com Link: http://lkml.kernel.org/r/1508792849-3115-19-git-send-email-paulmck@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- arch/arc/kernel/smp.c | 2 +- arch/arm/include/asm/spinlock.h | 2 +- arch/arm/mach-tegra/cpuidle-tegra20.c | 2 +- arch/arm/vdso/vgettimeofday.c | 2 +- arch/ia64/include/asm/spinlock.h | 8 ++--- arch/mips/include/asm/vdso.h | 2 +- arch/mips/kernel/pm-cps.c | 2 +- arch/mn10300/kernel/mn10300-serial.c | 4 +-- arch/parisc/include/asm/atomic.h | 2 +- arch/powerpc/platforms/powernv/opal-msglog.c | 2 +- arch/s390/include/asm/spinlock.h | 6 ++-- arch/s390/lib/spinlock.c | 16 +++++----- arch/sparc/include/asm/atomic_32.h | 2 +- arch/tile/gxio/dma_queue.c | 4 +-- arch/tile/include/gxio/dma_queue.h | 2 +- arch/tile/kernel/ptrace.c | 2 +- arch/x86/entry/common.c | 2 +- arch/x86/entry/vdso/vclock_gettime.c | 2 +- arch/x86/events/core.c | 2 +- arch/x86/include/asm/vgtod.h | 2 +- arch/x86/kernel/espfix_64.c | 6 ++-- arch/x86/kernel/nmi.c | 2 +- arch/x86/kvm/mmu.c | 4 +-- arch/x86/kvm/page_track.c | 2 +- arch/x86/xen/p2m.c | 2 +- arch/xtensa/platforms/xtfpga/lcd.c | 14 ++++----- block/blk-wbt.c | 2 +- drivers/base/core.c | 2 +- drivers/base/power/runtime.c | 4 +-- drivers/char/random.c | 4 +-- drivers/clocksource/bcm2835_timer.c | 2 +- drivers/crypto/caam/jr.c | 4 +-- drivers/crypto/nx/nx-842-powernv.c | 2 +- drivers/firewire/ohci.c | 10 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 +-- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 2 +- drivers/gpu/drm/radeon/radeon_gem.c | 4 +-- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 2 +- drivers/infiniband/hw/hfi1/file_ops.c | 2 +- drivers/infiniband/hw/hfi1/pio.c | 6 ++-- drivers/infiniband/hw/hfi1/ruc.c | 2 +- drivers/infiniband/hw/hfi1/sdma.c | 8 ++--- drivers/infiniband/hw/hfi1/sdma.h | 2 +- drivers/infiniband/hw/hfi1/uc.c | 4 +-- drivers/infiniband/hw/hfi1/ud.c | 4 +-- drivers/infiniband/hw/hfi1/user_sdma.c | 8 ++--- drivers/infiniband/hw/qib/qib_ruc.c | 2 +- drivers/infiniband/hw/qib/qib_uc.c | 4 +-- drivers/infiniband/hw/qib/qib_ud.c | 4 +-- drivers/infiniband/sw/rdmavt/qp.c | 6 ++-- drivers/input/misc/regulator-haptic.c | 2 +- drivers/md/dm-bufio.c | 10 +++--- drivers/md/dm-kcopyd.c | 4 +-- drivers/md/dm-stats.c | 36 +++++++++++----------- drivers/md/dm-switch.c | 2 +- drivers/md/dm-thin.c | 2 +- drivers/md/dm-verity-target.c | 2 +- drivers/md/dm.c | 4 +-- drivers/md/md.c | 2 +- drivers/md/raid5.c | 2 +- drivers/misc/mic/scif/scif_rb.c | 8 ++--- drivers/misc/mic/scif/scif_rma_list.c | 2 +- drivers/net/bonding/bond_alb.c | 2 +- drivers/net/bonding/bond_main.c | 6 ++-- drivers/net/ethernet/chelsio/cxgb4/sge.c | 4 +-- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- drivers/net/ethernet/hisilicon/hip04_eth.c | 4 +-- drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 4 +-- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 +-- drivers/net/ethernet/intel/i40e/i40e_ptp.c | 4 +-- drivers/net/ethernet/intel/igb/e1000_regs.h | 2 +- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_common.h | 4 +-- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 8 ++--- drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 4 +-- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2 +- drivers/net/ethernet/intel/ixgbevf/vf.h | 2 +- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 12 ++++---- drivers/net/ethernet/neterion/vxge/vxge-main.c | 2 +- drivers/net/ethernet/sfc/ef10.c | 10 +++--- drivers/net/ethernet/sfc/efx.c | 4 +-- drivers/net/ethernet/sfc/falcon/efx.c | 4 +-- drivers/net/ethernet/sfc/falcon/falcon.c | 4 +-- drivers/net/ethernet/sfc/falcon/farch.c | 8 ++--- drivers/net/ethernet/sfc/falcon/nic.h | 6 ++-- drivers/net/ethernet/sfc/falcon/tx.c | 6 ++-- drivers/net/ethernet/sfc/farch.c | 8 ++--- drivers/net/ethernet/sfc/nic.h | 6 ++-- drivers/net/ethernet/sfc/ptp.c | 10 +++--- drivers/net/ethernet/sfc/tx.c | 6 ++-- drivers/net/ethernet/sun/niu.c | 4 +-- drivers/net/tap.c | 2 +- drivers/net/tun.c | 4 +-- drivers/net/wireless/ath/ath5k/desc.c | 8 ++--- .../wireless/broadcom/brcm80211/brcmfmac/sdio.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 4 +-- drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 2 +- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 10 +++--- drivers/net/wireless/mac80211_hwsim.c | 4 +-- drivers/scsi/qla2xxx/qla_target.c | 2 +- drivers/target/target_core_user.c | 2 +- drivers/usb/class/cdc-wdm.c | 2 +- drivers/usb/core/devio.c | 2 +- drivers/usb/core/sysfs.c | 4 +-- drivers/usb/gadget/udc/gr_udc.c | 4 +-- drivers/usb/host/ohci-hcd.c | 2 +- drivers/usb/host/uhci-hcd.h | 4 +-- drivers/vfio/vfio.c | 2 +- drivers/vhost/scsi.c | 2 +- fs/aio.c | 2 +- fs/buffer.c | 3 +- fs/crypto/keyinfo.c | 2 +- fs/direct-io.c | 2 +- fs/exec.c | 2 +- fs/fcntl.c | 2 +- fs/fs_pin.c | 4 +-- fs/fuse/dev.c | 2 +- fs/inode.c | 2 +- fs/namei.c | 4 +-- fs/namespace.c | 2 +- fs/nfs/dir.c | 8 ++--- fs/proc/array.c | 2 +- fs/proc_namespace.c | 2 +- fs/splice.c | 2 +- fs/userfaultfd.c | 8 ++--- fs/xfs/xfs_log_priv.h | 4 +-- include/linux/bitops.h | 4 +-- include/linux/dynamic_queue_limits.h | 2 +- include/linux/huge_mm.h | 2 +- include/linux/if_team.h | 2 +- include/linux/llist.h | 2 +- include/linux/pm_runtime.h | 2 +- include/net/ip_vs.h | 6 ++-- kernel/acct.c | 4 +-- kernel/events/core.c | 6 ++-- kernel/events/ring_buffer.c | 2 +- kernel/exit.c | 2 +- kernel/trace/ring_buffer.c | 2 +- kernel/trace/trace.h | 2 +- kernel/trace/trace_stack.c | 2 +- kernel/user_namespace.c | 2 +- lib/assoc_array.c | 20 ++++++------ lib/dynamic_queue_limits.c | 2 +- lib/llist.c | 2 +- lib/vsprintf.c | 4 +-- mm/huge_memory.c | 2 +- net/core/dev.c | 2 +- net/core/pktgen.c | 2 +- net/ipv4/inet_fragment.c | 2 +- net/ipv4/route.c | 2 +- net/ipv4/tcp_output.c | 2 +- net/ipv4/udp.c | 4 +-- net/ipv6/ip6_tunnel.c | 8 ++--- net/ipv6/udp.c | 4 +-- net/llc/llc_input.c | 4 +-- net/mac80211/sta_info.c | 2 +- net/netlabel/netlabel_calipso.c | 2 +- net/wireless/nl80211.c | 2 +- sound/firewire/amdtp-am824.c | 6 ++-- sound/firewire/amdtp-stream.c | 23 +++++++------- sound/firewire/amdtp-stream.h | 2 +- sound/firewire/digi00x/amdtp-dot.c | 6 ++-- sound/firewire/fireface/amdtp-ff.c | 4 +-- sound/firewire/fireface/ff-midi.c | 10 +++--- sound/firewire/fireface/ff-transaction.c | 8 ++--- sound/firewire/isight.c | 18 +++++------ sound/firewire/motu/amdtp-motu.c | 4 +-- sound/firewire/oxfw/oxfw-scs1x.c | 12 ++++---- sound/firewire/tascam/amdtp-tascam.c | 4 +-- sound/firewire/tascam/tascam-transaction.c | 6 ++-- sound/soc/xtensa/xtfpga-i2s.c | 6 ++-- sound/usb/bcd2000/bcd2000.c | 4 +-- tools/arch/x86/include/asm/atomic.h | 2 +- tools/include/asm-generic/atomic-gcc.h | 2 +- tools/perf/util/auxtrace.h | 4 +-- tools/perf/util/session.h | 2 +- virt/kvm/kvm_main.c | 2 +- 180 files changed, 383 insertions(+), 385 deletions(-) (limited to 'tools/include') diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index f46267153ec2..94cabe73664b 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -245,7 +245,7 @@ static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg) * and read back old value */ do { - new = old = ACCESS_ONCE(*ipi_data_ptr); + new = old = READ_ONCE(*ipi_data_ptr); new |= 1U << msg; } while (cmpxchg(ipi_data_ptr, old, new) != old); diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index daa87212c9a1..77f50ae0aeb4 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -71,7 +71,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) while (lockval.tickets.next != lockval.tickets.owner) { wfe(); - lockval.tickets.owner = ACCESS_ONCE(lock->tickets.owner); + lockval.tickets.owner = READ_ONCE(lock->tickets.owner); } smp_mb(); diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c index 76e4c83cd5c8..3f24addd7972 100644 --- a/arch/arm/mach-tegra/cpuidle-tegra20.c +++ b/arch/arm/mach-tegra/cpuidle-tegra20.c @@ -179,7 +179,7 @@ static int tegra20_idle_lp2_coupled(struct cpuidle_device *dev, bool entered_lp2 = false; if (tegra_pending_sgi()) - ACCESS_ONCE(abort_flag) = true; + WRITE_ONCE(abort_flag, true); cpuidle_coupled_parallel_barrier(dev, &abort_barrier); diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c index 79214d5ff097..a9dd619c6c29 100644 --- a/arch/arm/vdso/vgettimeofday.c +++ b/arch/arm/vdso/vgettimeofday.c @@ -35,7 +35,7 @@ static notrace u32 __vdso_read_begin(const struct vdso_data *vdata) { u32 seq; repeat: - seq = ACCESS_ONCE(vdata->seq_count); + seq = READ_ONCE(vdata->seq_count); if (seq & 1) { cpu_relax(); goto repeat; diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h index 35b31884863b..e98775be112d 100644 --- a/arch/ia64/include/asm/spinlock.h +++ b/arch/ia64/include/asm/spinlock.h @@ -61,7 +61,7 @@ static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) { - int tmp = ACCESS_ONCE(lock->lock); + int tmp = READ_ONCE(lock->lock); if (!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK)) return ia64_cmpxchg(acq, &lock->lock, tmp, tmp + 1, sizeof (tmp)) == tmp; @@ -73,19 +73,19 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) unsigned short *p = (unsigned short *)&lock->lock + 1, tmp; asm volatile ("ld2.bias %0=[%1]" : "=r"(tmp) : "r"(p)); - ACCESS_ONCE(*p) = (tmp + 2) & ~1; + WRITE_ONCE(*p, (tmp + 2) & ~1); } static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) { - long tmp = ACCESS_ONCE(lock->lock); + long tmp = READ_ONCE(lock->lock); return !!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK); } static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) { - long tmp = ACCESS_ONCE(lock->lock); + long tmp = READ_ONCE(lock->lock); return ((tmp - (tmp >> TICKET_SHIFT)) & TICKET_MASK) > 1; } diff --git a/arch/mips/include/asm/vdso.h b/arch/mips/include/asm/vdso.h index b7cd6cf77b83..91bf0c2c265c 100644 --- a/arch/mips/include/asm/vdso.h +++ b/arch/mips/include/asm/vdso.h @@ -99,7 +99,7 @@ static inline u32 vdso_data_read_begin(const union mips_vdso_data *data) u32 seq; while (true) { - seq = ACCESS_ONCE(data->seq_count); + seq = READ_ONCE(data->seq_count); if (likely(!(seq & 1))) { /* Paired with smp_wmb() in vdso_data_write_*(). */ smp_rmb(); diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c index 4655017f2377..1d2996cd58da 100644 --- a/arch/mips/kernel/pm-cps.c +++ b/arch/mips/kernel/pm-cps.c @@ -166,7 +166,7 @@ int cps_pm_enter_state(enum cps_pm_state state) nc_core_ready_count = nc_addr; /* Ensure ready_count is zero-initialised before the assembly runs */ - ACCESS_ONCE(*nc_core_ready_count) = 0; + WRITE_ONCE(*nc_core_ready_count, 0); coupled_barrier(&per_cpu(pm_barrier, core), online); /* Run the generated entry code */ diff --git a/arch/mn10300/kernel/mn10300-serial.c b/arch/mn10300/kernel/mn10300-serial.c index 7ecf69879e2d..d7ef1232a82a 100644 --- a/arch/mn10300/kernel/mn10300-serial.c +++ b/arch/mn10300/kernel/mn10300-serial.c @@ -543,7 +543,7 @@ static void mn10300_serial_receive_interrupt(struct mn10300_serial_port *port) try_again: /* pull chars out of the hat */ - ix = ACCESS_ONCE(port->rx_outp); + ix = READ_ONCE(port->rx_outp); if (CIRC_CNT(port->rx_inp, ix, MNSC_BUFFER_SIZE) == 0) { if (push && !tport->low_latency) tty_flip_buffer_push(tport); @@ -1724,7 +1724,7 @@ static int mn10300_serial_poll_get_char(struct uart_port *_port) if (mn10300_serial_int_tbl[port->rx_irq].port != NULL) { do { /* pull chars out of the hat */ - ix = ACCESS_ONCE(port->rx_outp); + ix = READ_ONCE(port->rx_outp); if (CIRC_CNT(port->rx_inp, ix, MNSC_BUFFER_SIZE) == 0) return NO_POLL_CHAR; diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 17b98a87e5e2..c57d4e8307f2 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -260,7 +260,7 @@ atomic64_set(atomic64_t *v, s64 i) static __inline__ s64 atomic64_read(const atomic64_t *v) { - return ACCESS_ONCE((v)->counter); + return READ_ONCE((v)->counter); } #define atomic64_inc(v) (atomic64_add( 1,(v))) diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c index 7a9cde0cfbd1..acd3206dfae3 100644 --- a/arch/powerpc/platforms/powernv/opal-msglog.c +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -43,7 +43,7 @@ ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count) if (!opal_memcons) return -ENODEV; - out_pos = be32_to_cpu(ACCESS_ONCE(opal_memcons->out_pos)); + out_pos = be32_to_cpu(READ_ONCE(opal_memcons->out_pos)); /* Now we've read out_pos, put a barrier in before reading the new * data it points to in conbuf. */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 9fa855f91e55..66f4160010ef 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -117,14 +117,14 @@ extern int _raw_write_trylock_retry(arch_rwlock_t *lp); static inline int arch_read_trylock_once(arch_rwlock_t *rw) { - int old = ACCESS_ONCE(rw->lock); + int old = READ_ONCE(rw->lock); return likely(old >= 0 && __atomic_cmpxchg_bool(&rw->lock, old, old + 1)); } static inline int arch_write_trylock_once(arch_rwlock_t *rw) { - int old = ACCESS_ONCE(rw->lock); + int old = READ_ONCE(rw->lock); return likely(old == 0 && __atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000)); } @@ -211,7 +211,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) int old; do { - old = ACCESS_ONCE(rw->lock); + old = READ_ONCE(rw->lock); } while (!__atomic_cmpxchg_bool(&rw->lock, old, old - 1)); } diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index b12663d653d8..34e30b9ea234 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -162,8 +162,8 @@ void _raw_read_lock_wait(arch_rwlock_t *rw) smp_yield_cpu(~owner); count = spin_retry; } - old = ACCESS_ONCE(rw->lock); - owner = ACCESS_ONCE(rw->owner); + old = READ_ONCE(rw->lock); + owner = READ_ONCE(rw->owner); if (old < 0) continue; if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1)) @@ -178,7 +178,7 @@ int _raw_read_trylock_retry(arch_rwlock_t *rw) int old; while (count-- > 0) { - old = ACCESS_ONCE(rw->lock); + old = READ_ONCE(rw->lock); if (old < 0) continue; if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1)) @@ -202,8 +202,8 @@ void _raw_write_lock_wait(arch_rwlock_t *rw, int prev) smp_yield_cpu(~owner); count = spin_retry; } - old = ACCESS_ONCE(rw->lock); - owner = ACCESS_ONCE(rw->owner); + old = READ_ONCE(rw->lock); + owner = READ_ONCE(rw->owner); smp_mb(); if (old >= 0) { prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR); @@ -230,8 +230,8 @@ void _raw_write_lock_wait(arch_rwlock_t *rw) smp_yield_cpu(~owner); count = spin_retry; } - old = ACCESS_ONCE(rw->lock); - owner = ACCESS_ONCE(rw->owner); + old = READ_ONCE(rw->lock); + owner = READ_ONCE(rw->owner); if (old >= 0 && __atomic_cmpxchg_bool(&rw->lock, old, old | 0x80000000)) prev = old; @@ -251,7 +251,7 @@ int _raw_write_trylock_retry(arch_rwlock_t *rw) int old; while (count-- > 0) { - old = ACCESS_ONCE(rw->lock); + old = READ_ONCE(rw->lock); if (old) continue; if (__atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000)) diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index 7643e979e333..e2f398e9456c 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h @@ -31,7 +31,7 @@ void atomic_set(atomic_t *, int); #define atomic_set_release(v, i) atomic_set((v), (i)) -#define atomic_read(v) ACCESS_ONCE((v)->counter) +#define atomic_read(v) READ_ONCE((v)->counter) #define atomic_add(i, v) ((void)atomic_add_return( (int)(i), (v))) #define atomic_sub(i, v) ((void)atomic_add_return(-(int)(i), (v))) diff --git a/arch/tile/gxio/dma_queue.c b/arch/tile/gxio/dma_queue.c index baa60357f8ba..b7ba577d82ca 100644 --- a/arch/tile/gxio/dma_queue.c +++ b/arch/tile/gxio/dma_queue.c @@ -163,14 +163,14 @@ int __gxio_dma_queue_is_complete(__gxio_dma_queue_t *dma_queue, int64_t completion_slot, int update) { if (update) { - if (ACCESS_ONCE(dma_queue->hw_complete_count) > + if (READ_ONCE(dma_queue->hw_complete_count) > completion_slot) return 1; __gxio_dma_queue_update_credits(dma_queue); } - return ACCESS_ONCE(dma_queue->hw_complete_count) > completion_slot; + return READ_ONCE(dma_queue->hw_complete_count) > completion_slot; } EXPORT_SYMBOL_GPL(__gxio_dma_queue_is_complete); diff --git a/arch/tile/include/gxio/dma_queue.h b/arch/tile/include/gxio/dma_queue.h index b9e45e37649e..c8fd47edba30 100644 --- a/arch/tile/include/gxio/dma_queue.h +++ b/arch/tile/include/gxio/dma_queue.h @@ -121,7 +121,7 @@ static inline int64_t __gxio_dma_queue_reserve(__gxio_dma_queue_t *dma_queue, * if the result is LESS than "hw_complete_count". */ uint64_t complete; - complete = ACCESS_ONCE(dma_queue->hw_complete_count); + complete = READ_ONCE(dma_queue->hw_complete_count); slot |= (complete & 0xffffffffff000000); if (slot < complete) slot += 0x1000000; diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c index e1a078e6828e..d516d61751c2 100644 --- a/arch/tile/kernel/ptrace.c +++ b/arch/tile/kernel/ptrace.c @@ -255,7 +255,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, int do_syscall_trace_enter(struct pt_regs *regs) { - u32 work = ACCESS_ONCE(current_thread_info()->flags); + u32 work = READ_ONCE(current_thread_info()->flags); if ((work & _TIF_SYSCALL_TRACE) && tracehook_report_syscall_entry(regs)) { diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 03505ffbe1b6..eaa0ba66cf96 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -75,7 +75,7 @@ static long syscall_trace_enter(struct pt_regs *regs) if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) BUG_ON(regs != task_pt_regs(current)); - work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; + work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; if (unlikely(work & _TIF_SYSCALL_EMU)) emulated = true; diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index fa8dbfcf7ed3..11b13c4b43d5 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -318,7 +318,7 @@ int gettimeofday(struct timeval *, struct timezone *) notrace time_t __vdso_time(time_t *t) { /* This is atomic on x86 so we don't need any locks. */ - time_t result = ACCESS_ONCE(gtod->wall_time_sec); + time_t result = READ_ONCE(gtod->wall_time_sec); if (t) *t = result; diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 589af1eec7c1..140d33288e78 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2118,7 +2118,7 @@ static int x86_pmu_event_init(struct perf_event *event) event->destroy(event); } - if (ACCESS_ONCE(x86_pmu.attr_rdpmc)) + if (READ_ONCE(x86_pmu.attr_rdpmc)) event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED; return err; diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 022e59714562..53dd162576a8 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -48,7 +48,7 @@ static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s) unsigned ret; repeat: - ret = ACCESS_ONCE(s->seq); + ret = READ_ONCE(s->seq); if (unlikely(ret & 1)) { cpu_relax(); goto repeat; diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 9c4e7ba6870c..7d7715dde901 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -155,14 +155,14 @@ void init_espfix_ap(int cpu) page = cpu/ESPFIX_STACKS_PER_PAGE; /* Did another CPU already set this up? */ - stack_page = ACCESS_ONCE(espfix_pages[page]); + stack_page = READ_ONCE(espfix_pages[page]); if (likely(stack_page)) goto done; mutex_lock(&espfix_init_mutex); /* Did we race on the lock? */ - stack_page = ACCESS_ONCE(espfix_pages[page]); + stack_page = READ_ONCE(espfix_pages[page]); if (stack_page) goto unlock_done; @@ -200,7 +200,7 @@ void init_espfix_ap(int cpu) set_pte(&pte_p[n*PTE_STRIDE], pte); /* Job is done for this CPU and any CPU which shares this page */ - ACCESS_ONCE(espfix_pages[page]) = stack_page; + WRITE_ONCE(espfix_pages[page], stack_page); unlock_done: mutex_unlock(&espfix_init_mutex); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 35aafc95e4b8..18bc9b51ac9b 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -105,7 +105,7 @@ static void nmi_max_handler(struct irq_work *w) { struct nmiaction *a = container_of(w, struct nmiaction, irq_work); int remainder_ns, decimal_msecs; - u64 whole_msecs = ACCESS_ONCE(a->max_duration); + u64 whole_msecs = READ_ONCE(a->max_duration); remainder_ns = do_div(whole_msecs, (1000 * 1000)); decimal_msecs = remainder_ns / 1000; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7a69cf053711..a119b361b8b7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -443,7 +443,7 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte) static u64 __get_spte_lockless(u64 *sptep) { - return ACCESS_ONCE(*sptep); + return READ_ONCE(*sptep); } #else union split_spte { @@ -4819,7 +4819,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, * If we don't have indirect shadow pages, it means no page is * write-protected, so we can exit simply. */ - if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) + if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) return; remote_flush = local_flush = false; diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c index ea67dc876316..01c1371f39f8 100644 --- a/arch/x86/kvm/page_track.c +++ b/arch/x86/kvm/page_track.c @@ -157,7 +157,7 @@ bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn, return false; index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL); - return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]); + return !!READ_ONCE(slot->arch.gfn_track[mode][index]); } void kvm_page_track_cleanup(struct kvm *kvm) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 6083ba462f35..13b4f19b9131 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -547,7 +547,7 @@ int xen_alloc_p2m_entry(unsigned long pfn) if (p2m_top_mfn && pfn < MAX_P2M_PFN) { topidx = p2m_top_index(pfn); top_mfn_p = &p2m_top_mfn[topidx]; - mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); + mid_mfn = READ_ONCE(p2m_top_mfn_p[topidx]); BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); diff --git a/arch/xtensa/platforms/xtfpga/lcd.c b/arch/xtensa/platforms/xtfpga/lcd.c index 4dc0c1b43f4b..2f7eb66c23ec 100644 --- a/arch/xtensa/platforms/xtfpga/lcd.c +++ b/arch/xtensa/platforms/xtfpga/lcd.c @@ -34,23 +34,23 @@ static void lcd_put_byte(u8 *addr, u8 data) { #ifdef CONFIG_XTFPGA_LCD_8BIT_ACCESS - ACCESS_ONCE(*addr) = data; + WRITE_ONCE(*addr, data); #else - ACCESS_ONCE(*addr) = data & 0xf0; - ACCESS_ONCE(*addr) = (data << 4) & 0xf0; + WRITE_ONCE(*addr, data & 0xf0); + WRITE_ONCE(*addr, (data << 4) & 0xf0); #endif } static int __init lcd_init(void) { - ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE8BIT; + WRITE_ONCE(*LCD_INSTR_ADDR, LCD_DISPLAY_MODE8BIT); mdelay(5); - ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE8BIT; + WRITE_ONCE(*LCD_INSTR_ADDR, LCD_DISPLAY_MODE8BIT); udelay(200); - ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE8BIT; + WRITE_ONCE(*LCD_INSTR_ADDR, LCD_DISPLAY_MODE8BIT); udelay(50); #ifndef CONFIG_XTFPGA_LCD_8BIT_ACCESS - ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE4BIT; + WRITE_ONCE(*LCD_INSTR_ADDR, LCD_DISPLAY_MODE4BIT); udelay(50); lcd_put_byte(LCD_INSTR_ADDR, LCD_DISPLAY_MODE4BIT); udelay(50); diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 6a9a0f03a67b..d822530e6aea 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -261,7 +261,7 @@ static inline bool stat_sample_valid(struct blk_rq_stat *stat) static u64 rwb_sync_issue_lat(struct rq_wb *rwb) { - u64 now, issue = ACCESS_ONCE(rwb->sync_issue); + u64 now, issue = READ_ONCE(rwb->sync_issue); if (!issue || !rwb->sync_cookie) return 0; diff --git a/drivers/base/core.c b/drivers/base/core.c index 12ebd055724c..4b8ba2a75a4d 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -668,7 +668,7 @@ const char *dev_driver_string(const struct device *dev) * so be careful about accessing it. dev->bus and dev->class should * never change once they are set, so they don't need special care. */ - drv = ACCESS_ONCE(dev->driver); + drv = READ_ONCE(dev->driver); return drv ? drv->name : (dev->bus ? dev->bus->name : (dev->class ? dev->class->name : "")); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 7bcf80fa9ada..41d7c2b99f69 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -134,11 +134,11 @@ unsigned long pm_runtime_autosuspend_expiration(struct device *dev) if (!dev->power.use_autosuspend) goto out; - autosuspend_delay = ACCESS_ONCE(dev->power.autosuspend_delay); + autosuspend_delay = READ_ONCE(dev->power.autosuspend_delay); if (autosuspend_delay < 0) goto out; - last_busy = ACCESS_ONCE(dev->power.last_busy); + last_busy = READ_ONCE(dev->power.last_busy); elapsed = jiffies - last_busy; if (elapsed < 0) goto out; /* jiffies has wrapped around. */ diff --git a/drivers/char/random.c b/drivers/char/random.c index 8ad92707e45f..6c7ccac2679e 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -641,7 +641,7 @@ static void credit_entropy_bits(struct entropy_store *r, int nbits) return; retry: - entropy_count = orig = ACCESS_ONCE(r->entropy_count); + entropy_count = orig = READ_ONCE(r->entropy_count); if (nfrac < 0) { /* Debit */ entropy_count += nfrac; @@ -1265,7 +1265,7 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, /* Can we pull enough? */ retry: - entropy_count = orig = ACCESS_ONCE(r->entropy_count); + entropy_count = orig = READ_ONCE(r->entropy_count); ibytes = nbytes; /* never pull more than available */ have_bytes = entropy_count >> (ENTROPY_SHIFT + 3); diff --git a/drivers/clocksource/bcm2835_timer.c b/drivers/clocksource/bcm2835_timer.c index 39e489a96ad7..60da2537bef9 100644 --- a/drivers/clocksource/bcm2835_timer.c +++ b/drivers/clocksource/bcm2835_timer.c @@ -71,7 +71,7 @@ static irqreturn_t bcm2835_time_interrupt(int irq, void *dev_id) if (readl_relaxed(timer->control) & timer->match_mask) { writel_relaxed(timer->match_mask, timer->control); - event_handler = ACCESS_ONCE(timer->evt.event_handler); + event_handler = READ_ONCE(timer->evt.event_handler); if (event_handler) event_handler(&timer->evt); return IRQ_HANDLED; diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index d258953ff488..f4f258075b89 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -172,7 +172,7 @@ static void caam_jr_dequeue(unsigned long devarg) while (rd_reg32(&jrp->rregs->outring_used)) { - head = ACCESS_ONCE(jrp->head); + head = READ_ONCE(jrp->head); spin_lock(&jrp->outlock); @@ -341,7 +341,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, spin_lock_bh(&jrp->inplock); head = jrp->head; - tail = ACCESS_ONCE(jrp->tail); + tail = READ_ONCE(jrp->tail); if (!rd_reg32(&jrp->rregs->inpring_avail) || CIRC_SPACE(head, tail, JOBR_DEPTH) <= 0) { diff --git a/drivers/crypto/nx/nx-842-powernv.c b/drivers/crypto/nx/nx-842-powernv.c index 874ddf5e9087..0f20f5ec9617 100644 --- a/drivers/crypto/nx/nx-842-powernv.c +++ b/drivers/crypto/nx/nx-842-powernv.c @@ -193,7 +193,7 @@ static int wait_for_csb(struct nx842_workmem *wmem, ktime_t start = wmem->start, now = ktime_get(); ktime_t timeout = ktime_add_ms(start, CSB_WAIT_MAX); - while (!(ACCESS_ONCE(csb->flags) & CSB_V)) { + while (!(READ_ONCE(csb->flags) & CSB_V)) { cpu_relax(); now = ktime_get(); if (ktime_after(now, timeout)) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 8bf89267dc25..ccf52368a073 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -734,7 +734,7 @@ static unsigned int ar_search_last_active_buffer(struct ar_context *ctx, __le16 res_count, next_res_count; i = ar_first_buffer_index(ctx); - res_count = ACCESS_ONCE(ctx->descriptors[i].res_count); + res_count = READ_ONCE(ctx->descriptors[i].res_count); /* A buffer that is not yet completely filled must be the last one. */ while (i != last && res_count == 0) { @@ -742,8 +742,7 @@ static unsigned int ar_search_last_active_buffer(struct ar_context *ctx, /* Peek at the next descriptor. */ next_i = ar_next_buffer_index(i); rmb(); /* read descriptors in order */ - next_res_count = ACCESS_ONCE( - ctx->descriptors[next_i].res_count); + next_res_count = READ_ONCE(ctx->descriptors[next_i].res_count); /* * If the next descriptor is still empty, we must stop at this * descriptor. @@ -759,8 +758,7 @@ static unsigned int ar_search_last_active_buffer(struct ar_context *ctx, if (MAX_AR_PACKET_SIZE > PAGE_SIZE && i != last) { next_i = ar_next_buffer_index(next_i); rmb(); - next_res_count = ACCESS_ONCE( - ctx->descriptors[next_i].res_count); + next_res_count = READ_ONCE(ctx->descriptors[next_i].res_count); if (next_res_count != cpu_to_le16(PAGE_SIZE)) goto next_buffer_is_active; } @@ -2812,7 +2810,7 @@ static int handle_ir_buffer_fill(struct context *context, u32 buffer_dma; req_count = le16_to_cpu(last->req_count); - res_count = le16_to_cpu(ACCESS_ONCE(last->res_count)); + res_count = le16_to_cpu(READ_ONCE(last->res_count)); completed = req_count - res_count; buffer_dma = le32_to_cpu(last->data_address); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 333bad749067..303b5e099a98 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -260,7 +260,7 @@ static void amdgpu_fence_fallback(unsigned long arg) */ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) { - uint64_t seq = ACCESS_ONCE(ring->fence_drv.sync_seq); + uint64_t seq = READ_ONCE(ring->fence_drv.sync_seq); struct dma_fence *fence, **ptr; int r; @@ -300,7 +300,7 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring) amdgpu_fence_process(ring); emitted = 0x100000000ull; emitted -= atomic_read(&ring->fence_drv.last_seq); - emitted += ACCESS_ONCE(ring->fence_drv.sync_seq); + emitted += READ_ONCE(ring->fence_drv.sync_seq); return lower_32_bits(emitted); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 7171968f261e..6149a47fe63d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -788,11 +788,11 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) seq_printf(m, "\t0x%08x: %12ld byte %s", id, amdgpu_bo_size(bo), placement); - offset = ACCESS_ONCE(bo->tbo.mem.start); + offset = READ_ONCE(bo->tbo.mem.start); if (offset != AMDGPU_BO_INVALID_OFFSET) seq_printf(m, " @ 0x%010Lx", offset); - pin_count = ACCESS_ONCE(bo->pin_count); + pin_count = READ_ONCE(bo->pin_count); if (pin_count) seq_printf(m, " pin count %d", pin_count); seq_printf(m, "\n"); diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 38cea6fb25a8..a25f6c72f219 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -187,7 +187,7 @@ static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity) if (kfifo_is_empty(&entity->job_queue)) return false; - if (ACCESS_ONCE(entity->dependency)) + if (READ_ONCE(entity->dependency)) return false; return true; diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 3386452bd2f0..cf3deb283da5 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -451,7 +451,7 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, else r = 0; - cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type); + cur_placement = READ_ONCE(robj->tbo.mem.mem_type); args->domain = radeon_mem_type_to_domain(cur_placement); drm_gem_object_put_unlocked(gobj); return r; @@ -481,7 +481,7 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, r = ret; /* Flush HDP cache via MMIO if necessary */ - cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type); + cur_placement = READ_ONCE(robj->tbo.mem.mem_type); if (rdev->asic->mmio_hdp_flush && radeon_mem_type_to_domain(cur_placement) == RADEON_GEM_DOMAIN_VRAM) robj->rdev->asic->mmio_hdp_flush(rdev); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index a552e4ea5440..6ac094ee8983 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -904,7 +904,7 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv, if (unlikely(drm_is_render_client(file_priv))) require_exist = true; - if (ACCESS_ONCE(vmw_fpriv(file_priv)->locked_master)) { + if (READ_ONCE(vmw_fpriv(file_priv)->locked_master)) { DRM_ERROR("Locked master refused legacy " "surface reference.\n"); return -EACCES; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index d9a1e9893136..97bea2e1aa6a 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -380,7 +380,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, if (sc->flags & SCF_FROZEN) { wait_event_interruptible_timeout( dd->event_queue, - !(ACCESS_ONCE(dd->flags) & HFI1_FROZEN), + !(READ_ONCE(dd->flags) & HFI1_FROZEN), msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); if (dd->flags & HFI1_FROZEN) return -ENOLCK; diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 7108a4b5e94c..75e740780285 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1423,14 +1423,14 @@ retry: goto done; } /* copy from receiver cache line and recalculate */ - sc->alloc_free = ACCESS_ONCE(sc->free); + sc->alloc_free = READ_ONCE(sc->free); avail = (unsigned long)sc->credits - (sc->fill - sc->alloc_free); if (blocks > avail) { /* still no room, actively update */ sc_release_update(sc); - sc->alloc_free = ACCESS_ONCE(sc->free); + sc->alloc_free = READ_ONCE(sc->free); trycount++; goto retry; } @@ -1667,7 +1667,7 @@ void sc_release_update(struct send_context *sc) /* call sent buffer callbacks */ code = -1; /* code not yet set */ - head = ACCESS_ONCE(sc->sr_head); /* snapshot the head */ + head = READ_ONCE(sc->sr_head); /* snapshot the head */ tail = sc->sr_tail; while (head != tail) { pbuf = &sc->sr[tail].pbuf; diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index b3291f0fde9a..a7fc664f0d4e 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -363,7 +363,7 @@ static void ruc_loopback(struct rvt_qp *sqp) again: smp_read_barrier_depends(); /* see post_one_send() */ - if (sqp->s_last == ACCESS_ONCE(sqp->s_head)) + if (sqp->s_last == READ_ONCE(sqp->s_head)) goto clr_busy; wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 6781bcdb10b3..08346d25441c 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1725,7 +1725,7 @@ retry: swhead = sde->descq_head & sde->sdma_mask; /* this code is really bad for cache line trading */ - swtail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask; + swtail = READ_ONCE(sde->descq_tail) & sde->sdma_mask; cnt = sde->descq_cnt; if (swhead < swtail) @@ -1872,7 +1872,7 @@ retry: if ((status & sde->idle_mask) && !idle_check_done) { u16 swtail; - swtail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask; + swtail = READ_ONCE(sde->descq_tail) & sde->sdma_mask; if (swtail != hwhead) { hwhead = (u16)read_sde_csr(sde, SD(HEAD)); idle_check_done = 1; @@ -2222,7 +2222,7 @@ void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *sde) u16 len; head = sde->descq_head & sde->sdma_mask; - tail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask; + tail = READ_ONCE(sde->descq_tail) & sde->sdma_mask; seq_printf(s, SDE_FMT, sde->this_idx, sde->cpu, sdma_state_name(sde->state.current_state), @@ -3305,7 +3305,7 @@ int sdma_ahg_alloc(struct sdma_engine *sde) return -EINVAL; } while (1) { - nr = ffz(ACCESS_ONCE(sde->ahg_bits)); + nr = ffz(READ_ONCE(sde->ahg_bits)); if (nr > 31) { trace_hfi1_ahg_allocate(sde, -ENOSPC); return -ENOSPC; diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 107011d8613b..374c59784950 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -445,7 +445,7 @@ static inline u16 sdma_descq_freecnt(struct sdma_engine *sde) { return sde->descq_cnt - (sde->descq_tail - - ACCESS_ONCE(sde->descq_head)) - 1; + READ_ONCE(sde->descq_head)) - 1; } static inline u16 sdma_descq_inprocess(struct sdma_engine *sde) diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 0b646173ca22..9a31c585427f 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -80,7 +80,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) goto bail; /* We are in the error state, flush the work request. */ smp_read_barrier_depends(); /* see post_one_send() */ - if (qp->s_last == ACCESS_ONCE(qp->s_head)) + if (qp->s_last == READ_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (iowait_sdma_pending(&priv->s_iowait)) { @@ -121,7 +121,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) goto bail; /* Check if send work queue is empty. */ smp_read_barrier_depends(); /* see post_one_send() */ - if (qp->s_cur == ACCESS_ONCE(qp->s_head)) { + if (qp->s_cur == READ_ONCE(qp->s_head)) { clear_ahg(qp); goto bail; } diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 2ba74fdd6f15..7fec6b984e3e 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -487,7 +487,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) goto bail; /* We are in the error state, flush the work request. */ smp_read_barrier_depends(); /* see post_one_send */ - if (qp->s_last == ACCESS_ONCE(qp->s_head)) + if (qp->s_last == READ_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (iowait_sdma_pending(&priv->s_iowait)) { @@ -501,7 +501,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) /* see post_one_send() */ smp_read_barrier_depends(); - if (qp->s_cur == ACCESS_ONCE(qp->s_head)) + if (qp->s_cur == READ_ONCE(qp->s_head)) goto bail; wqe = rvt_get_swqe_ptr(qp, qp->s_cur); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index c0c0e0445cbf..8ec6e8a8d6f7 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -276,7 +276,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, /* Wait until all requests have been freed. */ wait_event_interruptible( pq->wait, - (ACCESS_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE)); + (READ_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE)); kfree(pq->reqs); kfree(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); @@ -591,7 +591,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, if (ret != -EBUSY) { req->status = ret; WRITE_ONCE(req->has_error, 1); - if (ACCESS_ONCE(req->seqcomp) == + if (READ_ONCE(req->seqcomp) == req->seqsubmitted - 1) goto free_req; return ret; @@ -825,7 +825,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) */ if (req->data_len) { iovec = &req->iovs[req->iov_idx]; - if (ACCESS_ONCE(iovec->offset) == iovec->iov.iov_len) { + if (READ_ONCE(iovec->offset) == iovec->iov.iov_len) { if (++req->iov_idx == req->data_iovs) { ret = -EFAULT; goto free_txreq; @@ -1390,7 +1390,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) } else { if (status != SDMA_TXREQ_S_OK) req->status = status; - if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) && + if (req->seqcomp == (READ_ONCE(req->seqsubmitted) - 1) && (READ_ONCE(req->done) || READ_ONCE(req->has_error))) { user_sdma_free_request(req, false); diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 53efbb0b40c4..9a37e844d4c8 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -368,7 +368,7 @@ static void qib_ruc_loopback(struct rvt_qp *sqp) again: smp_read_barrier_depends(); /* see post_one_send() */ - if (sqp->s_last == ACCESS_ONCE(sqp->s_head)) + if (sqp->s_last == READ_ONCE(sqp->s_head)) goto clr_busy; wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 498e2202e72c..bddcc37ace44 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -61,7 +61,7 @@ int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags) goto bail; /* We are in the error state, flush the work request. */ smp_read_barrier_depends(); /* see post_one_send() */ - if (qp->s_last == ACCESS_ONCE(qp->s_head)) + if (qp->s_last == READ_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_dma_busy)) { @@ -91,7 +91,7 @@ int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags) goto bail; /* Check if send work queue is empty. */ smp_read_barrier_depends(); /* see post_one_send() */ - if (qp->s_cur == ACCESS_ONCE(qp->s_head)) + if (qp->s_cur == READ_ONCE(qp->s_head)) goto bail; /* * Start a new request. diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index be4907453ac4..15962ed193ce 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -253,7 +253,7 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags) goto bail; /* We are in the error state, flush the work request. */ smp_read_barrier_depends(); /* see post_one_send */ - if (qp->s_last == ACCESS_ONCE(qp->s_head)) + if (qp->s_last == READ_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_dma_busy)) { @@ -267,7 +267,7 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags) /* see post_one_send() */ smp_read_barrier_depends(); - if (qp->s_cur == ACCESS_ONCE(qp->s_head)) + if (qp->s_cur == READ_ONCE(qp->s_head)) goto bail; wqe = rvt_get_swqe_ptr(qp, qp->s_cur); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 22df09ae809e..b670cb9d2006 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1073,7 +1073,7 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) rdi->driver_f.notify_error_qp(qp); /* Schedule the sending tasklet to drain the send work queue. */ - if (ACCESS_ONCE(qp->s_last) != qp->s_head) + if (READ_ONCE(qp->s_last) != qp->s_head) rdi->driver_f.schedule_send(qp); rvt_clear_mr_refs(qp, 0); @@ -1686,7 +1686,7 @@ static inline int rvt_qp_is_avail( if (likely(qp->s_avail)) return 0; smp_read_barrier_depends(); /* see rc.c */ - slast = ACCESS_ONCE(qp->s_last); + slast = READ_ONCE(qp->s_last); if (qp->s_head >= slast) avail = qp->s_size - (qp->s_head - slast); else @@ -1917,7 +1917,7 @@ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, * ahead and kick the send engine into gear. Otherwise we will always * just schedule the send to happen later. */ - call_send = qp->s_head == ACCESS_ONCE(qp->s_last) && !wr->next; + call_send = qp->s_head == READ_ONCE(qp->s_last) && !wr->next; for (; wr; wr = wr->next) { err = rvt_post_one_wr(qp, wr, &call_send); diff --git a/drivers/input/misc/regulator-haptic.c b/drivers/input/misc/regulator-haptic.c index 2e8f801932be..a1db1e5040dc 100644 --- a/drivers/input/misc/regulator-haptic.c +++ b/drivers/input/misc/regulator-haptic.c @@ -233,7 +233,7 @@ static int __maybe_unused regulator_haptic_resume(struct device *dev) haptic->suspended = false; - magnitude = ACCESS_ONCE(haptic->magnitude); + magnitude = READ_ONCE(haptic->magnitude); if (magnitude) regulator_haptic_set_voltage(haptic, magnitude); diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index d216a8f7bc22..33bb074d6941 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -347,7 +347,7 @@ static void __cache_size_refresh(void) BUG_ON(!mutex_is_locked(&dm_bufio_clients_lock)); BUG_ON(dm_bufio_client_count < 0); - dm_bufio_cache_size_latch = ACCESS_ONCE(dm_bufio_cache_size); + dm_bufio_cache_size_latch = READ_ONCE(dm_bufio_cache_size); /* * Use default if set to 0 and report the actual cache size used. @@ -960,7 +960,7 @@ static void __get_memory_limit(struct dm_bufio_client *c, { unsigned long buffers; - if (unlikely(ACCESS_ONCE(dm_bufio_cache_size) != dm_bufio_cache_size_latch)) { + if (unlikely(READ_ONCE(dm_bufio_cache_size) != dm_bufio_cache_size_latch)) { if (mutex_trylock(&dm_bufio_clients_lock)) { __cache_size_refresh(); mutex_unlock(&dm_bufio_clients_lock); @@ -1600,7 +1600,7 @@ static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp) static unsigned long get_retain_buffers(struct dm_bufio_client *c) { - unsigned long retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes); + unsigned long retain_bytes = READ_ONCE(dm_bufio_retain_bytes); return retain_bytes >> (c->sectors_per_block_bits + SECTOR_SHIFT); } @@ -1647,7 +1647,7 @@ dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker); - return ACCESS_ONCE(c->n_buffers[LIST_CLEAN]) + ACCESS_ONCE(c->n_buffers[LIST_DIRTY]); + return READ_ONCE(c->n_buffers[LIST_CLEAN]) + READ_ONCE(c->n_buffers[LIST_DIRTY]); } /* @@ -1818,7 +1818,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_set_sector_offset); static unsigned get_max_age_hz(void) { - unsigned max_age = ACCESS_ONCE(dm_bufio_max_age); + unsigned max_age = READ_ONCE(dm_bufio_max_age); if (max_age > UINT_MAX / HZ) max_age = UINT_MAX / HZ; diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index cf2c67e35eaf..eb45cc3df31d 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -107,7 +107,7 @@ static void io_job_start(struct dm_kcopyd_throttle *t) try_again: spin_lock_irq(&throttle_spinlock); - throttle = ACCESS_ONCE(t->throttle); + throttle = READ_ONCE(t->throttle); if (likely(throttle >= 100)) goto skip_limit; @@ -157,7 +157,7 @@ static void io_job_finish(struct dm_kcopyd_throttle *t) t->num_io_jobs--; - if (likely(ACCESS_ONCE(t->throttle) >= 100)) + if (likely(READ_ONCE(t->throttle) >= 100)) goto skip_limit; if (!t->num_io_jobs) { diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 6028d8247f58..a1a5eec783cc 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -431,7 +431,7 @@ do_sync_free: synchronize_rcu_expedited(); dm_stat_free(&s->rcu_head); } else { - ACCESS_ONCE(dm_stat_need_rcu_barrier) = 1; + WRITE_ONCE(dm_stat_need_rcu_barrier, 1); call_rcu(&s->rcu_head, dm_stat_free); } return 0; @@ -639,12 +639,12 @@ void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw, */ last = raw_cpu_ptr(stats->last); stats_aux->merged = - (bi_sector == (ACCESS_ONCE(last->last_sector) && + (bi_sector == (READ_ONCE(last->last_sector) && ((bi_rw == WRITE) == - (ACCESS_ONCE(last->last_rw) == WRITE)) + (READ_ONCE(last->last_rw) == WRITE)) )); - ACCESS_ONCE(last->last_sector) = end_sector; - ACCESS_ONCE(last->last_rw) = bi_rw; + WRITE_ONCE(last->last_sector, end_sector); + WRITE_ONCE(last->last_rw, bi_rw); } rcu_read_lock(); @@ -693,22 +693,22 @@ static void __dm_stat_init_temporary_percpu_totals(struct dm_stat_shared *shared for_each_possible_cpu(cpu) { p = &s->stat_percpu[cpu][x]; - shared->tmp.sectors[READ] += ACCESS_ONCE(p->sectors[READ]); - shared->tmp.sectors[WRITE] += ACCESS_ONCE(p->sectors[WRITE]); - shared->tmp.ios[READ] += ACCESS_ONCE(p->ios[READ]); - shared->tmp.ios[WRITE] += ACCESS_ONCE(p->ios[WRITE]); - shared->tmp.merges[READ] += ACCESS_ONCE(p->merges[READ]); - shared->tmp.merges[WRITE] += ACCESS_ONCE(p->merges[WRITE]); - shared->tmp.ticks[READ] += ACCESS_ONCE(p->ticks[READ]); - shared->tmp.ticks[WRITE] += ACCESS_ONCE(p->ticks[WRITE]); - shared->tmp.io_ticks[READ] += ACCESS_ONCE(p->io_ticks[READ]); - shared->tmp.io_ticks[WRITE] += ACCESS_ONCE(p->io_ticks[WRITE]); - shared->tmp.io_ticks_total += ACCESS_ONCE(p->io_ticks_total); - shared->tmp.time_in_queue += ACCESS_ONCE(p->time_in_queue); + shared->tmp.sectors[READ] += READ_ONCE(p->sectors[READ]); + shared->tmp.sectors[WRITE] += READ_ONCE(p->sectors[WRITE]); + shared->tmp.ios[READ] += READ_ONCE(p->ios[READ]); + shared->tmp.ios[WRITE] += READ_ONCE(p->ios[WRITE]); + shared->tmp.merges[READ] += READ_ONCE(p->merges[READ]); + shared->tmp.merges[WRITE] += READ_ONCE(p->merges[WRITE]); + shared->tmp.ticks[READ] += READ_ONCE(p->ticks[READ]); + shared->tmp.ticks[WRITE] += READ_ONCE(p->ticks[WRITE]); + shared->tmp.io_ticks[READ] += READ_ONCE(p->io_ticks[READ]); + shared->tmp.io_ticks[WRITE] += READ_ONCE(p->io_ticks[WRITE]); + shared->tmp.io_ticks_total += READ_ONCE(p->io_ticks_total); + shared->tmp.time_in_queue += READ_ONCE(p->time_in_queue); if (s->n_histogram_entries) { unsigned i; for (i = 0; i < s->n_histogram_entries + 1; i++) - shared->tmp.histogram[i] += ACCESS_ONCE(p->histogram[i]); + shared->tmp.histogram[i] += READ_ONCE(p->histogram[i]); } } } diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c index 4c8de1ff78ca..8d0ba879777e 100644 --- a/drivers/md/dm-switch.c +++ b/drivers/md/dm-switch.c @@ -144,7 +144,7 @@ static unsigned switch_region_table_read(struct switch_ctx *sctx, unsigned long switch_get_position(sctx, region_nr, ®ion_index, &bit); - return (ACCESS_ONCE(sctx->region_table[region_index]) >> bit) & + return (READ_ONCE(sctx->region_table[region_index]) >> bit) & ((1 << sctx->region_table_entry_bits) - 1); } diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 1e25705209c2..89e5dff9b4cf 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2431,7 +2431,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) struct pool_c *pt = pool->ti->private; bool needs_check = dm_pool_metadata_needs_check(pool->pmd); enum pool_mode old_mode = get_pool_mode(pool); - unsigned long no_space_timeout = ACCESS_ONCE(no_space_timeout_secs) * HZ; + unsigned long no_space_timeout = READ_ONCE(no_space_timeout_secs) * HZ; /* * Never allow the pool to transition to PM_WRITE mode if user diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index bda3caca23ca..fba93237a780 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -589,7 +589,7 @@ static void verity_prefetch_io(struct work_struct *work) verity_hash_at_level(v, pw->block, i, &hash_block_start, NULL); verity_hash_at_level(v, pw->block + pw->n_blocks - 1, i, &hash_block_end, NULL); if (!i) { - unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster); + unsigned cluster = READ_ONCE(dm_verity_prefetch_cluster); cluster >>= v->data_dev_block_bits; if (unlikely(!cluster)) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4be85324f44d..8aaffa19b29a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -114,7 +114,7 @@ static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS; static int __dm_get_module_param_int(int *module_param, int min, int max) { - int param = ACCESS_ONCE(*module_param); + int param = READ_ONCE(*module_param); int modified_param = 0; bool modified = true; @@ -136,7 +136,7 @@ static int __dm_get_module_param_int(int *module_param, int min, int max) unsigned __dm_get_module_param(unsigned *module_param, unsigned def, unsigned max) { - unsigned param = ACCESS_ONCE(*module_param); + unsigned param = READ_ONCE(*module_param); unsigned modified_param = 0; if (!param) diff --git a/drivers/md/md.c b/drivers/md/md.c index 0ff1bbf6c90e..447ddcbc9566 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2651,7 +2651,7 @@ state_show(struct md_rdev *rdev, char *page) { char *sep = ","; size_t len = 0; - unsigned long flags = ACCESS_ONCE(rdev->flags); + unsigned long flags = READ_ONCE(rdev->flags); if (test_bit(Faulty, &flags) || (!test_bit(ExternalBbl, &flags) && diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 928e24a07133..7d9a50eed9db 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6072,7 +6072,7 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n */ rcu_read_lock(); for (i = 0; i < conf->raid_disks; i++) { - struct md_rdev *rdev = ACCESS_ONCE(conf->disks[i].rdev); + struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev); if (rdev == NULL || test_bit(Faulty, &rdev->flags)) still_degraded = 1; diff --git a/drivers/misc/mic/scif/scif_rb.c b/drivers/misc/mic/scif/scif_rb.c index 637cc4686742..b665757ca89a 100644 --- a/drivers/misc/mic/scif/scif_rb.c +++ b/drivers/misc/mic/scif/scif_rb.c @@ -138,7 +138,7 @@ void scif_rb_commit(struct scif_rb *rb) * the read barrier in scif_rb_count(..) */ wmb(); - ACCESS_ONCE(*rb->write_ptr) = rb->current_write_offset; + WRITE_ONCE(*rb->write_ptr, rb->current_write_offset); #ifdef CONFIG_INTEL_MIC_CARD /* * X100 Si bug: For the case where a Core is performing an EXT_WR @@ -147,7 +147,7 @@ void scif_rb_commit(struct scif_rb *rb) * This way, if ordering is violated for the Interrupt Message, it will * fall just behind the first Posted associated with the first EXT_WR. */ - ACCESS_ONCE(*rb->write_ptr) = rb->current_write_offset; + WRITE_ONCE(*rb->write_ptr, rb->current_write_offset); #endif } @@ -210,7 +210,7 @@ void scif_rb_update_read_ptr(struct scif_rb *rb) * scif_rb_space(..) */ mb(); - ACCESS_ONCE(*rb->read_ptr) = new_offset; + WRITE_ONCE(*rb->read_ptr, new_offset); #ifdef CONFIG_INTEL_MIC_CARD /* * X100 Si Bug: For the case where a Core is performing an EXT_WR @@ -219,7 +219,7 @@ void scif_rb_update_read_ptr(struct scif_rb *rb) * This way, if ordering is violated for the Interrupt Message, it will * fall just behind the first Posted associated with the first EXT_WR. */ - ACCESS_ONCE(*rb->read_ptr) = new_offset; + WRITE_ONCE(*rb->read_ptr, new_offset); #endif } diff --git a/drivers/misc/mic/scif/scif_rma_list.c b/drivers/misc/mic/scif/scif_rma_list.c index e1ef8daedd5a..a036dbb4101e 100644 --- a/drivers/misc/mic/scif/scif_rma_list.c +++ b/drivers/misc/mic/scif/scif_rma_list.c @@ -277,7 +277,7 @@ retry: * Need to restart list traversal if there has been * an asynchronous list entry deletion. */ - if (ACCESS_ONCE(ep->rma_info.async_list_del)) + if (READ_ONCE(ep->rma_info.async_list_del)) goto retry; } mutex_unlock(&ep->rma_info.rma_lock); diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index c02cc817a490..1ed9529e7bd1 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1378,7 +1378,7 @@ int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) unsigned int count; slaves = rcu_dereference(bond->slave_arr); - count = slaves ? ACCESS_ONCE(slaves->count) : 0; + count = slaves ? READ_ONCE(slaves->count) : 0; if (likely(count)) tx_slave = slaves->arr[hash_index % count]; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index c99dc59d729b..af51b90cecbb 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1167,7 +1167,7 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) slave = bond_slave_get_rcu(skb->dev); bond = slave->bond; - recv_probe = ACCESS_ONCE(bond->recv_probe); + recv_probe = READ_ONCE(bond->recv_probe); if (recv_probe) { ret = recv_probe(skb, bond, slave); if (ret == RX_HANDLER_CONSUMED) { @@ -3810,7 +3810,7 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev else bond_xmit_slave_id(bond, skb, 0); } else { - int slave_cnt = ACCESS_ONCE(bond->slave_cnt); + int slave_cnt = READ_ONCE(bond->slave_cnt); if (likely(slave_cnt)) { slave_id = bond_rr_gen_slave_id(bond); @@ -3972,7 +3972,7 @@ static int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int count; slaves = rcu_dereference(bond->slave_arr); - count = slaves ? ACCESS_ONCE(slaves->count) : 0; + count = slaves ? READ_ONCE(slaves->count) : 0; if (likely(count)) { slave = slaves->arr[bond_xmit_hash(bond, skb) % count]; bond_dev_queue_xmit(bond, skb, slave->dev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 4ef68f69b58c..43f52a8fe708 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -405,7 +405,7 @@ void free_tx_desc(struct adapter *adap, struct sge_txq *q, */ static inline int reclaimable(const struct sge_txq *q) { - int hw_cidx = ntohs(ACCESS_ONCE(q->stat->cidx)); + int hw_cidx = ntohs(READ_ONCE(q->stat->cidx)); hw_cidx -= q->cidx; return hw_cidx < 0 ? hw_cidx + q->size : hw_cidx; } @@ -1375,7 +1375,7 @@ out_free: dev_kfree_skb_any(skb); */ static inline void reclaim_completed_tx_imm(struct sge_txq *q) { - int hw_cidx = ntohs(ACCESS_ONCE(q->stat->cidx)); + int hw_cidx = ntohs(READ_ONCE(q->stat->cidx)); int reclaim = hw_cidx - q->cidx; if (reclaim < 0) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 0e3d9f39a807..c6e859a27ee6 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -605,7 +605,7 @@ static void accumulate_16bit_val(u32 *acc, u16 val) if (wrapped) newacc += 65536; - ACCESS_ONCE(*acc) = newacc; + WRITE_ONCE(*acc, newacc); } static void populate_erx_stats(struct be_adapter *adapter, diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 0cec06bec63e..340e28211135 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -373,7 +373,7 @@ static int hip04_tx_reclaim(struct net_device *ndev, bool force) unsigned int count; smp_rmb(); - count = tx_count(ACCESS_ONCE(priv->tx_head), tx_tail); + count = tx_count(READ_ONCE(priv->tx_head), tx_tail); if (count == 0) goto out; @@ -431,7 +431,7 @@ static int hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev) dma_addr_t phys; smp_rmb(); - count = tx_count(tx_head, ACCESS_ONCE(priv->tx_tail)); + count = tx_count(tx_head, READ_ONCE(priv->tx_tail)); if (count == (TX_DESC_NUM - 1)) { netif_stop_queue(ndev); return NETDEV_TX_BUSY; diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 8f326f87a815..2cb9539c931e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -264,7 +264,7 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) vsi->rx_buf_failed, vsi->rx_page_failed); rcu_read_lock(); for (i = 0; i < vsi->num_queue_pairs; i++) { - struct i40e_ring *rx_ring = ACCESS_ONCE(vsi->rx_rings[i]); + struct i40e_ring *rx_ring = READ_ONCE(vsi->rx_rings[i]); if (!rx_ring) continue; @@ -320,7 +320,7 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) ITR_IS_DYNAMIC(rx_ring->rx_itr_setting) ? "dynamic" : "fixed"); } for (i = 0; i < vsi->num_queue_pairs; i++) { - struct i40e_ring *tx_ring = ACCESS_ONCE(vsi->tx_rings[i]); + struct i40e_ring *tx_ring = READ_ONCE(vsi->tx_rings[i]); if (!tx_ring) continue; diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 05e89864f781..e9e04a485e0a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1570,7 +1570,7 @@ static void i40e_get_ethtool_stats(struct net_device *netdev, } rcu_read_lock(); for (j = 0; j < vsi->num_queue_pairs; j++) { - tx_ring = ACCESS_ONCE(vsi->tx_rings[j]); + tx_ring = READ_ONCE(vsi->tx_rings[j]); if (!tx_ring) continue; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 6498da8806cb..de1fcac7834d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -455,7 +455,7 @@ static void i40e_get_netdev_stats_struct(struct net_device *netdev, u64 bytes, packets; unsigned int start; - tx_ring = ACCESS_ONCE(vsi->tx_rings[i]); + tx_ring = READ_ONCE(vsi->tx_rings[i]); if (!tx_ring) continue; i40e_get_netdev_stats_struct_tx(tx_ring, stats); @@ -791,7 +791,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) rcu_read_lock(); for (q = 0; q < vsi->num_queue_pairs; q++) { /* locate Tx ring */ - p = ACCESS_ONCE(vsi->tx_rings[q]); + p = READ_ONCE(vsi->tx_rings[q]); do { start = u64_stats_fetch_begin_irq(&p->syncp); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index d8456c381c99..97381238eb7c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -130,7 +130,7 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) } smp_mb(); /* Force any pending update before accessing. */ - adj = ACCESS_ONCE(pf->ptp_base_adj); + adj = READ_ONCE(pf->ptp_base_adj); freq = adj; freq *= ppb; @@ -499,7 +499,7 @@ void i40e_ptp_set_increment(struct i40e_pf *pf) wr32(hw, I40E_PRTTSYN_INC_H, incval >> 32); /* Update the base adjustement value. */ - ACCESS_ONCE(pf->ptp_base_adj) = incval; + WRITE_ONCE(pf->ptp_base_adj, incval); smp_mb(); /* Force the above update. */ } diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h index 58adbf234e07..31a3f09df9f7 100644 --- a/drivers/net/ethernet/intel/igb/e1000_regs.h +++ b/drivers/net/ethernet/intel/igb/e1000_regs.h @@ -375,7 +375,7 @@ u32 igb_rd32(struct e1000_hw *hw, u32 reg); /* write operations, indexed using DWORDS */ #define wr32(reg, val) \ do { \ - u8 __iomem *hw_addr = ACCESS_ONCE((hw)->hw_addr); \ + u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \ if (!E1000_REMOVED(hw_addr)) \ writel((val), &hw_addr[(reg)]); \ } while (0) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index fd4a46b03cc8..6bccc2be2b91 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -750,7 +750,7 @@ static void igb_cache_ring_register(struct igb_adapter *adapter) u32 igb_rd32(struct e1000_hw *hw, u32 reg) { struct igb_adapter *igb = container_of(hw, struct igb_adapter, hw); - u8 __iomem *hw_addr = ACCESS_ONCE(hw->hw_addr); + u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); u32 value = 0; if (E1000_REMOVED(hw_addr)) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h index e083732adf64..a01409e2e06c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h @@ -161,7 +161,7 @@ static inline bool ixgbe_removed(void __iomem *addr) static inline void ixgbe_write_reg(struct ixgbe_hw *hw, u32 reg, u32 value) { - u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr); + u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr); if (ixgbe_removed(reg_addr)) return; @@ -180,7 +180,7 @@ static inline void writeq(u64 val, void __iomem *addr) static inline void ixgbe_write_reg64(struct ixgbe_hw *hw, u32 reg, u64 value) { - u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr); + u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr); if (ixgbe_removed(reg_addr)) return; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 4d76afd13868..2224e691ee07 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -380,7 +380,7 @@ static void ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg) */ u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg) { - u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr); + u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr); u32 value; if (ixgbe_removed(reg_addr)) @@ -8630,7 +8630,7 @@ static void ixgbe_get_stats64(struct net_device *netdev, rcu_read_lock(); for (i = 0; i < adapter->num_rx_queues; i++) { - struct ixgbe_ring *ring = ACCESS_ONCE(adapter->rx_ring[i]); + struct ixgbe_ring *ring = READ_ONCE(adapter->rx_ring[i]); u64 bytes, packets; unsigned int start; @@ -8646,12 +8646,12 @@ static void ixgbe_get_stats64(struct net_device *netdev, } for (i = 0; i < adapter->num_tx_queues; i++) { - struct ixgbe_ring *ring = ACCESS_ONCE(adapter->tx_ring[i]); + struct ixgbe_ring *ring = READ_ONCE(adapter->tx_ring[i]); ixgbe_get_ring_stats64(stats, ring); } for (i = 0; i < adapter->num_xdp_queues; i++) { - struct ixgbe_ring *ring = ACCESS_ONCE(adapter->xdp_ring[i]); + struct ixgbe_ring *ring = READ_ONCE(adapter->xdp_ring[i]); ixgbe_get_ring_stats64(stats, ring); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index 86d6924a2b71..ae312c45696a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -378,7 +378,7 @@ static int ixgbe_ptp_adjfreq_82599(struct ptp_clock_info *ptp, s32 ppb) } smp_mb(); - incval = ACCESS_ONCE(adapter->base_incval); + incval = READ_ONCE(adapter->base_incval); freq = incval; freq *= ppb; @@ -1159,7 +1159,7 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) } /* update the base incval used to calculate frequency adjustment */ - ACCESS_ONCE(adapter->base_incval) = incval; + WRITE_ONCE(adapter->base_incval, incval); smp_mb(); /* need lock to prevent incorrect read while modifying cyclecounter */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 032f8ac06357..cacb30682434 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -164,7 +164,7 @@ static void ixgbevf_check_remove(struct ixgbe_hw *hw, u32 reg) u32 ixgbevf_read_reg(struct ixgbe_hw *hw, u32 reg) { - u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr); + u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr); u32 value; if (IXGBE_REMOVED(reg_addr)) diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h index 04d8d4ee4f04..c651fefcc3d2 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.h +++ b/drivers/net/ethernet/intel/ixgbevf/vf.h @@ -182,7 +182,7 @@ struct ixgbevf_info { static inline void ixgbe_write_reg(struct ixgbe_hw *hw, u32 reg, u32 value) { - u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr); + u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr); if (IXGBE_REMOVED(reg_addr)) return; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 8a32a8f7f9c0..3541a7f9d12e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -414,8 +414,8 @@ bool mlx4_en_process_tx_cq(struct net_device *dev, index = cons_index & size_mask; cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor; - last_nr_txbb = ACCESS_ONCE(ring->last_nr_txbb); - ring_cons = ACCESS_ONCE(ring->cons); + last_nr_txbb = READ_ONCE(ring->last_nr_txbb); + ring_cons = READ_ONCE(ring->cons); ring_index = ring_cons & size_mask; stamp_index = ring_index; @@ -479,8 +479,8 @@ bool mlx4_en_process_tx_cq(struct net_device *dev, wmb(); /* we want to dirty this cache line once */ - ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb; - ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped; + WRITE_ONCE(ring->last_nr_txbb, last_nr_txbb); + WRITE_ONCE(ring->cons, ring_cons + txbbs_skipped); if (cq->type == TX_XDP) return done < budget; @@ -858,7 +858,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_drop; /* fetch ring->cons far ahead before needing it to avoid stall */ - ring_cons = ACCESS_ONCE(ring->cons); + ring_cons = READ_ONCE(ring->cons); real_size = get_real_size(skb, shinfo, dev, &lso_header_size, &inline_ok, &fragptr); @@ -1066,7 +1066,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) */ smp_rmb(); - ring_cons = ACCESS_ONCE(ring->cons); + ring_cons = READ_ONCE(ring->cons); if (unlikely(!mlx4_en_is_tx_ring_full(ring))) { netif_tx_wake_queue(ring->tx_queue); ring->wake_queue++; diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 50ea69d88480..5dd5f61e1114 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -2629,7 +2629,7 @@ static void vxge_poll_vp_lockup(unsigned long data) ring = &vdev->vpaths[i].ring; /* Truncated to machine word size number of frames */ - rx_frms = ACCESS_ONCE(ring->stats.rx_frms); + rx_frms = READ_ONCE(ring->stats.rx_frms); /* Did this vpath received any packets */ if (ring->stats.prev_rx_frms == rx_frms) { diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 13f72f5b18d2..a95a46bcd339 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -2073,7 +2073,7 @@ static irqreturn_t efx_ef10_msi_interrupt(int irq, void *dev_id) netif_vdbg(efx, intr, efx->net_dev, "IRQ %d on CPU %d\n", irq, raw_smp_processor_id()); - if (likely(ACCESS_ONCE(efx->irq_soft_enabled))) { + if (likely(READ_ONCE(efx->irq_soft_enabled))) { /* Note test interrupts */ if (context->index == efx->irq_level) efx->last_irq_cpu = raw_smp_processor_id(); @@ -2088,7 +2088,7 @@ static irqreturn_t efx_ef10_msi_interrupt(int irq, void *dev_id) static irqreturn_t efx_ef10_legacy_interrupt(int irq, void *dev_id) { struct efx_nic *efx = dev_id; - bool soft_enabled = ACCESS_ONCE(efx->irq_soft_enabled); + bool soft_enabled = READ_ONCE(efx->irq_soft_enabled); struct efx_channel *channel; efx_dword_t reg; u32 queues; @@ -3291,7 +3291,7 @@ static int efx_ef10_handle_rx_event(struct efx_channel *channel, bool rx_cont; u16 flags = 0; - if (unlikely(ACCESS_ONCE(efx->reset_pending))) + if (unlikely(READ_ONCE(efx->reset_pending))) return 0; /* Basic packet information */ @@ -3428,7 +3428,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) unsigned int tx_ev_q_label; int tx_descs = 0; - if (unlikely(ACCESS_ONCE(efx->reset_pending))) + if (unlikely(READ_ONCE(efx->reset_pending))) return 0; if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_TX_DROP_EVENT))) @@ -5316,7 +5316,7 @@ static void efx_ef10_filter_remove_old(struct efx_nic *efx) int i; for (i = 0; i < HUNT_FILTER_TBL_ROWS; i++) { - if (ACCESS_ONCE(table->entry[i].spec) & + if (READ_ONCE(table->entry[i].spec) & EFX_EF10_FILTER_FLAG_AUTO_OLD) { rc = efx_ef10_filter_remove_internal(efx, 1U << EFX_FILTER_PRI_AUTO, i, true); diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index b9cb697b2818..016616a63880 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -2809,7 +2809,7 @@ static void efx_reset_work(struct work_struct *data) unsigned long pending; enum reset_type method; - pending = ACCESS_ONCE(efx->reset_pending); + pending = READ_ONCE(efx->reset_pending); method = fls(pending) - 1; if (method == RESET_TYPE_MC_BIST) @@ -2874,7 +2874,7 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) /* If we're not READY then just leave the flags set as the cue * to abort probing or reschedule the reset later. */ - if (ACCESS_ONCE(efx->state) != STATE_READY) + if (READ_ONCE(efx->state) != STATE_READY) return; /* efx_process_channel() will no longer read events once a diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index 29614da91cbf..7263275fde4a 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -2545,7 +2545,7 @@ static void ef4_reset_work(struct work_struct *data) unsigned long pending; enum reset_type method; - pending = ACCESS_ONCE(efx->reset_pending); + pending = READ_ONCE(efx->reset_pending); method = fls(pending) - 1; if ((method == RESET_TYPE_RECOVER_OR_DISABLE || @@ -2605,7 +2605,7 @@ void ef4_schedule_reset(struct ef4_nic *efx, enum reset_type type) /* If we're not READY then just leave the flags set as the cue * to abort probing or reschedule the reset later. */ - if (ACCESS_ONCE(efx->state) != STATE_READY) + if (READ_ONCE(efx->state) != STATE_READY) return; queue_work(reset_workqueue, &efx->reset_work); diff --git a/drivers/net/ethernet/sfc/falcon/falcon.c b/drivers/net/ethernet/sfc/falcon/falcon.c index 93c713c1f627..cd8bb472d758 100644 --- a/drivers/net/ethernet/sfc/falcon/falcon.c +++ b/drivers/net/ethernet/sfc/falcon/falcon.c @@ -452,7 +452,7 @@ static irqreturn_t falcon_legacy_interrupt_a1(int irq, void *dev_id) "IRQ %d on CPU %d status " EF4_OWORD_FMT "\n", irq, raw_smp_processor_id(), EF4_OWORD_VAL(*int_ker)); - if (!likely(ACCESS_ONCE(efx->irq_soft_enabled))) + if (!likely(READ_ONCE(efx->irq_soft_enabled))) return IRQ_HANDLED; /* Check to see if we have a serious error condition */ @@ -1372,7 +1372,7 @@ static void falcon_reconfigure_mac_wrapper(struct ef4_nic *efx) ef4_oword_t reg; int link_speed, isolate; - isolate = !!ACCESS_ONCE(efx->reset_pending); + isolate = !!READ_ONCE(efx->reset_pending); switch (link_state->speed) { case 10000: link_speed = 3; break; diff --git a/drivers/net/ethernet/sfc/falcon/farch.c b/drivers/net/ethernet/sfc/falcon/farch.c index 05916c710d8c..494884f6af4a 100644 --- a/drivers/net/ethernet/sfc/falcon/farch.c +++ b/drivers/net/ethernet/sfc/falcon/farch.c @@ -834,7 +834,7 @@ ef4_farch_handle_tx_event(struct ef4_channel *channel, ef4_qword_t *event) struct ef4_nic *efx = channel->efx; int tx_packets = 0; - if (unlikely(ACCESS_ONCE(efx->reset_pending))) + if (unlikely(READ_ONCE(efx->reset_pending))) return 0; if (likely(EF4_QWORD_FIELD(*event, FSF_AZ_TX_EV_COMP))) { @@ -990,7 +990,7 @@ ef4_farch_handle_rx_event(struct ef4_channel *channel, const ef4_qword_t *event) struct ef4_rx_queue *rx_queue; struct ef4_nic *efx = channel->efx; - if (unlikely(ACCESS_ONCE(efx->reset_pending))) + if (unlikely(READ_ONCE(efx->reset_pending))) return; rx_ev_cont = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT); @@ -1504,7 +1504,7 @@ irqreturn_t ef4_farch_fatal_interrupt(struct ef4_nic *efx) irqreturn_t ef4_farch_legacy_interrupt(int irq, void *dev_id) { struct ef4_nic *efx = dev_id; - bool soft_enabled = ACCESS_ONCE(efx->irq_soft_enabled); + bool soft_enabled = READ_ONCE(efx->irq_soft_enabled); ef4_oword_t *int_ker = efx->irq_status.addr; irqreturn_t result = IRQ_NONE; struct ef4_channel *channel; @@ -1596,7 +1596,7 @@ irqreturn_t ef4_farch_msi_interrupt(int irq, void *dev_id) "IRQ %d on CPU %d status " EF4_OWORD_FMT "\n", irq, raw_smp_processor_id(), EF4_OWORD_VAL(*int_ker)); - if (!likely(ACCESS_ONCE(efx->irq_soft_enabled))) + if (!likely(READ_ONCE(efx->irq_soft_enabled))) return IRQ_HANDLED; /* Handle non-event-queue sources */ diff --git a/drivers/net/ethernet/sfc/falcon/nic.h b/drivers/net/ethernet/sfc/falcon/nic.h index a4c4592f6023..54ca457cdb15 100644 --- a/drivers/net/ethernet/sfc/falcon/nic.h +++ b/drivers/net/ethernet/sfc/falcon/nic.h @@ -83,7 +83,7 @@ static inline struct ef4_tx_queue *ef4_tx_queue_partner(struct ef4_tx_queue *tx_ static inline bool __ef4_nic_tx_is_empty(struct ef4_tx_queue *tx_queue, unsigned int write_count) { - unsigned int empty_read_count = ACCESS_ONCE(tx_queue->empty_read_count); + unsigned int empty_read_count = READ_ONCE(tx_queue->empty_read_count); if (empty_read_count == 0) return false; @@ -464,11 +464,11 @@ irqreturn_t ef4_farch_fatal_interrupt(struct ef4_nic *efx); static inline int ef4_nic_event_test_irq_cpu(struct ef4_channel *channel) { - return ACCESS_ONCE(channel->event_test_cpu); + return READ_ONCE(channel->event_test_cpu); } static inline int ef4_nic_irq_test_irq_cpu(struct ef4_nic *efx) { - return ACCESS_ONCE(efx->last_irq_cpu); + return READ_ONCE(efx->last_irq_cpu); } /* Global Resources */ diff --git a/drivers/net/ethernet/sfc/falcon/tx.c b/drivers/net/ethernet/sfc/falcon/tx.c index 6a75f4140a4b..6486814e97dc 100644 --- a/drivers/net/ethernet/sfc/falcon/tx.c +++ b/drivers/net/ethernet/sfc/falcon/tx.c @@ -134,8 +134,8 @@ static void ef4_tx_maybe_stop_queue(struct ef4_tx_queue *txq1) */ netif_tx_stop_queue(txq1->core_txq); smp_mb(); - txq1->old_read_count = ACCESS_ONCE(txq1->read_count); - txq2->old_read_count = ACCESS_ONCE(txq2->read_count); + txq1->old_read_count = READ_ONCE(txq1->read_count); + txq2->old_read_count = READ_ONCE(txq2->read_count); fill_level = max(txq1->insert_count - txq1->old_read_count, txq2->insert_count - txq2->old_read_count); @@ -524,7 +524,7 @@ void ef4_xmit_done(struct ef4_tx_queue *tx_queue, unsigned int index) /* Check whether the hardware queue is now empty */ if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) { - tx_queue->old_write_count = ACCESS_ONCE(tx_queue->write_count); + tx_queue->old_write_count = READ_ONCE(tx_queue->write_count); if (tx_queue->read_count == tx_queue->old_write_count) { smp_mb(); tx_queue->empty_read_count = diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index ba45150f53c7..86454d25a405 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -827,7 +827,7 @@ efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) struct efx_nic *efx = channel->efx; int tx_packets = 0; - if (unlikely(ACCESS_ONCE(efx->reset_pending))) + if (unlikely(READ_ONCE(efx->reset_pending))) return 0; if (likely(EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_COMP))) { @@ -979,7 +979,7 @@ efx_farch_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event) struct efx_rx_queue *rx_queue; struct efx_nic *efx = channel->efx; - if (unlikely(ACCESS_ONCE(efx->reset_pending))) + if (unlikely(READ_ONCE(efx->reset_pending))) return; rx_ev_cont = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT); @@ -1520,7 +1520,7 @@ irqreturn_t efx_farch_fatal_interrupt(struct efx_nic *efx) irqreturn_t efx_farch_legacy_interrupt(int irq, void *dev_id) { struct efx_nic *efx = dev_id; - bool soft_enabled = ACCESS_ONCE(efx->irq_soft_enabled); + bool soft_enabled = READ_ONCE(efx->irq_soft_enabled); efx_oword_t *int_ker = efx->irq_status.addr; irqreturn_t result = IRQ_NONE; struct efx_channel *channel; @@ -1612,7 +1612,7 @@ irqreturn_t efx_farch_msi_interrupt(int irq, void *dev_id) "IRQ %d on CPU %d status " EFX_OWORD_FMT "\n", irq, raw_smp_processor_id(), EFX_OWORD_VAL(*int_ker)); - if (!likely(ACCESS_ONCE(efx->irq_soft_enabled))) + if (!likely(READ_ONCE(efx->irq_soft_enabled))) return IRQ_HANDLED; /* Handle non-event-queue sources */ diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 4d7fb8af880d..7b51b6371724 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -81,7 +81,7 @@ static struct efx_tx_queue *efx_tx_queue_partner(struct efx_tx_queue *tx_queue) static inline bool __efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue, unsigned int write_count) { - unsigned int empty_read_count = ACCESS_ONCE(tx_queue->empty_read_count); + unsigned int empty_read_count = READ_ONCE(tx_queue->empty_read_count); if (empty_read_count == 0) return false; @@ -617,11 +617,11 @@ irqreturn_t efx_farch_fatal_interrupt(struct efx_nic *efx); static inline int efx_nic_event_test_irq_cpu(struct efx_channel *channel) { - return ACCESS_ONCE(channel->event_test_cpu); + return READ_ONCE(channel->event_test_cpu); } static inline int efx_nic_irq_test_irq_cpu(struct efx_nic *efx) { - return ACCESS_ONCE(efx->last_irq_cpu); + return READ_ONCE(efx->last_irq_cpu); } /* Global Resources */ diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index 60cdb97f58e2..56c2db398def 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -658,7 +658,7 @@ static void efx_ptp_send_times(struct efx_nic *efx, /* Write host time for specified period or until MC is done */ while ((timespec64_compare(&now.ts_real, &limit) < 0) && - ACCESS_ONCE(*mc_running)) { + READ_ONCE(*mc_running)) { struct timespec64 update_time; unsigned int host_time; @@ -668,7 +668,7 @@ static void efx_ptp_send_times(struct efx_nic *efx, do { pps_get_ts(&now); } while ((timespec64_compare(&now.ts_real, &update_time) < 0) && - ACCESS_ONCE(*mc_running)); + READ_ONCE(*mc_running)); /* Synchronise NIC with single word of time only */ host_time = (now.ts_real.tv_sec << MC_NANOSECOND_BITS | @@ -832,14 +832,14 @@ static int efx_ptp_synchronize(struct efx_nic *efx, unsigned int num_readings) ptp->start.dma_addr); /* Clear flag that signals MC ready */ - ACCESS_ONCE(*start) = 0; + WRITE_ONCE(*start, 0); rc = efx_mcdi_rpc_start(efx, MC_CMD_PTP, synch_buf, MC_CMD_PTP_IN_SYNCHRONIZE_LEN); EFX_WARN_ON_ONCE_PARANOID(rc); /* Wait for start from MCDI (or timeout) */ timeout = jiffies + msecs_to_jiffies(MAX_SYNCHRONISE_WAIT_MS); - while (!ACCESS_ONCE(*start) && (time_before(jiffies, timeout))) { + while (!READ_ONCE(*start) && (time_before(jiffies, timeout))) { udelay(20); /* Usually start MCDI execution quickly */ loops++; } @@ -849,7 +849,7 @@ static int efx_ptp_synchronize(struct efx_nic *efx, unsigned int num_readings) if (!time_before(jiffies, timeout)) ++ptp->sync_timeouts; - if (ACCESS_ONCE(*start)) + if (READ_ONCE(*start)) efx_ptp_send_times(efx, &last_time); /* Collect results */ diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 32bf1fecf864..efb66ea21f27 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -136,8 +136,8 @@ static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1) */ netif_tx_stop_queue(txq1->core_txq); smp_mb(); - txq1->old_read_count = ACCESS_ONCE(txq1->read_count); - txq2->old_read_count = ACCESS_ONCE(txq2->read_count); + txq1->old_read_count = READ_ONCE(txq1->read_count); + txq2->old_read_count = READ_ONCE(txq2->read_count); fill_level = max(txq1->insert_count - txq1->old_read_count, txq2->insert_count - txq2->old_read_count); @@ -752,7 +752,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) /* Check whether the hardware queue is now empty */ if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) { - tx_queue->old_write_count = ACCESS_ONCE(tx_queue->write_count); + tx_queue->old_write_count = READ_ONCE(tx_queue->write_count); if (tx_queue->read_count == tx_queue->old_write_count) { smp_mb(); tx_queue->empty_read_count = diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 6a4e8e1bbd90..8ab0fb6892d5 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6245,7 +6245,7 @@ static void niu_get_rx_stats(struct niu *np, pkts = dropped = errors = bytes = 0; - rx_rings = ACCESS_ONCE(np->rx_rings); + rx_rings = READ_ONCE(np->rx_rings); if (!rx_rings) goto no_rings; @@ -6276,7 +6276,7 @@ static void niu_get_tx_stats(struct niu *np, pkts = errors = bytes = 0; - tx_rings = ACCESS_ONCE(np->tx_rings); + tx_rings = READ_ONCE(np->tx_rings); if (!tx_rings) goto no_rings; diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 21b71ae947fd..b55b29b90b88 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -257,7 +257,7 @@ static struct tap_queue *tap_get_queue(struct tap_dev *tap, * and validate that the result isn't NULL - in case we are * racing against queue removal. */ - int numvtaps = ACCESS_ONCE(tap->numvtaps); + int numvtaps = READ_ONCE(tap->numvtaps); __u32 rxq; if (!numvtaps) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e21bf90b819f..27cd50c5bc9e 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -469,7 +469,7 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, u32 numqueues = 0; rcu_read_lock(); - numqueues = ACCESS_ONCE(tun->numqueues); + numqueues = READ_ONCE(tun->numqueues); txq = __skb_get_hash_symmetric(skb); if (txq) { @@ -864,7 +864,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) rcu_read_lock(); tfile = rcu_dereference(tun->tfiles[txq]); - numqueues = ACCESS_ONCE(tun->numqueues); + numqueues = READ_ONCE(tun->numqueues); /* Drop packet if interface is not attached */ if (txq >= numqueues) diff --git a/drivers/net/wireless/ath/ath5k/desc.c b/drivers/net/wireless/ath/ath5k/desc.c index bd8d4392d68b..80f75139495f 100644 --- a/drivers/net/wireless/ath/ath5k/desc.c +++ b/drivers/net/wireless/ath/ath5k/desc.c @@ -500,13 +500,13 @@ ath5k_hw_proc_4word_tx_status(struct ath5k_hw *ah, tx_status = &desc->ud.ds_tx5212.tx_stat; - txstat1 = ACCESS_ONCE(tx_status->tx_status_1); + txstat1 = READ_ONCE(tx_status->tx_status_1); /* No frame has been send or error */ if (unlikely(!(txstat1 & AR5K_DESC_TX_STATUS1_DONE))) return -EINPROGRESS; - txstat0 = ACCESS_ONCE(tx_status->tx_status_0); + txstat0 = READ_ONCE(tx_status->tx_status_0); /* * Get descriptor status @@ -700,14 +700,14 @@ ath5k_hw_proc_5212_rx_status(struct ath5k_hw *ah, u32 rxstat0, rxstat1; rx_status = &desc->ud.ds_rx.rx_stat; - rxstat1 = ACCESS_ONCE(rx_status->rx_status_1); + rxstat1 = READ_ONCE(rx_status->rx_status_1); /* No frame received / not ready */ if (unlikely(!(rxstat1 & AR5K_5212_RX_DESC_STATUS1_DONE))) return -EINPROGRESS; memset(rs, 0, sizeof(struct ath5k_rx_status)); - rxstat0 = ACCESS_ONCE(rx_status->rx_status_0); + rxstat0 = READ_ONCE(rx_status->rx_status_0); /* * Frame receive status diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 613caca7dc02..785a0f33b7e6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -3628,7 +3628,7 @@ static void brcmf_sdio_dataworker(struct work_struct *work) bus->dpc_running = true; wmb(); - while (ACCESS_ONCE(bus->dpc_triggered)) { + while (READ_ONCE(bus->dpc_triggered)) { bus->dpc_triggered = false; brcmf_sdio_dpc(bus); bus->idlecount = 0; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 231878969332..0f45f34e39d3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -1118,7 +1118,7 @@ void iwl_mvm_set_hw_ctkill_state(struct iwl_mvm *mvm, bool state) static bool iwl_mvm_set_hw_rfkill_state(struct iwl_op_mode *op_mode, bool state) { struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode); - bool calibrating = ACCESS_ONCE(mvm->calibrating); + bool calibrating = READ_ONCE(mvm->calibrating); if (state) set_bit(IWL_MVM_STATUS_HW_RFKILL, &mvm->status); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 6f2e2af23219..6e9d3289b9d0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -652,7 +652,7 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) return -1; } else if (info.control.vif->type == NL80211_IFTYPE_STATION && is_multicast_ether_addr(hdr->addr1)) { - u8 ap_sta_id = ACCESS_ONCE(mvmvif->ap_sta_id); + u8 ap_sta_id = READ_ONCE(mvmvif->ap_sta_id); if (ap_sta_id != IWL_MVM_INVALID_STA) sta_id = ap_sta_id; @@ -700,7 +700,7 @@ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb, snap_ip_tcp = 8 + skb_transport_header(skb) - skb_network_header(skb) + tcp_hdrlen(skb); - dbg_max_amsdu_len = ACCESS_ONCE(mvm->max_amsdu_len); + dbg_max_amsdu_len = READ_ONCE(mvm->max_amsdu_len); if (!sta->max_amsdu_len || !ieee80211_is_data_qos(hdr->frame_control) || diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index a06b6612b658..f25ce3a1ea50 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -1247,7 +1247,7 @@ restart: spin_lock(&rxq->lock); /* uCode's read index (stored in shared DRAM) indicates the last Rx * buffer that the driver may process (last buffer filled by ucode). */ - r = le16_to_cpu(ACCESS_ONCE(rxq->rb_stts->closed_rb_num)) & 0x0FFF; + r = le16_to_cpu(READ_ONCE(rxq->rb_stts->closed_rb_num)) & 0x0FFF; i = rxq->read; /* W/A 9000 device step A0 wrap-around bug */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 2e3e013ec95a..9ad3f4fe5894 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -2076,12 +2076,12 @@ static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, int txq_idx) IWL_DEBUG_TX_QUEUES(trans, "Emptying queue %d...\n", txq_idx); txq = trans_pcie->txq[txq_idx]; - wr_ptr = ACCESS_ONCE(txq->write_ptr); + wr_ptr = READ_ONCE(txq->write_ptr); - while (txq->read_ptr != ACCESS_ONCE(txq->write_ptr) && + while (txq->read_ptr != READ_ONCE(txq->write_ptr) && !time_after(jiffies, now + msecs_to_jiffies(IWL_FLUSH_WAIT_MS))) { - u8 write_ptr = ACCESS_ONCE(txq->write_ptr); + u8 write_ptr = READ_ONCE(txq->write_ptr); if (WARN_ONCE(wr_ptr != write_ptr, "WR pointer moved while flushing %d -> %d\n", @@ -2553,7 +2553,7 @@ static u32 iwl_trans_pcie_dump_rbs(struct iwl_trans *trans, spin_lock(&rxq->lock); - r = le16_to_cpu(ACCESS_ONCE(rxq->rb_stts->closed_rb_num)) & 0x0FFF; + r = le16_to_cpu(READ_ONCE(rxq->rb_stts->closed_rb_num)) & 0x0FFF; for (i = rxq->read, j = 0; i != r && j < allocated_rb_nums; @@ -2814,7 +2814,7 @@ static struct iwl_trans_dump_data /* Dump RBs is supported only for pre-9000 devices (1 queue) */ struct iwl_rxq *rxq = &trans_pcie->rxq[0]; /* RBs */ - num_rbs = le16_to_cpu(ACCESS_ONCE(rxq->rb_stts->closed_rb_num)) + num_rbs = le16_to_cpu(READ_ONCE(rxq->rb_stts->closed_rb_num)) & 0x0FFF; num_rbs = (num_rbs - rxq->read) & RX_QUEUE_MASK; len += num_rbs * (sizeof(*data) + diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 6467ffac9811..d2b3d6177a55 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1380,7 +1380,7 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, mac80211_hwsim_monitor_rx(hw, skb, channel); /* wmediumd mode check */ - _portid = ACCESS_ONCE(data->wmediumd); + _portid = READ_ONCE(data->wmediumd); if (_portid) return mac80211_hwsim_tx_frame_nl(hw, skb, _portid); @@ -1477,7 +1477,7 @@ static void mac80211_hwsim_tx_frame(struct ieee80211_hw *hw, struct ieee80211_channel *chan) { struct mac80211_hwsim_data *data = hw->priv; - u32 _pid = ACCESS_ONCE(data->wmediumd); + u32 _pid = READ_ONCE(data->wmediumd); if (ieee80211_hw_check(hw, SUPPORTS_RC_TABLE)) { struct ieee80211_tx_info *txi = IEEE80211_SKB_CB(skb); diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index f05cfc83c9c8..f946bf889015 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -996,7 +996,7 @@ static void qlt_free_session_done(struct work_struct *work) if (logout_started) { bool traced = false; - while (!ACCESS_ONCE(sess->logout_completed)) { + while (!READ_ONCE(sess->logout_completed)) { if (!traced) { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf086, "%s: waiting for sess %p logout\n", diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 942d094269fb..9469695f5871 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -985,7 +985,7 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) mb = udev->mb_addr; tcmu_flush_dcache_range(mb, sizeof(*mb)); - while (udev->cmdr_last_cleaned != ACCESS_ONCE(mb->cmd_tail)) { + while (udev->cmdr_last_cleaned != READ_ONCE(mb->cmd_tail)) { struct tcmu_cmd_entry *entry = (void *) mb + CMDR_OFF + udev->cmdr_last_cleaned; struct tcmu_cmd *cmd; diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 3e865dbf878c..fbaa2a90d25d 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -483,7 +483,7 @@ static ssize_t wdm_read if (rv < 0) return -ERESTARTSYS; - cntr = ACCESS_ONCE(desc->length); + cntr = READ_ONCE(desc->length); if (cntr == 0) { desc->read = 0; retry: diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index e9326f31db8d..4ae667d8c238 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -150,7 +150,7 @@ static int usbfs_increase_memory_usage(u64 amount) { u64 lim; - lim = ACCESS_ONCE(usbfs_memory_mb); + lim = READ_ONCE(usbfs_memory_mb); lim <<= 20; atomic64_add(amount, &usbfs_memory_usage); diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index d930bfda4010..58d59c5f8592 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -973,7 +973,7 @@ static ssize_t interface_show(struct device *dev, struct device_attribute *attr, char *string; intf = to_usb_interface(dev); - string = ACCESS_ONCE(intf->cur_altsetting->string); + string = READ_ONCE(intf->cur_altsetting->string); if (!string) return 0; return sprintf(buf, "%s\n", string); @@ -989,7 +989,7 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, intf = to_usb_interface(dev); udev = interface_to_usbdev(intf); - alt = ACCESS_ONCE(intf->cur_altsetting); + alt = READ_ONCE(intf->cur_altsetting); return sprintf(buf, "usb:v%04Xp%04Xd%04Xdc%02Xdsc%02Xdp%02X" "ic%02Xisc%02Xip%02Xin%02X\n", diff --git a/drivers/usb/gadget/udc/gr_udc.c b/drivers/usb/gadget/udc/gr_udc.c index 1f9941145746..0b59fa50aa30 100644 --- a/drivers/usb/gadget/udc/gr_udc.c +++ b/drivers/usb/gadget/udc/gr_udc.c @@ -1261,7 +1261,7 @@ static int gr_handle_in_ep(struct gr_ep *ep) if (!req->last_desc) return 0; - if (ACCESS_ONCE(req->last_desc->ctrl) & GR_DESC_IN_CTRL_EN) + if (READ_ONCE(req->last_desc->ctrl) & GR_DESC_IN_CTRL_EN) return 0; /* Not put in hardware buffers yet */ if (gr_read32(&ep->regs->epstat) & (GR_EPSTAT_B1 | GR_EPSTAT_B0)) @@ -1290,7 +1290,7 @@ static int gr_handle_out_ep(struct gr_ep *ep) if (!req->curr_desc) return 0; - ctrl = ACCESS_ONCE(req->curr_desc->ctrl); + ctrl = READ_ONCE(req->curr_desc->ctrl); if (ctrl & GR_DESC_OUT_CTRL_EN) return 0; /* Not received yet */ diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 44924824fa41..c86f89babd57 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -785,7 +785,7 @@ static void io_watchdog_func(unsigned long _ohci) } /* find the last TD processed by the controller. */ - head = hc32_to_cpu(ohci, ACCESS_ONCE(ed->hwHeadP)) & TD_MASK; + head = hc32_to_cpu(ohci, READ_ONCE(ed->hwHeadP)) & TD_MASK; td_start = td; td_next = list_prepare_entry(td, &ed->td_list, td_list); list_for_each_entry_continue(td_next, &ed->td_list, td_list) { diff --git a/drivers/usb/host/uhci-hcd.h b/drivers/usb/host/uhci-hcd.h index 91b22b2ea3aa..09a2a259941b 100644 --- a/drivers/usb/host/uhci-hcd.h +++ b/drivers/usb/host/uhci-hcd.h @@ -186,7 +186,7 @@ struct uhci_qh { * We need a special accessor for the element pointer because it is * subject to asynchronous updates by the controller. */ -#define qh_element(qh) ACCESS_ONCE((qh)->element) +#define qh_element(qh) READ_ONCE((qh)->element) #define LINK_TO_QH(uhci, qh) (UHCI_PTR_QH((uhci)) | \ cpu_to_hc32((uhci), (qh)->dma_handle)) @@ -274,7 +274,7 @@ struct uhci_td { * subject to asynchronous updates by the controller. */ #define td_status(uhci, td) hc32_to_cpu((uhci), \ - ACCESS_ONCE((td)->status)) + READ_ONCE((td)->status)) #define LINK_TO_TD(uhci, td) (cpu_to_hc32((uhci), (td)->dma_handle)) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index f5a86f651f38..2bc3705a99bd 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -665,7 +665,7 @@ static int vfio_dev_viable(struct device *dev, void *data) { struct vfio_group *group = data; struct vfio_device *device; - struct device_driver *drv = ACCESS_ONCE(dev->driver); + struct device_driver *drv = READ_ONCE(dev->driver); struct vfio_unbound_dev *unbound; int ret = -EINVAL; diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 046f6d280af5..35e929f132e8 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -929,7 +929,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) continue; } - tpg = ACCESS_ONCE(vs_tpg[*target]); + tpg = READ_ONCE(vs_tpg[*target]); if (unlikely(!tpg)) { /* Target does not exist, fail the request */ vhost_scsi_send_bad_target(vs, vq, head, out); diff --git a/fs/aio.c b/fs/aio.c index 5a2487217072..e6de7715228c 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -576,7 +576,7 @@ static int kiocb_cancel(struct aio_kiocb *kiocb) * actually has a cancel function, hence the cmpxchg() */ - cancel = ACCESS_ONCE(kiocb->ki_cancel); + cancel = READ_ONCE(kiocb->ki_cancel); do { if (!cancel || cancel == KIOCB_CANCELLED) return -EINVAL; diff --git a/fs/buffer.c b/fs/buffer.c index 170df856bdb9..32ce01f0f95f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1692,7 +1692,8 @@ static struct buffer_head *create_page_buffers(struct page *page, struct inode * BUG_ON(!PageLocked(page)); if (!page_has_buffers(page)) - create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state); + create_empty_buffers(page, 1 << READ_ONCE(inode->i_blkbits), + b_state); return page_buffers(page); } diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 8e704d12a1cf..0083bd4fcaa5 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -373,7 +373,7 @@ void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci) struct fscrypt_info *prev; if (ci == NULL) - ci = ACCESS_ONCE(inode->i_crypt_info); + ci = READ_ONCE(inode->i_crypt_info); if (ci == NULL) return; diff --git a/fs/direct-io.c b/fs/direct-io.c index b53e66d9abd7..98fe1325da9d 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1152,7 +1152,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, get_block_t get_block, dio_iodone_t end_io, dio_submit_t submit_io, int flags) { - unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits); + unsigned i_blkbits = READ_ONCE(inode->i_blkbits); unsigned blkbits = i_blkbits; unsigned blocksize_mask = (1 << blkbits) - 1; ssize_t retval = -EINVAL; diff --git a/fs/exec.c b/fs/exec.c index 3e14ba25f678..1d6243d9f2b6 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1911,7 +1911,7 @@ void set_dumpable(struct mm_struct *mm, int value) return; do { - old = ACCESS_ONCE(mm->flags); + old = READ_ONCE(mm->flags); new = (old & ~MMF_DUMPABLE_MASK) | value; } while (cmpxchg(&mm->flags, old, new) != old); } diff --git a/fs/fcntl.c b/fs/fcntl.c index 448a1119f0be..57bf2964bb83 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -724,7 +724,7 @@ static void send_sigio_to_task(struct task_struct *p, * F_SETSIG can change ->signum lockless in parallel, make * sure we read it once and use the same value throughout. */ - int signum = ACCESS_ONCE(fown->signum); + int signum = READ_ONCE(fown->signum); if (!sigio_perm(p, fown, signum)) return; diff --git a/fs/fs_pin.c b/fs/fs_pin.c index e747b3d720ee..2d07f292b625 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c @@ -78,7 +78,7 @@ void mnt_pin_kill(struct mount *m) while (1) { struct hlist_node *p; rcu_read_lock(); - p = ACCESS_ONCE(m->mnt_pins.first); + p = READ_ONCE(m->mnt_pins.first); if (!p) { rcu_read_unlock(); break; @@ -92,7 +92,7 @@ void group_pin_kill(struct hlist_head *p) while (1) { struct hlist_node *q; rcu_read_lock(); - q = ACCESS_ONCE(p->first); + q = READ_ONCE(p->first); if (!q) { rcu_read_unlock(); break; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 13c65dd2d37d..a42d89371748 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -33,7 +33,7 @@ static struct fuse_dev *fuse_get_dev(struct file *file) * Lockless access is OK, because file->private data is set * once during mount and is valid until the file is released. */ - return ACCESS_ONCE(file->private_data); + return READ_ONCE(file->private_data); } static void fuse_request_init(struct fuse_req *req, struct page **pages, diff --git a/fs/inode.c b/fs/inode.c index d1e35b53bb23..fd401028a309 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2090,7 +2090,7 @@ void inode_set_flags(struct inode *inode, unsigned int flags, WARN_ON_ONCE(flags & ~mask); do { - old_flags = ACCESS_ONCE(inode->i_flags); + old_flags = READ_ONCE(inode->i_flags); new_flags = (old_flags & ~mask) | flags; } while (unlikely(cmpxchg(&inode->i_flags, old_flags, new_flags) != old_flags)); diff --git a/fs/namei.c b/fs/namei.c index c75ea03ca147..40a0f34bf990 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1209,7 +1209,7 @@ static int follow_managed(struct path *path, struct nameidata *nd) /* Given that we're not holding a lock here, we retain the value in a * local variable for each dentry as we look at it so that we don't see * the components of that value change under us */ - while (managed = ACCESS_ONCE(path->dentry->d_flags), + while (managed = READ_ONCE(path->dentry->d_flags), managed &= DCACHE_MANAGED_DENTRY, unlikely(managed != 0)) { /* Allow the filesystem to manage the transit without i_mutex @@ -1394,7 +1394,7 @@ int follow_down(struct path *path) unsigned managed; int ret; - while (managed = ACCESS_ONCE(path->dentry->d_flags), + while (managed = READ_ONCE(path->dentry->d_flags), unlikely(managed & DCACHE_MANAGED_DENTRY)) { /* Allow the filesystem to manage the transit without i_mutex * being held. diff --git a/fs/namespace.c b/fs/namespace.c index d18deb4c410b..e158ec6b527b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -353,7 +353,7 @@ int __mnt_want_write(struct vfsmount *m) * incremented count after it has set MNT_WRITE_HOLD. */ smp_mb(); - while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) + while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) cpu_relax(); /* * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5ceaeb1f6fb6..f439f1c45008 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1081,7 +1081,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) int error; if (flags & LOOKUP_RCU) { - parent = ACCESS_ONCE(dentry->d_parent); + parent = READ_ONCE(dentry->d_parent); dir = d_inode_rcu(parent); if (!dir) return -ECHILD; @@ -1168,7 +1168,7 @@ out_set_verifier: nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_valid: if (flags & LOOKUP_RCU) { - if (parent != ACCESS_ONCE(dentry->d_parent)) + if (parent != READ_ONCE(dentry->d_parent)) return -ECHILD; } else dput(parent); @@ -1582,7 +1582,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) struct inode *dir; if (flags & LOOKUP_RCU) { - parent = ACCESS_ONCE(dentry->d_parent); + parent = READ_ONCE(dentry->d_parent); dir = d_inode_rcu(parent); if (!dir) return -ECHILD; @@ -1596,7 +1596,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) ret = -ECHILD; if (!(flags & LOOKUP_RCU)) dput(parent); - else if (parent != ACCESS_ONCE(dentry->d_parent)) + else if (parent != READ_ONCE(dentry->d_parent)) return -ECHILD; goto out; } diff --git a/fs/proc/array.c b/fs/proc/array.c index 77a8eacbe032..375e8bf0dd24 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -453,7 +453,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, cutime = sig->cutime; cstime = sig->cstime; cgtime = sig->cgtime; - rsslim = ACCESS_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur); + rsslim = READ_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur); /* add up live thread stats at the group level */ if (whole) { diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 99dff222fe67..03afd5150916 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -27,7 +27,7 @@ static unsigned mounts_poll(struct file *file, poll_table *wait) poll_wait(file, &p->ns->poll, wait); - event = ACCESS_ONCE(ns->event); + event = READ_ONCE(ns->event); if (m->poll_event != event) { m->poll_event = event; res |= POLLERR | POLLPRI; diff --git a/fs/splice.c b/fs/splice.c index f3084cce0ea6..39e2dc01ac12 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -253,7 +253,7 @@ EXPORT_SYMBOL(add_to_pipe); */ int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) { - unsigned int buffers = ACCESS_ONCE(pipe->buffers); + unsigned int buffers = READ_ONCE(pipe->buffers); spd->nr_pages_max = buffers; if (buffers <= PIPE_DEF_BUFFERS) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 1c713fd5b3e6..f46d133c0949 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -381,7 +381,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) * in __get_user_pages if userfaultfd_release waits on the * caller of handle_userfault to release the mmap_sem. */ - if (unlikely(ACCESS_ONCE(ctx->released))) { + if (unlikely(READ_ONCE(ctx->released))) { /* * Don't return VM_FAULT_SIGBUS in this case, so a non * cooperative manager can close the uffd after the @@ -477,7 +477,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) vmf->flags, reason); up_read(&mm->mmap_sem); - if (likely(must_wait && !ACCESS_ONCE(ctx->released) && + if (likely(must_wait && !READ_ONCE(ctx->released) && (return_to_userland ? !signal_pending(current) : !fatal_signal_pending(current)))) { wake_up_poll(&ctx->fd_wqh, POLLIN); @@ -586,7 +586,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, set_current_state(TASK_KILLABLE); if (ewq->msg.event == 0) break; - if (ACCESS_ONCE(ctx->released) || + if (READ_ONCE(ctx->released) || fatal_signal_pending(current)) { /* * &ewq->wq may be queued in fork_event, but @@ -833,7 +833,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) struct userfaultfd_wake_range range = { .len = 0, }; unsigned long new_flags; - ACCESS_ONCE(ctx->released) = true; + WRITE_ONCE(ctx->released, true); if (!mmget_not_zero(mm)) goto wakeup; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 51bf7b827387..129975970d99 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -592,9 +592,9 @@ xlog_valid_lsn( * a transiently forward state. Instead, we can see the LSN in a * transiently behind state if we happen to race with a cycle wrap. */ - cur_cycle = ACCESS_ONCE(log->l_curr_cycle); + cur_cycle = READ_ONCE(log->l_curr_cycle); smp_rmb(); - cur_block = ACCESS_ONCE(log->l_curr_block); + cur_block = READ_ONCE(log->l_curr_block); if ((CYCLE_LSN(lsn) > cur_cycle) || (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block)) { diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 8fbe259b197c..0a7ce668f8e0 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -236,7 +236,7 @@ static inline unsigned long __ffs64(u64 word) typeof(*ptr) old, new; \ \ do { \ - old = ACCESS_ONCE(*ptr); \ + old = READ_ONCE(*ptr); \ new = (old & ~mask) | bits; \ } while (cmpxchg(ptr, old, new) != old); \ \ @@ -251,7 +251,7 @@ static inline unsigned long __ffs64(u64 word) typeof(*ptr) old, new; \ \ do { \ - old = ACCESS_ONCE(*ptr); \ + old = READ_ONCE(*ptr); \ new = old & ~clear; \ } while (!(old & test) && \ cmpxchg(ptr, old, new) != old); \ diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h index a4be70398ce1..36dd4ffb5715 100644 --- a/include/linux/dynamic_queue_limits.h +++ b/include/linux/dynamic_queue_limits.h @@ -88,7 +88,7 @@ static inline void dql_queued(struct dql *dql, unsigned int count) /* Returns how many objects can be queued, < 0 indicates over limit. */ static inline int dql_avail(const struct dql *dql) { - return ACCESS_ONCE(dql->adj_limit) - ACCESS_ONCE(dql->num_queued); + return READ_ONCE(dql->adj_limit) - READ_ONCE(dql->num_queued); } /* Record number of completed objects and recalculate the limit. */ diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 14bc21c2ee7f..785a00ca4628 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -221,7 +221,7 @@ extern struct page *huge_zero_page; static inline bool is_huge_zero_page(struct page *page) { - return ACCESS_ONCE(huge_zero_page) == page; + return READ_ONCE(huge_zero_page) == page; } static inline bool is_huge_zero_pmd(pmd_t pmd) diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 30294603526f..d95cae09dea0 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -247,7 +247,7 @@ static inline struct team_port *team_get_port_by_index(struct team *team, static inline int team_num_to_port_index(struct team *team, unsigned int num) { - int en_port_count = ACCESS_ONCE(team->en_port_count); + int en_port_count = READ_ONCE(team->en_port_count); if (unlikely(!en_port_count)) return 0; diff --git a/include/linux/llist.h b/include/linux/llist.h index 1957635e6d5f..85abc2915e8d 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -198,7 +198,7 @@ static inline void init_llist_head(struct llist_head *list) */ static inline bool llist_empty(const struct llist_head *head) { - return ACCESS_ONCE(head->first) == NULL; + return READ_ONCE(head->first) == NULL; } static inline struct llist_node *llist_next(struct llist_node *node) diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 2efb08a60e63..f0fc4700b6ff 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -105,7 +105,7 @@ static inline bool pm_runtime_callbacks_present(struct device *dev) static inline void pm_runtime_mark_last_busy(struct device *dev) { - ACCESS_ONCE(dev->power.last_busy) = jiffies; + WRITE_ONCE(dev->power.last_busy, jiffies); } static inline bool pm_runtime_is_irq_safe(struct device *dev) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 4f4f786255ef..3fadb6f9982b 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -983,12 +983,12 @@ static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) static inline int sysctl_sync_period(struct netns_ipvs *ipvs) { - return ACCESS_ONCE(ipvs->sysctl_sync_threshold[1]); + return READ_ONCE(ipvs->sysctl_sync_threshold[1]); } static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs) { - return ACCESS_ONCE(ipvs->sysctl_sync_refresh_period); + return READ_ONCE(ipvs->sysctl_sync_refresh_period); } static inline int sysctl_sync_retries(struct netns_ipvs *ipvs) @@ -1013,7 +1013,7 @@ static inline int sysctl_sloppy_sctp(struct netns_ipvs *ipvs) static inline int sysctl_sync_ports(struct netns_ipvs *ipvs) { - return ACCESS_ONCE(ipvs->sysctl_sync_ports); + return READ_ONCE(ipvs->sysctl_sync_ports); } static inline int sysctl_sync_persist_mode(struct netns_ipvs *ipvs) diff --git a/kernel/acct.c b/kernel/acct.c index 5e72af29ab73..21eedd0dd81a 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -146,7 +146,7 @@ static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) again: smp_rmb(); rcu_read_lock(); - res = to_acct(ACCESS_ONCE(ns->bacct)); + res = to_acct(READ_ONCE(ns->bacct)); if (!res) { rcu_read_unlock(); return NULL; @@ -158,7 +158,7 @@ again: } rcu_read_unlock(); mutex_lock(&res->lock); - if (res != to_acct(ACCESS_ONCE(ns->bacct))) { + if (res != to_acct(READ_ONCE(ns->bacct))) { mutex_unlock(&res->lock); acct_put(res); goto again; diff --git a/kernel/events/core.c b/kernel/events/core.c index 824a583079a1..8fd2f2d1358a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1200,7 +1200,7 @@ perf_event_ctx_lock_nested(struct perf_event *event, int nesting) again: rcu_read_lock(); - ctx = ACCESS_ONCE(event->ctx); + ctx = READ_ONCE(event->ctx); if (!atomic_inc_not_zero(&ctx->refcount)) { rcu_read_unlock(); goto again; @@ -5302,8 +5302,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (!rb) goto aux_unlock; - aux_offset = ACCESS_ONCE(rb->user_page->aux_offset); - aux_size = ACCESS_ONCE(rb->user_page->aux_size); + aux_offset = READ_ONCE(rb->user_page->aux_offset); + aux_size = READ_ONCE(rb->user_page->aux_size); if (aux_offset < perf_data_size(rb) + PAGE_SIZE) goto aux_unlock; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index f684d8e5fa2b..f3e37971c842 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -381,7 +381,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, * (B) <-> (C) ordering is still observed by the pmu driver. */ if (!rb->aux_overwrite) { - aux_tail = ACCESS_ONCE(rb->user_page->aux_tail); + aux_tail = READ_ONCE(rb->user_page->aux_tail); handle->wakeup = rb->aux_wakeup + rb->aux_watermark; if (aux_head - aux_tail < perf_aux_size(rb)) handle->size = CIRC_SPACE(aux_head, aux_tail, perf_aux_size(rb)); diff --git a/kernel/exit.c b/kernel/exit.c index f6cad39f35df..6b4298a41167 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1339,7 +1339,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, * Ensure that EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE transition * can't confuse the checks below. */ - int exit_state = ACCESS_ONCE(p->exit_state); + int exit_state = READ_ONCE(p->exit_state); int ret; if (unlikely(exit_state == EXIT_DEAD)) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 81279c6602ff..845f3805c73d 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2724,7 +2724,7 @@ rb_reserve_next_event(struct ring_buffer *buffer, * if it happened, we have to fail the write. */ barrier(); - if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) { + if (unlikely(READ_ONCE(cpu_buffer->buffer) != buffer)) { local_dec(&cpu_buffer->committing); local_dec(&cpu_buffer->commits); return NULL; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 652c682707cd..9050c8b3ccde 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1459,7 +1459,7 @@ extern struct trace_event_file *find_event_file(struct trace_array *tr, static inline void *event_file_data(struct file *filp) { - return ACCESS_ONCE(file_inode(filp)->i_private); + return READ_ONCE(file_inode(filp)->i_private); } extern struct mutex event_mutex; diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 49cb41412eec..780262210c9a 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -77,7 +77,7 @@ check_stack(unsigned long ip, unsigned long *stack) { unsigned long this_size, flags; unsigned long *p, *top, *start; static int tracer_frame; - int frame_size = ACCESS_ONCE(tracer_frame); + int frame_size = READ_ONCE(tracer_frame); int i, x; this_size = ((unsigned long)stack) & (THREAD_SIZE-1); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index c490f1e4313b..d32b45662fb6 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -894,7 +894,7 @@ static bool new_idmap_permitted(const struct file *file, int proc_setgroups_show(struct seq_file *seq, void *v) { struct user_namespace *ns = seq->private; - unsigned long userns_flags = ACCESS_ONCE(ns->flags); + unsigned long userns_flags = READ_ONCE(ns->flags); seq_printf(seq, "%s\n", (userns_flags & USERNS_SETGROUPS_ALLOWED) ? diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 155c55d8db5f..fe7953aead82 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -39,7 +39,7 @@ begin_node: /* Descend through a shortcut */ shortcut = assoc_array_ptr_to_shortcut(cursor); smp_read_barrier_depends(); - cursor = ACCESS_ONCE(shortcut->next_node); + cursor = READ_ONCE(shortcut->next_node); } node = assoc_array_ptr_to_node(cursor); @@ -55,7 +55,7 @@ begin_node: */ has_meta = 0; for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { - ptr = ACCESS_ONCE(node->slots[slot]); + ptr = READ_ONCE(node->slots[slot]); has_meta |= (unsigned long)ptr; if (ptr && assoc_array_ptr_is_leaf(ptr)) { /* We need a barrier between the read of the pointer @@ -89,7 +89,7 @@ continue_node: smp_read_barrier_depends(); for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { - ptr = ACCESS_ONCE(node->slots[slot]); + ptr = READ_ONCE(node->slots[slot]); if (assoc_array_ptr_is_meta(ptr)) { cursor = ptr; goto begin_node; @@ -98,7 +98,7 @@ continue_node: finished_node: /* Move up to the parent (may need to skip back over a shortcut) */ - parent = ACCESS_ONCE(node->back_pointer); + parent = READ_ONCE(node->back_pointer); slot = node->parent_slot; if (parent == stop) return 0; @@ -107,7 +107,7 @@ finished_node: shortcut = assoc_array_ptr_to_shortcut(parent); smp_read_barrier_depends(); cursor = parent; - parent = ACCESS_ONCE(shortcut->back_pointer); + parent = READ_ONCE(shortcut->back_pointer); slot = shortcut->parent_slot; if (parent == stop) return 0; @@ -147,7 +147,7 @@ int assoc_array_iterate(const struct assoc_array *array, void *iterator_data), void *iterator_data) { - struct assoc_array_ptr *root = ACCESS_ONCE(array->root); + struct assoc_array_ptr *root = READ_ONCE(array->root); if (!root) return 0; @@ -194,7 +194,7 @@ assoc_array_walk(const struct assoc_array *array, pr_devel("-->%s()\n", __func__); - cursor = ACCESS_ONCE(array->root); + cursor = READ_ONCE(array->root); if (!cursor) return assoc_array_walk_tree_empty; @@ -220,7 +220,7 @@ consider_node: slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK); slot &= ASSOC_ARRAY_FAN_MASK; - ptr = ACCESS_ONCE(node->slots[slot]); + ptr = READ_ONCE(node->slots[slot]); pr_devel("consider slot %x [ix=%d type=%lu]\n", slot, level, (unsigned long)ptr & 3); @@ -294,7 +294,7 @@ follow_shortcut: } while (sc_level < shortcut->skip_to_level); /* The shortcut matches the leaf's index to this point. */ - cursor = ACCESS_ONCE(shortcut->next_node); + cursor = READ_ONCE(shortcut->next_node); if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) { level = sc_level; goto jumped; @@ -337,7 +337,7 @@ void *assoc_array_find(const struct assoc_array *array, * the terminal node. */ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { - ptr = ACCESS_ONCE(node->slots[slot]); + ptr = READ_ONCE(node->slots[slot]); if (ptr && assoc_array_ptr_is_leaf(ptr)) { /* We need a barrier between the read of the pointer * and dereferencing the pointer - but only if we are diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c index f346715e2255..81770a55cb16 100644 --- a/lib/dynamic_queue_limits.c +++ b/lib/dynamic_queue_limits.c @@ -20,7 +20,7 @@ void dql_completed(struct dql *dql, unsigned int count) unsigned int ovlimit, completed, num_queued; bool all_prev_completed; - num_queued = ACCESS_ONCE(dql->num_queued); + num_queued = READ_ONCE(dql->num_queued); /* Can't complete more than what's in queue */ BUG_ON(count > num_queued - dql->num_completed); diff --git a/lib/llist.c b/lib/llist.c index ae5872b1df0c..7062e931a7bb 100644 --- a/lib/llist.c +++ b/lib/llist.c @@ -41,7 +41,7 @@ bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, struct llist_node *first; do { - new_last->next = first = ACCESS_ONCE(head->first); + new_last->next = first = READ_ONCE(head->first); } while (cmpxchg(&head->first, first, new_first) != first); return !first; diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 86c3385b9eb3..1746bae94d41 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -620,8 +620,8 @@ char *dentry_name(char *buf, char *end, const struct dentry *d, struct printf_sp rcu_read_lock(); for (i = 0; i < depth; i++, d = p) { - p = ACCESS_ONCE(d->d_parent); - array[i] = ACCESS_ONCE(d->d_name.name); + p = READ_ONCE(d->d_parent); + array[i] = READ_ONCE(d->d_name.name); if (p == d) { if (i) array[i] = ""; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 269b5df58543..c3bf907a03ee 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2715,7 +2715,7 @@ static unsigned long deferred_split_count(struct shrinker *shrink, struct shrink_control *sc) { struct pglist_data *pgdata = NODE_DATA(sc->nid); - return ACCESS_ONCE(pgdata->split_queue_len); + return READ_ONCE(pgdata->split_queue_len); } static unsigned long deferred_split_scan(struct shrinker *shrink, diff --git a/net/core/dev.c b/net/core/dev.c index 11596a302a26..61559ca3980b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3725,7 +3725,7 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, flow_table = rcu_dereference(rxqueue->rps_flow_table); if (flow_table && flow_id <= flow_table->mask) { rflow = &flow_table->flows[flow_id]; - cpu = ACCESS_ONCE(rflow->cpu); + cpu = READ_ONCE(rflow->cpu); if (rflow->filter == filter_id && cpu < nr_cpu_ids && ((int)(per_cpu(softnet_data, cpu).input_queue_head - rflow->last_qtail) < diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 6e1e10ff433a..3b2034f6d49d 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3377,7 +3377,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) static void pktgen_xmit(struct pktgen_dev *pkt_dev) { - unsigned int burst = ACCESS_ONCE(pkt_dev->burst); + unsigned int burst = READ_ONCE(pkt_dev->burst); struct net_device *odev = pkt_dev->odev; struct netdev_queue *txq; struct sk_buff *skb; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index af74d0433453..f9597ba26599 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -164,7 +164,7 @@ static void inet_frag_worker(struct work_struct *work) local_bh_disable(); - for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) { + for (i = READ_ONCE(f->next_bucket); budget; --budget) { evicted += inet_evict_bucket(f, &f->hash[i]); i = (i + 1) & (INETFRAGS_HASHSZ - 1); if (evicted > INETFRAGS_EVICT_MAX) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3d9f1c2f81c5..c0864562083b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -495,7 +495,7 @@ u32 ip_idents_reserve(u32 hash, int segs) { u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ; atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ; - u32 old = ACCESS_ONCE(*p_tstamp); + u32 old = READ_ONCE(*p_tstamp); u32 now = (u32)jiffies; u32 new, delta = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0bc9e46a5369..48531da1aba6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1908,7 +1908,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) goto send_now; - win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor); + win_divisor = READ_ONCE(sysctl_tcp_tso_win_divisor); if (win_divisor) { u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ebfbccae62fd..02ec9a349303 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1853,7 +1853,7 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) */ /* if we're overly short, let UDP handle it */ - encap_rcv = ACCESS_ONCE(up->encap_rcv); + encap_rcv = READ_ONCE(up->encap_rcv); if (encap_rcv) { int ret; @@ -2298,7 +2298,7 @@ void udp_destroy_sock(struct sock *sk) unlock_sock_fast(sk, slow); if (static_key_false(&udp_encap_needed) && up->encap_type) { void (*encap_destroy)(struct sock *sk); - encap_destroy = ACCESS_ONCE(up->encap_destroy); + encap_destroy = READ_ONCE(up->encap_destroy); if (encap_destroy) encap_destroy(sk); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a1c24443cd9e..dab946554157 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -490,7 +490,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, if (!t) goto out; - tproto = ACCESS_ONCE(t->parms.proto); + tproto = READ_ONCE(t->parms.proto); if (tproto != ipproto && tproto != 0) goto out; @@ -899,7 +899,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); if (t) { - u8 tproto = ACCESS_ONCE(t->parms.proto); + u8 tproto = READ_ONCE(t->parms.proto); if (tproto != ipproto && tproto != 0) goto drop; @@ -1233,7 +1233,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - tproto = ACCESS_ONCE(t->parms.proto); + tproto = READ_ONCE(t->parms.proto); if (tproto != IPPROTO_IPIP && tproto != 0) return -1; @@ -1303,7 +1303,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) u8 tproto; int err; - tproto = ACCESS_ONCE(t->parms.proto); + tproto = READ_ONCE(t->parms.proto); if ((tproto != IPPROTO_IPV6 && tproto != 0) || ip6_tnl_addr_conflict(t, ipv6h)) return -1; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 40d7234c27b9..3f30fa313bf2 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -606,7 +606,7 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) */ /* if we're overly short, let UDP handle it */ - encap_rcv = ACCESS_ONCE(up->encap_rcv); + encap_rcv = READ_ONCE(up->encap_rcv); if (encap_rcv) { int ret; @@ -1432,7 +1432,7 @@ void udpv6_destroy_sock(struct sock *sk) if (static_key_false(&udpv6_encap_needed) && up->encap_type) { void (*encap_destroy)(struct sock *sk); - encap_destroy = ACCESS_ONCE(up->encap_destroy); + encap_destroy = READ_ONCE(up->encap_destroy); if (encap_destroy) encap_destroy(sk); } diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index dd3e83328ad5..82cb93f66b9b 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -193,7 +193,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, */ rcv = rcu_dereference(sap->rcv_func); dest = llc_pdu_type(skb); - sap_handler = dest ? ACCESS_ONCE(llc_type_handlers[dest - 1]) : NULL; + sap_handler = dest ? READ_ONCE(llc_type_handlers[dest - 1]) : NULL; if (unlikely(!sap_handler)) { if (rcv) rcv(skb, dev, pt, orig_dev); @@ -214,7 +214,7 @@ drop: kfree_skb(skb); goto out; handle_station: - sta_handler = ACCESS_ONCE(llc_station_handler); + sta_handler = READ_ONCE(llc_station_handler); if (!sta_handler) goto drop; sta_handler(skb); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 69615016d5bf..214d2ba02877 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2008,7 +2008,7 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate, static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo) { - u16 rate = ACCESS_ONCE(sta_get_last_rx_stats(sta)->last_rate); + u16 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate); if (rate == STA_STATS_RATE_INVALID) return -EINVAL; diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c index d177dd066504..4d748975117d 100644 --- a/net/netlabel/netlabel_calipso.c +++ b/net/netlabel/netlabel_calipso.c @@ -393,7 +393,7 @@ EXPORT_SYMBOL(netlbl_calipso_ops_register); static const struct netlbl_calipso_ops *netlbl_calipso_ops_get(void) { - return ACCESS_ONCE(calipso_ops); + return READ_ONCE(calipso_ops); } /** diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d396cb61a280..eb866647a27a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -14201,7 +14201,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; - u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid); + u32 nlportid = READ_ONCE(wdev->ap_unexpected_nlportid); if (!nlportid) return false; diff --git a/sound/firewire/amdtp-am824.c b/sound/firewire/amdtp-am824.c index 23ccddb20de1..4210e5c6262e 100644 --- a/sound/firewire/amdtp-am824.c +++ b/sound/firewire/amdtp-am824.c @@ -247,7 +247,7 @@ void amdtp_am824_midi_trigger(struct amdtp_stream *s, unsigned int port, struct amdtp_am824 *p = s->protocol; if (port < p->midi_ports) - ACCESS_ONCE(p->midi[port]) = midi; + WRITE_ONCE(p->midi[port], midi); } EXPORT_SYMBOL_GPL(amdtp_am824_midi_trigger); @@ -336,7 +336,7 @@ static unsigned int process_rx_data_blocks(struct amdtp_stream *s, __be32 *buffe unsigned int data_blocks, unsigned int *syt) { struct amdtp_am824 *p = s->protocol; - struct snd_pcm_substream *pcm = ACCESS_ONCE(s->pcm); + struct snd_pcm_substream *pcm = READ_ONCE(s->pcm); unsigned int pcm_frames; if (pcm) { @@ -357,7 +357,7 @@ static unsigned int process_tx_data_blocks(struct amdtp_stream *s, __be32 *buffe unsigned int data_blocks, unsigned int *syt) { struct amdtp_am824 *p = s->protocol; - struct snd_pcm_substream *pcm = ACCESS_ONCE(s->pcm); + struct snd_pcm_substream *pcm = READ_ONCE(s->pcm); unsigned int pcm_frames; if (pcm) { diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index 3fc581a5ad62..4a1dc145327b 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -376,7 +376,7 @@ static void update_pcm_pointers(struct amdtp_stream *s, ptr = s->pcm_buffer_pointer + frames; if (ptr >= pcm->runtime->buffer_size) ptr -= pcm->runtime->buffer_size; - ACCESS_ONCE(s->pcm_buffer_pointer) = ptr; + WRITE_ONCE(s->pcm_buffer_pointer, ptr); s->pcm_period_pointer += frames; if (s->pcm_period_pointer >= pcm->runtime->period_size) { @@ -388,7 +388,7 @@ static void update_pcm_pointers(struct amdtp_stream *s, static void pcm_period_tasklet(unsigned long data) { struct amdtp_stream *s = (void *)data; - struct snd_pcm_substream *pcm = ACCESS_ONCE(s->pcm); + struct snd_pcm_substream *pcm = READ_ONCE(s->pcm); if (pcm) snd_pcm_period_elapsed(pcm); @@ -453,7 +453,7 @@ static int handle_out_packet(struct amdtp_stream *s, s->data_block_counter = (s->data_block_counter + data_blocks) & 0xff; - buffer[0] = cpu_to_be32(ACCESS_ONCE(s->source_node_id_field) | + buffer[0] = cpu_to_be32(READ_ONCE(s->source_node_id_field) | (s->data_block_quadlets << CIP_DBS_SHIFT) | ((s->sph << CIP_SPH_SHIFT) & CIP_SPH_MASK) | s->data_block_counter); @@ -472,7 +472,7 @@ static int handle_out_packet(struct amdtp_stream *s, if (queue_out_packet(s, payload_length) < 0) return -EIO; - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (pcm && pcm_frames > 0) update_pcm_pointers(s, pcm, pcm_frames); @@ -504,7 +504,7 @@ static int handle_out_packet_without_header(struct amdtp_stream *s, if (queue_out_packet(s, payload_length) < 0) return -EIO; - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (pcm && pcm_frames > 0) update_pcm_pointers(s, pcm, pcm_frames); @@ -621,7 +621,7 @@ end: if (queue_in_packet(s) < 0) return -EIO; - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (pcm && pcm_frames > 0) update_pcm_pointers(s, pcm, pcm_frames); @@ -649,7 +649,7 @@ static int handle_in_packet_without_header(struct amdtp_stream *s, if (queue_in_packet(s) < 0) return -EIO; - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (pcm && pcm_frames > 0) update_pcm_pointers(s, pcm, pcm_frames); @@ -947,7 +947,7 @@ unsigned long amdtp_stream_pcm_pointer(struct amdtp_stream *s) if (!in_interrupt() && amdtp_stream_running(s)) fw_iso_context_flush_completions(s->context); - return ACCESS_ONCE(s->pcm_buffer_pointer); + return READ_ONCE(s->pcm_buffer_pointer); } EXPORT_SYMBOL(amdtp_stream_pcm_pointer); @@ -977,9 +977,8 @@ EXPORT_SYMBOL(amdtp_stream_pcm_ack); void amdtp_stream_update(struct amdtp_stream *s) { /* Precomputing. */ - ACCESS_ONCE(s->source_node_id_field) = - (fw_parent_device(s->unit)->card->node_id << CIP_SID_SHIFT) & - CIP_SID_MASK; + WRITE_ONCE(s->source_node_id_field, + (fw_parent_device(s->unit)->card->node_id << CIP_SID_SHIFT) & CIP_SID_MASK); } EXPORT_SYMBOL(amdtp_stream_update); @@ -1022,7 +1021,7 @@ void amdtp_stream_pcm_abort(struct amdtp_stream *s) { struct snd_pcm_substream *pcm; - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (pcm) snd_pcm_stop_xrun(pcm); } diff --git a/sound/firewire/amdtp-stream.h b/sound/firewire/amdtp-stream.h index ed6eafd10992..f9abd8b07ce6 100644 --- a/sound/firewire/amdtp-stream.h +++ b/sound/firewire/amdtp-stream.h @@ -220,7 +220,7 @@ static inline bool amdtp_stream_pcm_running(struct amdtp_stream *s) static inline void amdtp_stream_pcm_trigger(struct amdtp_stream *s, struct snd_pcm_substream *pcm) { - ACCESS_ONCE(s->pcm) = pcm; + WRITE_ONCE(s->pcm, pcm); } static inline bool cip_sfc_is_base_44100(enum cip_sfc sfc) diff --git a/sound/firewire/digi00x/amdtp-dot.c b/sound/firewire/digi00x/amdtp-dot.c index 1453c34ce99f..4a884a335248 100644 --- a/sound/firewire/digi00x/amdtp-dot.c +++ b/sound/firewire/digi00x/amdtp-dot.c @@ -327,7 +327,7 @@ void amdtp_dot_midi_trigger(struct amdtp_stream *s, unsigned int port, struct amdtp_dot *p = s->protocol; if (port < MAX_MIDI_PORTS) - ACCESS_ONCE(p->midi[port]) = midi; + WRITE_ONCE(p->midi[port], midi); } static unsigned int process_tx_data_blocks(struct amdtp_stream *s, @@ -338,7 +338,7 @@ static unsigned int process_tx_data_blocks(struct amdtp_stream *s, struct snd_pcm_substream *pcm; unsigned int pcm_frames; - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (pcm) { read_pcm_s32(s, pcm, buffer, data_blocks); pcm_frames = data_blocks; @@ -359,7 +359,7 @@ static unsigned int process_rx_data_blocks(struct amdtp_stream *s, struct snd_pcm_substream *pcm; unsigned int pcm_frames; - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (pcm) { write_pcm_s32(s, pcm, buffer, data_blocks); pcm_frames = data_blocks; diff --git a/sound/firewire/fireface/amdtp-ff.c b/sound/firewire/fireface/amdtp-ff.c index 780da9deb2f0..77c7598b61ab 100644 --- a/sound/firewire/fireface/amdtp-ff.c +++ b/sound/firewire/fireface/amdtp-ff.c @@ -108,7 +108,7 @@ static unsigned int process_rx_data_blocks(struct amdtp_stream *s, unsigned int data_blocks, unsigned int *syt) { - struct snd_pcm_substream *pcm = ACCESS_ONCE(s->pcm); + struct snd_pcm_substream *pcm = READ_ONCE(s->pcm); unsigned int pcm_frames; if (pcm) { @@ -127,7 +127,7 @@ static unsigned int process_tx_data_blocks(struct amdtp_stream *s, unsigned int data_blocks, unsigned int *syt) { - struct snd_pcm_substream *pcm = ACCESS_ONCE(s->pcm); + struct snd_pcm_substream *pcm = READ_ONCE(s->pcm); unsigned int pcm_frames; if (pcm) { diff --git a/sound/firewire/fireface/ff-midi.c b/sound/firewire/fireface/ff-midi.c index 949ee56b4e0e..6a49611ee462 100644 --- a/sound/firewire/fireface/ff-midi.c +++ b/sound/firewire/fireface/ff-midi.c @@ -22,7 +22,7 @@ static int midi_playback_open(struct snd_rawmidi_substream *substream) ff->running_status[substream->number] = 0; ff->rx_midi_error[substream->number] = false; - ACCESS_ONCE(ff->rx_midi_substreams[substream->number]) = substream; + WRITE_ONCE(ff->rx_midi_substreams[substream->number], substream); return 0; } @@ -38,7 +38,7 @@ static int midi_playback_close(struct snd_rawmidi_substream *substream) struct snd_ff *ff = substream->rmidi->private_data; cancel_work_sync(&ff->rx_midi_work[substream->number]); - ACCESS_ONCE(ff->rx_midi_substreams[substream->number]) = NULL; + WRITE_ONCE(ff->rx_midi_substreams[substream->number], NULL); return 0; } @@ -52,10 +52,10 @@ static void midi_capture_trigger(struct snd_rawmidi_substream *substream, spin_lock_irqsave(&ff->lock, flags); if (up) - ACCESS_ONCE(ff->tx_midi_substreams[substream->number]) = - substream; + WRITE_ONCE(ff->tx_midi_substreams[substream->number], + substream); else - ACCESS_ONCE(ff->tx_midi_substreams[substream->number]) = NULL; + WRITE_ONCE(ff->tx_midi_substreams[substream->number], NULL); spin_unlock_irqrestore(&ff->lock, flags); } diff --git a/sound/firewire/fireface/ff-transaction.c b/sound/firewire/fireface/ff-transaction.c index dd6c8e839647..332b29f8ed75 100644 --- a/sound/firewire/fireface/ff-transaction.c +++ b/sound/firewire/fireface/ff-transaction.c @@ -12,7 +12,7 @@ static void finish_transmit_midi_msg(struct snd_ff *ff, unsigned int port, int rcode) { struct snd_rawmidi_substream *substream = - ACCESS_ONCE(ff->rx_midi_substreams[port]); + READ_ONCE(ff->rx_midi_substreams[port]); if (rcode_is_permanent_error(rcode)) { ff->rx_midi_error[port] = true; @@ -60,7 +60,7 @@ static inline void fill_midi_buf(struct snd_ff *ff, unsigned int port, static void transmit_midi_msg(struct snd_ff *ff, unsigned int port) { struct snd_rawmidi_substream *substream = - ACCESS_ONCE(ff->rx_midi_substreams[port]); + READ_ONCE(ff->rx_midi_substreams[port]); u8 *buf = (u8 *)ff->msg_buf[port]; int i, len; @@ -159,7 +159,7 @@ static void handle_midi_msg(struct fw_card *card, struct fw_request *request, */ index = (quad >> 8) & 0xff; if (index > 0) { - substream = ACCESS_ONCE(ff->tx_midi_substreams[0]); + substream = READ_ONCE(ff->tx_midi_substreams[0]); if (substream != NULL) { byte = quad & 0xff; snd_rawmidi_receive(substream, &byte, 1); @@ -169,7 +169,7 @@ static void handle_midi_msg(struct fw_card *card, struct fw_request *request, /* Message in second port. */ index = (quad >> 24) & 0xff; if (index > 0) { - substream = ACCESS_ONCE(ff->tx_midi_substreams[1]); + substream = READ_ONCE(ff->tx_midi_substreams[1]); if (substream != NULL) { byte = (quad >> 16) & 0xff; snd_rawmidi_receive(substream, &byte, 1); diff --git a/sound/firewire/isight.c b/sound/firewire/isight.c index 5826aa8362f1..46092fa3ff9b 100644 --- a/sound/firewire/isight.c +++ b/sound/firewire/isight.c @@ -96,7 +96,7 @@ static void isight_update_pointers(struct isight *isight, unsigned int count) ptr += count; if (ptr >= runtime->buffer_size) ptr -= runtime->buffer_size; - ACCESS_ONCE(isight->buffer_pointer) = ptr; + WRITE_ONCE(isight->buffer_pointer, ptr); isight->period_counter += count; if (isight->period_counter >= runtime->period_size) { @@ -111,7 +111,7 @@ static void isight_samples(struct isight *isight, struct snd_pcm_runtime *runtime; unsigned int count1; - if (!ACCESS_ONCE(isight->pcm_running)) + if (!READ_ONCE(isight->pcm_running)) return; runtime = isight->pcm->runtime; @@ -131,7 +131,7 @@ static void isight_samples(struct isight *isight, static void isight_pcm_abort(struct isight *isight) { - if (ACCESS_ONCE(isight->pcm_active)) + if (READ_ONCE(isight->pcm_active)) snd_pcm_stop_xrun(isight->pcm); } @@ -141,7 +141,7 @@ static void isight_dropped_samples(struct isight *isight, unsigned int total) u32 dropped; unsigned int count1; - if (!ACCESS_ONCE(isight->pcm_running)) + if (!READ_ONCE(isight->pcm_running)) return; runtime = isight->pcm->runtime; @@ -293,7 +293,7 @@ static int isight_hw_params(struct snd_pcm_substream *substream, if (err < 0) return err; - ACCESS_ONCE(isight->pcm_active) = true; + WRITE_ONCE(isight->pcm_active, true); return 0; } @@ -331,7 +331,7 @@ static int isight_hw_free(struct snd_pcm_substream *substream) { struct isight *isight = substream->private_data; - ACCESS_ONCE(isight->pcm_active) = false; + WRITE_ONCE(isight->pcm_active, false); mutex_lock(&isight->mutex); isight_stop_streaming(isight); @@ -424,10 +424,10 @@ static int isight_trigger(struct snd_pcm_substream *substream, int cmd) switch (cmd) { case SNDRV_PCM_TRIGGER_START: - ACCESS_ONCE(isight->pcm_running) = true; + WRITE_ONCE(isight->pcm_running, true); break; case SNDRV_PCM_TRIGGER_STOP: - ACCESS_ONCE(isight->pcm_running) = false; + WRITE_ONCE(isight->pcm_running, false); break; default: return -EINVAL; @@ -439,7 +439,7 @@ static snd_pcm_uframes_t isight_pointer(struct snd_pcm_substream *substream) { struct isight *isight = substream->private_data; - return ACCESS_ONCE(isight->buffer_pointer); + return READ_ONCE(isight->buffer_pointer); } static int isight_create_pcm(struct isight *isight) diff --git a/sound/firewire/motu/amdtp-motu.c b/sound/firewire/motu/amdtp-motu.c index 96f0091144bb..f0555a24d90e 100644 --- a/sound/firewire/motu/amdtp-motu.c +++ b/sound/firewire/motu/amdtp-motu.c @@ -310,7 +310,7 @@ static unsigned int process_tx_data_blocks(struct amdtp_stream *s, if (p->midi_ports) read_midi_messages(s, buffer, data_blocks); - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (data_blocks > 0 && pcm) read_pcm_s32(s, pcm->runtime, buffer, data_blocks); @@ -374,7 +374,7 @@ static unsigned int process_rx_data_blocks(struct amdtp_stream *s, if (p->midi_ports) write_midi_messages(s, buffer, data_blocks); - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (pcm) write_pcm_s32(s, pcm->runtime, buffer, data_blocks); else diff --git a/sound/firewire/oxfw/oxfw-scs1x.c b/sound/firewire/oxfw/oxfw-scs1x.c index 02d595665898..f33497cdc706 100644 --- a/sound/firewire/oxfw/oxfw-scs1x.c +++ b/sound/firewire/oxfw/oxfw-scs1x.c @@ -112,7 +112,7 @@ static void handle_hss(struct fw_card *card, struct fw_request *request, } if (length >= 1) { - stream = ACCESS_ONCE(scs->input); + stream = READ_ONCE(scs->input); if (stream) midi_input_packet(scs, stream, data, length); } @@ -183,7 +183,7 @@ static void scs_output_work(struct work_struct *work) if (scs->transaction_running) return; - stream = ACCESS_ONCE(scs->output); + stream = READ_ONCE(scs->output); if (!stream || scs->error) { scs->output_idle = true; wake_up(&scs->idle_wait); @@ -291,9 +291,9 @@ static void midi_capture_trigger(struct snd_rawmidi_substream *stream, int up) if (up) { scs->input_escape_count = 0; - ACCESS_ONCE(scs->input) = stream; + WRITE_ONCE(scs->input, stream); } else { - ACCESS_ONCE(scs->input) = NULL; + WRITE_ONCE(scs->input, NULL); } } @@ -319,10 +319,10 @@ static void midi_playback_trigger(struct snd_rawmidi_substream *stream, int up) scs->transaction_bytes = 0; scs->error = false; - ACCESS_ONCE(scs->output) = stream; + WRITE_ONCE(scs->output, stream); schedule_work(&scs->work); } else { - ACCESS_ONCE(scs->output) = NULL; + WRITE_ONCE(scs->output, NULL); } } static void midi_playback_drain(struct snd_rawmidi_substream *stream) diff --git a/sound/firewire/tascam/amdtp-tascam.c b/sound/firewire/tascam/amdtp-tascam.c index 6aff1fc1c72d..ab482423c165 100644 --- a/sound/firewire/tascam/amdtp-tascam.c +++ b/sound/firewire/tascam/amdtp-tascam.c @@ -124,7 +124,7 @@ static unsigned int process_tx_data_blocks(struct amdtp_stream *s, { struct snd_pcm_substream *pcm; - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (data_blocks > 0 && pcm) read_pcm_s32(s, pcm, buffer, data_blocks); @@ -143,7 +143,7 @@ static unsigned int process_rx_data_blocks(struct amdtp_stream *s, /* This field is not used. */ *syt = 0x0000; - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (pcm) write_pcm_s32(s, pcm, buffer, data_blocks); else diff --git a/sound/firewire/tascam/tascam-transaction.c b/sound/firewire/tascam/tascam-transaction.c index 8967c52f5032..2ad692dd4b13 100644 --- a/sound/firewire/tascam/tascam-transaction.c +++ b/sound/firewire/tascam/tascam-transaction.c @@ -148,7 +148,7 @@ static void async_midi_port_callback(struct fw_card *card, int rcode, void *callback_data) { struct snd_fw_async_midi_port *port = callback_data; - struct snd_rawmidi_substream *substream = ACCESS_ONCE(port->substream); + struct snd_rawmidi_substream *substream = READ_ONCE(port->substream); /* This port is closed. */ if (substream == NULL) @@ -173,7 +173,7 @@ static void midi_port_work(struct work_struct *work) { struct snd_fw_async_midi_port *port = container_of(work, struct snd_fw_async_midi_port, work); - struct snd_rawmidi_substream *substream = ACCESS_ONCE(port->substream); + struct snd_rawmidi_substream *substream = READ_ONCE(port->substream); int generation; /* Under transacting or error state. */ @@ -282,7 +282,7 @@ static void handle_midi_tx(struct fw_card *card, struct fw_request *request, bytes = 3; } - substream = ACCESS_ONCE(tscm->tx_midi_substreams[port]); + substream = READ_ONCE(tscm->tx_midi_substreams[port]); if (substream != NULL) snd_rawmidi_receive(substream, b + 1, bytes); } diff --git a/sound/soc/xtensa/xtfpga-i2s.c b/sound/soc/xtensa/xtfpga-i2s.c index 8382ffa3bcaf..2472144b329e 100644 --- a/sound/soc/xtensa/xtfpga-i2s.c +++ b/sound/soc/xtensa/xtfpga-i2s.c @@ -165,7 +165,7 @@ static bool xtfpga_pcm_push_tx(struct xtfpga_i2s *i2s) tx_substream = rcu_dereference(i2s->tx_substream); tx_active = tx_substream && snd_pcm_running(tx_substream); if (tx_active) { - unsigned tx_ptr = ACCESS_ONCE(i2s->tx_ptr); + unsigned tx_ptr = READ_ONCE(i2s->tx_ptr); unsigned new_tx_ptr = i2s->tx_fn(i2s, tx_substream->runtime, tx_ptr); @@ -437,7 +437,7 @@ static int xtfpga_pcm_trigger(struct snd_pcm_substream *substream, int cmd) case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - ACCESS_ONCE(i2s->tx_ptr) = 0; + WRITE_ONCE(i2s->tx_ptr, 0); rcu_assign_pointer(i2s->tx_substream, substream); xtfpga_pcm_refill_fifo(i2s); break; @@ -459,7 +459,7 @@ static snd_pcm_uframes_t xtfpga_pcm_pointer(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct xtfpga_i2s *i2s = runtime->private_data; - snd_pcm_uframes_t pos = ACCESS_ONCE(i2s->tx_ptr); + snd_pcm_uframes_t pos = READ_ONCE(i2s->tx_ptr); return pos < runtime->buffer_size ? pos : 0; } diff --git a/sound/usb/bcd2000/bcd2000.c b/sound/usb/bcd2000/bcd2000.c index 7371e5b06035..fc579f330601 100644 --- a/sound/usb/bcd2000/bcd2000.c +++ b/sound/usb/bcd2000/bcd2000.c @@ -108,7 +108,7 @@ static void bcd2000_midi_handle_input(struct bcd2000 *bcd2k, unsigned int payload_length, tocopy; struct snd_rawmidi_substream *midi_receive_substream; - midi_receive_substream = ACCESS_ONCE(bcd2k->midi_receive_substream); + midi_receive_substream = READ_ONCE(bcd2k->midi_receive_substream); if (!midi_receive_substream) return; @@ -139,7 +139,7 @@ static void bcd2000_midi_send(struct bcd2000 *bcd2k) BUILD_BUG_ON(sizeof(device_cmd_prefix) >= BUFSIZE); - midi_out_substream = ACCESS_ONCE(bcd2k->midi_out_substream); + midi_out_substream = READ_ONCE(bcd2k->midi_out_substream); if (!midi_out_substream) return; diff --git a/tools/arch/x86/include/asm/atomic.h b/tools/arch/x86/include/asm/atomic.h index 328eeceec709..96e2d06cb031 100644 --- a/tools/arch/x86/include/asm/atomic.h +++ b/tools/arch/x86/include/asm/atomic.h @@ -24,7 +24,7 @@ */ static inline int atomic_read(const atomic_t *v) { - return ACCESS_ONCE((v)->counter); + return READ_ONCE((v)->counter); } /** diff --git a/tools/include/asm-generic/atomic-gcc.h b/tools/include/asm-generic/atomic-gcc.h index 5e9738f97bf3..97427e700e3b 100644 --- a/tools/include/asm-generic/atomic-gcc.h +++ b/tools/include/asm-generic/atomic-gcc.h @@ -21,7 +21,7 @@ */ static inline int atomic_read(const atomic_t *v) { - return ACCESS_ONCE((v)->counter); + return READ_ONCE((v)->counter); } /** diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 33b5e6cdf38c..d19e11b68de7 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -378,7 +378,7 @@ struct addr_filters { static inline u64 auxtrace_mmap__read_snapshot_head(struct auxtrace_mmap *mm) { struct perf_event_mmap_page *pc = mm->userpg; - u64 head = ACCESS_ONCE(pc->aux_head); + u64 head = READ_ONCE(pc->aux_head); /* Ensure all reads are done after we read the head */ rmb(); @@ -389,7 +389,7 @@ static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm) { struct perf_event_mmap_page *pc = mm->userpg; #if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT) - u64 head = ACCESS_ONCE(pc->aux_head); + u64 head = READ_ONCE(pc->aux_head); #else u64 head = __sync_val_compare_and_swap(&pc->aux_head, 0, 0); #endif diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 47b5e7dbcb18..aae9645c7122 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -113,7 +113,7 @@ int __perf_session__set_tracepoints_handlers(struct perf_session *session, extern volatile int session_done; -#define session_done() ACCESS_ONCE(session_done) +#define session_done() READ_ONCE(session_done) int perf_session__deliver_synth_event(struct perf_session *session, union perf_event *event, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9deb5a245b83..ce507ae1d4f5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2302,7 +2302,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) continue; } else if (pass && i > last_boosted_vcpu) break; - if (!ACCESS_ONCE(vcpu->preempted)) + if (!READ_ONCE(vcpu->preempted)) continue; if (vcpu == me) continue; -- cgit v1.2.3 From 433727948904301b01f1d5ebf39893c96cd4bab7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 26 Oct 2017 14:41:01 -0300 Subject: tools include uapi: Grab a copy of linux/prctl.h We will use it to generate tables for beautifying prctl's 'option' arg and some of the others eventually. Cc: Andy Lutomirski Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-cg8mpmz4hk9nfih685emnbk9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/prctl.h | 200 +++++++++++++++++++++++++++++++++++++++ tools/perf/check-headers.sh | 1 + 2 files changed, 201 insertions(+) create mode 100644 tools/include/uapi/linux/prctl.h (limited to 'tools/include') diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h new file mode 100644 index 000000000000..a8d0759a9e40 --- /dev/null +++ b/tools/include/uapi/linux/prctl.h @@ -0,0 +1,200 @@ +#ifndef _LINUX_PRCTL_H +#define _LINUX_PRCTL_H + +#include + +/* Values to pass as first argument to prctl() */ + +#define PR_SET_PDEATHSIG 1 /* Second arg is a signal */ +#define PR_GET_PDEATHSIG 2 /* Second arg is a ptr to return the signal */ + +/* Get/set current->mm->dumpable */ +#define PR_GET_DUMPABLE 3 +#define PR_SET_DUMPABLE 4 + +/* Get/set unaligned access control bits (if meaningful) */ +#define PR_GET_UNALIGN 5 +#define PR_SET_UNALIGN 6 +# define PR_UNALIGN_NOPRINT 1 /* silently fix up unaligned user accesses */ +# define PR_UNALIGN_SIGBUS 2 /* generate SIGBUS on unaligned user access */ + +/* Get/set whether or not to drop capabilities on setuid() away from + * uid 0 (as per security/commoncap.c) */ +#define PR_GET_KEEPCAPS 7 +#define PR_SET_KEEPCAPS 8 + +/* Get/set floating-point emulation control bits (if meaningful) */ +#define PR_GET_FPEMU 9 +#define PR_SET_FPEMU 10 +# define PR_FPEMU_NOPRINT 1 /* silently emulate fp operations accesses */ +# define PR_FPEMU_SIGFPE 2 /* don't emulate fp operations, send SIGFPE instead */ + +/* Get/set floating-point exception mode (if meaningful) */ +#define PR_GET_FPEXC 11 +#define PR_SET_FPEXC 12 +# define PR_FP_EXC_SW_ENABLE 0x80 /* Use FPEXC for FP exception enables */ +# define PR_FP_EXC_DIV 0x010000 /* floating point divide by zero */ +# define PR_FP_EXC_OVF 0x020000 /* floating point overflow */ +# define PR_FP_EXC_UND 0x040000 /* floating point underflow */ +# define PR_FP_EXC_RES 0x080000 /* floating point inexact result */ +# define PR_FP_EXC_INV 0x100000 /* floating point invalid operation */ +# define PR_FP_EXC_DISABLED 0 /* FP exceptions disabled */ +# define PR_FP_EXC_NONRECOV 1 /* async non-recoverable exc. mode */ +# define PR_FP_EXC_ASYNC 2 /* async recoverable exception mode */ +# define PR_FP_EXC_PRECISE 3 /* precise exception mode */ + +/* Get/set whether we use statistical process timing or accurate timestamp + * based process timing */ +#define PR_GET_TIMING 13 +#define PR_SET_TIMING 14 +# define PR_TIMING_STATISTICAL 0 /* Normal, traditional, + statistical process timing */ +# define PR_TIMING_TIMESTAMP 1 /* Accurate timestamp based + process timing */ + +#define PR_SET_NAME 15 /* Set process name */ +#define PR_GET_NAME 16 /* Get process name */ + +/* Get/set process endian */ +#define PR_GET_ENDIAN 19 +#define PR_SET_ENDIAN 20 +# define PR_ENDIAN_BIG 0 +# define PR_ENDIAN_LITTLE 1 /* True little endian mode */ +# define PR_ENDIAN_PPC_LITTLE 2 /* "PowerPC" pseudo little endian */ + +/* Get/set process seccomp mode */ +#define PR_GET_SECCOMP 21 +#define PR_SET_SECCOMP 22 + +/* Get/set the capability bounding set (as per security/commoncap.c) */ +#define PR_CAPBSET_READ 23 +#define PR_CAPBSET_DROP 24 + +/* Get/set the process' ability to use the timestamp counter instruction */ +#define PR_GET_TSC 25 +#define PR_SET_TSC 26 +# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */ +# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */ + +/* Get/set securebits (as per security/commoncap.c) */ +#define PR_GET_SECUREBITS 27 +#define PR_SET_SECUREBITS 28 + +/* + * Get/set the timerslack as used by poll/select/nanosleep + * A value of 0 means "use default" + */ +#define PR_SET_TIMERSLACK 29 +#define PR_GET_TIMERSLACK 30 + +#define PR_TASK_PERF_EVENTS_DISABLE 31 +#define PR_TASK_PERF_EVENTS_ENABLE 32 + +/* + * Set early/late kill mode for hwpoison memory corruption. + * This influences when the process gets killed on a memory corruption. + */ +#define PR_MCE_KILL 33 +# define PR_MCE_KILL_CLEAR 0 +# define PR_MCE_KILL_SET 1 + +# define PR_MCE_KILL_LATE 0 +# define PR_MCE_KILL_EARLY 1 +# define PR_MCE_KILL_DEFAULT 2 + +#define PR_MCE_KILL_GET 34 + +/* + * Tune up process memory map specifics. + */ +#define PR_SET_MM 35 +# define PR_SET_MM_START_CODE 1 +# define PR_SET_MM_END_CODE 2 +# define PR_SET_MM_START_DATA 3 +# define PR_SET_MM_END_DATA 4 +# define PR_SET_MM_START_STACK 5 +# define PR_SET_MM_START_BRK 6 +# define PR_SET_MM_BRK 7 +# define PR_SET_MM_ARG_START 8 +# define PR_SET_MM_ARG_END 9 +# define PR_SET_MM_ENV_START 10 +# define PR_SET_MM_ENV_END 11 +# define PR_SET_MM_AUXV 12 +# define PR_SET_MM_EXE_FILE 13 +# define PR_SET_MM_MAP 14 +# define PR_SET_MM_MAP_SIZE 15 + +/* + * This structure provides new memory descriptor + * map which mostly modifies /proc/pid/stat[m] + * output for a task. This mostly done in a + * sake of checkpoint/restore functionality. + */ +struct prctl_mm_map { + __u64 start_code; /* code section bounds */ + __u64 end_code; + __u64 start_data; /* data section bounds */ + __u64 end_data; + __u64 start_brk; /* heap for brk() syscall */ + __u64 brk; + __u64 start_stack; /* stack starts at */ + __u64 arg_start; /* command line arguments bounds */ + __u64 arg_end; + __u64 env_start; /* environment variables bounds */ + __u64 env_end; + __u64 *auxv; /* auxiliary vector */ + __u32 auxv_size; /* vector size */ + __u32 exe_fd; /* /proc/$pid/exe link file */ +}; + +/* + * Set specific pid that is allowed to ptrace the current task. + * A value of 0 mean "no process". + */ +#define PR_SET_PTRACER 0x59616d61 +# define PR_SET_PTRACER_ANY ((unsigned long)-1) + +#define PR_SET_CHILD_SUBREAPER 36 +#define PR_GET_CHILD_SUBREAPER 37 + +/* + * If no_new_privs is set, then operations that grant new privileges (i.e. + * execve) will either fail or not grant them. This affects suid/sgid, + * file capabilities, and LSMs. + * + * Operations that merely manipulate or drop existing privileges (setresuid, + * capset, etc.) will still work. Drop those privileges if you want them gone. + * + * Changing LSM security domain is considered a new privilege. So, for example, + * asking selinux for a specific new context (e.g. with runcon) will result + * in execve returning -EPERM. + * + * See Documentation/prctl/no_new_privs.txt for more details. + */ +#define PR_SET_NO_NEW_PRIVS 38 +#define PR_GET_NO_NEW_PRIVS 39 + +#define PR_GET_TID_ADDRESS 40 + +#define PR_SET_THP_DISABLE 41 +#define PR_GET_THP_DISABLE 42 + +/* + * Tell the kernel to start/stop helping userspace manage bounds tables. + */ +#define PR_MPX_ENABLE_MANAGEMENT 43 +#define PR_MPX_DISABLE_MANAGEMENT 44 + +#define PR_SET_FP_MODE 45 +#define PR_GET_FP_MODE 46 +# define PR_FP_MODE_FR (1 << 0) /* 64b FP registers */ +# define PR_FP_MODE_FRE (1 << 1) /* 32b compatibility */ + +/* Control the ambient capability set */ +#define PR_CAP_AMBIENT 47 +# define PR_CAP_AMBIENT_IS_SET 1 +# define PR_CAP_AMBIENT_RAISE 2 +# define PR_CAP_AMBIENT_LOWER 3 +# define PR_CAP_AMBIENT_CLEAR_ALL 4 + +#endif /* _LINUX_PRCTL_H */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 322629423b49..8d8b37198666 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -6,6 +6,7 @@ include/uapi/drm/i915_drm.h include/uapi/linux/fcntl.h include/uapi/linux/kvm.h include/uapi/linux/perf_event.h +include/uapi/linux/prctl.h include/uapi/linux/sched.h include/uapi/linux/stat.h include/uapi/linux/vhost.h -- cgit v1.2.3 From 735e215e95e53b857000aaabe1b4707878b10f43 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 31 Oct 2017 10:04:11 -0300 Subject: tools include uapi: Grab a copy of linux/kcmp.h We will use it to generate tables for beautifying kcmp's 'type' arg. Cc: Adrian Hunter Cc: Andrey Vagin Cc: Cyrill Gorcunov Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-r35zr79invmpinfe1zu57cas@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kcmp.h | 27 +++++++++++++++++++++++++++ tools/perf/check-headers.sh | 1 + 2 files changed, 28 insertions(+) create mode 100644 tools/include/uapi/linux/kcmp.h (limited to 'tools/include') diff --git a/tools/include/uapi/linux/kcmp.h b/tools/include/uapi/linux/kcmp.h new file mode 100644 index 000000000000..481e103da78e --- /dev/null +++ b/tools/include/uapi/linux/kcmp.h @@ -0,0 +1,27 @@ +#ifndef _UAPI_LINUX_KCMP_H +#define _UAPI_LINUX_KCMP_H + +#include + +/* Comparison type */ +enum kcmp_type { + KCMP_FILE, + KCMP_VM, + KCMP_FILES, + KCMP_FS, + KCMP_SIGHAND, + KCMP_IO, + KCMP_SYSVSEM, + KCMP_EPOLL_TFD, + + KCMP_TYPES, +}; + +/* Slot for KCMP_EPOLL_TFD */ +struct kcmp_epoll_slot { + __u32 efd; /* epoll file descriptor */ + __u32 tfd; /* target file number */ + __u32 toff; /* target offset within same numbered sequence */ +}; + +#endif /* _UAPI_LINUX_KCMP_H */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 8d8b37198666..a3a041b0d35e 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -4,6 +4,7 @@ HEADERS=' include/uapi/drm/drm.h include/uapi/drm/i915_drm.h include/uapi/linux/fcntl.h +include/uapi/linux/kcmp.h include/uapi/linux/kvm.h include/uapi/linux/perf_event.h include/uapi/linux/prctl.h -- cgit v1.2.3 From 1c9725974074a047f6080eecc62c50a8e840d050 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 1 Nov 2017 16:36:30 +0100 Subject: mm: introduce MAP_SHARED_VALIDATE, a mechanism to safely define new mmap flags The mmap(2) syscall suffers from the ABI anti-pattern of not validating unknown flags. However, proposals like MAP_SYNC need a mechanism to define new behavior that is known to fail on older kernels without the support. Define a new MAP_SHARED_VALIDATE flag pattern that is guaranteed to fail on all legacy mmap implementations. It is worth noting that the original proposal was for a standalone MAP_VALIDATE flag. However, when that could not be supported by all archs Linus observed: I see why you *think* you want a bitmap. You think you want a bitmap because you want to make MAP_VALIDATE be part of MAP_SYNC etc, so that people can do ret = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_SYNC, fd, 0); and "know" that MAP_SYNC actually takes. And I'm saying that whole wish is bogus. You're fundamentally depending on special semantics, just make it explicit. It's already not portable, so don't try to make it so. Rename that MAP_VALIDATE as MAP_SHARED_VALIDATE, make it have a value of 0x3, and make people do ret = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED_VALIDATE | MAP_SYNC, fd, 0); and then the kernel side is easier too (none of that random garbage playing games with looking at the "MAP_VALIDATE bit", but just another case statement in that map type thing. Boom. Done. Similar to ->fallocate() we also want the ability to validate the support for new flags on a per ->mmap() 'struct file_operations' instance basis. Towards that end arrange for flags to be generically validated against a mmap_supported_flags exported by 'struct file_operations'. By default all existing flags are implicitly supported, but new flags require MAP_SHARED_VALIDATE and per-instance-opt-in. Cc: Jan Kara Cc: Arnd Bergmann Cc: Andy Lutomirski Cc: Andrew Morton Suggested-by: Christoph Hellwig Suggested-by: Linus Torvalds Reviewed-by: Ross Zwisler Signed-off-by: Dan Williams Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- arch/alpha/include/uapi/asm/mman.h | 1 + arch/mips/include/uapi/asm/mman.h | 1 + arch/parisc/include/uapi/asm/mman.h | 1 + arch/xtensa/include/uapi/asm/mman.h | 1 + include/linux/fs.h | 1 + include/linux/mman.h | 39 ++++++++++++++++++++++++++++ include/uapi/asm-generic/mman-common.h | 1 + mm/mmap.c | 15 +++++++++++ tools/include/uapi/asm-generic/mman-common.h | 1 + 9 files changed, 61 insertions(+) (limited to 'tools/include') diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h index 3b26cc62dadb..f6d118aaedb9 100644 --- a/arch/alpha/include/uapi/asm/mman.h +++ b/arch/alpha/include/uapi/asm/mman.h @@ -11,6 +11,7 @@ #define MAP_SHARED 0x01 /* Share changes */ #define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ #define MAP_TYPE 0x0f /* Mask for type of mapping (OSF/1 is _wrong_) */ #define MAP_FIXED 0x100 /* Interpret addr exactly */ #define MAP_ANONYMOUS 0x10 /* don't use a file */ diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h index da3216007fe0..93268e4cd3c7 100644 --- a/arch/mips/include/uapi/asm/mman.h +++ b/arch/mips/include/uapi/asm/mman.h @@ -28,6 +28,7 @@ */ #define MAP_SHARED 0x001 /* Share changes */ #define MAP_PRIVATE 0x002 /* Changes are private */ +#define MAP_SHARED_VALIDATE 0x003 /* share + validate extension flags */ #define MAP_TYPE 0x00f /* Mask for type of mapping */ #define MAP_FIXED 0x010 /* Interpret addr exactly */ diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h index 775b5d5e41a1..bca652aa1677 100644 --- a/arch/parisc/include/uapi/asm/mman.h +++ b/arch/parisc/include/uapi/asm/mman.h @@ -11,6 +11,7 @@ #define MAP_SHARED 0x01 /* Share changes */ #define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ #define MAP_TYPE 0x03 /* Mask for type of mapping */ #define MAP_FIXED 0x04 /* Interpret addr exactly */ #define MAP_ANONYMOUS 0x10 /* don't use a file */ diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h index b15b278aa314..9ab426374714 100644 --- a/arch/xtensa/include/uapi/asm/mman.h +++ b/arch/xtensa/include/uapi/asm/mman.h @@ -35,6 +35,7 @@ */ #define MAP_SHARED 0x001 /* Share changes */ #define MAP_PRIVATE 0x002 /* Changes are private */ +#define MAP_SHARED_VALIDATE 0x003 /* share + validate extension flags */ #define MAP_TYPE 0x00f /* Mask for type of mapping */ #define MAP_FIXED 0x010 /* Interpret addr exactly */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 13dab191a23e..57added3201d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1701,6 +1701,7 @@ struct file_operations { long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); + unsigned long mmap_supported_flags; int (*open) (struct inode *, struct file *); int (*flush) (struct file *, fl_owner_t id); int (*release) (struct inode *, struct file *); diff --git a/include/linux/mman.h b/include/linux/mman.h index edb6cf6a81ed..74452e3f2536 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -7,6 +7,45 @@ #include #include +/* + * Arrange for legacy / undefined architecture specific flags to be + * ignored by default in LEGACY_MAP_MASK. + */ +#ifndef MAP_32BIT +#define MAP_32BIT 0 +#endif +#ifndef MAP_HUGE_2MB +#define MAP_HUGE_2MB 0 +#endif +#ifndef MAP_HUGE_1GB +#define MAP_HUGE_1GB 0 +#endif +#ifndef MAP_UNINITIALIZED +#define MAP_UNINITIALIZED 0 +#endif + +/* + * The historical set of flags that all mmap implementations implicitly + * support when a ->mmap_validate() op is not provided in file_operations. + */ +#define LEGACY_MAP_MASK (MAP_SHARED \ + | MAP_PRIVATE \ + | MAP_FIXED \ + | MAP_ANONYMOUS \ + | MAP_DENYWRITE \ + | MAP_EXECUTABLE \ + | MAP_UNINITIALIZED \ + | MAP_GROWSDOWN \ + | MAP_LOCKED \ + | MAP_NORESERVE \ + | MAP_POPULATE \ + | MAP_NONBLOCK \ + | MAP_STACK \ + | MAP_HUGETLB \ + | MAP_32BIT \ + | MAP_HUGE_2MB \ + | MAP_HUGE_1GB) + extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; extern unsigned long sysctl_overcommit_kbytes; diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index 203268f9231e..8ce7f5a0800f 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -16,6 +16,7 @@ #define MAP_SHARED 0x01 /* Share changes */ #define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ #define MAP_TYPE 0x0f /* Mask for type of mapping */ #define MAP_FIXED 0x10 /* Interpret addr exactly */ #define MAP_ANONYMOUS 0x20 /* don't use a file */ diff --git a/mm/mmap.c b/mm/mmap.c index 680506faceae..924839fac0e6 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1387,9 +1387,24 @@ unsigned long do_mmap(struct file *file, unsigned long addr, if (file) { struct inode *inode = file_inode(file); + unsigned long flags_mask; + + flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags; switch (flags & MAP_TYPE) { case MAP_SHARED: + /* + * Force use of MAP_SHARED_VALIDATE with non-legacy + * flags. E.g. MAP_SYNC is dangerous to use with + * MAP_SHARED as you don't know which consistency model + * you will get. We silently ignore unsupported flags + * with MAP_SHARED to preserve backward compatibility. + */ + flags &= LEGACY_MAP_MASK; + /* fall through */ + case MAP_SHARED_VALIDATE: + if (flags & ~flags_mask) + return -EOPNOTSUPP; if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE)) return -EACCES; diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 203268f9231e..8ce7f5a0800f 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -16,6 +16,7 @@ #define MAP_SHARED 0x01 /* Share changes */ #define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ #define MAP_TYPE 0x0f /* Mask for type of mapping */ #define MAP_FIXED 0x10 /* Interpret addr exactly */ #define MAP_ANONYMOUS 0x20 /* don't use a file */ -- cgit v1.2.3 From 928631e05495fa1f0e9775f555b94dbcbb4e2fb5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:19 -0700 Subject: bpftool: print program device bound info If program is bound to a device, print the name of the relevant interface or unknown if the netdev has since been removed. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- tools/bpf/bpftool/prog.c | 31 +++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 7 +++++++ 2 files changed, 38 insertions(+) (limited to 'tools/include') diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 250f80fd46aa..d3ab808dc882 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -229,6 +230,21 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) info->tag[0], info->tag[1], info->tag[2], info->tag[3], info->tag[4], info->tag[5], info->tag[6], info->tag[7]); + if (info->status & BPF_PROG_STATUS_DEV_BOUND) { + jsonw_name(json_wtr, "dev"); + if (info->ifindex) { + char name[IF_NAMESIZE]; + + if (!if_indextoname(info->ifindex, name)) + jsonw_printf(json_wtr, "\"ifindex:%d\"", + info->ifindex); + else + jsonw_printf(json_wtr, "\"%s\"", name); + } else { + jsonw_printf(json_wtr, "\"unknown\""); + } + } + if (info->load_time) { char buf[32]; @@ -274,6 +290,21 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) printf("tag "); fprint_hex(stdout, info->tag, BPF_TAG_SIZE, ""); + printf(" "); + + if (info->status & BPF_PROG_STATUS_DEV_BOUND) { + printf("dev "); + if (info->ifindex) { + char name[IF_NAMESIZE]; + + if (!if_indextoname(info->ifindex, name)) + printf("ifindex:%d ", info->ifindex); + else + printf("%s ", name); + } else { + printf("unknown "); + } + } printf("\n"); if (info->load_time) { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7cebba491011..e92f62cf933a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -259,6 +259,7 @@ union bpf_attr { __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; char prog_name[BPF_OBJ_NAME_LEN]; + __u32 prog_target_ifindex; /* ifindex of netdev to prep for */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -893,6 +894,10 @@ enum sk_action { #define BPF_TAG_SIZE 8 +enum bpf_prog_status { + BPF_PROG_STATUS_DEV_BOUND = (1 << 0), +}; + struct bpf_prog_info { __u32 type; __u32 id; @@ -906,6 +911,8 @@ struct bpf_prog_info { __u32 nr_map_ids; __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u32 status; } __attribute__((aligned(8))); struct bpf_map_info { -- cgit v1.2.3 From ebc614f687369f9df99828572b1d85a7c2de3d92 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sun, 5 Nov 2017 08:15:32 -0500 Subject: bpf, cgroup: implement eBPF-based device controller for cgroup v2 Cgroup v2 lacks the device controller, provided by cgroup v1. This patch adds a new eBPF program type, which in combination of previously added ability to attach multiple eBPF programs to a cgroup, will provide a similar functionality, but with some additional flexibility. This patch introduces a BPF_PROG_TYPE_CGROUP_DEVICE program type. A program takes major and minor device numbers, device type (block/character) and access type (mknod/read/write) as parameters and returns an integer which defines if the operation should be allowed or terminated with -EPERM. Signed-off-by: Roman Gushchin Acked-by: Alexei Starovoitov Acked-by: Tejun Heo Cc: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 15 ++++++++++ include/linux/bpf_types.h | 3 ++ include/linux/device_cgroup.h | 8 ++++- include/uapi/linux/bpf.h | 15 ++++++++++ kernel/bpf/cgroup.c | 67 ++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 7 +++++ kernel/bpf/verifier.c | 1 + tools/include/uapi/linux/bpf.h | 15 ++++++++++ 8 files changed, 130 insertions(+), 1 deletion(-) (limited to 'tools/include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 87a7db9feb38..a7f16e0f8d68 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -67,6 +67,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, struct bpf_sock_ops_kern *sock_ops, enum bpf_attach_type type); +int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, + short access, enum bpf_attach_type type); + /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ ({ \ @@ -112,6 +115,17 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, } \ __ret; \ }) + +#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_check_dev_permission(type, major, minor, \ + access, \ + BPF_CGROUP_DEVICE); \ + \ + __ret; \ +}) #else struct cgroup_bpf {}; @@ -122,6 +136,7 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) #endif /* CONFIG_CGROUP_BPF */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 53c5b9ad7220..978c1d9c9383 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -19,6 +19,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) #endif +#ifdef CONFIG_CGROUP_BPF +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) +#endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h index 2d93d7ecd479..8557efe096dc 100644 --- a/include/linux/device_cgroup.h +++ b/include/linux/device_cgroup.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include +#include #define DEVCG_ACC_MKNOD 1 #define DEVCG_ACC_READ 2 @@ -19,10 +20,15 @@ static inline int __devcgroup_check_permission(short type, u32 major, u32 minor, { return 0; } #endif -#ifdef CONFIG_CGROUP_DEVICE +#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) static inline int devcgroup_check_permission(short type, u32 major, u32 minor, short access) { + int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access); + + if (rc) + return -EPERM; + return __devcgroup_check_permission(type, major, minor, access); } diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4455dd195201..e880ae6434ee 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -132,6 +132,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_XMIT, BPF_PROG_TYPE_SOCK_OPS, BPF_PROG_TYPE_SK_SKB, + BPF_PROG_TYPE_CGROUP_DEVICE, }; enum bpf_attach_type { @@ -141,6 +142,7 @@ enum bpf_attach_type { BPF_CGROUP_SOCK_OPS, BPF_SK_SKB_STREAM_PARSER, BPF_SK_SKB_STREAM_VERDICT, + BPF_CGROUP_DEVICE, __MAX_BPF_ATTACH_TYPE }; @@ -991,4 +993,17 @@ struct bpf_perf_event_value { __u64 running; }; +#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) +#define BPF_DEVCG_ACC_READ (1ULL << 1) +#define BPF_DEVCG_ACC_WRITE (1ULL << 2) + +#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) +#define BPF_DEVCG_DEV_CHAR (1ULL << 1) + +struct bpf_cgroup_dev_ctx { + __u32 access_type; /* (access << 16) | type */ + __u32 major; + __u32 minor; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 3db5a17fcfe8..b789ab78d28f 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -522,3 +522,70 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, return ret == 1 ? 0 : -EPERM; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); + +int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, + short access, enum bpf_attach_type type) +{ + struct cgroup *cgrp; + struct bpf_cgroup_dev_ctx ctx = { + .access_type = (access << 16) | dev_type, + .major = major, + .minor = minor, + }; + int allow = 1; + + rcu_read_lock(); + cgrp = task_dfl_cgroup(current); + allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, + BPF_PROG_RUN); + rcu_read_unlock(); + + return !allow; +} +EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission); + +static const struct bpf_func_proto * +cgroup_dev_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_map_lookup_elem: + return &bpf_map_lookup_elem_proto; + case BPF_FUNC_map_update_elem: + return &bpf_map_update_elem_proto; + case BPF_FUNC_map_delete_elem: + return &bpf_map_delete_elem_proto; + case BPF_FUNC_get_current_uid_gid: + return &bpf_get_current_uid_gid_proto; + case BPF_FUNC_trace_printk: + if (capable(CAP_SYS_ADMIN)) + return bpf_get_trace_printk_proto(); + default: + return NULL; + } +} + +static bool cgroup_dev_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + if (type == BPF_WRITE) + return false; + + if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx)) + return false; + /* The verifier guarantees that size > 0. */ + if (off % size != 0) + return false; + if (size != sizeof(__u32)) + return false; + + return true; +} + +const struct bpf_prog_ops cg_dev_prog_ops = { +}; + +const struct bpf_verifier_ops cg_dev_verifier_ops = { + .get_func_proto = cgroup_dev_func_proto, + .is_valid_access = cgroup_dev_is_valid_access, +}; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 416d70cdfc76..09badc37e864 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1326,6 +1326,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_CGROUP_SOCK_OPS: ptype = BPF_PROG_TYPE_SOCK_OPS; break; + case BPF_CGROUP_DEVICE: + ptype = BPF_PROG_TYPE_CGROUP_DEVICE; + break; case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_VERDICT: return sockmap_get_from_fd(attr, true); @@ -1378,6 +1381,9 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_CGROUP_SOCK_OPS: ptype = BPF_PROG_TYPE_SOCK_OPS; break; + case BPF_CGROUP_DEVICE: + ptype = BPF_PROG_TYPE_CGROUP_DEVICE; + break; case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_VERDICT: return sockmap_get_from_fd(attr, false); @@ -1420,6 +1426,7 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_INET_EGRESS: case BPF_CGROUP_INET_SOCK_CREATE: case BPF_CGROUP_SOCK_OPS: + case BPF_CGROUP_DEVICE: break; default: return -EINVAL; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index add845fe788a..4a942e2e753d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3124,6 +3124,7 @@ static int check_return_code(struct bpf_verifier_env *env) case BPF_PROG_TYPE_CGROUP_SKB: case BPF_PROG_TYPE_CGROUP_SOCK: case BPF_PROG_TYPE_SOCK_OPS: + case BPF_PROG_TYPE_CGROUP_DEVICE: break; default: return 0; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e92f62cf933a..b280f37cd057 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -131,6 +131,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_XMIT, BPF_PROG_TYPE_SOCK_OPS, BPF_PROG_TYPE_SK_SKB, + BPF_PROG_TYPE_CGROUP_DEVICE, }; enum bpf_attach_type { @@ -140,6 +141,7 @@ enum bpf_attach_type { BPF_CGROUP_SOCK_OPS, BPF_SK_SKB_STREAM_PARSER, BPF_SK_SKB_STREAM_VERDICT, + BPF_CGROUP_DEVICE, __MAX_BPF_ATTACH_TYPE }; @@ -990,4 +992,17 @@ struct bpf_perf_event_value { __u64 running; }; +#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) +#define BPF_DEVCG_ACC_READ (1ULL << 1) +#define BPF_DEVCG_ACC_WRITE (1ULL << 2) + +#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) +#define BPF_DEVCG_DEV_CHAR (1ULL << 1) + +struct bpf_cgroup_dev_ctx { + __u32 access_type; /* (access << 16) | type */ + __u32 major; + __u32 minor; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From eafb3401faf243f7dca0e23325242cb8c2269ee9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 7 Nov 2017 15:28:43 -0500 Subject: samples/bpf: add a test for bpf_override_return This adds a basic test for bpf_override_return to verify it works. We override the main function for mounting a btrfs fs so it'll return -ENOMEM and then make sure that trying to mount a btrfs fs will fail. Acked-by: Alexei Starovoitov Signed-off-by: Josef Bacik Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/Makefile | 4 ++++ samples/bpf/test_override_return.sh | 15 +++++++++++++++ samples/bpf/tracex7_kern.c | 16 ++++++++++++++++ samples/bpf/tracex7_user.c | 28 ++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 7 ++++++- tools/testing/selftests/bpf/bpf_helpers.h | 3 ++- 6 files changed, 71 insertions(+), 2 deletions(-) create mode 100755 samples/bpf/test_override_return.sh create mode 100644 samples/bpf/tracex7_kern.c create mode 100644 samples/bpf/tracex7_user.c (limited to 'tools/include') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 3b4945c1eab0..87db0f9a4c15 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -15,6 +15,7 @@ hostprogs-y += tracex3 hostprogs-y += tracex4 hostprogs-y += tracex5 hostprogs-y += tracex6 +hostprogs-y += tracex7 hostprogs-y += test_probe_write_user hostprogs-y += trace_output hostprogs-y += lathist @@ -61,6 +62,7 @@ tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o +tracex7-objs := bpf_load.o $(LIBBPF) tracex7_user.o load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o @@ -104,6 +106,7 @@ always += tracex3_kern.o always += tracex4_kern.o always += tracex5_kern.o always += tracex6_kern.o +always += tracex7_kern.o always += sock_flags_kern.o always += test_probe_write_user_kern.o always += trace_output_kern.o @@ -158,6 +161,7 @@ HOSTLOADLIBES_tracex3 += -lelf HOSTLOADLIBES_tracex4 += -lelf -lrt HOSTLOADLIBES_tracex5 += -lelf HOSTLOADLIBES_tracex6 += -lelf +HOSTLOADLIBES_tracex7 += -lelf HOSTLOADLIBES_test_cgrp2_sock2 += -lelf HOSTLOADLIBES_load_sock_ops += -lelf HOSTLOADLIBES_test_probe_write_user += -lelf diff --git a/samples/bpf/test_override_return.sh b/samples/bpf/test_override_return.sh new file mode 100755 index 000000000000..e68b9ee6814b --- /dev/null +++ b/samples/bpf/test_override_return.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +rm -f testfile.img +dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1 +DEVICE=$(losetup --show -f testfile.img) +mkfs.btrfs -f $DEVICE +mkdir tmpmnt +./tracex7 $DEVICE +if [ $? -eq 0 ] +then + echo "SUCCESS!" +else + echo "FAILED!" +fi +losetup -d $DEVICE diff --git a/samples/bpf/tracex7_kern.c b/samples/bpf/tracex7_kern.c new file mode 100644 index 000000000000..1ab308a43e0f --- /dev/null +++ b/samples/bpf/tracex7_kern.c @@ -0,0 +1,16 @@ +#include +#include +#include +#include "bpf_helpers.h" + +SEC("kprobe/open_ctree") +int bpf_prog1(struct pt_regs *ctx) +{ + unsigned long rc = -12; + + bpf_override_return(ctx, rc); + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/tracex7_user.c b/samples/bpf/tracex7_user.c new file mode 100644 index 000000000000..8a52ac492e8b --- /dev/null +++ b/samples/bpf/tracex7_user.c @@ -0,0 +1,28 @@ +#define _GNU_SOURCE + +#include +#include +#include +#include "libbpf.h" +#include "bpf_load.h" + +int main(int argc, char **argv) +{ + FILE *f; + char filename[256]; + char command[256]; + int ret; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + snprintf(command, 256, "mount %s tmpmnt/", argv[1]); + f = popen(command, "r"); + ret = pclose(f); + + return ret ? 0 : 1; +} diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e880ae6434ee..adb66f78b674 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -677,6 +677,10 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return : 0 on success or negative error code + * + * int bpf_override_return(pt_regs, rc) + * @pt_regs: pointer to struct pt_regs + * @rc: the return value to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -736,7 +740,8 @@ union bpf_attr { FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ FN(perf_prog_read_value), \ - FN(getsockopt), + FN(getsockopt), \ + FN(override_return), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index fd9a17fa8a8b..33cb00e46c49 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -82,7 +82,8 @@ static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, static int (*bpf_perf_prog_read_value)(void *ctx, void *buf, unsigned int buf_size) = (void *) BPF_FUNC_perf_prog_read_value; - +static int (*bpf_override_return)(void *ctx, unsigned long rc) = + (void *) BPF_FUNC_override_return; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions -- cgit v1.2.3 From 505ee76761062a1872b024140e886b7136a6c1d5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 11 Nov 2017 09:06:57 +0100 Subject: tooling/headers: Sync the tools/include/uapi/drm/i915_drm.h UAPI header Last minute upstream update to one of the UAPI headers - sync it with tooling, to address this warning: Warning: Kernel ABI header at 'tools/include/uapi/drm/i915_drm.h' differs from latest version at 'include/uapi/drm/i915_drm.h' Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- tools/include/uapi/drm/i915_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/include') diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 6598fb76d2c2..9816590d3ad2 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -829,6 +829,7 @@ struct drm_i915_gem_exec_fence { #define I915_EXEC_FENCE_WAIT (1<<0) #define I915_EXEC_FENCE_SIGNAL (1<<1) +#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1)) __u32 flags; }; -- cgit v1.2.3 From f3edacbd697f94a743fff1a3d26910ab99948ba7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 11 Nov 2017 18:24:55 +0900 Subject: bpf: Revert bpf_overrid_function() helper changes. NACK'd by x86 maintainer. Signed-off-by: David S. Miller --- arch/Kconfig | 3 --- arch/x86/Kconfig | 1 - arch/x86/include/asm/kprobes.h | 4 ---- arch/x86/include/asm/ptrace.h | 5 ---- arch/x86/kernel/kprobes/ftrace.c | 14 ----------- include/linux/filter.h | 3 +-- include/linux/trace_events.h | 1 - include/uapi/linux/bpf.h | 7 +----- kernel/bpf/core.c | 3 --- kernel/bpf/verifier.c | 2 -- kernel/events/core.c | 7 ------ kernel/trace/Kconfig | 11 --------- kernel/trace/bpf_trace.c | 35 --------------------------- kernel/trace/trace_kprobe.c | 40 ++++++------------------------- kernel/trace/trace_probe.h | 6 ----- samples/bpf/Makefile | 4 ---- samples/bpf/test_override_return.sh | 15 ------------ samples/bpf/tracex7_kern.c | 16 ------------- samples/bpf/tracex7_user.c | 28 ---------------------- tools/include/uapi/linux/bpf.h | 7 +----- tools/testing/selftests/bpf/bpf_helpers.h | 3 +-- 21 files changed, 11 insertions(+), 204 deletions(-) delete mode 100755 samples/bpf/test_override_return.sh delete mode 100644 samples/bpf/tracex7_kern.c delete mode 100644 samples/bpf/tracex7_user.c (limited to 'tools/include') diff --git a/arch/Kconfig b/arch/Kconfig index 6e8520f09bc1..057370a0ac4e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -196,9 +196,6 @@ config HAVE_OPTPROBES config HAVE_KPROBES_ON_FTRACE bool -config HAVE_KPROBE_OVERRIDE - bool - config HAVE_NMI bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 51458c1a0b4a..2fdb23313dd5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -153,7 +153,6 @@ config X86 select HAVE_KERNEL_XZ select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE - select HAVE_KPROBE_OVERRIDE select HAVE_KRETPROBES select HAVE_KVM select HAVE_LIVEPATCH if X86_64 diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index c6c3b1f4306a..6cf65437b5e5 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -67,10 +67,6 @@ extern const int kretprobe_blacklist_size; void arch_remove_kprobe(struct kprobe *p); asmlinkage void kretprobe_trampoline(void); -#ifdef CONFIG_KPROBES_ON_FTRACE -extern void arch_ftrace_kprobe_override_function(struct pt_regs *regs); -#endif - /* Architecture specific copy of original instruction*/ struct arch_specific_insn { /* copy of the original instruction */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 2370bb0149cc..c0e3c45cf6ab 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -109,11 +109,6 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) return regs->ax; } -static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) -{ - regs->ax = rc; -} - /* * user_mode(regs) determines whether a register set came from user * mode. On x86_32, this is true if V8086 mode was enabled OR if the diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 3c455bf490cb..041f7b6dfa0f 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -97,17 +97,3 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p) p->ainsn.boostable = false; return 0; } - -asmlinkage void override_func(void); -asm( - ".type override_func, @function\n" - "override_func:\n" - " ret\n" - ".size override_func, .-override_func\n" -); - -void arch_ftrace_kprobe_override_function(struct pt_regs *regs) -{ - regs->ip = (unsigned long)&override_func; -} -NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function); diff --git a/include/linux/filter.h b/include/linux/filter.h index eaec066f99e8..0cd02ff4ae30 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -459,8 +459,7 @@ struct bpf_prog { locked:1, /* Program image locked? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ - dst_needed:1, /* Do we need dst entry? */ - kprobe_override:1; /* Do we override a kprobe? */ + dst_needed:1; /* Do we need dst entry? */ kmemcheck_bitfield_end(meta); enum bpf_prog_type type; /* Type of BPF program */ u32 len; /* Number of filter blocks */ diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 17e5e820a84c..84014ecfa67f 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -523,7 +523,6 @@ do { \ struct perf_event; DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); -DECLARE_PER_CPU(int, bpf_kprobe_override); extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index adb66f78b674..e880ae6434ee 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -677,10 +677,6 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return : 0 on success or negative error code - * - * int bpf_override_return(pt_regs, rc) - * @pt_regs: pointer to struct pt_regs - * @rc: the return value to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -740,8 +736,7 @@ union bpf_attr { FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ FN(perf_prog_read_value), \ - FN(getsockopt), \ - FN(override_return), + FN(getsockopt), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 271daad31f37..8a6c37762330 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1326,9 +1326,6 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) { - if (fp->kprobe_override) - return false; - if (!array->owner_prog_type) { /* There's no owner yet where we could check for * compatibility. diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bc464b8ec91e..4a942e2e753d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4357,8 +4357,6 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) prog->dst_needed = 1; if (insn->imm == BPF_FUNC_get_prandom_u32) bpf_user_rnd_init_once(); - if (insn->imm == BPF_FUNC_override_return) - prog->kprobe_override = 1; if (insn->imm == BPF_FUNC_tail_call) { /* If we tail call into other programs, we * cannot make any assumptions since they can diff --git a/kernel/events/core.c b/kernel/events/core.c index ac240d31b5bf..42d24bd64ea4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8171,13 +8171,6 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) return -EINVAL; } - /* Kprobe override only works for kprobes, not uprobes. */ - if (prog->kprobe_override && - !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) { - bpf_prog_put(prog); - return -EINVAL; - } - if (is_tracepoint || is_syscall_tp) { int off = trace_event_get_offsets(event->tp_event); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 9dc0deeaad2b..434c840e2d82 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -518,17 +518,6 @@ config FUNCTION_PROFILER If in doubt, say N. -config BPF_KPROBE_OVERRIDE - bool "Enable BPF programs to override a kprobed function" - depends on BPF_EVENTS - depends on KPROBES_ON_FTRACE - depends on HAVE_KPROBE_OVERRIDE - depends on DYNAMIC_FTRACE_WITH_REGS - default n - help - Allows BPF to override the execution of a probed function and - set a different return value. This is used for error injection. - config FTRACE_MCOUNT_RECORD def_bool y depends on DYNAMIC_FTRACE diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 1865b0d4cdeb..506efe6e8ed9 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -13,10 +13,6 @@ #include #include #include -#include -#include - -#include "trace_probe.h" #include "trace.h" u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); @@ -80,29 +76,6 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) } EXPORT_SYMBOL_GPL(trace_call_bpf); -#ifdef CONFIG_BPF_KPROBE_OVERRIDE -BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) -{ - __this_cpu_write(bpf_kprobe_override, 1); - regs_set_return_value(regs, rc); - arch_ftrace_kprobe_override_function(regs); - return 0; -} -#else -BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) -{ - return -EINVAL; -} -#endif - -static const struct bpf_func_proto bpf_override_return_proto = { - .func = bpf_override_return, - .gpl_only = true, - .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_ANYTHING, -}; - BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) { int ret; @@ -578,10 +551,6 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_get_stackid_proto; case BPF_FUNC_perf_event_read_value: return &bpf_perf_event_read_value_proto; - case BPF_FUNC_override_return: - pr_warn_ratelimited("%s[%d] is installing a program with bpf_override_return helper that may cause unexpected behavior!", - current->comm, task_pid_nr(current)); - return &bpf_override_return_proto; default: return tracing_func_proto(func_id); } @@ -797,10 +766,6 @@ int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog_array *new_array; int ret = -EEXIST; - /* Kprobe override only works for ftrace based kprobes. */ - if (prog->kprobe_override && !trace_kprobe_ftrace(event->tp_event)) - return -EINVAL; - mutex_lock(&bpf_event_mutex); if (event->prog) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 8e3c9ec1faf7..abf92e478cfb 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -42,7 +42,6 @@ struct trace_kprobe { (offsetof(struct trace_kprobe, tp.args) + \ (sizeof(struct probe_arg) * (n))) -DEFINE_PER_CPU(int, bpf_kprobe_override); static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk) { @@ -88,12 +87,6 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) return nhit; } -int trace_kprobe_ftrace(struct trace_event_call *call) -{ - struct trace_kprobe *tk = (struct trace_kprobe *)call->data; - return kprobe_ftrace(&tk->rp.kp); -} - static int register_kprobe_event(struct trace_kprobe *tk); static int unregister_kprobe_event(struct trace_kprobe *tk); @@ -1177,7 +1170,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call) #ifdef CONFIG_PERF_EVENTS /* Kprobe profile handler */ -static int +static void kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { struct trace_event_call *call = &tk->tp.call; @@ -1186,29 +1179,12 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) int size, __size, dsize; int rctx; - if (bpf_prog_array_valid(call)) { - int ret; - - ret = trace_call_bpf(call, regs); - - /* - * We need to check and see if we modified the pc of the - * pt_regs, and if so clear the kprobe and return 1 so that we - * don't do the instruction skipping. Also reset our state so - * we are clean the next pass through. - */ - if (__this_cpu_read(bpf_kprobe_override)) { - __this_cpu_write(bpf_kprobe_override, 0); - reset_current_kprobe(); - return 1; - } - if (!ret) - return 0; - } + if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) + return; head = this_cpu_ptr(call->perf_events); if (hlist_empty(head)) - return 0; + return; dsize = __get_data_size(&tk->tp, regs); __size = sizeof(*entry) + tk->tp.size + dsize; @@ -1217,14 +1193,13 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) entry = perf_trace_buf_alloc(size, NULL, &rctx); if (!entry) - return 0; + return; entry->ip = (unsigned long)tk->rp.kp.addr; memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL, NULL); - return 0; } NOKPROBE_SYMBOL(kprobe_perf_func); @@ -1300,7 +1275,6 @@ static int kprobe_register(struct trace_event_call *event, static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) { struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); - int ret = 0; raw_cpu_inc(*tk->nhit); @@ -1308,9 +1282,9 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) kprobe_trace_func(tk, regs); #ifdef CONFIG_PERF_EVENTS if (tk->tp.flags & TP_FLAG_PROFILE) - ret = kprobe_perf_func(tk, regs); + kprobe_perf_func(tk, regs); #endif - return ret; + return 0; /* We don't tweek kernel, so just return 0 */ } NOKPROBE_SYMBOL(kprobe_dispatcher); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index adbb3f7d1fb5..903273c93e61 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -253,7 +253,6 @@ struct symbol_cache; unsigned long update_symbol_cache(struct symbol_cache *sc); void free_symbol_cache(struct symbol_cache *sc); struct symbol_cache *alloc_symbol_cache(const char *sym, long offset); -int trace_kprobe_ftrace(struct trace_event_call *call); #else /* uprobes do not support symbol fetch methods */ #define fetch_symbol_u8 NULL @@ -279,11 +278,6 @@ alloc_symbol_cache(const char *sym, long offset) { return NULL; } - -static inline int trace_kprobe_ftrace(struct trace_event_call *call) -{ - return 0; -} #endif /* CONFIG_KPROBE_EVENTS */ struct probe_arg { diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 87db0f9a4c15..3b4945c1eab0 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -15,7 +15,6 @@ hostprogs-y += tracex3 hostprogs-y += tracex4 hostprogs-y += tracex5 hostprogs-y += tracex6 -hostprogs-y += tracex7 hostprogs-y += test_probe_write_user hostprogs-y += trace_output hostprogs-y += lathist @@ -62,7 +61,6 @@ tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o -tracex7-objs := bpf_load.o $(LIBBPF) tracex7_user.o load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o @@ -106,7 +104,6 @@ always += tracex3_kern.o always += tracex4_kern.o always += tracex5_kern.o always += tracex6_kern.o -always += tracex7_kern.o always += sock_flags_kern.o always += test_probe_write_user_kern.o always += trace_output_kern.o @@ -161,7 +158,6 @@ HOSTLOADLIBES_tracex3 += -lelf HOSTLOADLIBES_tracex4 += -lelf -lrt HOSTLOADLIBES_tracex5 += -lelf HOSTLOADLIBES_tracex6 += -lelf -HOSTLOADLIBES_tracex7 += -lelf HOSTLOADLIBES_test_cgrp2_sock2 += -lelf HOSTLOADLIBES_load_sock_ops += -lelf HOSTLOADLIBES_test_probe_write_user += -lelf diff --git a/samples/bpf/test_override_return.sh b/samples/bpf/test_override_return.sh deleted file mode 100755 index e68b9ee6814b..000000000000 --- a/samples/bpf/test_override_return.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -rm -f testfile.img -dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1 -DEVICE=$(losetup --show -f testfile.img) -mkfs.btrfs -f $DEVICE -mkdir tmpmnt -./tracex7 $DEVICE -if [ $? -eq 0 ] -then - echo "SUCCESS!" -else - echo "FAILED!" -fi -losetup -d $DEVICE diff --git a/samples/bpf/tracex7_kern.c b/samples/bpf/tracex7_kern.c deleted file mode 100644 index 1ab308a43e0f..000000000000 --- a/samples/bpf/tracex7_kern.c +++ /dev/null @@ -1,16 +0,0 @@ -#include -#include -#include -#include "bpf_helpers.h" - -SEC("kprobe/open_ctree") -int bpf_prog1(struct pt_regs *ctx) -{ - unsigned long rc = -12; - - bpf_override_return(ctx, rc); - return 0; -} - -char _license[] SEC("license") = "GPL"; -u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/tracex7_user.c b/samples/bpf/tracex7_user.c deleted file mode 100644 index 8a52ac492e8b..000000000000 --- a/samples/bpf/tracex7_user.c +++ /dev/null @@ -1,28 +0,0 @@ -#define _GNU_SOURCE - -#include -#include -#include -#include "libbpf.h" -#include "bpf_load.h" - -int main(int argc, char **argv) -{ - FILE *f; - char filename[256]; - char command[256]; - int ret; - - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; - } - - snprintf(command, 256, "mount %s tmpmnt/", argv[1]); - f = popen(command, "r"); - ret = pclose(f); - - return ret ? 0 : 1; -} diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index adb66f78b674..e880ae6434ee 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -677,10 +677,6 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return : 0 on success or negative error code - * - * int bpf_override_return(pt_regs, rc) - * @pt_regs: pointer to struct pt_regs - * @rc: the return value to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -740,8 +736,7 @@ union bpf_attr { FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ FN(perf_prog_read_value), \ - FN(getsockopt), \ - FN(override_return), + FN(getsockopt), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 33cb00e46c49..fd9a17fa8a8b 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -82,8 +82,7 @@ static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, static int (*bpf_perf_prog_read_value)(void *ctx, void *buf, unsigned int buf_size) = (void *) BPF_FUNC_perf_prog_read_value; -static int (*bpf_override_return)(void *ctx, unsigned long rc) = - (void *) BPF_FUNC_override_return; + /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions -- cgit v1.2.3 From 4675ff05de2d76d167336b368bd07f3fef6ed5a6 Mon Sep 17 00:00:00 2001 From: "Levin, Alexander (Sasha Levin)" Date: Wed, 15 Nov 2017 17:36:02 -0800 Subject: kmemcheck: rip it out Fix up makefiles, remove references, and git rm kmemcheck. Link: http://lkml.kernel.org/r/20171007030159.22241-4-alexander.levin@verizon.com Signed-off-by: Sasha Levin Cc: Steven Rostedt Cc: Vegard Nossum Cc: Pekka Enberg Cc: Michal Hocko Cc: Eric W. Biederman Cc: Alexander Potapenko Cc: Tim Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/admin-guide/kernel-parameters.txt | 7 - Documentation/dev-tools/index.rst | 1 - Documentation/dev-tools/kmemcheck.rst | 733 ------------------------ MAINTAINERS | 10 - arch/x86/Kconfig | 3 +- arch/x86/include/asm/kmemcheck.h | 42 -- arch/x86/include/asm/string_32.h | 9 - arch/x86/include/asm/string_64.h | 8 - arch/x86/kernel/cpu/intel.c | 15 - arch/x86/mm/Makefile | 2 - arch/x86/mm/init.c | 5 +- arch/x86/mm/kmemcheck/Makefile | 1 - arch/x86/mm/kmemcheck/error.c | 227 -------- arch/x86/mm/kmemcheck/error.h | 15 - arch/x86/mm/kmemcheck/kmemcheck.c | 658 --------------------- arch/x86/mm/kmemcheck/opcode.c | 106 ---- arch/x86/mm/kmemcheck/opcode.h | 9 - arch/x86/mm/kmemcheck/pte.c | 22 - arch/x86/mm/kmemcheck/pte.h | 10 - arch/x86/mm/kmemcheck/selftest.c | 70 --- arch/x86/mm/kmemcheck/selftest.h | 6 - arch/x86/mm/kmemcheck/shadow.c | 173 ------ arch/x86/mm/kmemcheck/shadow.h | 18 - include/linux/interrupt.h | 15 - include/linux/kmemcheck.h | 171 ------ kernel/softirq.c | 10 - kernel/sysctl.c | 10 - lib/Kconfig.debug | 6 +- lib/Kconfig.kmemcheck | 94 --- mm/Kconfig.debug | 1 - mm/Makefile | 2 - mm/kmemcheck.c | 125 ---- mm/slub.c | 5 +- scripts/kernel-doc | 2 - tools/include/linux/kmemcheck.h | 8 - 35 files changed, 7 insertions(+), 2592 deletions(-) delete mode 100644 Documentation/dev-tools/kmemcheck.rst delete mode 100644 arch/x86/mm/kmemcheck/Makefile delete mode 100644 arch/x86/mm/kmemcheck/kmemcheck.c delete mode 100644 arch/x86/mm/kmemcheck/shadow.c delete mode 100644 lib/Kconfig.kmemcheck (limited to 'tools/include') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b74e13312fdc..00bb04972612 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1864,13 +1864,6 @@ Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y, the default is off. - kmemcheck= [X86] Boot-time kmemcheck enable/disable/one-shot mode - Valid arguments: 0, 1, 2 - kmemcheck=0 (disabled) - kmemcheck=1 (enabled) - kmemcheck=2 (one-shot mode) - Default: 2 (one-shot mode) - kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs. Default is 0 (don't ignore, but inject #GP) diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst index a81787cd47d7..e313925fb0fa 100644 --- a/Documentation/dev-tools/index.rst +++ b/Documentation/dev-tools/index.rst @@ -21,7 +21,6 @@ whole; patches welcome! kasan ubsan kmemleak - kmemcheck gdb-kernel-debugging kgdb kselftest diff --git a/Documentation/dev-tools/kmemcheck.rst b/Documentation/dev-tools/kmemcheck.rst deleted file mode 100644 index 7f3d1985de74..000000000000 --- a/Documentation/dev-tools/kmemcheck.rst +++ /dev/null @@ -1,733 +0,0 @@ -Getting started with kmemcheck -============================== - -Vegard Nossum - - -Introduction ------------- - -kmemcheck is a debugging feature for the Linux Kernel. More specifically, it -is a dynamic checker that detects and warns about some uses of uninitialized -memory. - -Userspace programmers might be familiar with Valgrind's memcheck. The main -difference between memcheck and kmemcheck is that memcheck works for userspace -programs only, and kmemcheck works for the kernel only. The implementations -are of course vastly different. Because of this, kmemcheck is not as accurate -as memcheck, but it turns out to be good enough in practice to discover real -programmer errors that the compiler is not able to find through static -analysis. - -Enabling kmemcheck on a kernel will probably slow it down to the extent that -the machine will not be usable for normal workloads such as e.g. an -interactive desktop. kmemcheck will also cause the kernel to use about twice -as much memory as normal. For this reason, kmemcheck is strictly a debugging -feature. - - -Downloading ------------ - -As of version 2.6.31-rc1, kmemcheck is included in the mainline kernel. - - -Configuring and compiling -------------------------- - -kmemcheck only works for the x86 (both 32- and 64-bit) platform. A number of -configuration variables must have specific settings in order for the kmemcheck -menu to even appear in "menuconfig". These are: - -- ``CONFIG_CC_OPTIMIZE_FOR_SIZE=n`` - This option is located under "General setup" / "Optimize for size". - - Without this, gcc will use certain optimizations that usually lead to - false positive warnings from kmemcheck. An example of this is a 16-bit - field in a struct, where gcc may load 32 bits, then discard the upper - 16 bits. kmemcheck sees only the 32-bit load, and may trigger a - warning for the upper 16 bits (if they're uninitialized). - -- ``CONFIG_SLAB=y`` or ``CONFIG_SLUB=y`` - This option is located under "General setup" / "Choose SLAB - allocator". - -- ``CONFIG_FUNCTION_TRACER=n`` - This option is located under "Kernel hacking" / "Tracers" / "Kernel - Function Tracer" - - When function tracing is compiled in, gcc emits a call to another - function at the beginning of every function. This means that when the - page fault handler is called, the ftrace framework will be called - before kmemcheck has had a chance to handle the fault. If ftrace then - modifies memory that was tracked by kmemcheck, the result is an - endless recursive page fault. - -- ``CONFIG_DEBUG_PAGEALLOC=n`` - This option is located under "Kernel hacking" / "Memory Debugging" - / "Debug page memory allocations". - -In addition, I highly recommend turning on ``CONFIG_DEBUG_INFO=y``. This is also -located under "Kernel hacking". With this, you will be able to get line number -information from the kmemcheck warnings, which is extremely valuable in -debugging a problem. This option is not mandatory, however, because it slows -down the compilation process and produces a much bigger kernel image. - -Now the kmemcheck menu should be visible (under "Kernel hacking" / "Memory -Debugging" / "kmemcheck: trap use of uninitialized memory"). Here follows -a description of the kmemcheck configuration variables: - -- ``CONFIG_KMEMCHECK`` - This must be enabled in order to use kmemcheck at all... - -- ``CONFIG_KMEMCHECK_``[``DISABLED`` | ``ENABLED`` | ``ONESHOT``]``_BY_DEFAULT`` - This option controls the status of kmemcheck at boot-time. "Enabled" - will enable kmemcheck right from the start, "disabled" will boot the - kernel as normal (but with the kmemcheck code compiled in, so it can - be enabled at run-time after the kernel has booted), and "one-shot" is - a special mode which will turn kmemcheck off automatically after - detecting the first use of uninitialized memory. - - If you are using kmemcheck to actively debug a problem, then you - probably want to choose "enabled" here. - - The one-shot mode is mostly useful in automated test setups because it - can prevent floods of warnings and increase the chances of the machine - surviving in case something is really wrong. In other cases, the one- - shot mode could actually be counter-productive because it would turn - itself off at the very first error -- in the case of a false positive - too -- and this would come in the way of debugging the specific - problem you were interested in. - - If you would like to use your kernel as normal, but with a chance to - enable kmemcheck in case of some problem, it might be a good idea to - choose "disabled" here. When kmemcheck is disabled, most of the run- - time overhead is not incurred, and the kernel will be almost as fast - as normal. - -- ``CONFIG_KMEMCHECK_QUEUE_SIZE`` - Select the maximum number of error reports to store in an internal - (fixed-size) buffer. Since errors can occur virtually anywhere and in - any context, we need a temporary storage area which is guaranteed not - to generate any other page faults when accessed. The queue will be - emptied as soon as a tasklet may be scheduled. If the queue is full, - new error reports will be lost. - - The default value of 64 is probably fine. If some code produces more - than 64 errors within an irqs-off section, then the code is likely to - produce many, many more, too, and these additional reports seldom give - any more information (the first report is usually the most valuable - anyway). - - This number might have to be adjusted if you are not using serial - console or similar to capture the kernel log. If you are using the - "dmesg" command to save the log, then getting a lot of kmemcheck - warnings might overflow the kernel log itself, and the earlier reports - will get lost in that way instead. Try setting this to 10 or so on - such a setup. - -- ``CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT`` - Select the number of shadow bytes to save along with each entry of the - error-report queue. These bytes indicate what parts of an allocation - are initialized, uninitialized, etc. and will be displayed when an - error is detected to help the debugging of a particular problem. - - The number entered here is actually the logarithm of the number of - bytes that will be saved. So if you pick for example 5 here, kmemcheck - will save 2^5 = 32 bytes. - - The default value should be fine for debugging most problems. It also - fits nicely within 80 columns. - -- ``CONFIG_KMEMCHECK_PARTIAL_OK`` - This option (when enabled) works around certain GCC optimizations that - produce 32-bit reads from 16-bit variables where the upper 16 bits are - thrown away afterwards. - - The default value (enabled) is recommended. This may of course hide - some real errors, but disabling it would probably produce a lot of - false positives. - -- ``CONFIG_KMEMCHECK_BITOPS_OK`` - This option silences warnings that would be generated for bit-field - accesses where not all the bits are initialized at the same time. This - may also hide some real bugs. - - This option is probably obsolete, or it should be replaced with - the kmemcheck-/bitfield-annotations for the code in question. The - default value is therefore fine. - -Now compile the kernel as usual. - - -How to use ----------- - -Booting -~~~~~~~ - -First some information about the command-line options. There is only one -option specific to kmemcheck, and this is called "kmemcheck". It can be used -to override the default mode as chosen by the ``CONFIG_KMEMCHECK_*_BY_DEFAULT`` -option. Its possible settings are: - -- ``kmemcheck=0`` (disabled) -- ``kmemcheck=1`` (enabled) -- ``kmemcheck=2`` (one-shot mode) - -If SLUB debugging has been enabled in the kernel, it may take precedence over -kmemcheck in such a way that the slab caches which are under SLUB debugging -will not be tracked by kmemcheck. In order to ensure that this doesn't happen -(even though it shouldn't by default), use SLUB's boot option ``slub_debug``, -like this: ``slub_debug=-`` - -In fact, this option may also be used for fine-grained control over SLUB vs. -kmemcheck. For example, if the command line includes -``kmemcheck=1 slub_debug=,dentry``, then SLUB debugging will be used only -for the "dentry" slab cache, and with kmemcheck tracking all the other -caches. This is advanced usage, however, and is not generally recommended. - - -Run-time enable/disable -~~~~~~~~~~~~~~~~~~~~~~~ - -When the kernel has booted, it is possible to enable or disable kmemcheck at -run-time. WARNING: This feature is still experimental and may cause false -positive warnings to appear. Therefore, try not to use this. If you find that -it doesn't work properly (e.g. you see an unreasonable amount of warnings), I -will be happy to take bug reports. - -Use the file ``/proc/sys/kernel/kmemcheck`` for this purpose, e.g.:: - - $ echo 0 > /proc/sys/kernel/kmemcheck # disables kmemcheck - -The numbers are the same as for the ``kmemcheck=`` command-line option. - - -Debugging -~~~~~~~~~ - -A typical report will look something like this:: - - WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024) - 80000000000000000000000000000000000000000088ffff0000000000000000 - i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u - ^ - - Pid: 1856, comm: ntpdate Not tainted 2.6.29-rc5 #264 945P-A - RIP: 0010:[] [] __dequeue_signal+0xc8/0x190 - RSP: 0018:ffff88003cdf7d98 EFLAGS: 00210002 - RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009 - RDX: ffff88003e5d6018 RSI: ffff88003e5d6024 RDI: ffff88003cdf7e84 - RBP: ffff88003cdf7db8 R08: ffff88003e5d6000 R09: 0000000000000000 - R10: 0000000000000080 R11: 0000000000000000 R12: 000000000000000e - R13: ffff88003cdf7e78 R14: ffff88003d530710 R15: ffff88003d5a98c8 - FS: 0000000000000000(0000) GS:ffff880001982000(0063) knlGS:00000 - CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 - CR2: ffff88003f806ea0 CR3: 000000003c036000 CR4: 00000000000006a0 - DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 - DR3: 0000000000000000 DR6: 00000000ffff4ff0 DR7: 0000000000000400 - [] dequeue_signal+0x8e/0x170 - [] get_signal_to_deliver+0x98/0x390 - [] do_notify_resume+0xad/0x7d0 - [] int_signal+0x12/0x17 - [] 0xffffffffffffffff - -The single most valuable information in this report is the RIP (or EIP on 32- -bit) value. This will help us pinpoint exactly which instruction that caused -the warning. - -If your kernel was compiled with ``CONFIG_DEBUG_INFO=y``, then all we have to do -is give this address to the addr2line program, like this:: - - $ addr2line -e vmlinux -i ffffffff8104ede8 - arch/x86/include/asm/string_64.h:12 - include/asm-generic/siginfo.h:287 - kernel/signal.c:380 - kernel/signal.c:410 - -The "``-e vmlinux``" tells addr2line which file to look in. **IMPORTANT:** -This must be the vmlinux of the kernel that produced the warning in the -first place! If not, the line number information will almost certainly be -wrong. - -The "``-i``" tells addr2line to also print the line numbers of inlined -functions. In this case, the flag was very important, because otherwise, -it would only have printed the first line, which is just a call to -``memcpy()``, which could be called from a thousand places in the kernel, and -is therefore not very useful. These inlined functions would not show up in -the stack trace above, simply because the kernel doesn't load the extra -debugging information. This technique can of course be used with ordinary -kernel oopses as well. - -In this case, it's the caller of ``memcpy()`` that is interesting, and it can be -found in ``include/asm-generic/siginfo.h``, line 287:: - - 281 static inline void copy_siginfo(struct siginfo *to, struct siginfo *from) - 282 { - 283 if (from->si_code < 0) - 284 memcpy(to, from, sizeof(*to)); - 285 else - 286 /* _sigchld is currently the largest know union member */ - 287 memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld)); - 288 } - -Since this was a read (kmemcheck usually warns about reads only, though it can -warn about writes to unallocated or freed memory as well), it was probably the -"from" argument which contained some uninitialized bytes. Following the chain -of calls, we move upwards to see where "from" was allocated or initialized, -``kernel/signal.c``, line 380:: - - 359 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info) - 360 { - ... - 367 list_for_each_entry(q, &list->list, list) { - 368 if (q->info.si_signo == sig) { - 369 if (first) - 370 goto still_pending; - 371 first = q; - ... - 377 if (first) { - 378 still_pending: - 379 list_del_init(&first->list); - 380 copy_siginfo(info, &first->info); - 381 __sigqueue_free(first); - ... - 392 } - 393 } - -Here, it is ``&first->info`` that is being passed on to ``copy_siginfo()``. The -variable ``first`` was found on a list -- passed in as the second argument to -``collect_signal()``. We continue our journey through the stack, to figure out -where the item on "list" was allocated or initialized. We move to line 410:: - - 395 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, - 396 siginfo_t *info) - 397 { - ... - 410 collect_signal(sig, pending, info); - ... - 414 } - -Now we need to follow the ``pending`` pointer, since that is being passed on to -``collect_signal()`` as ``list``. At this point, we've run out of lines from the -"addr2line" output. Not to worry, we just paste the next addresses from the -kmemcheck stack dump, i.e.:: - - [] dequeue_signal+0x8e/0x170 - [] get_signal_to_deliver+0x98/0x390 - [] do_notify_resume+0xad/0x7d0 - [] int_signal+0x12/0x17 - - $ addr2line -e vmlinux -i ffffffff8104f04e ffffffff81050bd8 \ - ffffffff8100b87d ffffffff8100c7b5 - kernel/signal.c:446 - kernel/signal.c:1806 - arch/x86/kernel/signal.c:805 - arch/x86/kernel/signal.c:871 - arch/x86/kernel/entry_64.S:694 - -Remember that since these addresses were found on the stack and not as the -RIP value, they actually point to the _next_ instruction (they are return -addresses). This becomes obvious when we look at the code for line 446:: - - 422 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) - 423 { - ... - 431 signr = __dequeue_signal(&tsk->signal->shared_pending, - 432 mask, info); - 433 /* - 434 * itimer signal ? - 435 * - 436 * itimers are process shared and we restart periodic - 437 * itimers in the signal delivery path to prevent DoS - 438 * attacks in the high resolution timer case. This is - 439 * compliant with the old way of self restarting - 440 * itimers, as the SIGALRM is a legacy signal and only - 441 * queued once. Changing the restart behaviour to - 442 * restart the timer in the signal dequeue path is - 443 * reducing the timer noise on heavy loaded !highres - 444 * systems too. - 445 */ - 446 if (unlikely(signr == SIGALRM)) { - ... - 489 } - -So instead of looking at 446, we should be looking at 431, which is the line -that executes just before 446. Here we see that what we are looking for is -``&tsk->signal->shared_pending``. - -Our next task is now to figure out which function that puts items on this -``shared_pending`` list. A crude, but efficient tool, is ``git grep``:: - - $ git grep -n 'shared_pending' kernel/ - ... - kernel/signal.c:828: pending = group ? &t->signal->shared_pending : &t->pending; - kernel/signal.c:1339: pending = group ? &t->signal->shared_pending : &t->pending; - ... - -There were more results, but none of them were related to list operations, -and these were the only assignments. We inspect the line numbers more closely -and find that this is indeed where items are being added to the list:: - - 816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t, - 817 int group) - 818 { - ... - 828 pending = group ? &t->signal->shared_pending : &t->pending; - ... - 851 q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN && - 852 (is_si_special(info) || - 853 info->si_code >= 0))); - 854 if (q) { - 855 list_add_tail(&q->list, &pending->list); - ... - 890 } - -and:: - - 1309 int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) - 1310 { - .... - 1339 pending = group ? &t->signal->shared_pending : &t->pending; - 1340 list_add_tail(&q->list, &pending->list); - .... - 1347 } - -In the first case, the list element we are looking for, ``q``, is being -returned from the function ``__sigqueue_alloc()``, which looks like an -allocation function. Let's take a look at it:: - - 187 static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, - 188 int override_rlimit) - 189 { - 190 struct sigqueue *q = NULL; - 191 struct user_struct *user; - 192 - 193 /* - 194 * We won't get problems with the target's UID changing under us - 195 * because changing it requires RCU be used, and if t != current, the - 196 * caller must be holding the RCU readlock (by way of a spinlock) and - 197 * we use RCU protection here - 198 */ - 199 user = get_uid(__task_cred(t)->user); - 200 atomic_inc(&user->sigpending); - 201 if (override_rlimit || - 202 atomic_read(&user->sigpending) <= - 203 t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) - 204 q = kmem_cache_alloc(sigqueue_cachep, flags); - 205 if (unlikely(q == NULL)) { - 206 atomic_dec(&user->sigpending); - 207 free_uid(user); - 208 } else { - 209 INIT_LIST_HEAD(&q->list); - 210 q->flags = 0; - 211 q->user = user; - 212 } - 213 - 214 return q; - 215 } - -We see that this function initializes ``q->list``, ``q->flags``, and -``q->user``. It seems that now is the time to look at the definition of -``struct sigqueue``, e.g.:: - - 14 struct sigqueue { - 15 struct list_head list; - 16 int flags; - 17 siginfo_t info; - 18 struct user_struct *user; - 19 }; - -And, you might remember, it was a ``memcpy()`` on ``&first->info`` that -caused the warning, so this makes perfect sense. It also seems reasonable -to assume that it is the caller of ``__sigqueue_alloc()`` that has the -responsibility of filling out (initializing) this member. - -But just which fields of the struct were uninitialized? Let's look at -kmemcheck's report again:: - - WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024) - 80000000000000000000000000000000000000000088ffff0000000000000000 - i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u - ^ - -These first two lines are the memory dump of the memory object itself, and -the shadow bytemap, respectively. The memory object itself is in this case -``&first->info``. Just beware that the start of this dump is NOT the start -of the object itself! The position of the caret (^) corresponds with the -address of the read (ffff88003e4a2024). - -The shadow bytemap dump legend is as follows: - -- i: initialized -- u: uninitialized -- a: unallocated (memory has been allocated by the slab layer, but has not - yet been handed off to anybody) -- f: freed (memory has been allocated by the slab layer, but has been freed - by the previous owner) - -In order to figure out where (relative to the start of the object) the -uninitialized memory was located, we have to look at the disassembly. For -that, we'll need the RIP address again:: - - RIP: 0010:[] [] __dequeue_signal+0xc8/0x190 - - $ objdump -d --no-show-raw-insn vmlinux | grep -C 8 ffffffff8104ede8: - ffffffff8104edc8: mov %r8,0x8(%r8) - ffffffff8104edcc: test %r10d,%r10d - ffffffff8104edcf: js ffffffff8104ee88 <__dequeue_signal+0x168> - ffffffff8104edd5: mov %rax,%rdx - ffffffff8104edd8: mov $0xc,%ecx - ffffffff8104eddd: mov %r13,%rdi - ffffffff8104ede0: mov $0x30,%eax - ffffffff8104ede5: mov %rdx,%rsi - ffffffff8104ede8: rep movsl %ds:(%rsi),%es:(%rdi) - ffffffff8104edea: test $0x2,%al - ffffffff8104edec: je ffffffff8104edf0 <__dequeue_signal+0xd0> - ffffffff8104edee: movsw %ds:(%rsi),%es:(%rdi) - ffffffff8104edf0: test $0x1,%al - ffffffff8104edf2: je ffffffff8104edf5 <__dequeue_signal+0xd5> - ffffffff8104edf4: movsb %ds:(%rsi),%es:(%rdi) - ffffffff8104edf5: mov %r8,%rdi - ffffffff8104edf8: callq ffffffff8104de60 <__sigqueue_free> - -As expected, it's the "``rep movsl``" instruction from the ``memcpy()`` -that causes the warning. We know about ``REP MOVSL`` that it uses the register -``RCX`` to count the number of remaining iterations. By taking a look at the -register dump again (from the kmemcheck report), we can figure out how many -bytes were left to copy:: - - RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009 - -By looking at the disassembly, we also see that ``%ecx`` is being loaded -with the value ``$0xc`` just before (ffffffff8104edd8), so we are very -lucky. Keep in mind that this is the number of iterations, not bytes. And -since this is a "long" operation, we need to multiply by 4 to get the -number of bytes. So this means that the uninitialized value was encountered -at 4 * (0xc - 0x9) = 12 bytes from the start of the object. - -We can now try to figure out which field of the "``struct siginfo``" that -was not initialized. This is the beginning of the struct:: - - 40 typedef struct siginfo { - 41 int si_signo; - 42 int si_errno; - 43 int si_code; - 44 - 45 union { - .. - 92 } _sifields; - 93 } siginfo_t; - -On 64-bit, the int is 4 bytes long, so it must the union member that has -not been initialized. We can verify this using gdb:: - - $ gdb vmlinux - ... - (gdb) p &((struct siginfo *) 0)->_sifields - $1 = (union {...} *) 0x10 - -Actually, it seems that the union member is located at offset 0x10 -- which -means that gcc has inserted 4 bytes of padding between the members ``si_code`` -and ``_sifields``. We can now get a fuller picture of the memory dump:: - - _----------------------------=> si_code - / _--------------------=> (padding) - | / _------------=> _sifields(._kill._pid) - | | / _----=> _sifields(._kill._uid) - | | | / - -------|-------|-------|-------| - 80000000000000000000000000000000000000000088ffff0000000000000000 - i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u - -This allows us to realize another important fact: ``si_code`` contains the -value 0x80. Remember that x86 is little endian, so the first 4 bytes -"80000000" are really the number 0x00000080. With a bit of research, we -find that this is actually the constant ``SI_KERNEL`` defined in -``include/asm-generic/siginfo.h``:: - - 144 #define SI_KERNEL 0x80 /* sent by the kernel from somewhere */ - -This macro is used in exactly one place in the x86 kernel: In ``send_signal()`` -in ``kernel/signal.c``:: - - 816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t, - 817 int group) - 818 { - ... - 828 pending = group ? &t->signal->shared_pending : &t->pending; - ... - 851 q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN && - 852 (is_si_special(info) || - 853 info->si_code >= 0))); - 854 if (q) { - 855 list_add_tail(&q->list, &pending->list); - 856 switch ((unsigned long) info) { - ... - 865 case (unsigned long) SEND_SIG_PRIV: - 866 q->info.si_signo = sig; - 867 q->info.si_errno = 0; - 868 q->info.si_code = SI_KERNEL; - 869 q->info.si_pid = 0; - 870 q->info.si_uid = 0; - 871 break; - ... - 890 } - -Not only does this match with the ``.si_code`` member, it also matches the place -we found earlier when looking for where siginfo_t objects are enqueued on the -``shared_pending`` list. - -So to sum up: It seems that it is the padding introduced by the compiler -between two struct fields that is uninitialized, and this gets reported when -we do a ``memcpy()`` on the struct. This means that we have identified a false -positive warning. - -Normally, kmemcheck will not report uninitialized accesses in ``memcpy()`` calls -when both the source and destination addresses are tracked. (Instead, we copy -the shadow bytemap as well). In this case, the destination address clearly -was not tracked. We can dig a little deeper into the stack trace from above:: - - arch/x86/kernel/signal.c:805 - arch/x86/kernel/signal.c:871 - arch/x86/kernel/entry_64.S:694 - -And we clearly see that the destination siginfo object is located on the -stack:: - - 782 static void do_signal(struct pt_regs *regs) - 783 { - 784 struct k_sigaction ka; - 785 siginfo_t info; - ... - 804 signr = get_signal_to_deliver(&info, &ka, regs, NULL); - ... - 854 } - -And this ``&info`` is what eventually gets passed to ``copy_siginfo()`` as the -destination argument. - -Now, even though we didn't find an actual error here, the example is still a -good one, because it shows how one would go about to find out what the report -was all about. - - -Annotating false positives -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -There are a few different ways to make annotations in the source code that -will keep kmemcheck from checking and reporting certain allocations. Here -they are: - -- ``__GFP_NOTRACK_FALSE_POSITIVE`` - This flag can be passed to ``kmalloc()`` or ``kmem_cache_alloc()`` - (therefore also to other functions that end up calling one of - these) to indicate that the allocation should not be tracked - because it would lead to a false positive report. This is a "big - hammer" way of silencing kmemcheck; after all, even if the false - positive pertains to particular field in a struct, for example, we - will now lose the ability to find (real) errors in other parts of - the same struct. - - Example:: - - /* No warnings will ever trigger on accessing any part of x */ - x = kmalloc(sizeof *x, GFP_KERNEL | __GFP_NOTRACK_FALSE_POSITIVE); - -- ``kmemcheck_bitfield_begin(name)``/``kmemcheck_bitfield_end(name)`` and - ``kmemcheck_annotate_bitfield(ptr, name)`` - The first two of these three macros can be used inside struct - definitions to signal, respectively, the beginning and end of a - bitfield. Additionally, this will assign the bitfield a name, which - is given as an argument to the macros. - - Having used these markers, one can later use - kmemcheck_annotate_bitfield() at the point of allocation, to indicate - which parts of the allocation is part of a bitfield. - - Example:: - - struct foo { - int x; - - kmemcheck_bitfield_begin(flags); - int flag_a:1; - int flag_b:1; - kmemcheck_bitfield_end(flags); - - int y; - }; - - struct foo *x = kmalloc(sizeof *x); - - /* No warnings will trigger on accessing the bitfield of x */ - kmemcheck_annotate_bitfield(x, flags); - - Note that ``kmemcheck_annotate_bitfield()`` can be used even before the - return value of ``kmalloc()`` is checked -- in other words, passing NULL - as the first argument is legal (and will do nothing). - - -Reporting errors ----------------- - -As we have seen, kmemcheck will produce false positive reports. Therefore, it -is not very wise to blindly post kmemcheck warnings to mailing lists and -maintainers. Instead, I encourage maintainers and developers to find errors -in their own code. If you get a warning, you can try to work around it, try -to figure out if it's a real error or not, or simply ignore it. Most -developers know their own code and will quickly and efficiently determine the -root cause of a kmemcheck report. This is therefore also the most efficient -way to work with kmemcheck. - -That said, we (the kmemcheck maintainers) will always be on the lookout for -false positives that we can annotate and silence. So whatever you find, -please drop us a note privately! Kernel configs and steps to reproduce (if -available) are of course a great help too. - -Happy hacking! - - -Technical description ---------------------- - -kmemcheck works by marking memory pages non-present. This means that whenever -somebody attempts to access the page, a page fault is generated. The page -fault handler notices that the page was in fact only hidden, and so it calls -on the kmemcheck code to make further investigations. - -When the investigations are completed, kmemcheck "shows" the page by marking -it present (as it would be under normal circumstances). This way, the -interrupted code can continue as usual. - -But after the instruction has been executed, we should hide the page again, so -that we can catch the next access too! Now kmemcheck makes use of a debugging -feature of the processor, namely single-stepping. When the processor has -finished the one instruction that generated the memory access, a debug -exception is raised. From here, we simply hide the page again and continue -execution, this time with the single-stepping feature turned off. - -kmemcheck requires some assistance from the memory allocator in order to work. -The memory allocator needs to - - 1. Tell kmemcheck about newly allocated pages and pages that are about to - be freed. This allows kmemcheck to set up and tear down the shadow memory - for the pages in question. The shadow memory stores the status of each - byte in the allocation proper, e.g. whether it is initialized or - uninitialized. - - 2. Tell kmemcheck which parts of memory should be marked uninitialized. - There are actually a few more states, such as "not yet allocated" and - "recently freed". - -If a slab cache is set up using the SLAB_NOTRACK flag, it will never return -memory that can take page faults because of kmemcheck. - -If a slab cache is NOT set up using the SLAB_NOTRACK flag, callers can still -request memory with the __GFP_NOTRACK or __GFP_NOTRACK_FALSE_POSITIVE flags. -This does not prevent the page faults from occurring, however, but marks the -object in question as being initialized so that no warnings will ever be -produced for this object. - -Currently, the SLAB and SLUB allocators are supported by kmemcheck. diff --git a/MAINTAINERS b/MAINTAINERS index 7e9c887ad951..ac814d3dd1c1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7688,16 +7688,6 @@ F: include/linux/kdb.h F: include/linux/kgdb.h F: kernel/debug/ -KMEMCHECK -M: Vegard Nossum -M: Pekka Enberg -S: Maintained -F: Documentation/dev-tools/kmemcheck.rst -F: arch/x86/include/asm/kmemcheck.h -F: arch/x86/mm/kmemcheck/ -F: include/linux/kmemcheck.h -F: mm/kmemcheck.c - KMEMLEAK M: Catalin Marinas S: Maintained diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f08977d82ca0..cb678192da4a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -112,7 +112,6 @@ config X86 select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP select HAVE_ARCH_KGDB - select HAVE_ARCH_KMEMCHECK select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT @@ -1430,7 +1429,7 @@ config ARCH_DMA_ADDR_T_64BIT config X86_DIRECT_GBPAGES def_bool y - depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK + depends on X86_64 && !DEBUG_PAGEALLOC ---help--- Certain kernel features effectively disable kernel linear 1 GB mappings (even if the CPU otherwise diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h index 945a0337fbcf..ea32a7d3cf1b 100644 --- a/arch/x86/include/asm/kmemcheck.h +++ b/arch/x86/include/asm/kmemcheck.h @@ -1,43 +1 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ASM_X86_KMEMCHECK_H -#define ASM_X86_KMEMCHECK_H - -#include -#include - -#ifdef CONFIG_KMEMCHECK -bool kmemcheck_active(struct pt_regs *regs); - -void kmemcheck_show(struct pt_regs *regs); -void kmemcheck_hide(struct pt_regs *regs); - -bool kmemcheck_fault(struct pt_regs *regs, - unsigned long address, unsigned long error_code); -bool kmemcheck_trap(struct pt_regs *regs); -#else -static inline bool kmemcheck_active(struct pt_regs *regs) -{ - return false; -} - -static inline void kmemcheck_show(struct pt_regs *regs) -{ -} - -static inline void kmemcheck_hide(struct pt_regs *regs) -{ -} - -static inline bool kmemcheck_fault(struct pt_regs *regs, - unsigned long address, unsigned long error_code) -{ - return false; -} - -static inline bool kmemcheck_trap(struct pt_regs *regs) -{ - return false; -} -#endif /* CONFIG_KMEMCHECK */ - -#endif diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index 076502241eae..55d392c6bd29 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h @@ -179,8 +179,6 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) * No 3D Now! */ -#ifndef CONFIG_KMEMCHECK - #if (__GNUC__ >= 4) #define memcpy(t, f, n) __builtin_memcpy(t, f, n) #else @@ -189,13 +187,6 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) ? __constant_memcpy((t), (f), (n)) \ : __memcpy((t), (f), (n))) #endif -#else -/* - * kmemcheck becomes very happy if we use the REP instructions unconditionally, - * because it means that we know both memory operands in advance. - */ -#define memcpy(t, f, n) __memcpy((t), (f), (n)) -#endif #endif #endif /* !CONFIG_FORTIFY_SOURCE */ diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 0b1b4445f4c5..533f74c300c2 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -33,7 +33,6 @@ extern void *memcpy(void *to, const void *from, size_t len); extern void *__memcpy(void *to, const void *from, size_t len); #ifndef CONFIG_FORTIFY_SOURCE -#ifndef CONFIG_KMEMCHECK #if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4 #define memcpy(dst, src, len) \ ({ \ @@ -46,13 +45,6 @@ extern void *__memcpy(void *to, const void *from, size_t len); __ret; \ }) #endif -#else -/* - * kmemcheck becomes very happy if we use the REP instructions unconditionally, - * because it means that we know both memory operands in advance. - */ -#define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len)) -#endif #endif /* !CONFIG_FORTIFY_SOURCE */ #define __HAVE_ARCH_MEMSET diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index b720dacac051..b1af22073e28 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -187,21 +187,6 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86 == 6 && c->x86_model < 15) clear_cpu_cap(c, X86_FEATURE_PAT); -#ifdef CONFIG_KMEMCHECK - /* - * P4s have a "fast strings" feature which causes single- - * stepping REP instructions to only generate a #DB on - * cache-line boundaries. - * - * Ingo Molnar reported a Pentium D (model 6) and a Xeon - * (model 2) with the same problem. - */ - if (c->x86 == 15) - if (msr_clear_bit(MSR_IA32_MISC_ENABLE, - MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) > 0) - pr_info("kmemcheck: Disabling fast string operations\n"); -#endif - /* * If fast string is not enabled in IA32_MISC_ENABLE for any reason, * clear the fast string and enhanced fast string CPU capabilities. diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 7ba7f3d7f477..8e13b8cc6bed 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -29,8 +29,6 @@ obj-$(CONFIG_X86_PTDUMP) += debug_pagetables.o obj-$(CONFIG_HIGHMEM) += highmem_32.o -obj-$(CONFIG_KMEMCHECK) += kmemcheck/ - KASAN_SANITIZE_kasan_init_$(BITS).o := n obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index ef94620ceb8a..6fdf91ef130a 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -163,12 +163,11 @@ static int page_size_mask; static void __init probe_page_size_mask(void) { /* - * For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will - * use small pages. + * For pagealloc debugging, identity mapping will use small pages. * This will simplify cpa(), which otherwise needs to support splitting * large pages into small in interrupt context, etc. */ - if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled() && !IS_ENABLED(CONFIG_KMEMCHECK)) + if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled()) page_size_mask |= 1 << PG_LEVEL_2M; else direct_gbpages = 0; diff --git a/arch/x86/mm/kmemcheck/Makefile b/arch/x86/mm/kmemcheck/Makefile deleted file mode 100644 index 520b3bce4095..000000000000 --- a/arch/x86/mm/kmemcheck/Makefile +++ /dev/null @@ -1 +0,0 @@ -obj-y := error.o kmemcheck.o opcode.o pte.o selftest.o shadow.o diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c index 872ec4159a68..cec594032515 100644 --- a/arch/x86/mm/kmemcheck/error.c +++ b/arch/x86/mm/kmemcheck/error.c @@ -1,228 +1 @@ // SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include - -#include "error.h" -#include "shadow.h" - -enum kmemcheck_error_type { - KMEMCHECK_ERROR_INVALID_ACCESS, - KMEMCHECK_ERROR_BUG, -}; - -#define SHADOW_COPY_SIZE (1 << CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT) - -struct kmemcheck_error { - enum kmemcheck_error_type type; - - union { - /* KMEMCHECK_ERROR_INVALID_ACCESS */ - struct { - /* Kind of access that caused the error */ - enum kmemcheck_shadow state; - /* Address and size of the erroneous read */ - unsigned long address; - unsigned int size; - }; - }; - - struct pt_regs regs; - struct stack_trace trace; - unsigned long trace_entries[32]; - - /* We compress it to a char. */ - unsigned char shadow_copy[SHADOW_COPY_SIZE]; - unsigned char memory_copy[SHADOW_COPY_SIZE]; -}; - -/* - * Create a ring queue of errors to output. We can't call printk() directly - * from the kmemcheck traps, since this may call the console drivers and - * result in a recursive fault. - */ -static struct kmemcheck_error error_fifo[CONFIG_KMEMCHECK_QUEUE_SIZE]; -static unsigned int error_count; -static unsigned int error_rd; -static unsigned int error_wr; -static unsigned int error_missed_count; - -static struct kmemcheck_error *error_next_wr(void) -{ - struct kmemcheck_error *e; - - if (error_count == ARRAY_SIZE(error_fifo)) { - ++error_missed_count; - return NULL; - } - - e = &error_fifo[error_wr]; - if (++error_wr == ARRAY_SIZE(error_fifo)) - error_wr = 0; - ++error_count; - return e; -} - -static struct kmemcheck_error *error_next_rd(void) -{ - struct kmemcheck_error *e; - - if (error_count == 0) - return NULL; - - e = &error_fifo[error_rd]; - if (++error_rd == ARRAY_SIZE(error_fifo)) - error_rd = 0; - --error_count; - return e; -} - -void kmemcheck_error_recall(void) -{ - static const char *desc[] = { - [KMEMCHECK_SHADOW_UNALLOCATED] = "unallocated", - [KMEMCHECK_SHADOW_UNINITIALIZED] = "uninitialized", - [KMEMCHECK_SHADOW_INITIALIZED] = "initialized", - [KMEMCHECK_SHADOW_FREED] = "freed", - }; - - static const char short_desc[] = { - [KMEMCHECK_SHADOW_UNALLOCATED] = 'a', - [KMEMCHECK_SHADOW_UNINITIALIZED] = 'u', - [KMEMCHECK_SHADOW_INITIALIZED] = 'i', - [KMEMCHECK_SHADOW_FREED] = 'f', - }; - - struct kmemcheck_error *e; - unsigned int i; - - e = error_next_rd(); - if (!e) - return; - - switch (e->type) { - case KMEMCHECK_ERROR_INVALID_ACCESS: - printk(KERN_WARNING "WARNING: kmemcheck: Caught %d-bit read from %s memory (%p)\n", - 8 * e->size, e->state < ARRAY_SIZE(desc) ? - desc[e->state] : "(invalid shadow state)", - (void *) e->address); - - printk(KERN_WARNING); - for (i = 0; i < SHADOW_COPY_SIZE; ++i) - printk(KERN_CONT "%02x", e->memory_copy[i]); - printk(KERN_CONT "\n"); - - printk(KERN_WARNING); - for (i = 0; i < SHADOW_COPY_SIZE; ++i) { - if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) - printk(KERN_CONT " %c", short_desc[e->shadow_copy[i]]); - else - printk(KERN_CONT " ?"); - } - printk(KERN_CONT "\n"); - printk(KERN_WARNING "%*c\n", 2 + 2 - * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); - break; - case KMEMCHECK_ERROR_BUG: - printk(KERN_EMERG "ERROR: kmemcheck: Fatal error\n"); - break; - } - - __show_regs(&e->regs, 1); - print_stack_trace(&e->trace, 0); -} - -static void do_wakeup(unsigned long data) -{ - while (error_count > 0) - kmemcheck_error_recall(); - - if (error_missed_count > 0) { - printk(KERN_WARNING "kmemcheck: Lost %d error reports because " - "the queue was too small\n", error_missed_count); - error_missed_count = 0; - } -} - -static DECLARE_TASKLET(kmemcheck_tasklet, &do_wakeup, 0); - -/* - * Save the context of an error report. - */ -void kmemcheck_error_save(enum kmemcheck_shadow state, - unsigned long address, unsigned int size, struct pt_regs *regs) -{ - static unsigned long prev_ip; - - struct kmemcheck_error *e; - void *shadow_copy; - void *memory_copy; - - /* Don't report several adjacent errors from the same EIP. */ - if (regs->ip == prev_ip) - return; - prev_ip = regs->ip; - - e = error_next_wr(); - if (!e) - return; - - e->type = KMEMCHECK_ERROR_INVALID_ACCESS; - - e->state = state; - e->address = address; - e->size = size; - - /* Save regs */ - memcpy(&e->regs, regs, sizeof(*regs)); - - /* Save stack trace */ - e->trace.nr_entries = 0; - e->trace.entries = e->trace_entries; - e->trace.max_entries = ARRAY_SIZE(e->trace_entries); - e->trace.skip = 0; - save_stack_trace_regs(regs, &e->trace); - - /* Round address down to nearest 16 bytes */ - shadow_copy = kmemcheck_shadow_lookup(address - & ~(SHADOW_COPY_SIZE - 1)); - BUG_ON(!shadow_copy); - - memcpy(e->shadow_copy, shadow_copy, SHADOW_COPY_SIZE); - - kmemcheck_show_addr(address); - memory_copy = (void *) (address & ~(SHADOW_COPY_SIZE - 1)); - memcpy(e->memory_copy, memory_copy, SHADOW_COPY_SIZE); - kmemcheck_hide_addr(address); - - tasklet_hi_schedule_first(&kmemcheck_tasklet); -} - -/* - * Save the context of a kmemcheck bug. - */ -void kmemcheck_error_save_bug(struct pt_regs *regs) -{ - struct kmemcheck_error *e; - - e = error_next_wr(); - if (!e) - return; - - e->type = KMEMCHECK_ERROR_BUG; - - memcpy(&e->regs, regs, sizeof(*regs)); - - e->trace.nr_entries = 0; - e->trace.entries = e->trace_entries; - e->trace.max_entries = ARRAY_SIZE(e->trace_entries); - e->trace.skip = 1; - save_stack_trace(&e->trace); - - tasklet_hi_schedule_first(&kmemcheck_tasklet); -} diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h index 39f80d7a874d..ea32a7d3cf1b 100644 --- a/arch/x86/mm/kmemcheck/error.h +++ b/arch/x86/mm/kmemcheck/error.h @@ -1,16 +1 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ARCH__X86__MM__KMEMCHECK__ERROR_H -#define ARCH__X86__MM__KMEMCHECK__ERROR_H - -#include - -#include "shadow.h" - -void kmemcheck_error_save(enum kmemcheck_shadow state, - unsigned long address, unsigned int size, struct pt_regs *regs); - -void kmemcheck_error_save_bug(struct pt_regs *regs); - -void kmemcheck_error_recall(void); - -#endif diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c deleted file mode 100644 index 4515bae36bbe..000000000000 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ /dev/null @@ -1,658 +0,0 @@ -/** - * kmemcheck - a heavyweight memory checker for the linux kernel - * Copyright (C) 2007, 2008 Vegard Nossum - * (With a lot of help from Ingo Molnar and Pekka Enberg.) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2) as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "error.h" -#include "opcode.h" -#include "pte.h" -#include "selftest.h" -#include "shadow.h" - - -#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT -# define KMEMCHECK_ENABLED 0 -#endif - -#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT -# define KMEMCHECK_ENABLED 1 -#endif - -#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT -# define KMEMCHECK_ENABLED 2 -#endif - -int kmemcheck_enabled = KMEMCHECK_ENABLED; - -int __init kmemcheck_init(void) -{ -#ifdef CONFIG_SMP - /* - * Limit SMP to use a single CPU. We rely on the fact that this code - * runs before SMP is set up. - */ - if (setup_max_cpus > 1) { - printk(KERN_INFO - "kmemcheck: Limiting number of CPUs to 1.\n"); - setup_max_cpus = 1; - } -#endif - - if (!kmemcheck_selftest()) { - printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n"); - kmemcheck_enabled = 0; - return -EINVAL; - } - - printk(KERN_INFO "kmemcheck: Initialized\n"); - return 0; -} - -early_initcall(kmemcheck_init); - -/* - * We need to parse the kmemcheck= option before any memory is allocated. - */ -static int __init param_kmemcheck(char *str) -{ - int val; - int ret; - - if (!str) - return -EINVAL; - - ret = kstrtoint(str, 0, &val); - if (ret) - return ret; - kmemcheck_enabled = val; - return 0; -} - -early_param("kmemcheck", param_kmemcheck); - -int kmemcheck_show_addr(unsigned long address) -{ - pte_t *pte; - - pte = kmemcheck_pte_lookup(address); - if (!pte) - return 0; - - set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); - __flush_tlb_one(address); - return 1; -} - -int kmemcheck_hide_addr(unsigned long address) -{ - pte_t *pte; - - pte = kmemcheck_pte_lookup(address); - if (!pte) - return 0; - - set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); - __flush_tlb_one(address); - return 1; -} - -struct kmemcheck_context { - bool busy; - int balance; - - /* - * There can be at most two memory operands to an instruction, but - * each address can cross a page boundary -- so we may need up to - * four addresses that must be hidden/revealed for each fault. - */ - unsigned long addr[4]; - unsigned long n_addrs; - unsigned long flags; - - /* Data size of the instruction that caused a fault. */ - unsigned int size; -}; - -static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context); - -bool kmemcheck_active(struct pt_regs *regs) -{ - struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); - - return data->balance > 0; -} - -/* Save an address that needs to be shown/hidden */ -static void kmemcheck_save_addr(unsigned long addr) -{ - struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); - - BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr)); - data->addr[data->n_addrs++] = addr; -} - -static unsigned int kmemcheck_show_all(void) -{ - struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); - unsigned int i; - unsigned int n; - - n = 0; - for (i = 0; i < data->n_addrs; ++i) - n += kmemcheck_show_addr(data->addr[i]); - - return n; -} - -static unsigned int kmemcheck_hide_all(void) -{ - struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); - unsigned int i; - unsigned int n; - - n = 0; - for (i = 0; i < data->n_addrs; ++i) - n += kmemcheck_hide_addr(data->addr[i]); - - return n; -} - -/* - * Called from the #PF handler. - */ -void kmemcheck_show(struct pt_regs *regs) -{ - struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); - - BUG_ON(!irqs_disabled()); - - if (unlikely(data->balance != 0)) { - kmemcheck_show_all(); - kmemcheck_error_save_bug(regs); - data->balance = 0; - return; - } - - /* - * None of the addresses actually belonged to kmemcheck. Note that - * this is not an error. - */ - if (kmemcheck_show_all() == 0) - return; - - ++data->balance; - - /* - * The IF needs to be cleared as well, so that the faulting - * instruction can run "uninterrupted". Otherwise, we might take - * an interrupt and start executing that before we've had a chance - * to hide the page again. - * - * NOTE: In the rare case of multiple faults, we must not override - * the original flags: - */ - if (!(regs->flags & X86_EFLAGS_TF)) - data->flags = regs->flags; - - regs->flags |= X86_EFLAGS_TF; - regs->flags &= ~X86_EFLAGS_IF; -} - -/* - * Called from the #DB handler. - */ -void kmemcheck_hide(struct pt_regs *regs) -{ - struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); - int n; - - BUG_ON(!irqs_disabled()); - - if (unlikely(data->balance != 1)) { - kmemcheck_show_all(); - kmemcheck_error_save_bug(regs); - data->n_addrs = 0; - data->balance = 0; - - if (!(data->flags & X86_EFLAGS_TF)) - regs->flags &= ~X86_EFLAGS_TF; - if (data->flags & X86_EFLAGS_IF) - regs->flags |= X86_EFLAGS_IF; - return; - } - - if (kmemcheck_enabled) - n = kmemcheck_hide_all(); - else - n = kmemcheck_show_all(); - - if (n == 0) - return; - - --data->balance; - - data->n_addrs = 0; - - if (!(data->flags & X86_EFLAGS_TF)) - regs->flags &= ~X86_EFLAGS_TF; - if (data->flags & X86_EFLAGS_IF) - regs->flags |= X86_EFLAGS_IF; -} - -void kmemcheck_show_pages(struct page *p, unsigned int n) -{ - unsigned int i; - - for (i = 0; i < n; ++i) { - unsigned long address; - pte_t *pte; - unsigned int level; - - address = (unsigned long) page_address(&p[i]); - pte = lookup_address(address, &level); - BUG_ON(!pte); - BUG_ON(level != PG_LEVEL_4K); - - set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); - set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN)); - __flush_tlb_one(address); - } -} - -bool kmemcheck_page_is_tracked(struct page *p) -{ - /* This will also check the "hidden" flag of the PTE. */ - return kmemcheck_pte_lookup((unsigned long) page_address(p)); -} - -void kmemcheck_hide_pages(struct page *p, unsigned int n) -{ - unsigned int i; - - for (i = 0; i < n; ++i) { - unsigned long address; - pte_t *pte; - unsigned int level; - - address = (unsigned long) page_address(&p[i]); - pte = lookup_address(address, &level); - BUG_ON(!pte); - BUG_ON(level != PG_LEVEL_4K); - - set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); - set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN)); - __flush_tlb_one(address); - } -} - -/* Access may NOT cross page boundary */ -static void kmemcheck_read_strict(struct pt_regs *regs, - unsigned long addr, unsigned int size) -{ - void *shadow; - enum kmemcheck_shadow status; - - shadow = kmemcheck_shadow_lookup(addr); - if (!shadow) - return; - - kmemcheck_save_addr(addr); - status = kmemcheck_shadow_test(shadow, size); - if (status == KMEMCHECK_SHADOW_INITIALIZED) - return; - - if (kmemcheck_enabled) - kmemcheck_error_save(status, addr, size, regs); - - if (kmemcheck_enabled == 2) - kmemcheck_enabled = 0; - - /* Don't warn about it again. */ - kmemcheck_shadow_set(shadow, size); -} - -bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) -{ - enum kmemcheck_shadow status; - void *shadow; - - shadow = kmemcheck_shadow_lookup(addr); - if (!shadow) - return true; - - status = kmemcheck_shadow_test_all(shadow, size); - - return status == KMEMCHECK_SHADOW_INITIALIZED; -} - -/* Access may cross page boundary */ -static void kmemcheck_read(struct pt_regs *regs, - unsigned long addr, unsigned int size) -{ - unsigned long page = addr & PAGE_MASK; - unsigned long next_addr = addr + size - 1; - unsigned long next_page = next_addr & PAGE_MASK; - - if (likely(page == next_page)) { - kmemcheck_read_strict(regs, addr, size); - return; - } - - /* - * What we do is basically to split the access across the - * two pages and handle each part separately. Yes, this means - * that we may now see reads that are 3 + 5 bytes, for - * example (and if both are uninitialized, there will be two - * reports), but it makes the code a lot simpler. - */ - kmemcheck_read_strict(regs, addr, next_page - addr); - kmemcheck_read_strict(regs, next_page, next_addr - next_page); -} - -static void kmemcheck_write_strict(struct pt_regs *regs, - unsigned long addr, unsigned int size) -{ - void *shadow; - - shadow = kmemcheck_shadow_lookup(addr); - if (!shadow) - return; - - kmemcheck_save_addr(addr); - kmemcheck_shadow_set(shadow, size); -} - -static void kmemcheck_write(struct pt_regs *regs, - unsigned long addr, unsigned int size) -{ - unsigned long page = addr & PAGE_MASK; - unsigned long next_addr = addr + size - 1; - unsigned long next_page = next_addr & PAGE_MASK; - - if (likely(page == next_page)) { - kmemcheck_write_strict(regs, addr, size); - return; - } - - /* See comment in kmemcheck_read(). */ - kmemcheck_write_strict(regs, addr, next_page - addr); - kmemcheck_write_strict(regs, next_page, next_addr - next_page); -} - -/* - * Copying is hard. We have two addresses, each of which may be split across - * a page (and each page will have different shadow addresses). - */ -static void kmemcheck_copy(struct pt_regs *regs, - unsigned long src_addr, unsigned long dst_addr, unsigned int size) -{ - uint8_t shadow[8]; - enum kmemcheck_shadow status; - - unsigned long page; - unsigned long next_addr; - unsigned long next_page; - - uint8_t *x; - unsigned int i; - unsigned int n; - - BUG_ON(size > sizeof(shadow)); - - page = src_addr & PAGE_MASK; - next_addr = src_addr + size - 1; - next_page = next_addr & PAGE_MASK; - - if (likely(page == next_page)) { - /* Same page */ - x = kmemcheck_shadow_lookup(src_addr); - if (x) { - kmemcheck_save_addr(src_addr); - for (i = 0; i < size; ++i) - shadow[i] = x[i]; - } else { - for (i = 0; i < size; ++i) - shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; - } - } else { - n = next_page - src_addr; - BUG_ON(n > sizeof(shadow)); - - /* First page */ - x = kmemcheck_shadow_lookup(src_addr); - if (x) { - kmemcheck_save_addr(src_addr); - for (i = 0; i < n; ++i) - shadow[i] = x[i]; - } else { - /* Not tracked */ - for (i = 0; i < n; ++i) - shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; - } - - /* Second page */ - x = kmemcheck_shadow_lookup(next_page); - if (x) { - kmemcheck_save_addr(next_page); - for (i = n; i < size; ++i) - shadow[i] = x[i - n]; - } else { - /* Not tracked */ - for (i = n; i < size; ++i) - shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; - } - } - - page = dst_addr & PAGE_MASK; - next_addr = dst_addr + size - 1; - next_page = next_addr & PAGE_MASK; - - if (likely(page == next_page)) { - /* Same page */ - x = kmemcheck_shadow_lookup(dst_addr); - if (x) { - kmemcheck_save_addr(dst_addr); - for (i = 0; i < size; ++i) { - x[i] = shadow[i]; - shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; - } - } - } else { - n = next_page - dst_addr; - BUG_ON(n > sizeof(shadow)); - - /* First page */ - x = kmemcheck_shadow_lookup(dst_addr); - if (x) { - kmemcheck_save_addr(dst_addr); - for (i = 0; i < n; ++i) { - x[i] = shadow[i]; - shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; - } - } - - /* Second page */ - x = kmemcheck_shadow_lookup(next_page); - if (x) { - kmemcheck_save_addr(next_page); - for (i = n; i < size; ++i) { - x[i - n] = shadow[i]; - shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; - } - } - } - - status = kmemcheck_shadow_test(shadow, size); - if (status == KMEMCHECK_SHADOW_INITIALIZED) - return; - - if (kmemcheck_enabled) - kmemcheck_error_save(status, src_addr, size, regs); - - if (kmemcheck_enabled == 2) - kmemcheck_enabled = 0; -} - -enum kmemcheck_method { - KMEMCHECK_READ, - KMEMCHECK_WRITE, -}; - -static void kmemcheck_access(struct pt_regs *regs, - unsigned long fallback_address, enum kmemcheck_method fallback_method) -{ - const uint8_t *insn; - const uint8_t *insn_primary; - unsigned int size; - - struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); - - /* Recursive fault -- ouch. */ - if (data->busy) { - kmemcheck_show_addr(fallback_address); - kmemcheck_error_save_bug(regs); - return; - } - - data->busy = true; - - insn = (const uint8_t *) regs->ip; - insn_primary = kmemcheck_opcode_get_primary(insn); - - kmemcheck_opcode_decode(insn, &size); - - switch (insn_primary[0]) { -#ifdef CONFIG_KMEMCHECK_BITOPS_OK - /* AND, OR, XOR */ - /* - * Unfortunately, these instructions have to be excluded from - * our regular checking since they access only some (and not - * all) bits. This clears out "bogus" bitfield-access warnings. - */ - case 0x80: - case 0x81: - case 0x82: - case 0x83: - switch ((insn_primary[1] >> 3) & 7) { - /* OR */ - case 1: - /* AND */ - case 4: - /* XOR */ - case 6: - kmemcheck_write(regs, fallback_address, size); - goto out; - - /* ADD */ - case 0: - /* ADC */ - case 2: - /* SBB */ - case 3: - /* SUB */ - case 5: - /* CMP */ - case 7: - break; - } - break; -#endif - - /* MOVS, MOVSB, MOVSW, MOVSD */ - case 0xa4: - case 0xa5: - /* - * These instructions are special because they take two - * addresses, but we only get one page fault. - */ - kmemcheck_copy(regs, regs->si, regs->di, size); - goto out; - - /* CMPS, CMPSB, CMPSW, CMPSD */ - case 0xa6: - case 0xa7: - kmemcheck_read(regs, regs->si, size); - kmemcheck_read(regs, regs->di, size); - goto out; - } - - /* - * If the opcode isn't special in any way, we use the data from the - * page fault handler to determine the address and type of memory - * access. - */ - switch (fallback_method) { - case KMEMCHECK_READ: - kmemcheck_read(regs, fallback_address, size); - goto out; - case KMEMCHECK_WRITE: - kmemcheck_write(regs, fallback_address, size); - goto out; - } - -out: - data->busy = false; -} - -bool kmemcheck_fault(struct pt_regs *regs, unsigned long address, - unsigned long error_code) -{ - pte_t *pte; - - /* - * XXX: Is it safe to assume that memory accesses from virtual 86 - * mode or non-kernel code segments will _never_ access kernel - * memory (e.g. tracked pages)? For now, we need this to avoid - * invoking kmemcheck for PnP BIOS calls. - */ - if (regs->flags & X86_VM_MASK) - return false; - if (regs->cs != __KERNEL_CS) - return false; - - pte = kmemcheck_pte_lookup(address); - if (!pte) - return false; - - WARN_ON_ONCE(in_nmi()); - - if (error_code & 2) - kmemcheck_access(regs, address, KMEMCHECK_WRITE); - else - kmemcheck_access(regs, address, KMEMCHECK_READ); - - kmemcheck_show(regs); - return true; -} - -bool kmemcheck_trap(struct pt_regs *regs) -{ - if (!kmemcheck_active(regs)) - return false; - - /* We're done. */ - kmemcheck_hide(regs); - return true; -} diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c index df8109ddf7fe..cec594032515 100644 --- a/arch/x86/mm/kmemcheck/opcode.c +++ b/arch/x86/mm/kmemcheck/opcode.c @@ -1,107 +1 @@ // SPDX-License-Identifier: GPL-2.0 -#include - -#include "opcode.h" - -static bool opcode_is_prefix(uint8_t b) -{ - return - /* Group 1 */ - b == 0xf0 || b == 0xf2 || b == 0xf3 - /* Group 2 */ - || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26 - || b == 0x64 || b == 0x65 - /* Group 3 */ - || b == 0x66 - /* Group 4 */ - || b == 0x67; -} - -#ifdef CONFIG_X86_64 -static bool opcode_is_rex_prefix(uint8_t b) -{ - return (b & 0xf0) == 0x40; -} -#else -static bool opcode_is_rex_prefix(uint8_t b) -{ - return false; -} -#endif - -#define REX_W (1 << 3) - -/* - * This is a VERY crude opcode decoder. We only need to find the size of the - * load/store that caused our #PF and this should work for all the opcodes - * that we care about. Moreover, the ones who invented this instruction set - * should be shot. - */ -void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size) -{ - /* Default operand size */ - int operand_size_override = 4; - - /* prefixes */ - for (; opcode_is_prefix(*op); ++op) { - if (*op == 0x66) - operand_size_override = 2; - } - - /* REX prefix */ - if (opcode_is_rex_prefix(*op)) { - uint8_t rex = *op; - - ++op; - if (rex & REX_W) { - switch (*op) { - case 0x63: - *size = 4; - return; - case 0x0f: - ++op; - - switch (*op) { - case 0xb6: - case 0xbe: - *size = 1; - return; - case 0xb7: - case 0xbf: - *size = 2; - return; - } - - break; - } - - *size = 8; - return; - } - } - - /* escape opcode */ - if (*op == 0x0f) { - ++op; - - /* - * This is move with zero-extend and sign-extend, respectively; - * we don't have to think about 0xb6/0xbe, because this is - * already handled in the conditional below. - */ - if (*op == 0xb7 || *op == 0xbf) - operand_size_override = 2; - } - - *size = (*op & 1) ? operand_size_override : 1; -} - -const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op) -{ - /* skip prefixes */ - while (opcode_is_prefix(*op)) - ++op; - if (opcode_is_rex_prefix(*op)) - ++op; - return op; -} diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h index 51a1ce94c24a..ea32a7d3cf1b 100644 --- a/arch/x86/mm/kmemcheck/opcode.h +++ b/arch/x86/mm/kmemcheck/opcode.h @@ -1,10 +1 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ARCH__X86__MM__KMEMCHECK__OPCODE_H -#define ARCH__X86__MM__KMEMCHECK__OPCODE_H - -#include - -void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size); -const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op); - -#endif diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c index 8a03be90272a..cec594032515 100644 --- a/arch/x86/mm/kmemcheck/pte.c +++ b/arch/x86/mm/kmemcheck/pte.c @@ -1,23 +1 @@ // SPDX-License-Identifier: GPL-2.0 -#include - -#include - -#include "pte.h" - -pte_t *kmemcheck_pte_lookup(unsigned long address) -{ - pte_t *pte; - unsigned int level; - - pte = lookup_address(address, &level); - if (!pte) - return NULL; - if (level != PG_LEVEL_4K) - return NULL; - if (!pte_hidden(*pte)) - return NULL; - - return pte; -} - diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h index b595612382c2..ea32a7d3cf1b 100644 --- a/arch/x86/mm/kmemcheck/pte.h +++ b/arch/x86/mm/kmemcheck/pte.h @@ -1,11 +1 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ARCH__X86__MM__KMEMCHECK__PTE_H -#define ARCH__X86__MM__KMEMCHECK__PTE_H - -#include - -#include - -pte_t *kmemcheck_pte_lookup(unsigned long address); - -#endif diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c index 7ce0be1f99eb..cec594032515 100644 --- a/arch/x86/mm/kmemcheck/selftest.c +++ b/arch/x86/mm/kmemcheck/selftest.c @@ -1,71 +1 @@ // SPDX-License-Identifier: GPL-2.0 -#include -#include - -#include "opcode.h" -#include "selftest.h" - -struct selftest_opcode { - unsigned int expected_size; - const uint8_t *insn; - const char *desc; -}; - -static const struct selftest_opcode selftest_opcodes[] = { - /* REP MOVS */ - {1, "\xf3\xa4", "rep movsb , "}, - {4, "\xf3\xa5", "rep movsl , "}, - - /* MOVZX / MOVZXD */ - {1, "\x66\x0f\xb6\x51\xf8", "movzwq , "}, - {1, "\x0f\xb6\x51\xf8", "movzwq , "}, - - /* MOVSX / MOVSXD */ - {1, "\x66\x0f\xbe\x51\xf8", "movswq , "}, - {1, "\x0f\xbe\x51\xf8", "movswq , "}, - -#ifdef CONFIG_X86_64 - /* MOVZX / MOVZXD */ - {1, "\x49\x0f\xb6\x51\xf8", "movzbq , "}, - {2, "\x49\x0f\xb7\x51\xf8", "movzbq , "}, - - /* MOVSX / MOVSXD */ - {1, "\x49\x0f\xbe\x51\xf8", "movsbq , "}, - {2, "\x49\x0f\xbf\x51\xf8", "movsbq , "}, - {4, "\x49\x63\x51\xf8", "movslq , "}, -#endif -}; - -static bool selftest_opcode_one(const struct selftest_opcode *op) -{ - unsigned size; - - kmemcheck_opcode_decode(op->insn, &size); - - if (size == op->expected_size) - return true; - - printk(KERN_WARNING "kmemcheck: opcode %s: expected size %d, got %d\n", - op->desc, op->expected_size, size); - return false; -} - -static bool selftest_opcodes_all(void) -{ - bool pass = true; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(selftest_opcodes); ++i) - pass = pass && selftest_opcode_one(&selftest_opcodes[i]); - - return pass; -} - -bool kmemcheck_selftest(void) -{ - bool pass = true; - - pass = pass && selftest_opcodes_all(); - - return pass; -} diff --git a/arch/x86/mm/kmemcheck/selftest.h b/arch/x86/mm/kmemcheck/selftest.h index 8d759aae453d..ea32a7d3cf1b 100644 --- a/arch/x86/mm/kmemcheck/selftest.h +++ b/arch/x86/mm/kmemcheck/selftest.h @@ -1,7 +1 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ARCH_X86_MM_KMEMCHECK_SELFTEST_H -#define ARCH_X86_MM_KMEMCHECK_SELFTEST_H - -bool kmemcheck_selftest(void); - -#endif diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c deleted file mode 100644 index c2638a7d2c10..000000000000 --- a/arch/x86/mm/kmemcheck/shadow.c +++ /dev/null @@ -1,173 +0,0 @@ -#include -#include -#include - -#include -#include - -#include "pte.h" -#include "shadow.h" - -/* - * Return the shadow address for the given address. Returns NULL if the - * address is not tracked. - * - * We need to be extremely careful not to follow any invalid pointers, - * because this function can be called for *any* possible address. - */ -void *kmemcheck_shadow_lookup(unsigned long address) -{ - pte_t *pte; - struct page *page; - - if (!virt_addr_valid(address)) - return NULL; - - pte = kmemcheck_pte_lookup(address); - if (!pte) - return NULL; - - page = virt_to_page(address); - if (!page->shadow) - return NULL; - return page->shadow + (address & (PAGE_SIZE - 1)); -} - -static void mark_shadow(void *address, unsigned int n, - enum kmemcheck_shadow status) -{ - unsigned long addr = (unsigned long) address; - unsigned long last_addr = addr + n - 1; - unsigned long page = addr & PAGE_MASK; - unsigned long last_page = last_addr & PAGE_MASK; - unsigned int first_n; - void *shadow; - - /* If the memory range crosses a page boundary, stop there. */ - if (page == last_page) - first_n = n; - else - first_n = page + PAGE_SIZE - addr; - - shadow = kmemcheck_shadow_lookup(addr); - if (shadow) - memset(shadow, status, first_n); - - addr += first_n; - n -= first_n; - - /* Do full-page memset()s. */ - while (n >= PAGE_SIZE) { - shadow = kmemcheck_shadow_lookup(addr); - if (shadow) - memset(shadow, status, PAGE_SIZE); - - addr += PAGE_SIZE; - n -= PAGE_SIZE; - } - - /* Do the remaining page, if any. */ - if (n > 0) { - shadow = kmemcheck_shadow_lookup(addr); - if (shadow) - memset(shadow, status, n); - } -} - -void kmemcheck_mark_unallocated(void *address, unsigned int n) -{ - mark_shadow(address, n, KMEMCHECK_SHADOW_UNALLOCATED); -} - -void kmemcheck_mark_uninitialized(void *address, unsigned int n) -{ - mark_shadow(address, n, KMEMCHECK_SHADOW_UNINITIALIZED); -} - -/* - * Fill the shadow memory of the given address such that the memory at that - * address is marked as being initialized. - */ -void kmemcheck_mark_initialized(void *address, unsigned int n) -{ - mark_shadow(address, n, KMEMCHECK_SHADOW_INITIALIZED); -} -EXPORT_SYMBOL_GPL(kmemcheck_mark_initialized); - -void kmemcheck_mark_freed(void *address, unsigned int n) -{ - mark_shadow(address, n, KMEMCHECK_SHADOW_FREED); -} - -void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n) -{ - unsigned int i; - - for (i = 0; i < n; ++i) - kmemcheck_mark_unallocated(page_address(&p[i]), PAGE_SIZE); -} - -void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n) -{ - unsigned int i; - - for (i = 0; i < n; ++i) - kmemcheck_mark_uninitialized(page_address(&p[i]), PAGE_SIZE); -} - -void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n) -{ - unsigned int i; - - for (i = 0; i < n; ++i) - kmemcheck_mark_initialized(page_address(&p[i]), PAGE_SIZE); -} - -enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) -{ -#ifdef CONFIG_KMEMCHECK_PARTIAL_OK - uint8_t *x; - unsigned int i; - - x = shadow; - - /* - * Make sure _some_ bytes are initialized. Gcc frequently generates - * code to access neighboring bytes. - */ - for (i = 0; i < size; ++i) { - if (x[i] == KMEMCHECK_SHADOW_INITIALIZED) - return x[i]; - } - - return x[0]; -#else - return kmemcheck_shadow_test_all(shadow, size); -#endif -} - -enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, unsigned int size) -{ - uint8_t *x; - unsigned int i; - - x = shadow; - - /* All bytes must be initialized. */ - for (i = 0; i < size; ++i) { - if (x[i] != KMEMCHECK_SHADOW_INITIALIZED) - return x[i]; - } - - return x[0]; -} - -void kmemcheck_shadow_set(void *shadow, unsigned int size) -{ - uint8_t *x; - unsigned int i; - - x = shadow; - for (i = 0; i < size; ++i) - x[i] = KMEMCHECK_SHADOW_INITIALIZED; -} diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h index 49768dc18664..ea32a7d3cf1b 100644 --- a/arch/x86/mm/kmemcheck/shadow.h +++ b/arch/x86/mm/kmemcheck/shadow.h @@ -1,19 +1 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ARCH__X86__MM__KMEMCHECK__SHADOW_H -#define ARCH__X86__MM__KMEMCHECK__SHADOW_H - -enum kmemcheck_shadow { - KMEMCHECK_SHADOW_UNALLOCATED, - KMEMCHECK_SHADOW_UNINITIALIZED, - KMEMCHECK_SHADOW_INITIALIZED, - KMEMCHECK_SHADOW_FREED, -}; - -void *kmemcheck_shadow_lookup(unsigned long address); - -enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size); -enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, - unsigned int size); -void kmemcheck_shadow_set(void *shadow, unsigned int size); - -#endif diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index baeb872283d9..69c238210325 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -594,21 +594,6 @@ static inline void tasklet_hi_schedule(struct tasklet_struct *t) __tasklet_hi_schedule(t); } -extern void __tasklet_hi_schedule_first(struct tasklet_struct *t); - -/* - * This version avoids touching any other tasklets. Needed for kmemcheck - * in order not to take any page faults while enqueueing this tasklet; - * consider VERY carefully whether you really need this or - * tasklet_hi_schedule()... - */ -static inline void tasklet_hi_schedule_first(struct tasklet_struct *t) -{ - if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) - __tasklet_hi_schedule_first(t); -} - - static inline void tasklet_disable_nosync(struct tasklet_struct *t) { atomic_inc(&t->count); diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h index 7b1d7bead7d9..ea32a7d3cf1b 100644 --- a/include/linux/kmemcheck.h +++ b/include/linux/kmemcheck.h @@ -1,172 +1 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef LINUX_KMEMCHECK_H -#define LINUX_KMEMCHECK_H - -#include -#include - -#ifdef CONFIG_KMEMCHECK -extern int kmemcheck_enabled; - -/* The slab-related functions. */ -void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node); -void kmemcheck_free_shadow(struct page *page, int order); -void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, - size_t size); -void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size); - -void kmemcheck_pagealloc_alloc(struct page *p, unsigned int order, - gfp_t gfpflags); - -void kmemcheck_show_pages(struct page *p, unsigned int n); -void kmemcheck_hide_pages(struct page *p, unsigned int n); - -bool kmemcheck_page_is_tracked(struct page *p); - -void kmemcheck_mark_unallocated(void *address, unsigned int n); -void kmemcheck_mark_uninitialized(void *address, unsigned int n); -void kmemcheck_mark_initialized(void *address, unsigned int n); -void kmemcheck_mark_freed(void *address, unsigned int n); - -void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n); -void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n); -void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n); - -int kmemcheck_show_addr(unsigned long address); -int kmemcheck_hide_addr(unsigned long address); - -bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size); - -/* - * Bitfield annotations - * - * How to use: If you have a struct using bitfields, for example - * - * struct a { - * int x:8, y:8; - * }; - * - * then this should be rewritten as - * - * struct a { - * kmemcheck_bitfield_begin(flags); - * int x:8, y:8; - * kmemcheck_bitfield_end(flags); - * }; - * - * Now the "flags_begin" and "flags_end" members may be used to refer to the - * beginning and end, respectively, of the bitfield (and things like - * &x.flags_begin is allowed). As soon as the struct is allocated, the bit- - * fields should be annotated: - * - * struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL); - * kmemcheck_annotate_bitfield(a, flags); - */ -#define kmemcheck_bitfield_begin(name) \ - int name##_begin[0]; - -#define kmemcheck_bitfield_end(name) \ - int name##_end[0]; - -#define kmemcheck_annotate_bitfield(ptr, name) \ - do { \ - int _n; \ - \ - if (!ptr) \ - break; \ - \ - _n = (long) &((ptr)->name##_end) \ - - (long) &((ptr)->name##_begin); \ - BUILD_BUG_ON(_n < 0); \ - \ - kmemcheck_mark_initialized(&((ptr)->name##_begin), _n); \ - } while (0) - -#define kmemcheck_annotate_variable(var) \ - do { \ - kmemcheck_mark_initialized(&(var), sizeof(var)); \ - } while (0) \ - -#else -#define kmemcheck_enabled 0 - -static inline void -kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node) -{ -} - -static inline void -kmemcheck_free_shadow(struct page *page, int order) -{ -} - -static inline void -kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, - size_t size) -{ -} - -static inline void kmemcheck_slab_free(struct kmem_cache *s, void *object, - size_t size) -{ -} - -static inline void kmemcheck_pagealloc_alloc(struct page *p, - unsigned int order, gfp_t gfpflags) -{ -} - -static inline bool kmemcheck_page_is_tracked(struct page *p) -{ - return false; -} - -static inline void kmemcheck_mark_unallocated(void *address, unsigned int n) -{ -} - -static inline void kmemcheck_mark_uninitialized(void *address, unsigned int n) -{ -} - -static inline void kmemcheck_mark_initialized(void *address, unsigned int n) -{ -} - -static inline void kmemcheck_mark_freed(void *address, unsigned int n) -{ -} - -static inline void kmemcheck_mark_unallocated_pages(struct page *p, - unsigned int n) -{ -} - -static inline void kmemcheck_mark_uninitialized_pages(struct page *p, - unsigned int n) -{ -} - -static inline void kmemcheck_mark_initialized_pages(struct page *p, - unsigned int n) -{ -} - -static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) -{ - return true; -} - -#define kmemcheck_bitfield_begin(name) -#define kmemcheck_bitfield_end(name) -#define kmemcheck_annotate_bitfield(ptr, name) \ - do { \ - } while (0) - -#define kmemcheck_annotate_variable(var) \ - do { \ - } while (0) - -#endif /* CONFIG_KMEMCHECK */ - -#endif /* LINUX_KMEMCHECK_H */ diff --git a/kernel/softirq.c b/kernel/softirq.c index 662f7b1b7a78..2f5e87f1bae2 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -486,16 +486,6 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) } EXPORT_SYMBOL(__tasklet_hi_schedule); -void __tasklet_hi_schedule_first(struct tasklet_struct *t) -{ - lockdep_assert_irqs_disabled(); - - t->next = __this_cpu_read(tasklet_hi_vec.head); - __this_cpu_write(tasklet_hi_vec.head, t); - __raise_softirq_irqoff(HI_SOFTIRQ); -} -EXPORT_SYMBOL(__tasklet_hi_schedule_first); - static __latent_entropy void tasklet_action(struct softirq_action *a) { struct tasklet_struct *list; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9576bd582d4a..7638e2f7fff8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -1173,15 +1172,6 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one_thousand, }, -#endif -#ifdef CONFIG_KMEMCHECK - { - .procname = "kmemcheck", - .data = &kmemcheck_enabled, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, #endif { .procname = "panic_on_warn", diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 07ce7449765a..5402e3954659 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -504,7 +504,7 @@ config DEBUG_OBJECTS_ENABLE_DEFAULT config DEBUG_SLAB bool "Debug slab memory allocations" - depends on DEBUG_KERNEL && SLAB && !KMEMCHECK + depends on DEBUG_KERNEL && SLAB help Say Y here to have the kernel do limited verification on memory allocation as well as poisoning memory on free to catch use of freed @@ -516,7 +516,7 @@ config DEBUG_SLAB_LEAK config SLUB_DEBUG_ON bool "SLUB debugging on by default" - depends on SLUB && SLUB_DEBUG && !KMEMCHECK + depends on SLUB && SLUB_DEBUG default n help Boot with debugging on by default. SLUB boots by default with @@ -730,8 +730,6 @@ config DEBUG_STACKOVERFLOW If in doubt, say "N". -source "lib/Kconfig.kmemcheck" - source "lib/Kconfig.kasan" endmenu # "Memory Debugging" diff --git a/lib/Kconfig.kmemcheck b/lib/Kconfig.kmemcheck deleted file mode 100644 index 846e039a86b4..000000000000 --- a/lib/Kconfig.kmemcheck +++ /dev/null @@ -1,94 +0,0 @@ -config HAVE_ARCH_KMEMCHECK - bool - -if HAVE_ARCH_KMEMCHECK - -menuconfig KMEMCHECK - bool "kmemcheck: trap use of uninitialized memory" - depends on DEBUG_KERNEL - depends on !X86_USE_3DNOW - depends on SLUB || SLAB - depends on !CC_OPTIMIZE_FOR_SIZE - depends on !FUNCTION_TRACER - select FRAME_POINTER - select STACKTRACE - default n - help - This option enables tracing of dynamically allocated kernel memory - to see if memory is used before it has been given an initial value. - Be aware that this requires half of your memory for bookkeeping and - will insert extra code at *every* read and write to tracked memory - thus slow down the kernel code (but user code is unaffected). - - The kernel may be started with kmemcheck=0 or kmemcheck=1 to disable - or enable kmemcheck at boot-time. If the kernel is started with - kmemcheck=0, the large memory and CPU overhead is not incurred. - -choice - prompt "kmemcheck: default mode at boot" - depends on KMEMCHECK - default KMEMCHECK_ONESHOT_BY_DEFAULT - help - This option controls the default behaviour of kmemcheck when the - kernel boots and no kmemcheck= parameter is given. - -config KMEMCHECK_DISABLED_BY_DEFAULT - bool "disabled" - depends on KMEMCHECK - -config KMEMCHECK_ENABLED_BY_DEFAULT - bool "enabled" - depends on KMEMCHECK - -config KMEMCHECK_ONESHOT_BY_DEFAULT - bool "one-shot" - depends on KMEMCHECK - help - In one-shot mode, only the first error detected is reported before - kmemcheck is disabled. - -endchoice - -config KMEMCHECK_QUEUE_SIZE - int "kmemcheck: error queue size" - depends on KMEMCHECK - default 64 - help - Select the maximum number of errors to store in the queue. Since - errors can occur virtually anywhere and in any context, we need a - temporary storage area which is guarantueed not to generate any - other faults. The queue will be emptied as soon as a tasklet may - be scheduled. If the queue is full, new error reports will be - lost. - -config KMEMCHECK_SHADOW_COPY_SHIFT - int "kmemcheck: shadow copy size (5 => 32 bytes, 6 => 64 bytes)" - depends on KMEMCHECK - range 2 8 - default 5 - help - Select the number of shadow bytes to save along with each entry of - the queue. These bytes indicate what parts of an allocation are - initialized, uninitialized, etc. and will be displayed when an - error is detected to help the debugging of a particular problem. - -config KMEMCHECK_PARTIAL_OK - bool "kmemcheck: allow partially uninitialized memory" - depends on KMEMCHECK - default y - help - This option works around certain GCC optimizations that produce - 32-bit reads from 16-bit variables where the upper 16 bits are - thrown away afterwards. This may of course also hide some real - bugs. - -config KMEMCHECK_BITOPS_OK - bool "kmemcheck: allow bit-field manipulation" - depends on KMEMCHECK - default n - help - This option silences warnings that would be generated for bit-field - accesses where not all the bits are initialized at the same time. - This may also hide some real bugs. - -endif diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 5b0adf1435de..e5e606ee5f71 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -11,7 +11,6 @@ config DEBUG_PAGEALLOC bool "Debug page memory allocations" depends on DEBUG_KERNEL depends on !HIBERNATION || ARCH_SUPPORTS_DEBUG_PAGEALLOC && !PPC && !SPARC - depends on !KMEMCHECK select PAGE_EXTENSION select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC ---help--- diff --git a/mm/Makefile b/mm/Makefile index 4659b93cba43..e7ebd176fb93 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -17,7 +17,6 @@ KCOV_INSTRUMENT_slub.o := n KCOV_INSTRUMENT_page_alloc.o := n KCOV_INSTRUMENT_debug-pagealloc.o := n KCOV_INSTRUMENT_kmemleak.o := n -KCOV_INSTRUMENT_kmemcheck.o := n KCOV_INSTRUMENT_memcontrol.o := n KCOV_INSTRUMENT_mmzone.o := n KCOV_INSTRUMENT_vmstat.o := n @@ -70,7 +69,6 @@ obj-$(CONFIG_KSM) += ksm.o obj-$(CONFIG_PAGE_POISONING) += page_poison.o obj-$(CONFIG_SLAB) += slab.o obj-$(CONFIG_SLUB) += slub.o -obj-$(CONFIG_KMEMCHECK) += kmemcheck.o obj-$(CONFIG_KASAN) += kasan/ obj-$(CONFIG_FAILSLAB) += failslab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c index b3a4d61d341c..cec594032515 100644 --- a/mm/kmemcheck.c +++ b/mm/kmemcheck.c @@ -1,126 +1 @@ // SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include "slab.h" -#include - -void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node) -{ - struct page *shadow; - int pages; - int i; - - pages = 1 << order; - - /* - * With kmemcheck enabled, we need to allocate a memory area for the - * shadow bits as well. - */ - shadow = alloc_pages_node(node, flags, order); - if (!shadow) { - if (printk_ratelimit()) - pr_err("kmemcheck: failed to allocate shadow bitmap\n"); - return; - } - - for(i = 0; i < pages; ++i) - page[i].shadow = page_address(&shadow[i]); - - /* - * Mark it as non-present for the MMU so that our accesses to - * this memory will trigger a page fault and let us analyze - * the memory accesses. - */ - kmemcheck_hide_pages(page, pages); -} - -void kmemcheck_free_shadow(struct page *page, int order) -{ - struct page *shadow; - int pages; - int i; - - if (!kmemcheck_page_is_tracked(page)) - return; - - pages = 1 << order; - - kmemcheck_show_pages(page, pages); - - shadow = virt_to_page(page[0].shadow); - - for(i = 0; i < pages; ++i) - page[i].shadow = NULL; - - __free_pages(shadow, order); -} - -void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, - size_t size) -{ - if (unlikely(!object)) /* Skip object if allocation failed */ - return; - - /* - * Has already been memset(), which initializes the shadow for us - * as well. - */ - if (gfpflags & __GFP_ZERO) - return; - - /* No need to initialize the shadow of a non-tracked slab. */ - if (s->flags & SLAB_NOTRACK) - return; - - if (!kmemcheck_enabled || gfpflags & __GFP_NOTRACK) { - /* - * Allow notracked objects to be allocated from - * tracked caches. Note however that these objects - * will still get page faults on access, they just - * won't ever be flagged as uninitialized. If page - * faults are not acceptable, the slab cache itself - * should be marked NOTRACK. - */ - kmemcheck_mark_initialized(object, size); - } else if (!s->ctor) { - /* - * New objects should be marked uninitialized before - * they're returned to the called. - */ - kmemcheck_mark_uninitialized(object, size); - } -} - -void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size) -{ - /* TODO: RCU freeing is unsupported for now; hide false positives. */ - if (!s->ctor && !(s->flags & SLAB_TYPESAFE_BY_RCU)) - kmemcheck_mark_freed(object, size); -} - -void kmemcheck_pagealloc_alloc(struct page *page, unsigned int order, - gfp_t gfpflags) -{ - int pages; - - if (gfpflags & (__GFP_HIGHMEM | __GFP_NOTRACK)) - return; - - pages = 1 << order; - - /* - * NOTE: We choose to track GFP_ZERO pages too; in fact, they - * can become uninitialized by copying uninitialized memory - * into them. - */ - - /* XXX: Can use zone->node for node? */ - kmemcheck_alloc_shadow(page, order, gfpflags, -1); - - if (gfpflags & __GFP_ZERO) - kmemcheck_mark_initialized_pages(page, pages); - else - kmemcheck_mark_uninitialized_pages(page, pages); -} diff --git a/mm/slub.c b/mm/slub.c index c2c41e178acf..cfd56e5a35fb 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1371,7 +1371,7 @@ static inline void *slab_free_hook(struct kmem_cache *s, void *x) * So in order to make the debug calls that expect irqs to be * disabled we need to disable interrupts temporarily. */ -#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP) +#ifdef CONFIG_LOCKDEP { unsigned long flags; @@ -1399,8 +1399,7 @@ static inline void slab_free_freelist_hook(struct kmem_cache *s, * Compiler cannot detect this function can be removed if slab_free_hook() * evaluates to nothing. Thus, catch all relevant config debug options here. */ -#if defined(CONFIG_KMEMCHECK) || \ - defined(CONFIG_LOCKDEP) || \ +#if defined(CONFIG_LOCKDEP) || \ defined(CONFIG_DEBUG_KMEMLEAK) || \ defined(CONFIG_DEBUG_OBJECTS_FREE) || \ defined(CONFIG_KASAN) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 67d051edd615..7bd52b8f63d4 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -2182,8 +2182,6 @@ sub dump_struct($$) { # strip comments: $members =~ s/\/\*.*?\*\///gos; $nested =~ s/\/\*.*?\*\///gos; - # strip kmemcheck_bitfield_{begin,end}.*; - $members =~ s/kmemcheck_bitfield_.*?;//gos; # strip attributes $members =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i; $members =~ s/__aligned\s*\([^;]*\)//gos; diff --git a/tools/include/linux/kmemcheck.h b/tools/include/linux/kmemcheck.h index 2bccd2c7b897..ea32a7d3cf1b 100644 --- a/tools/include/linux/kmemcheck.h +++ b/tools/include/linux/kmemcheck.h @@ -1,9 +1 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LINUX_KMEMCHECK_H_ -#define _LIBLOCKDEP_LINUX_KMEMCHECK_H_ - -static inline void kmemcheck_mark_initialized(void *address, unsigned int n) -{ -} - -#endif -- cgit v1.2.3 From 1f6f4cb7ba219b00a3fa9afe8049fa16444d8b52 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Nov 2017 15:21:53 -0800 Subject: bpf: offload: rename the ifindex field bpf_target_prog seems long and clunky, rename it to prog_ifindex. We don't want to call this field just ifindex, because maps may need a similar field in the future and bpf_attr members for programs and maps are unnamed. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 2 +- kernel/bpf/offload.c | 2 +- kernel/bpf/syscall.c | 4 ++-- tools/include/uapi/linux/bpf.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'tools/include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e880ae6434ee..3f626df42516 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -262,7 +262,7 @@ union bpf_attr { __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; char prog_name[BPF_OBJ_NAME_LEN]; - __u32 prog_target_ifindex; /* ifindex of netdev to prep for */ + __u32 prog_ifindex; /* ifindex of netdev to prep for */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index ac187f9ee182..a778e5df7e26 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -29,7 +29,7 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) init_waitqueue_head(&offload->verifier_done); rtnl_lock(); - offload->netdev = __dev_get_by_index(net, attr->prog_target_ifindex); + offload->netdev = __dev_get_by_index(net, attr->prog_ifindex); if (!offload->netdev) { rtnl_unlock(); kfree(offload); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 09badc37e864..8e9d065bb7cd 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1118,7 +1118,7 @@ struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD prog_target_ifindex +#define BPF_PROG_LOAD_LAST_FIELD prog_ifindex static int bpf_prog_load(union bpf_attr *attr) { @@ -1181,7 +1181,7 @@ static int bpf_prog_load(union bpf_attr *attr) atomic_set(&prog->aux->refcnt, 1); prog->gpl_compatible = is_gpl ? 1 : 0; - if (attr->prog_target_ifindex) { + if (attr->prog_ifindex) { err = bpf_prog_offload_init(prog, attr); if (err) goto free_prog; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e880ae6434ee..3f626df42516 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -262,7 +262,7 @@ union bpf_attr { __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; char prog_name[BPF_OBJ_NAME_LEN]; - __u32 prog_target_ifindex; /* ifindex of netdev to prep for */ + __u32 prog_ifindex; /* ifindex of netdev to prep for */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ -- cgit v1.2.3 From 51aa423959b0ab62169c98b90566a0628ba096b8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Nov 2017 15:21:58 -0800 Subject: bpftool: revert printing program device bound info This reverts commit 928631e05495 ("bpftool: print program device bound info"). We will remove this API and redo it right in -next. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/prog.c | 31 ------------------------------- tools/include/uapi/linux/bpf.h | 6 ------ 2 files changed, 37 deletions(-) (limited to 'tools/include') diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index f45c44ef9bec..ad619b96c276 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include @@ -230,21 +229,6 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) info->tag[0], info->tag[1], info->tag[2], info->tag[3], info->tag[4], info->tag[5], info->tag[6], info->tag[7]); - if (info->status & BPF_PROG_STATUS_DEV_BOUND) { - jsonw_name(json_wtr, "dev"); - if (info->ifindex) { - char name[IF_NAMESIZE]; - - if (!if_indextoname(info->ifindex, name)) - jsonw_printf(json_wtr, "\"ifindex:%d\"", - info->ifindex); - else - jsonw_printf(json_wtr, "\"%s\"", name); - } else { - jsonw_printf(json_wtr, "\"unknown\""); - } - } - if (info->load_time) { char buf[32]; @@ -302,21 +286,6 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) printf("tag "); fprint_hex(stdout, info->tag, BPF_TAG_SIZE, ""); - printf(" "); - - if (info->status & BPF_PROG_STATUS_DEV_BOUND) { - printf("dev "); - if (info->ifindex) { - char name[IF_NAMESIZE]; - - if (!if_indextoname(info->ifindex, name)) - printf("ifindex:%d ", info->ifindex); - else - printf("%s ", name); - } else { - printf("unknown "); - } - } printf("\n"); if (info->load_time) { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 3f626df42516..4c223ab30293 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -897,10 +897,6 @@ enum sk_action { #define BPF_TAG_SIZE 8 -enum bpf_prog_status { - BPF_PROG_STATUS_DEV_BOUND = (1 << 0), -}; - struct bpf_prog_info { __u32 type; __u32 id; @@ -914,8 +910,6 @@ struct bpf_prog_info { __u32 nr_map_ids; __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; - __u32 ifindex; - __u32 status; } __attribute__((aligned(8))); struct bpf_map_info { -- cgit v1.2.3 From 85369131895643c6510416fdcb215a855d39afb1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 7 Nov 2017 13:41:35 -0300 Subject: tools headers: Synchronize kernel ABI headers wrt SPDX tags Two more, that were just in perf/core and thus weren't covered by Ingo's latest headers synch, kcmp.h and prctl.h, silencing this: Warning: Kernel ABI header at 'tools/include/uapi/linux/kcmp.h' differs from latest version at 'include/uapi/linux/kcmp.h' Warning: Kernel ABI header at 'tools/include/uapi/linux/prctl.h' differs from latest version at 'include/uapi/linux/prctl.h' Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-2a0r7iybyqpkftllyy5t9hfk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kcmp.h | 1 + tools/include/uapi/linux/prctl.h | 1 + 2 files changed, 2 insertions(+) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/kcmp.h b/tools/include/uapi/linux/kcmp.h index 481e103da78e..ef1305010925 100644 --- a/tools/include/uapi/linux/kcmp.h +++ b/tools/include/uapi/linux/kcmp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _UAPI_LINUX_KCMP_H #define _UAPI_LINUX_KCMP_H diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index a8d0759a9e40..b640071421f7 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _LINUX_PRCTL_H #define _LINUX_PRCTL_H -- cgit v1.2.3 From 0f1aabeb4932e48ce067cb8c88322277dd7af371 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 27 Nov 2017 11:35:52 -0300 Subject: tools headers: Synchronize perf_event.h header To get the changes in the 085b30625e39 ("perf/core: Add PERF_AUX_FLAG_COLLISION to report colliding samples") commit, that will be eventually used by perf to handle the ARM SPE architecture. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Cc: Will Deacon Link: https://lkml.kernel.org/n/tip-178ohv0oy0csq3kzfdk8ky4n@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 362493a2f950..b9a4953018ed 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -942,6 +942,7 @@ enum perf_callchain_context { #define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ #define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ #define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ +#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ #define PERF_FLAG_FD_NO_GROUP (1UL << 0) #define PERF_FLAG_FD_OUTPUT (1UL << 1) -- cgit v1.2.3 From 8ce6d5eb01cba83db9077c88a533bfeff39c679c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 27 Nov 2017 11:43:00 -0300 Subject: tools headers uapi: Synchronize drm/drm.h To pick up the new ioctls added in these csets: 3064abfa932b ("drm: Add CRTC_GET_SEQUENCE and CRTC_QUEUE_SEQUENCE ioctls [v3]") 62884cd386b8 ("drm: Add four ioctls for managing drm mode object leases [v7]") That will be automatically decoded (the ioctl cmd parameter, the structs will be supported when we start using eBPF for that, which is in the works). This silences this warning when building tools/perf: Warning: Kernel ABI header at 'tools/include/uapi/drm/drm.h' differs from latest version at 'include/uapi/drm/drm.h' Cc: Adrian Hunter Cc: Dave Airlie Cc: David Ahern Cc: Jiri Olsa Cc: Keith Packard Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-bivwf1pkfmi1ugpswbsxd9e9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/drm.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'tools/include') diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 97677cd6964d..6fdff5945c8a 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -737,6 +737,28 @@ struct drm_syncobj_array { __u32 pad; }; +/* Query current scanout sequence number */ +struct drm_crtc_get_sequence { + __u32 crtc_id; /* requested crtc_id */ + __u32 active; /* return: crtc output is active */ + __u64 sequence; /* return: most recent vblank sequence */ + __s64 sequence_ns; /* return: most recent time of first pixel out */ +}; + +/* Queue event to be delivered at specified sequence. Time stamp marks + * when the first pixel of the refresh cycle leaves the display engine + * for the display + */ +#define DRM_CRTC_SEQUENCE_RELATIVE 0x00000001 /* sequence is relative to current */ +#define DRM_CRTC_SEQUENCE_NEXT_ON_MISS 0x00000002 /* Use next sequence if we've missed */ + +struct drm_crtc_queue_sequence { + __u32 crtc_id; + __u32 flags; + __u64 sequence; /* on input, target sequence. on output, actual sequence */ + __u64 user_data; /* user data passed to event */ +}; + #if defined(__cplusplus) } #endif @@ -819,6 +841,9 @@ extern "C" { #define DRM_IOCTL_WAIT_VBLANK DRM_IOWR(0x3a, union drm_wait_vblank) +#define DRM_IOCTL_CRTC_GET_SEQUENCE DRM_IOWR(0x3b, struct drm_crtc_get_sequence) +#define DRM_IOCTL_CRTC_QUEUE_SEQUENCE DRM_IOWR(0x3c, struct drm_crtc_queue_sequence) + #define DRM_IOCTL_UPDATE_DRAW DRM_IOW(0x3f, struct drm_update_draw) #define DRM_IOCTL_MODE_GETRESOURCES DRM_IOWR(0xA0, struct drm_mode_card_res) @@ -863,6 +888,11 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array) #define DRM_IOCTL_SYNCOBJ_SIGNAL DRM_IOWR(0xC5, struct drm_syncobj_array) +#define DRM_IOCTL_MODE_CREATE_LEASE DRM_IOWR(0xC6, struct drm_mode_create_lease) +#define DRM_IOCTL_MODE_LIST_LESSEES DRM_IOWR(0xC7, struct drm_mode_list_lessees) +#define DRM_IOCTL_MODE_GET_LEASE DRM_IOWR(0xC8, struct drm_mode_get_lease) +#define DRM_IOCTL_MODE_REVOKE_LEASE DRM_IOWR(0xC9, struct drm_mode_revoke_lease) + /** * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. @@ -893,6 +923,7 @@ struct drm_event { #define DRM_EVENT_VBLANK 0x01 #define DRM_EVENT_FLIP_COMPLETE 0x02 +#define DRM_EVENT_CRTC_SEQUENCE 0x03 struct drm_event_vblank { struct drm_event base; @@ -903,6 +934,16 @@ struct drm_event_vblank { __u32 crtc_id; /* 0 on older kernels that do not support this */ }; +/* Event delivered at sequence. Time stamp marks when the first pixel + * of the refresh cycle leaves the display engine for the display + */ +struct drm_event_crtc_sequence { + struct drm_event base; + __u64 user_data; + __s64 time_ns; + __u64 sequence; +}; + /* typedef area */ #ifndef __KERNEL__ typedef struct drm_clip_rect drm_clip_rect_t; -- cgit v1.2.3 From 485be0cb0c71da167cb7f27c20130dc04fec33ff Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 27 Nov 2017 11:50:07 -0300 Subject: tools headers: Synchronize drm/i915_drm.h To pick up the changes from these csets: bf64e0b00e1f ("drm/i915: Expand I915_PARAM_HAS_SCHEDULER into a capability bitmask") ac14fbd460d0 ("drm/i915/scheduler: Support user-defined priorities") 822a4b673284 ("drm/i915: Don't use BIT() in UAPI section") 3fd3a6ffe279 ("drm/i915: Simplify i915_reg_read_ioctl") None of them affects how the tools are built, this os done just to silence this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/drm/i915_drm.h' differs from latest version at 'include/uapi/drm/i915_drm.h' Cc: Adrian Hunter Cc: Chris Wilson Cc: David Ahern Cc: Jiri Olsa Cc: Joonas Lahtinen Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-d2gor8brpcowe7bcxovjhqwm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/i915_drm.h | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) (limited to 'tools/include') diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 9816590d3ad2..ac3c6503ca27 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -397,10 +397,20 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_MIN_EU_IN_POOL 39 #define I915_PARAM_MMAP_GTT_VERSION 40 -/* Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution +/* + * Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution * priorities and the driver will attempt to execute batches in priority order. + * The param returns a capability bitmask, nonzero implies that the scheduler + * is enabled, with different features present according to the mask. + * + * The initial priority for each batch is supplied by the context and is + * controlled via I915_CONTEXT_PARAM_PRIORITY. */ #define I915_PARAM_HAS_SCHEDULER 41 +#define I915_SCHEDULER_CAP_ENABLED (1ul << 0) +#define I915_SCHEDULER_CAP_PRIORITY (1ul << 1) +#define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) + #define I915_PARAM_HUC_STATUS 42 /* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of @@ -1309,14 +1319,16 @@ struct drm_i915_reg_read { * be specified */ __u64 offset; +#define I915_REG_READ_8B_WA (1ul << 0) + __u64 val; /* Return value */ }; /* Known registers: * * Render engine timestamp - 0x2358 + 64bit - gen7+ * - Note this register returns an invalid value if using the default - * single instruction 8byte read, in order to workaround that use - * offset (0x2538 | 1) instead. + * single instruction 8byte read, in order to workaround that pass + * flag I915_REG_READ_8B_WA in offset field. * */ @@ -1359,6 +1371,10 @@ struct drm_i915_gem_context_param { #define I915_CONTEXT_PARAM_GTT_SIZE 0x3 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4 #define I915_CONTEXT_PARAM_BANNABLE 0x5 +#define I915_CONTEXT_PARAM_PRIORITY 0x6 +#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */ +#define I915_CONTEXT_DEFAULT_PRIORITY 0 +#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */ __u64 value; }; @@ -1510,9 +1526,14 @@ struct drm_i915_perf_oa_config { __u32 n_boolean_regs; __u32 n_flex_regs; - __u64 __user mux_regs_ptr; - __u64 __user boolean_regs_ptr; - __u64 __user flex_regs_ptr; + /* + * These fields are pointers to tuples of u32 values (register + * address, value). For example the expected length of the buffer + * pointed by mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs). + */ + __u64 mux_regs_ptr; + __u64 boolean_regs_ptr; + __u64 flex_regs_ptr; }; #if defined(__cplusplus) -- cgit v1.2.3 From 374fbe56068c36126fc6903aaaa78f1ae8a95f91 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 27 Nov 2017 12:04:21 -0300 Subject: tools headers: Synchronize KVM arch ABI headers To pick up changes from these csets: da9a1446d248 ("KVM: s390: provide a capability for AIS state migration") 5c5196da4e96 ("KVM: arm/arm64: Support EL1 phys timer register access in set/get reg") None of which affects buildint tools/perf/. Cc: Adrian Hunter Cc: Christian Borntraeger Cc: Christoffer Dall Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-dd72s6izo4qdzt1isowlz8ji@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm/include/uapi/asm/kvm.h | 7 +++++++ tools/arch/arm64/include/uapi/asm/kvm.h | 7 +++++++ tools/include/uapi/linux/kvm.h | 1 + 3 files changed, 15 insertions(+) (limited to 'tools/include') diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h index 1f57bbe82b6f..6edd177bb1c7 100644 --- a/tools/arch/arm/include/uapi/asm/kvm.h +++ b/tools/arch/arm/include/uapi/asm/kvm.h @@ -152,6 +152,12 @@ struct kvm_arch_memory_slot { (__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64) #define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__) +/* PL1 Physical Timer Registers */ +#define KVM_REG_ARM_PTIMER_CTL ARM_CP15_REG32(0, 14, 2, 1) +#define KVM_REG_ARM_PTIMER_CNT ARM_CP15_REG64(0, 14) +#define KVM_REG_ARM_PTIMER_CVAL ARM_CP15_REG64(2, 14) + +/* Virtual Timer Registers */ #define KVM_REG_ARM_TIMER_CTL ARM_CP15_REG32(0, 14, 3, 1) #define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14) #define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14) @@ -216,6 +222,7 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_ITS_SAVE_TABLES 1 #define KVM_DEV_ARM_ITS_RESTORE_TABLES 2 #define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3 +#define KVM_DEV_ARM_ITS_CTRL_RESET 4 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 51149ec75fe4..9abbf3044654 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -196,6 +196,12 @@ struct kvm_arch_memory_slot { #define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64) +/* Physical Timer EL0 Registers */ +#define KVM_REG_ARM_PTIMER_CTL ARM64_SYS_REG(3, 3, 14, 2, 1) +#define KVM_REG_ARM_PTIMER_CVAL ARM64_SYS_REG(3, 3, 14, 2, 2) +#define KVM_REG_ARM_PTIMER_CNT ARM64_SYS_REG(3, 3, 14, 0, 1) + +/* EL0 Virtual Timer Registers */ #define KVM_REG_ARM_TIMER_CTL ARM64_SYS_REG(3, 3, 14, 3, 1) #define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) #define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) @@ -228,6 +234,7 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_ITS_SAVE_TABLES 1 #define KVM_DEV_ARM_ITS_RESTORE_TABLES 2 #define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3 +#define KVM_DEV_ARM_ITS_CTRL_RESET 4 /* Device Control API on vcpu fd */ #define KVM_ARM_VCPU_PMU_V3_CTRL 0 diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 7e99999d6236..282d7613fce8 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -931,6 +931,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_SMT_POSSIBLE 147 #define KVM_CAP_HYPERV_SYNIC2 148 #define KVM_CAP_HYPERV_VP_INDEX 149 +#define KVM_CAP_S390_AIS_MIGRATION 150 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From d9744f940923ea341a289d1920a55e3a3de7fc9a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 27 Nov 2017 12:11:02 -0300 Subject: tools headers: Synchronize prctl.h ABI header To pick up changes from: 2d2123bc7c7f ("arm64/sve: Add prctl controls for userspace vector length management") 7582e22038a2 ("arm64/sve: Backend logic for setting the vector length") That showed a limitation of the regexp used in tools/perf/trace/beauty/prctl_option.sh, that matches only PR_{SET,GET}_, but should match a few more, like PR_MPX_*, PR_CAP_* and the one added by the above commit, PR_SVE_SET_*. This silences this warning when building tools/perf: Warning: Kernel ABI header at 'tools/include/uapi/linux/prctl.h' differs from latest version at 'include/uapi/linux/prctl.h' Support for those extra prctl options should be left for the next merge window tho. Cc: Adrian Hunter Cc: Dave Martin Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Cc: Will Deacon Link: https://lkml.kernel.org/n/tip-r52dsyuzy04qzqyfcifjs35t@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/prctl.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index b640071421f7..af5f8c2df87a 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -198,4 +198,13 @@ struct prctl_mm_map { # define PR_CAP_AMBIENT_LOWER 3 # define PR_CAP_AMBIENT_CLEAR_ALL 4 +/* arm64 Scalable Vector Extension controls */ +/* Flag values must be kept in sync with ptrace NT_ARM_SVE interface */ +#define PR_SVE_SET_VL 50 /* set task vector length */ +# define PR_SVE_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */ +#define PR_SVE_GET_VL 51 /* get task vector length */ +/* Bits common to PR_SVE_SET_VL and PR_SVE_GET_VL */ +# define PR_SVE_VL_LEN_MASK 0xffff +# define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */ + #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From 1b3b5219abfd6a214e99018747e9fe98514b43ca Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 27 Nov 2017 12:18:23 -0300 Subject: tools headers: Syncronize mman.h ABI header To add support for the MAP_SYNC flag introduced in: b6fb293f2497 ("mm: Define MAP_SYNC and VM_SYNC flags") Update tools/perf/trace/beauty/mmap.c to support that flag. This silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/mman.h' differs from latest version at 'include/uapi/asm-generic/mman.h' Cc: Adrian Hunter Cc: Dan Williams Cc: David Ahern Cc: Jan Kara Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-14zyk3iywrj37c7g1eagmzbo@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/mman.h | 1 + tools/perf/trace/beauty/mmap.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'tools/include') diff --git a/tools/include/uapi/asm-generic/mman.h b/tools/include/uapi/asm-generic/mman.h index 2dffcbf705b3..653687d9771b 100644 --- a/tools/include/uapi/asm-generic/mman.h +++ b/tools/include/uapi/asm-generic/mman.h @@ -13,6 +13,7 @@ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ #define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ #define MAP_HUGETLB 0x40000 /* create a huge page mapping */ +#define MAP_SYNC 0x80000 /* perform synchronous page faults for the mapping */ /* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */ diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c index 9e1668b2c5d7..417e3ecfe9d7 100644 --- a/tools/perf/trace/beauty/mmap.c +++ b/tools/perf/trace/beauty/mmap.c @@ -62,6 +62,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, P_MMAP_FLAG(POPULATE); P_MMAP_FLAG(STACK); P_MMAP_FLAG(UNINITIALIZED); +#ifdef MAP_SYNC + P_MMAP_FLAG(SYNC); +#endif #undef P_MMAP_FLAG if (flags) -- cgit v1.2.3 From 618e165b2a8e10765dd2a4f9866d118a474f0faf Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 4 Dec 2017 10:56:48 +0100 Subject: selftests/bpf: sync kernel headers and introduce arch support in Makefile Synchronize the uapi kernel header files which solves the broken uapi export of pt_regs. Because of arch-specific uapi headers, extended the include path in the Makefile. With this change, the test_verifier program compiles and runs successfully on s390. Signed-off-by: Hendrik Brueckner Reviewed-and-tested-by: Thomas Richter Acked-by: Alexei Starovoitov Cc: Daniel Borkmann Cc: Shuah Khan Signed-off-by: Daniel Borkmann --- tools/arch/arm64/include/uapi/asm/bpf_perf_event.h | 9 + tools/arch/s390/include/uapi/asm/bpf_perf_event.h | 9 + tools/arch/s390/include/uapi/asm/ptrace.h | 457 +++++++++++++++++++++ tools/include/uapi/asm-generic/bpf_perf_event.h | 9 + tools/include/uapi/linux/bpf_perf_event.h | 6 +- tools/testing/selftests/bpf/Makefile | 14 +- 6 files changed, 500 insertions(+), 4 deletions(-) create mode 100644 tools/arch/arm64/include/uapi/asm/bpf_perf_event.h create mode 100644 tools/arch/s390/include/uapi/asm/bpf_perf_event.h create mode 100644 tools/arch/s390/include/uapi/asm/ptrace.h create mode 100644 tools/include/uapi/asm-generic/bpf_perf_event.h (limited to 'tools/include') diff --git a/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h b/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 000000000000..b551b741653d --- /dev/null +++ b/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_BPF_PERF_EVENT_H__ + +#include + +typedef struct user_pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 000000000000..cefe7c7cd4f6 --- /dev/null +++ b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_BPF_PERF_EVENT_H__ + +#include + +typedef user_pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/tools/arch/s390/include/uapi/asm/ptrace.h b/tools/arch/s390/include/uapi/asm/ptrace.h new file mode 100644 index 000000000000..543dd70e12c8 --- /dev/null +++ b/tools/arch/s390/include/uapi/asm/ptrace.h @@ -0,0 +1,457 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * S390 version + * Copyright IBM Corp. 1999, 2000 + * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) + */ + +#ifndef _UAPI_S390_PTRACE_H +#define _UAPI_S390_PTRACE_H + +/* + * Offsets in the user_regs_struct. They are used for the ptrace + * system call and in entry.S + */ +#ifndef __s390x__ + +#define PT_PSWMASK 0x00 +#define PT_PSWADDR 0x04 +#define PT_GPR0 0x08 +#define PT_GPR1 0x0C +#define PT_GPR2 0x10 +#define PT_GPR3 0x14 +#define PT_GPR4 0x18 +#define PT_GPR5 0x1C +#define PT_GPR6 0x20 +#define PT_GPR7 0x24 +#define PT_GPR8 0x28 +#define PT_GPR9 0x2C +#define PT_GPR10 0x30 +#define PT_GPR11 0x34 +#define PT_GPR12 0x38 +#define PT_GPR13 0x3C +#define PT_GPR14 0x40 +#define PT_GPR15 0x44 +#define PT_ACR0 0x48 +#define PT_ACR1 0x4C +#define PT_ACR2 0x50 +#define PT_ACR3 0x54 +#define PT_ACR4 0x58 +#define PT_ACR5 0x5C +#define PT_ACR6 0x60 +#define PT_ACR7 0x64 +#define PT_ACR8 0x68 +#define PT_ACR9 0x6C +#define PT_ACR10 0x70 +#define PT_ACR11 0x74 +#define PT_ACR12 0x78 +#define PT_ACR13 0x7C +#define PT_ACR14 0x80 +#define PT_ACR15 0x84 +#define PT_ORIGGPR2 0x88 +#define PT_FPC 0x90 +/* + * A nasty fact of life that the ptrace api + * only supports passing of longs. + */ +#define PT_FPR0_HI 0x98 +#define PT_FPR0_LO 0x9C +#define PT_FPR1_HI 0xA0 +#define PT_FPR1_LO 0xA4 +#define PT_FPR2_HI 0xA8 +#define PT_FPR2_LO 0xAC +#define PT_FPR3_HI 0xB0 +#define PT_FPR3_LO 0xB4 +#define PT_FPR4_HI 0xB8 +#define PT_FPR4_LO 0xBC +#define PT_FPR5_HI 0xC0 +#define PT_FPR5_LO 0xC4 +#define PT_FPR6_HI 0xC8 +#define PT_FPR6_LO 0xCC +#define PT_FPR7_HI 0xD0 +#define PT_FPR7_LO 0xD4 +#define PT_FPR8_HI 0xD8 +#define PT_FPR8_LO 0XDC +#define PT_FPR9_HI 0xE0 +#define PT_FPR9_LO 0xE4 +#define PT_FPR10_HI 0xE8 +#define PT_FPR10_LO 0xEC +#define PT_FPR11_HI 0xF0 +#define PT_FPR11_LO 0xF4 +#define PT_FPR12_HI 0xF8 +#define PT_FPR12_LO 0xFC +#define PT_FPR13_HI 0x100 +#define PT_FPR13_LO 0x104 +#define PT_FPR14_HI 0x108 +#define PT_FPR14_LO 0x10C +#define PT_FPR15_HI 0x110 +#define PT_FPR15_LO 0x114 +#define PT_CR_9 0x118 +#define PT_CR_10 0x11C +#define PT_CR_11 0x120 +#define PT_IEEE_IP 0x13C +#define PT_LASTOFF PT_IEEE_IP +#define PT_ENDREGS 0x140-1 + +#define GPR_SIZE 4 +#define CR_SIZE 4 + +#define STACK_FRAME_OVERHEAD 96 /* size of minimum stack frame */ + +#else /* __s390x__ */ + +#define PT_PSWMASK 0x00 +#define PT_PSWADDR 0x08 +#define PT_GPR0 0x10 +#define PT_GPR1 0x18 +#define PT_GPR2 0x20 +#define PT_GPR3 0x28 +#define PT_GPR4 0x30 +#define PT_GPR5 0x38 +#define PT_GPR6 0x40 +#define PT_GPR7 0x48 +#define PT_GPR8 0x50 +#define PT_GPR9 0x58 +#define PT_GPR10 0x60 +#define PT_GPR11 0x68 +#define PT_GPR12 0x70 +#define PT_GPR13 0x78 +#define PT_GPR14 0x80 +#define PT_GPR15 0x88 +#define PT_ACR0 0x90 +#define PT_ACR1 0x94 +#define PT_ACR2 0x98 +#define PT_ACR3 0x9C +#define PT_ACR4 0xA0 +#define PT_ACR5 0xA4 +#define PT_ACR6 0xA8 +#define PT_ACR7 0xAC +#define PT_ACR8 0xB0 +#define PT_ACR9 0xB4 +#define PT_ACR10 0xB8 +#define PT_ACR11 0xBC +#define PT_ACR12 0xC0 +#define PT_ACR13 0xC4 +#define PT_ACR14 0xC8 +#define PT_ACR15 0xCC +#define PT_ORIGGPR2 0xD0 +#define PT_FPC 0xD8 +#define PT_FPR0 0xE0 +#define PT_FPR1 0xE8 +#define PT_FPR2 0xF0 +#define PT_FPR3 0xF8 +#define PT_FPR4 0x100 +#define PT_FPR5 0x108 +#define PT_FPR6 0x110 +#define PT_FPR7 0x118 +#define PT_FPR8 0x120 +#define PT_FPR9 0x128 +#define PT_FPR10 0x130 +#define PT_FPR11 0x138 +#define PT_FPR12 0x140 +#define PT_FPR13 0x148 +#define PT_FPR14 0x150 +#define PT_FPR15 0x158 +#define PT_CR_9 0x160 +#define PT_CR_10 0x168 +#define PT_CR_11 0x170 +#define PT_IEEE_IP 0x1A8 +#define PT_LASTOFF PT_IEEE_IP +#define PT_ENDREGS 0x1B0-1 + +#define GPR_SIZE 8 +#define CR_SIZE 8 + +#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */ + +#endif /* __s390x__ */ + +#define NUM_GPRS 16 +#define NUM_FPRS 16 +#define NUM_CRS 16 +#define NUM_ACRS 16 + +#define NUM_CR_WORDS 3 + +#define FPR_SIZE 8 +#define FPC_SIZE 4 +#define FPC_PAD_SIZE 4 /* gcc insists on aligning the fpregs */ +#define ACR_SIZE 4 + + +#define PTRACE_OLDSETOPTIONS 21 + +#ifndef __ASSEMBLY__ +#include +#include + +typedef union { + float f; + double d; + __u64 ui; + struct + { + __u32 hi; + __u32 lo; + } fp; +} freg_t; + +typedef struct { + __u32 fpc; + __u32 pad; + freg_t fprs[NUM_FPRS]; +} s390_fp_regs; + +#define FPC_EXCEPTION_MASK 0xF8000000 +#define FPC_FLAGS_MASK 0x00F80000 +#define FPC_DXC_MASK 0x0000FF00 +#define FPC_RM_MASK 0x00000003 + +/* this typedef defines how a Program Status Word looks like */ +typedef struct { + unsigned long mask; + unsigned long addr; +} __attribute__ ((aligned(8))) psw_t; + +#ifndef __s390x__ + +#define PSW_MASK_PER 0x40000000UL +#define PSW_MASK_DAT 0x04000000UL +#define PSW_MASK_IO 0x02000000UL +#define PSW_MASK_EXT 0x01000000UL +#define PSW_MASK_KEY 0x00F00000UL +#define PSW_MASK_BASE 0x00080000UL /* always one */ +#define PSW_MASK_MCHECK 0x00040000UL +#define PSW_MASK_WAIT 0x00020000UL +#define PSW_MASK_PSTATE 0x00010000UL +#define PSW_MASK_ASC 0x0000C000UL +#define PSW_MASK_CC 0x00003000UL +#define PSW_MASK_PM 0x00000F00UL +#define PSW_MASK_RI 0x00000000UL +#define PSW_MASK_EA 0x00000000UL +#define PSW_MASK_BA 0x00000000UL + +#define PSW_MASK_USER 0x0000FF00UL + +#define PSW_ADDR_AMODE 0x80000000UL +#define PSW_ADDR_INSN 0x7FFFFFFFUL + +#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 20) + +#define PSW_ASC_PRIMARY 0x00000000UL +#define PSW_ASC_ACCREG 0x00004000UL +#define PSW_ASC_SECONDARY 0x00008000UL +#define PSW_ASC_HOME 0x0000C000UL + +#else /* __s390x__ */ + +#define PSW_MASK_PER 0x4000000000000000UL +#define PSW_MASK_DAT 0x0400000000000000UL +#define PSW_MASK_IO 0x0200000000000000UL +#define PSW_MASK_EXT 0x0100000000000000UL +#define PSW_MASK_BASE 0x0000000000000000UL +#define PSW_MASK_KEY 0x00F0000000000000UL +#define PSW_MASK_MCHECK 0x0004000000000000UL +#define PSW_MASK_WAIT 0x0002000000000000UL +#define PSW_MASK_PSTATE 0x0001000000000000UL +#define PSW_MASK_ASC 0x0000C00000000000UL +#define PSW_MASK_CC 0x0000300000000000UL +#define PSW_MASK_PM 0x00000F0000000000UL +#define PSW_MASK_RI 0x0000008000000000UL +#define PSW_MASK_EA 0x0000000100000000UL +#define PSW_MASK_BA 0x0000000080000000UL + +#define PSW_MASK_USER 0x0000FF0180000000UL + +#define PSW_ADDR_AMODE 0x0000000000000000UL +#define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL + +#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 52) + +#define PSW_ASC_PRIMARY 0x0000000000000000UL +#define PSW_ASC_ACCREG 0x0000400000000000UL +#define PSW_ASC_SECONDARY 0x0000800000000000UL +#define PSW_ASC_HOME 0x0000C00000000000UL + +#endif /* __s390x__ */ + + +/* + * The s390_regs structure is used to define the elf_gregset_t. + */ +typedef struct { + psw_t psw; + unsigned long gprs[NUM_GPRS]; + unsigned int acrs[NUM_ACRS]; + unsigned long orig_gpr2; +} s390_regs; + +/* + * The user_pt_regs structure exports the beginning of + * the in-kernel pt_regs structure to user space. + */ +typedef struct { + unsigned long args[1]; + psw_t psw; + unsigned long gprs[NUM_GPRS]; +} user_pt_regs; + +/* + * Now for the user space program event recording (trace) definitions. + * The following structures are used only for the ptrace interface, don't + * touch or even look at it if you don't want to modify the user-space + * ptrace interface. In particular stay away from it for in-kernel PER. + */ +typedef struct { + unsigned long cr[NUM_CR_WORDS]; +} per_cr_words; + +#define PER_EM_MASK 0xE8000000UL + +typedef struct { +#ifdef __s390x__ + unsigned : 32; +#endif /* __s390x__ */ + unsigned em_branching : 1; + unsigned em_instruction_fetch : 1; + /* + * Switching on storage alteration automatically fixes + * the storage alteration event bit in the users std. + */ + unsigned em_storage_alteration : 1; + unsigned em_gpr_alt_unused : 1; + unsigned em_store_real_address : 1; + unsigned : 3; + unsigned branch_addr_ctl : 1; + unsigned : 1; + unsigned storage_alt_space_ctl : 1; + unsigned : 21; + unsigned long starting_addr; + unsigned long ending_addr; +} per_cr_bits; + +typedef struct { + unsigned short perc_atmid; + unsigned long address; + unsigned char access_id; +} per_lowcore_words; + +typedef struct { + unsigned perc_branching : 1; + unsigned perc_instruction_fetch : 1; + unsigned perc_storage_alteration : 1; + unsigned perc_gpr_alt_unused : 1; + unsigned perc_store_real_address : 1; + unsigned : 3; + unsigned atmid_psw_bit_31 : 1; + unsigned atmid_validity_bit : 1; + unsigned atmid_psw_bit_32 : 1; + unsigned atmid_psw_bit_5 : 1; + unsigned atmid_psw_bit_16 : 1; + unsigned atmid_psw_bit_17 : 1; + unsigned si : 2; + unsigned long address; + unsigned : 4; + unsigned access_id : 4; +} per_lowcore_bits; + +typedef struct { + union { + per_cr_words words; + per_cr_bits bits; + } control_regs; + /* + * The single_step and instruction_fetch bits are obsolete, + * the kernel always sets them to zero. To enable single + * stepping use ptrace(PTRACE_SINGLESTEP) instead. + */ + unsigned single_step : 1; + unsigned instruction_fetch : 1; + unsigned : 30; + /* + * These addresses are copied into cr10 & cr11 if single + * stepping is switched off + */ + unsigned long starting_addr; + unsigned long ending_addr; + union { + per_lowcore_words words; + per_lowcore_bits bits; + } lowcore; +} per_struct; + +typedef struct { + unsigned int len; + unsigned long kernel_addr; + unsigned long process_addr; +} ptrace_area; + +/* + * S/390 specific non posix ptrace requests. I chose unusual values so + * they are unlikely to clash with future ptrace definitions. + */ +#define PTRACE_PEEKUSR_AREA 0x5000 +#define PTRACE_POKEUSR_AREA 0x5001 +#define PTRACE_PEEKTEXT_AREA 0x5002 +#define PTRACE_PEEKDATA_AREA 0x5003 +#define PTRACE_POKETEXT_AREA 0x5004 +#define PTRACE_POKEDATA_AREA 0x5005 +#define PTRACE_GET_LAST_BREAK 0x5006 +#define PTRACE_PEEK_SYSTEM_CALL 0x5007 +#define PTRACE_POKE_SYSTEM_CALL 0x5008 +#define PTRACE_ENABLE_TE 0x5009 +#define PTRACE_DISABLE_TE 0x5010 +#define PTRACE_TE_ABORT_RAND 0x5011 + +/* + * The numbers chosen here are somewhat arbitrary but absolutely MUST + * not overlap with any of the number assigned in . + */ +#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */ + +/* + * PT_PROT definition is loosely based on hppa bsd definition in + * gdb/hppab-nat.c + */ +#define PTRACE_PROT 21 + +typedef enum { + ptprot_set_access_watchpoint, + ptprot_set_write_watchpoint, + ptprot_disable_watchpoint +} ptprot_flags; + +typedef struct { + unsigned long lowaddr; + unsigned long hiaddr; + ptprot_flags prot; +} ptprot_area; + +/* Sequence of bytes for breakpoint illegal instruction. */ +#define S390_BREAKPOINT {0x0,0x1} +#define S390_BREAKPOINT_U16 ((__u16)0x0001) +#define S390_SYSCALL_OPCODE ((__u16)0x0a00) +#define S390_SYSCALL_SIZE 2 + +/* + * The user_regs_struct defines the way the user registers are + * store on the stack for signal handling. + */ +struct user_regs_struct { + psw_t psw; + unsigned long gprs[NUM_GPRS]; + unsigned int acrs[NUM_ACRS]; + unsigned long orig_gpr2; + s390_fp_regs fp_regs; + /* + * These per registers are in here so that gdb can modify them + * itself as there is no "official" ptrace interface for hardware + * watchpoints. This is the way intel does it. + */ + per_struct per_info; + unsigned long ieee_instruction_pointer; /* obsolete, always 0 */ +}; + +#endif /* __ASSEMBLY__ */ + +#endif /* _UAPI_S390_PTRACE_H */ diff --git a/tools/include/uapi/asm-generic/bpf_perf_event.h b/tools/include/uapi/asm-generic/bpf_perf_event.h new file mode 100644 index 000000000000..53815d2cd047 --- /dev/null +++ b/tools/include/uapi/asm-generic/bpf_perf_event.h @@ -0,0 +1,9 @@ +#ifndef _UAPI__ASM_GENERIC_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_GENERIC_BPF_PERF_EVENT_H__ + +#include + +/* Export kernel pt_regs structure */ +typedef struct pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_GENERIC_BPF_PERF_EVENT_H__ */ diff --git a/tools/include/uapi/linux/bpf_perf_event.h b/tools/include/uapi/linux/bpf_perf_event.h index 067427259820..8f95303f9d80 100644 --- a/tools/include/uapi/linux/bpf_perf_event.h +++ b/tools/include/uapi/linux/bpf_perf_event.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* Copyright (c) 2016 Facebook * * This program is free software; you can redistribute it and/or @@ -7,11 +8,10 @@ #ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__ #define _UAPI__LINUX_BPF_PERF_EVENT_H__ -#include -#include +#include struct bpf_perf_event_data { - struct pt_regs regs; + bpf_user_pt_regs_t regs; __u64 sample_period; }; diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 333a48655ee0..21a2d76b67dc 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -1,7 +1,19 @@ # SPDX-License-Identifier: GPL-2.0 + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +endif +include $(srctree)/tools/scripts/Makefile.arch + +$(call detected_var,SRCARCH) + LIBDIR := ../../../lib BPFDIR := $(LIBDIR)/bpf APIDIR := ../../../include/uapi +ASMDIR:= ../../../arch/$(ARCH)/include/uapi GENDIR := ../../../../include/generated GENHDR := $(GENDIR)/autoconf.h @@ -9,7 +21,7 @@ ifneq ($(wildcard $(GENHDR)),) GENFLAGS := -DHAVE_GENHDR endif -CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include +CFLAGS += -Wall -O2 -I$(APIDIR) -I$(ASMDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include LDLIBS += -lcap -lelf TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ -- cgit v1.2.3 From f335195adf043168ee69d78ea72ac3e30f0c57ce Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 6 Dec 2017 11:27:57 +0100 Subject: kmemcheck: rip it out for real Commit 4675ff05de2d ("kmemcheck: rip it out") has removed the code but for some reason SPDX header stayed in place. This looks like a rebase mistake in the mmotm tree or the merge mistake. Let's drop those leftovers as well. Signed-off-by: Michal Hocko Signed-off-by: Linus Torvalds --- arch/x86/include/asm/kmemcheck.h | 1 - arch/x86/mm/kmemcheck/error.c | 1 - arch/x86/mm/kmemcheck/error.h | 1 - arch/x86/mm/kmemcheck/opcode.c | 1 - arch/x86/mm/kmemcheck/opcode.h | 1 - arch/x86/mm/kmemcheck/pte.c | 1 - arch/x86/mm/kmemcheck/pte.h | 1 - arch/x86/mm/kmemcheck/selftest.c | 1 - arch/x86/mm/kmemcheck/selftest.h | 1 - arch/x86/mm/kmemcheck/shadow.h | 1 - include/linux/kmemcheck.h | 1 - mm/kmemcheck.c | 1 - tools/include/linux/kmemcheck.h | 1 - 13 files changed, 13 deletions(-) delete mode 100644 arch/x86/include/asm/kmemcheck.h delete mode 100644 arch/x86/mm/kmemcheck/error.c delete mode 100644 arch/x86/mm/kmemcheck/error.h delete mode 100644 arch/x86/mm/kmemcheck/opcode.c delete mode 100644 arch/x86/mm/kmemcheck/opcode.h delete mode 100644 arch/x86/mm/kmemcheck/pte.c delete mode 100644 arch/x86/mm/kmemcheck/pte.h delete mode 100644 arch/x86/mm/kmemcheck/selftest.c delete mode 100644 arch/x86/mm/kmemcheck/selftest.h delete mode 100644 arch/x86/mm/kmemcheck/shadow.h delete mode 100644 include/linux/kmemcheck.h delete mode 100644 mm/kmemcheck.c delete mode 100644 tools/include/linux/kmemcheck.h (limited to 'tools/include') diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/include/asm/kmemcheck.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c deleted file mode 100644 index cec594032515..000000000000 --- a/arch/x86/mm/kmemcheck/error.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/mm/kmemcheck/error.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c deleted file mode 100644 index cec594032515..000000000000 --- a/arch/x86/mm/kmemcheck/opcode.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/mm/kmemcheck/opcode.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c deleted file mode 100644 index cec594032515..000000000000 --- a/arch/x86/mm/kmemcheck/pte.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/mm/kmemcheck/pte.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c deleted file mode 100644 index cec594032515..000000000000 --- a/arch/x86/mm/kmemcheck/selftest.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/arch/x86/mm/kmemcheck/selftest.h b/arch/x86/mm/kmemcheck/selftest.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/mm/kmemcheck/selftest.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/mm/kmemcheck/shadow.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/include/linux/kmemcheck.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c deleted file mode 100644 index cec594032515..000000000000 --- a/mm/kmemcheck.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/tools/include/linux/kmemcheck.h b/tools/include/linux/kmemcheck.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/tools/include/linux/kmemcheck.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -- cgit v1.2.3 From 2a22f692bbe0a7933acbd50045479ffc0fdf11f7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 27 Nov 2017 10:38:22 +0000 Subject: tools/include: Remove ACCESS_ONCE() There are no longer any usersapce uses of ACCESS_ONCE(), so we can remove the definition from our userspace , which is only used by tools in the kernel directory (i.e. it isn't a uapi header). This patch removes the ACCESS_ONCE() definition, and updates comments which referred to it. At the same time, some inconsistent and redundant whitespace is removed from comments. Tested-by: Paul E. McKenney Signed-off-by: Mark Rutland Cc: Arnaldo Carvalho de Melo Cc: Joe Perches Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: apw@canonical.com Link: http://lkml.kernel.org/r/20171127103824.36526-3-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- tools/include/linux/compiler.h | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'tools/include') diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 07fd03c74a77..04e32f965ad7 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -84,8 +84,6 @@ #define uninitialized_var(x) x = *(&(x)) -#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) - #include /* @@ -135,20 +133,19 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s /* * Prevent the compiler from merging or refetching reads or writes. The * compiler is also forbidden from reordering successive instances of - * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the - * compiler is aware of some particular ordering. One way to make the - * compiler aware of ordering is to put the two invocations of READ_ONCE, - * WRITE_ONCE or ACCESS_ONCE() in different C statements. + * READ_ONCE and WRITE_ONCE, but only when the compiler is aware of some + * particular ordering. One way to make the compiler aware of ordering is to + * put the two invocations of READ_ONCE or WRITE_ONCE in different C + * statements. * - * In contrast to ACCESS_ONCE these two macros will also work on aggregate - * data types like structs or unions. If the size of the accessed data - * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) - * READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a - * compile-time warning. + * These two macros will also work on aggregate data types like structs or + * unions. If the size of the accessed data type exceeds the word size of + * the machine (e.g., 32 bits or 64 bits) READ_ONCE() and WRITE_ONCE() will + * fall back to memcpy and print a compile-time warning. * * Their two major use cases are: (1) Mediating communication between * process-level code and irq/NMI handlers, all running on the same CPU, - * and (2) Ensuring that the compiler does not fold, spindle, or otherwise + * and (2) Ensuring that the compiler does not fold, spindle, or otherwise * mutilate accesses that either do not require ordering or that interact * with an explicit memory barrier or atomic instruction that provides the * required ordering. -- cgit v1.2.3 From 720f228e8d3128b7ab1d39f51fdd8da07a7640c9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 12 Dec 2017 02:25:32 +0100 Subject: bpf: fix broken BPF selftest build At least on x86_64, the kernel's BPF selftests seemed to have stopped to build due to 618e165b2a8e ("selftests/bpf: sync kernel headers and introduce arch support in Makefile"): [...] In file included from test_verifier.c:29:0: ../../../include/uapi/linux/bpf_perf_event.h:11:32: fatal error: asm/bpf_perf_event.h: No such file or directory #include ^ compilation terminated. [...] While pulling in tools/arch/*/include/uapi/asm/bpf_perf_event.h seems to work fine, there's no automated fall-back logic right now that would do the same out of tools/include/uapi/asm-generic/bpf_perf_event.h. The usual convention today is to add a include/[uapi/]asm/ equivalent that would pull in the correct arch header or generic one as fall-back, all ifdef'ed based on compiler target definition. It's similarly done also in other cases such as tools/include/asm/barrier.h, thus adapt the same here. Fixes: 618e165b2a8e ("selftests/bpf: sync kernel headers and introduce arch support in Makefile") Signed-off-by: Daniel Borkmann Cc: Hendrik Brueckner Cc: Arnaldo Carvalho de Melo Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- tools/include/uapi/asm/bpf_perf_event.h | 7 +++++++ tools/testing/selftests/bpf/Makefile | 13 +------------ 2 files changed, 8 insertions(+), 12 deletions(-) create mode 100644 tools/include/uapi/asm/bpf_perf_event.h (limited to 'tools/include') diff --git a/tools/include/uapi/asm/bpf_perf_event.h b/tools/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 000000000000..13a58531e6fa --- /dev/null +++ b/tools/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,7 @@ +#if defined(__aarch64__) +#include "../../arch/arm64/include/uapi/asm/bpf_perf_event.h" +#elif defined(__s390__) +#include "../../arch/s390/include/uapi/asm/bpf_perf_event.h" +#else +#include +#endif diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 21a2d76b67dc..792af7c3b74f 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -1,19 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 -ifeq ($(srctree),) -srctree := $(patsubst %/,%,$(dir $(CURDIR))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -endif -include $(srctree)/tools/scripts/Makefile.arch - -$(call detected_var,SRCARCH) - LIBDIR := ../../../lib BPFDIR := $(LIBDIR)/bpf APIDIR := ../../../include/uapi -ASMDIR:= ../../../arch/$(ARCH)/include/uapi GENDIR := ../../../../include/generated GENHDR := $(GENDIR)/autoconf.h @@ -21,7 +10,7 @@ ifneq ($(wildcard $(GENHDR)),) GENFLAGS := -DHAVE_GENHDR endif -CFLAGS += -Wall -O2 -I$(APIDIR) -I$(ASMDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include +CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include LDLIBS += -lcap -lelf TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ -- cgit v1.2.3 From 92ccc262e485781ff4c0fb3b7c77a619282df49a Mon Sep 17 00:00:00 2001 From: Mengting Zhang Date: Tue, 12 Dec 2017 18:16:57 +0000 Subject: tools/lib/lockdep: Add missing declaration of 'pr_cont()' Commit: 681fbec881de ("lockdep: Use consistent printing primitives") has moved lockdep away from using printk() for printing. The commit added usage of pr_cont() which wasn't wrapped in the userspace headers, causing the following warning for the liblockdep build: ../../../kernel/locking/lockdep.c:3544:2: warning: implicit declaration of function 'pr_cont' [-Wimplicit-function-declaration] Adding an empty declaration of 'pr_cont' fixes the problem. Signed-off-by: Mengting Zhang Signed-off-by: Sasha Levin Reviewed-by: Alexander Sverdlin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: a.p.zijlstra@chello.nl Link: http://lkml.kernel.org/r/20171212181644.11913-2-alexander.levin@verizon.com Signed-off-by: Ingo Molnar --- tools/include/linux/lockdep.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/include') diff --git a/tools/include/linux/lockdep.h b/tools/include/linux/lockdep.h index 940c1b075659..6b0c36a58fcb 100644 --- a/tools/include/linux/lockdep.h +++ b/tools/include/linux/lockdep.h @@ -48,6 +48,7 @@ static inline int debug_locks_off(void) #define printk(...) dprintf(STDOUT_FILENO, __VA_ARGS__) #define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define pr_warn pr_err +#define pr_cont pr_err #define list_del_rcu list_del -- cgit v1.2.3 From 643e345c95f0b4a4082c60755e06e3e635658da6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 15 Dec 2017 13:47:51 +0100 Subject: tools/headers: Synchronize kernel <-> tooling headers Two kernel headers got modified recently, which are used by tooling as well: tools/include/uapi/linux/kvm.h arch/x86/include/asm/cpufeatures.h None of those changes have an effect on tooling, so do a plain copy. Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Namhyung Kim Cc: Jiri Olsa Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- tools/arch/x86/include/asm/cpufeatures.h | 1 + tools/include/uapi/linux/kvm.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'tools/include') diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index c0b0e9e8aa66..800104c8a3ed 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -266,6 +266,7 @@ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ +#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 282d7613fce8..496e59a2738b 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -630,9 +630,9 @@ struct kvm_s390_irq { struct kvm_s390_irq_state { __u64 buf; - __u32 flags; + __u32 flags; /* will stay unused for compatibility reasons */ __u32 len; - __u32 reserved[4]; + __u32 reserved[4]; /* will stay unused for compatibility reasons */ }; /* for KVM_SET_GUEST_DEBUG */ -- cgit v1.2.3