diff options
| author | Daniel Borkmann <daniel@iogearbox.net> | 2018-08-11 01:58:47 +0200 |
|---|---|---|
| committer | Daniel Borkmann <daniel@iogearbox.net> | 2018-08-11 01:58:48 +0200 |
| commit | 9d6f417714c3aaf67b23ffdc1d2b036cce3ecc1c (patch) | |
| tree | 4bc949c2a15c2c0d1cdbb390740f508fe7644623 /include/linux | |
| parent | 74b247f4c36315e5c08580700a68e0eb3b72de03 (diff) | |
| parent | 91134d849a0e8fbc70b8607d280e0d325dcaf7bb (diff) | |
Merge branch 'bpf-reuseport-map'
Martin KaFai Lau says:
====================
This series introduces a new map type "BPF_MAP_TYPE_REUSEPORT_SOCKARRAY"
and a new prog type BPF_PROG_TYPE_SK_REUSEPORT.
Here is a snippet from a commit message:
"To unleash the full potential of a bpf prog, it is essential for the
userspace to be capable of directly setting up a bpf map which can then
be consumed by the bpf prog to make decision. In this case, decide which
SO_REUSEPORT sk to serve the incoming request.
By adding BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, the userspace has total control
and visibility on where a SO_REUSEPORT sk should be located in a bpf map.
The later patch will introduce BPF_PROG_TYPE_SK_REUSEPORT such that
the bpf prog can directly select a sk from the bpf map. That will
raise the programmability of the bpf prog attached to a reuseport
group (a group of sk serving the same IP:PORT).
For example, in UDP, the bpf prog can peek into the payload (e.g.
through the "data" pointer introduced in the later patch) to learn
the application level's connection information and then decide which sk
to pick from a bpf map. The userspace can tightly couple the sk's location
in a bpf map with the application logic in generating the UDP payload's
connection information. This connection info contact/API stays within the
userspace.
Also, when used with map-in-map, the userspace can switch the
old-server-process's inner map to a new-server-process's inner map
in one call "bpf_map_update_elem(outer_map, &index, &new_reuseport_array)".
The bpf prog will then direct incoming requests to the new process instead
of the old process. The old process can finish draining the pending
requests (e.g. by "accept()") before closing the old-fds. [Note that
deleting a fd from a bpf map does not necessary mean the fd is closed]"
====================
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/bpf.h | 28 | ||||
| -rw-r--r-- | include/linux/bpf_types.h | 6 | ||||
| -rw-r--r-- | include/linux/filter.h | 16 |
3 files changed, 50 insertions, 0 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cd8790d2c6ed..db11662faea6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -524,6 +524,7 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) } struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type); +int array_map_alloc_check(union bpf_attr *attr); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) @@ -769,6 +770,33 @@ static inline void __xsk_map_flush(struct bpf_map *map) } #endif +#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) +void bpf_sk_reuseport_detach(struct sock *sk); +int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, + void *value); +int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags); +#else +static inline void bpf_sk_reuseport_detach(struct sock *sk) +{ +} + +#ifdef CONFIG_BPF_SYSCALL +static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, + void *key, void *value) +{ + return -EOPNOTSUPP; +} + +static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, + void *key, void *value, + u64 map_flags) +{ + return -EOPNOTSUPP; +} +#endif /* CONFIG_BPF_SYSCALL */ +#endif /* defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) */ + /* verifier prototypes for helper functions called from eBPF programs */ extern const struct bpf_func_proto bpf_map_lookup_elem_proto; extern const struct bpf_func_proto bpf_map_update_elem_proto; diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index add08be53b6f..cd26c090e7c0 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -29,6 +29,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) #ifdef CONFIG_BPF_LIRC_MODE2 BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) #endif +#ifdef CONFIG_INET +BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport) +#endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) @@ -60,4 +63,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) #if defined(CONFIG_XDP_SOCKETS) BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) #endif +#ifdef CONFIG_INET +BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops) +#endif #endif diff --git a/include/linux/filter.h b/include/linux/filter.h index 2b072dab32c0..5d565c50bcb2 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -32,6 +32,7 @@ struct seccomp_data; struct bpf_prog_aux; struct xdp_rxq_info; struct xdp_buff; +struct sock_reuseport; /* ArgX, context and stack frame pointer register positions. Note, * Arg1, Arg2, Arg3, etc are used as argument mappings of function @@ -752,6 +753,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_attach_bpf(u32 ufd, struct sock *sk); int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk); +void sk_reuseport_prog_free(struct bpf_prog *prog); int sk_detach_filter(struct sock *sk); int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); @@ -833,6 +835,20 @@ void bpf_warn_invalid_xdp_action(u32 act); struct sock *do_sk_redirect_map(struct sk_buff *skb); struct sock *do_msg_redirect_map(struct sk_msg_buff *md); +#ifdef CONFIG_INET +struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, + struct bpf_prog *prog, struct sk_buff *skb, + u32 hash); +#else +static inline struct sock * +bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, + struct bpf_prog *prog, struct sk_buff *skb, + u32 hash) +{ + return NULL; +} +#endif + #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; extern int bpf_jit_harden; |
