diff options
| author | David S. Miller <davem@davemloft.net> | 2016-12-02 13:46:21 -0500 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2016-12-02 13:46:21 -0500 |
| commit | b5b5eca9aa4166779e184685dcd838f4d0775e76 (patch) | |
| tree | 546165a4619501f722cb551411164e1b88e693b3 /include | |
| parent | 7f7bf1606fa8fa0e3aecdeac0ba8005f2a0fbdef (diff) | |
| parent | 554ae6e792ef38020b80b4d5127c51d510c0918f (diff) | |
Merge branch 'bpf-support-for-sockets'
David Ahern says:
====================
net: Add bpf support for sockets
The recently added VRF support in Linux leverages the bind-to-device
API for programs to specify an L3 domain for a socket. While
SO_BINDTODEVICE has been around for ages, not every ipv4/ipv6 capable
program has support for it. Even for those programs that do support it,
the API requires processes to be started as root (CAP_NET_RAW) which
is not desirable from a general security perspective.
This patch set leverages Daniel Mack's work to attach bpf programs to
a cgroup to provide a capability to set sk_bound_dev_if for all
AF_INET{6} sockets opened by a process in a cgroup when the sockets
are allocated.
For example:
1. configure vrf (e.g., using ifupdown2)
auto eth0
iface eth0 inet dhcp
vrf mgmt
auto mgmt
iface mgmt
vrf-table auto
2. configure cgroup
mount -t cgroup2 none /tmp/cgroupv2
mkdir /tmp/cgroupv2/mgmt
test_cgrp2_sock /tmp/cgroupv2/mgmt 15
3. set shell into cgroup (e.g., can be done at login using pam)
echo $$ >> /tmp/cgroupv2/mgmt/cgroup.procs
At this point all commands run in the shell (e.g, apt) have sockets
automatically bound to the VRF (see output of ss -ap 'dev == <vrf>'),
including processes not running as root.
This capability enables running any program in a VRF context and is key
to deploying Management VRF, a fundamental configuration for networking
gear, with any Linux OS installation.
This patchset also exports the socket family, type and protocol as
read-only allowing bpf filters to deny a process in a cgroup the ability
to open specific types of AF_INET or AF_INET6 sockets.
v7
- comments from Alexei
v6
- add export of socket family, type and protocol
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/bpf-cgroup.h | 60 | ||||
| -rw-r--r-- | include/net/sock.h | 15 | ||||
| -rw-r--r-- | include/uapi/linux/bpf.h | 9 |
3 files changed, 61 insertions, 23 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 0cf1adfadd2d..7b6e5d168c95 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -36,31 +36,44 @@ void cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type); -int __cgroup_bpf_run_filter(struct sock *sk, - struct sk_buff *skb, - enum bpf_attach_type type); - -/* Wrappers for __cgroup_bpf_run_filter() guarded by cgroup_bpf_enabled. */ -#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) \ -({ \ - int __ret = 0; \ - if (cgroup_bpf_enabled) \ - __ret = __cgroup_bpf_run_filter(sk, skb, \ - BPF_CGROUP_INET_INGRESS); \ - \ - __ret; \ +int __cgroup_bpf_run_filter_skb(struct sock *sk, + struct sk_buff *skb, + enum bpf_attach_type type); + +int __cgroup_bpf_run_filter_sk(struct sock *sk, + enum bpf_attach_type type); + +/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ +#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_run_filter_skb(sk, skb, \ + BPF_CGROUP_INET_INGRESS); \ + \ + __ret; \ }) -#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) \ -({ \ - int __ret = 0; \ - if (cgroup_bpf_enabled && sk && sk == skb->sk) { \ - typeof(sk) __sk = sk_to_full_sk(sk); \ - if (sk_fullsock(__sk)) \ - __ret = __cgroup_bpf_run_filter(__sk, skb, \ - BPF_CGROUP_INET_EGRESS); \ - } \ - __ret; \ +#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled && sk && sk == skb->sk) { \ + typeof(sk) __sk = sk_to_full_sk(sk); \ + if (sk_fullsock(__sk)) \ + __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \ + BPF_CGROUP_INET_EGRESS); \ + } \ + __ret; \ +}) + +#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled && sk) { \ + __ret = __cgroup_bpf_run_filter_sk(sk, \ + BPF_CGROUP_INET_SOCK_CREATE); \ + } \ + __ret; \ }) #else @@ -72,6 +85,7 @@ static inline void cgroup_bpf_inherit(struct cgroup *cgrp, #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #endif /* CONFIG_CGROUP_BPF */ diff --git a/include/net/sock.h b/include/net/sock.h index 442cbb118a07..69afda6bea15 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -389,6 +389,21 @@ struct sock { * Because of non atomicity rules, all * changes are protected by socket lock. */ + unsigned int __sk_flags_offset[0]; +#ifdef __BIG_ENDIAN_BITFIELD +#define SK_FL_PROTO_SHIFT 16 +#define SK_FL_PROTO_MASK 0x00ff0000 + +#define SK_FL_TYPE_SHIFT 0 +#define SK_FL_TYPE_MASK 0x0000ffff +#else +#define SK_FL_PROTO_SHIFT 8 +#define SK_FL_PROTO_MASK 0x0000ff00 + +#define SK_FL_TYPE_SHIFT 16 +#define SK_FL_TYPE_MASK 0xffff0000 +#endif + kmemcheck_bitfield_begin(flags); unsigned int sk_padding : 2, sk_no_check_tx : 1, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 22ac82792687..6123d9b8e828 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -101,6 +101,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_XDP, BPF_PROG_TYPE_PERF_EVENT, BPF_PROG_TYPE_CGROUP_SKB, + BPF_PROG_TYPE_CGROUP_SOCK, BPF_PROG_TYPE_LWT_IN, BPF_PROG_TYPE_LWT_OUT, BPF_PROG_TYPE_LWT_XMIT, @@ -109,6 +110,7 @@ enum bpf_prog_type { enum bpf_attach_type { BPF_CGROUP_INET_INGRESS, BPF_CGROUP_INET_EGRESS, + BPF_CGROUP_INET_SOCK_CREATE, __MAX_BPF_ATTACH_TYPE }; @@ -567,6 +569,13 @@ enum bpf_ret_code { /* >127 are reserved for prog type specific return codes */ }; +struct bpf_sock { + __u32 bound_dev_if; + __u32 family; + __u32 type; + __u32 protocol; +}; + /* User return codes for XDP prog type. * A valid XDP program must return one of these defined values. All other * return codes are reserved for future use. Unknown return codes will result |
