summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-12-02 13:46:21 -0500
committerDavid S. Miller <davem@davemloft.net>2016-12-02 13:46:21 -0500
commitb5b5eca9aa4166779e184685dcd838f4d0775e76 (patch)
tree546165a4619501f722cb551411164e1b88e693b3 /include
parent7f7bf1606fa8fa0e3aecdeac0ba8005f2a0fbdef (diff)
parent554ae6e792ef38020b80b4d5127c51d510c0918f (diff)
Merge branch 'bpf-support-for-sockets'
David Ahern says: ==================== net: Add bpf support for sockets The recently added VRF support in Linux leverages the bind-to-device API for programs to specify an L3 domain for a socket. While SO_BINDTODEVICE has been around for ages, not every ipv4/ipv6 capable program has support for it. Even for those programs that do support it, the API requires processes to be started as root (CAP_NET_RAW) which is not desirable from a general security perspective. This patch set leverages Daniel Mack's work to attach bpf programs to a cgroup to provide a capability to set sk_bound_dev_if for all AF_INET{6} sockets opened by a process in a cgroup when the sockets are allocated. For example: 1. configure vrf (e.g., using ifupdown2) auto eth0 iface eth0 inet dhcp vrf mgmt auto mgmt iface mgmt vrf-table auto 2. configure cgroup mount -t cgroup2 none /tmp/cgroupv2 mkdir /tmp/cgroupv2/mgmt test_cgrp2_sock /tmp/cgroupv2/mgmt 15 3. set shell into cgroup (e.g., can be done at login using pam) echo $$ >> /tmp/cgroupv2/mgmt/cgroup.procs At this point all commands run in the shell (e.g, apt) have sockets automatically bound to the VRF (see output of ss -ap 'dev == <vrf>'), including processes not running as root. This capability enables running any program in a VRF context and is key to deploying Management VRF, a fundamental configuration for networking gear, with any Linux OS installation. This patchset also exports the socket family, type and protocol as read-only allowing bpf filters to deny a process in a cgroup the ability to open specific types of AF_INET or AF_INET6 sockets. v7 - comments from Alexei v6 - add export of socket family, type and protocol ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/bpf-cgroup.h60
-rw-r--r--include/net/sock.h15
-rw-r--r--include/uapi/linux/bpf.h9
3 files changed, 61 insertions, 23 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 0cf1adfadd2d..7b6e5d168c95 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -36,31 +36,44 @@ void cgroup_bpf_update(struct cgroup *cgrp,
struct bpf_prog *prog,
enum bpf_attach_type type);
-int __cgroup_bpf_run_filter(struct sock *sk,
- struct sk_buff *skb,
- enum bpf_attach_type type);
-
-/* Wrappers for __cgroup_bpf_run_filter() guarded by cgroup_bpf_enabled. */
-#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) \
-({ \
- int __ret = 0; \
- if (cgroup_bpf_enabled) \
- __ret = __cgroup_bpf_run_filter(sk, skb, \
- BPF_CGROUP_INET_INGRESS); \
- \
- __ret; \
+int __cgroup_bpf_run_filter_skb(struct sock *sk,
+ struct sk_buff *skb,
+ enum bpf_attach_type type);
+
+int __cgroup_bpf_run_filter_sk(struct sock *sk,
+ enum bpf_attach_type type);
+
+/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
+#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
+({ \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled) \
+ __ret = __cgroup_bpf_run_filter_skb(sk, skb, \
+ BPF_CGROUP_INET_INGRESS); \
+ \
+ __ret; \
})
-#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) \
-({ \
- int __ret = 0; \
- if (cgroup_bpf_enabled && sk && sk == skb->sk) { \
- typeof(sk) __sk = sk_to_full_sk(sk); \
- if (sk_fullsock(__sk)) \
- __ret = __cgroup_bpf_run_filter(__sk, skb, \
- BPF_CGROUP_INET_EGRESS); \
- } \
- __ret; \
+#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \
+({ \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled && sk && sk == skb->sk) { \
+ typeof(sk) __sk = sk_to_full_sk(sk); \
+ if (sk_fullsock(__sk)) \
+ __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \
+ BPF_CGROUP_INET_EGRESS); \
+ } \
+ __ret; \
+})
+
+#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
+({ \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled && sk) { \
+ __ret = __cgroup_bpf_run_filter_sk(sk, \
+ BPF_CGROUP_INET_SOCK_CREATE); \
+ } \
+ __ret; \
})
#else
@@ -72,6 +85,7 @@ static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
#endif /* CONFIG_CGROUP_BPF */
diff --git a/include/net/sock.h b/include/net/sock.h
index 442cbb118a07..69afda6bea15 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -389,6 +389,21 @@ struct sock {
* Because of non atomicity rules, all
* changes are protected by socket lock.
*/
+ unsigned int __sk_flags_offset[0];
+#ifdef __BIG_ENDIAN_BITFIELD
+#define SK_FL_PROTO_SHIFT 16
+#define SK_FL_PROTO_MASK 0x00ff0000
+
+#define SK_FL_TYPE_SHIFT 0
+#define SK_FL_TYPE_MASK 0x0000ffff
+#else
+#define SK_FL_PROTO_SHIFT 8
+#define SK_FL_PROTO_MASK 0x0000ff00
+
+#define SK_FL_TYPE_SHIFT 16
+#define SK_FL_TYPE_MASK 0xffff0000
+#endif
+
kmemcheck_bitfield_begin(flags);
unsigned int sk_padding : 2,
sk_no_check_tx : 1,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 22ac82792687..6123d9b8e828 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -101,6 +101,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_XDP,
BPF_PROG_TYPE_PERF_EVENT,
BPF_PROG_TYPE_CGROUP_SKB,
+ BPF_PROG_TYPE_CGROUP_SOCK,
BPF_PROG_TYPE_LWT_IN,
BPF_PROG_TYPE_LWT_OUT,
BPF_PROG_TYPE_LWT_XMIT,
@@ -109,6 +110,7 @@ enum bpf_prog_type {
enum bpf_attach_type {
BPF_CGROUP_INET_INGRESS,
BPF_CGROUP_INET_EGRESS,
+ BPF_CGROUP_INET_SOCK_CREATE,
__MAX_BPF_ATTACH_TYPE
};
@@ -567,6 +569,13 @@ enum bpf_ret_code {
/* >127 are reserved for prog type specific return codes */
};
+struct bpf_sock {
+ __u32 bound_dev_if;
+ __u32 family;
+ __u32 type;
+ __u32 protocol;
+};
+
/* User return codes for XDP prog type.
* A valid XDP program must return one of these defined values. All other
* return codes are reserved for future use. Unknown return codes will result