From 23729ff23186424e54b4d6678fcd526cdacef4d3 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 2 Jul 2019 09:13:56 -0700 Subject: bpf: add BPF_CGROUP_SOCK_OPS callback that is executed on every RTT Performance impact should be minimal because it's under a new BPF_SOCK_OPS_RTT_CB_FLAG flag that has to be explicitly enabled. Suggested-by: Eric Dumazet Cc: Eric Dumazet Cc: Priyaranjan Jha Cc: Yuchung Cheng Cc: Soheil Hassas Yeganeh Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index cffea1826a1f..9cdd0aaeba06 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1770,6 +1770,7 @@ union bpf_attr { * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) + * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT) * * Therefore, this function can be used to clear a callback flag by * setting the appropriate bit to zero. e.g. to disable the RTO @@ -3314,7 +3315,8 @@ struct bpf_sock_ops { #define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) #define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) #define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently +#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3) +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently * supported cb flags */ @@ -3369,6 +3371,8 @@ enum { BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after * socket transition to LISTEN state. */ + BPF_SOCK_OPS_RTT_CB, /* Called on every RTT. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect -- cgit v1.2.3 From 0357746d1e40a8226f68a42c8d7222a12d7c451f Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 2 Jul 2019 09:13:58 -0700 Subject: bpf: add dsack_dups/delivered{, _ce} to bpf_tcp_sock Add more fields to bpf_tcp_sock that might be useful for debugging congestion control issues. Cc: Eric Dumazet Cc: Priyaranjan Jha Cc: Yuchung Cheng Cc: Soheil Hassas Yeganeh Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 5 +++++ net/core/filter.c | 11 ++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9cdd0aaeba06..bfb0b1a76684 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3073,6 +3073,11 @@ struct bpf_tcp_sock { * sum(delta(snd_una)), or how many bytes * were acked. */ + __u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups + * total number of DSACK blocks received + */ + __u32 delivered; /* Total data packets delivered incl. rexmits */ + __u32 delivered_ce; /* Like the above but only ECE marked packets */ }; struct bpf_sock_tuple { diff --git a/net/core/filter.c b/net/core/filter.c index ad908526545d..3da4b6c38b46 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5544,7 +5544,7 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = { bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { - if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, bytes_acked)) + if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, delivered_ce)) return false; if (off % size != 0) @@ -5652,6 +5652,15 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, case offsetof(struct bpf_tcp_sock, bytes_acked): BPF_TCP_SOCK_GET_COMMON(bytes_acked); break; + case offsetof(struct bpf_tcp_sock, dsack_dups): + BPF_TCP_SOCK_GET_COMMON(dsack_dups); + break; + case offsetof(struct bpf_tcp_sock, delivered): + BPF_TCP_SOCK_GET_COMMON(delivered); + break; + case offsetof(struct bpf_tcp_sock, delivered_ce): + BPF_TCP_SOCK_GET_COMMON(delivered_ce); + break; } return insn - insn_buf; -- cgit v1.2.3 From c2cb5e82a720c05b707701c75dfeb356fe184787 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 2 Jul 2019 09:13:59 -0700 Subject: bpf: add icsk_retransmits to bpf_tcp_sock Add some inet_connection_sock fields to bpf_tcp_sock that might be useful for debugging congestion control issues. Cc: Eric Dumazet Cc: Priyaranjan Jha Cc: Yuchung Cheng Cc: Soheil Hassas Yeganeh Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 1 + net/core/filter.c | 20 +++++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bfb0b1a76684..ead27aebf491 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3078,6 +3078,7 @@ struct bpf_tcp_sock { */ __u32 delivered; /* Total data packets delivered incl. rexmits */ __u32 delivered_ce; /* Like the above but only ECE marked packets */ + __u32 icsk_retransmits; /* Number of unrecovered [RTO] timeouts */ }; struct bpf_sock_tuple { diff --git a/net/core/filter.c b/net/core/filter.c index 3da4b6c38b46..089aaea0ccc6 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5544,7 +5544,8 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = { bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { - if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, delivered_ce)) + if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, + icsk_retransmits)) return false; if (off % size != 0) @@ -5575,6 +5576,20 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, offsetof(struct tcp_sock, FIELD)); \ } while (0) +#define BPF_INET_SOCK_GET_COMMON(FIELD) \ + do { \ + BUILD_BUG_ON(FIELD_SIZEOF(struct inet_connection_sock, \ + FIELD) > \ + FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct inet_connection_sock, \ + FIELD), \ + si->dst_reg, si->src_reg, \ + offsetof( \ + struct inet_connection_sock, \ + FIELD)); \ + } while (0) + if (insn > insn_buf) return insn - insn_buf; @@ -5661,6 +5676,9 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, case offsetof(struct bpf_tcp_sock, delivered_ce): BPF_TCP_SOCK_GET_COMMON(delivered_ce); break; + case offsetof(struct bpf_tcp_sock, icsk_retransmits): + BPF_INET_SOCK_GET_COMMON(icsk_retransmits); + break; } return insn - insn_buf; -- cgit v1.2.3