From 7449197d600d30d038b1ce6285ab4747096eac7f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 2 Mar 2022 11:56:28 -0800 Subject: bpf: Keep the (rcv) timestamp behavior for the existing tc-bpf@ingress The current tc-bpf@ingress reads and writes the __sk_buff->tstamp as a (rcv) timestamp which currently could either be 0 (not available) or ktime_get_real(). This patch is to backward compatible with the (rcv) timestamp expectation at ingress. If the skb->tstamp has the delivery_time, the bpf insn rewrite will read 0 for tc-bpf running at ingress as it is not available. When writing at ingress, it will also clear the skb->mono_delivery_time bit. /* BPF_READ: a = __sk_buff->tstamp */ if (!skb->tc_at_ingress || !skb->mono_delivery_time) a = skb->tstamp; else a = 0 /* BPF_WRITE: __sk_buff->tstamp = a */ if (skb->tc_at_ingress) skb->mono_delivery_time = 0; skb->tstamp = a; [ A note on the BPF_CGROUP_INET_INGRESS which can also access skb->tstamp. At that point, the skb is delivered locally and skb_clear_delivery_time() has already been done, so the skb->tstamp will only have the (rcv) timestamp. ] If the tc-bpf@egress writes 0 to skb->tstamp, the skb->mono_delivery_time has to be cleared also. It could be done together during convert_ctx_access(). However, the latter patch will also expose the skb->mono_delivery_time bit as __sk_buff->delivery_time_type. Changing the delivery_time_type in the background may surprise the user, e.g. the 2nd read on __sk_buff->delivery_time_type may need a READ_ONCE() to avoid compiler optimization. Thus, in expecting the needs in the latter patch, this patch does a check on !skb->tstamp after running the tc-bpf and clears the skb->mono_delivery_time bit if needed. The earlier discussion on v4 [0]. The bpf insn rewrite requires the skb's mono_delivery_time bit and tc_at_ingress bit. They are moved up in sk_buff so that bpf rewrite can be done at a fixed offset. tc_skip_classify is moved together with tc_at_ingress. To get one bit for mono_delivery_time, csum_not_inet is moved down and this bit is currently used by sctp. [0]: https://lore.kernel.org/bpf/20220217015043.khqwqklx45c4m4se@kafai-mbp.dhcp.thefacebook.com/ Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/skbuff.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4b5b926a81f2..5445860e1ba6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -941,8 +941,12 @@ struct sk_buff { __u8 vlan_present:1; /* See PKT_VLAN_PRESENT_BIT */ __u8 csum_complete_sw:1; __u8 csum_level:2; - __u8 csum_not_inet:1; __u8 dst_pending_confirm:1; + __u8 mono_delivery_time:1; +#ifdef CONFIG_NET_CLS_ACT + __u8 tc_skip_classify:1; + __u8 tc_at_ingress:1; +#endif #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif @@ -953,10 +957,6 @@ struct sk_buff { #ifdef CONFIG_NET_SWITCHDEV __u8 offload_fwd_mark:1; __u8 offload_l3_fwd_mark:1; -#endif -#ifdef CONFIG_NET_CLS_ACT - __u8 tc_skip_classify:1; - __u8 tc_at_ingress:1; #endif __u8 redirected:1; #ifdef CONFIG_NET_REDIRECT @@ -969,7 +969,7 @@ struct sk_buff { __u8 decrypted:1; #endif __u8 slow_gro:1; - __u8 mono_delivery_time:1; + __u8 csum_not_inet:1; #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ @@ -1047,10 +1047,16 @@ struct sk_buff { /* if you move pkt_vlan_present around you also must adapt these constants */ #ifdef __BIG_ENDIAN_BITFIELD #define PKT_VLAN_PRESENT_BIT 7 +#define TC_AT_INGRESS_MASK (1 << 0) +#define SKB_MONO_DELIVERY_TIME_MASK (1 << 2) #else #define PKT_VLAN_PRESENT_BIT 0 +#define TC_AT_INGRESS_MASK (1 << 7) +#define SKB_MONO_DELIVERY_TIME_MASK (1 << 5) #endif #define PKT_VLAN_PRESENT_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset) +#define TC_AT_INGRESS_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset) +#define SKB_MONO_DELIVERY_TIME_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset) #ifdef __KERNEL__ /* -- cgit v1.2.3