diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2025-09-15 16:26:40 -0700 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2025-09-15 16:26:40 -0700 |
| commit | 943a4fd7e1f382ac35cb630b0c04f695ef12ab2b (patch) | |
| tree | 499fc09258e8c43c5826071fb2218ce35c38e2d8 /include | |
| parent | 0915cb22452723407ca9606b7e5cc3fe6ce767d5 (diff) | |
| parent | 30f5ca00624397d81c99515bdd43286ade93d7c8 (diff) | |
Merge branch 'accecn-protocol-patch-series'
TCP preparations for AccECN support
Just code reshuffling, no functional changes.
Link: https://patch.msgid.link/20250911110642.87529-1-chia-yu.chang@nokia-bell-labs.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/tcp.h | 4 | ||||
| -rw-r--r-- | include/net/tcp.h | 54 | ||||
| -rw-r--r-- | include/net/tcp_ecn.h | 116 |
3 files changed, 145 insertions, 29 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 57e478bfaef2..d103cc0e7a35 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -285,6 +285,8 @@ struct tcp_sock { * Header prediction flags * 0x5?10 << 16 + snd_wnd in net byte order */ + u8 nonagle : 4,/* Disable Nagle algorithm? */ + rate_app_limited:1; /* rate_{delivered,interval_us} limited? */ __be32 pred_flags; u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ u64 tcp_mstamp; /* most recent packet received/sent */ @@ -303,8 +305,6 @@ struct tcp_sock { * Options received (usually on last packet, some only on SYN packets). */ struct tcp_options_received rx_opt; - u8 nonagle : 4,/* Disable Nagle algorithm? */ - rate_app_limited:1; /* rate_{delivered,interval_us} limited? */ __cacheline_group_end(tcp_sock_write_txrx); /* RX read-write hotpath cache lines */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 277914c4d067..e25340459ce4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -821,33 +821,6 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us); } -static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) -{ - /* mptcp hooks are only on the slow path */ - if (sk_is_mptcp((struct sock *)tp)) - return; - - tp->pred_flags = htonl((tp->tcp_header_len << 26) | - ntohl(TCP_FLAG_ACK) | - snd_wnd); -} - -static inline void tcp_fast_path_on(struct tcp_sock *tp) -{ - __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); -} - -static inline void tcp_fast_path_check(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (RB_EMPTY_ROOT(&tp->out_of_order_queue) && - tp->rcv_wnd && - atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && - !tp->urg_data) - tcp_fast_path_on(tp); -} - u32 tcp_delack_max(const struct sock *sk); /* Compute the actual rto_min value */ @@ -1807,6 +1780,33 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt, return true; } +static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) +{ + /* mptcp hooks are only on the slow path */ + if (sk_is_mptcp((struct sock *)tp)) + return; + + tp->pred_flags = htonl((tp->tcp_header_len << 26) | + ntohl(TCP_FLAG_ACK) | + snd_wnd); +} + +static inline void tcp_fast_path_on(struct tcp_sock *tp) +{ + __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); +} + +static inline void tcp_fast_path_check(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (RB_EMPTY_ROOT(&tp->out_of_order_queue) && + tp->rcv_wnd && + atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && + !tp->urg_data) + tcp_fast_path_on(tp); +} + bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb, int mib_idx, u32 *last_oow_ack_time); diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h new file mode 100644 index 000000000000..b3430557676b --- /dev/null +++ b/include/net/tcp_ecn.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _TCP_ECN_H +#define _TCP_ECN_H + +#include <linux/tcp.h> +#include <linux/skbuff.h> + +#include <net/inet_connection_sock.h> +#include <net/sock.h> +#include <net/tcp.h> +#include <net/inet_ecn.h> + +static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp) +{ + if (tcp_ecn_mode_rfc3168(tp)) + tp->ecn_flags |= TCP_ECN_QUEUE_CWR; +} + +static inline void tcp_ecn_accept_cwr(struct sock *sk, + const struct sk_buff *skb) +{ + if (tcp_hdr(skb)->cwr) { + tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR; + + /* If the sender is telling us it has entered CWR, then its + * cwnd may be very low (even just 1 packet), so we should ACK + * immediately. + */ + if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; + } +} + +static inline void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) +{ + tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; +} + +static inline void tcp_ecn_rcv_synack(struct tcp_sock *tp, + const struct tcphdr *th) +{ + if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || th->cwr)) + tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); +} + +static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, + const struct tcphdr *th) +{ + if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr)) + tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); +} + +static inline bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, + const struct tcphdr *th) +{ + if (th->ece && !th->syn && tcp_ecn_mode_rfc3168(tp)) + return true; + return false; +} + +/* Packet ECN state for a SYN-ACK */ +static inline void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) +{ + const struct tcp_sock *tp = tcp_sk(sk); + + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; + if (tcp_ecn_disabled(tp)) + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; + else if (tcp_ca_needs_ecn(sk) || + tcp_bpf_ca_needs_ecn(sk)) + INET_ECN_xmit(sk); +} + +/* Packet ECN state for a SYN. */ +static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); + bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 || + tcp_ca_needs_ecn(sk) || bpf_needs_ecn; + + if (!use_ecn) { + const struct dst_entry *dst = __sk_dst_get(sk); + + if (dst && dst_feature(dst, RTAX_FEATURE_ECN)) + use_ecn = true; + } + + tp->ecn_flags = 0; + + if (use_ecn) { + if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) + INET_ECN_xmit(sk); + + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; + tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); + } +} + +static inline void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) +{ + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) + /* tp->ecn_flags are cleared at a later point in time when + * SYN ACK is ultimatively being received. + */ + TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR); +} + +static inline void +tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th) +{ + if (inet_rsk(req)->ecn_ok) + th->ece = 1; +} + +#endif /* _LINUX_TCP_ECN_H */ |
