summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-09-15 16:26:40 -0700
committerJakub Kicinski <kuba@kernel.org>2025-09-15 16:26:40 -0700
commit943a4fd7e1f382ac35cb630b0c04f695ef12ab2b (patch)
tree499fc09258e8c43c5826071fb2218ce35c38e2d8 /include
parent0915cb22452723407ca9606b7e5cc3fe6ce767d5 (diff)
parent30f5ca00624397d81c99515bdd43286ade93d7c8 (diff)
Merge branch 'accecn-protocol-patch-series'
TCP preparations for AccECN support Just code reshuffling, no functional changes. Link: https://patch.msgid.link/20250911110642.87529-1-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/tcp.h4
-rw-r--r--include/net/tcp.h54
-rw-r--r--include/net/tcp_ecn.h116
3 files changed, 145 insertions, 29 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 57e478bfaef2..d103cc0e7a35 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -285,6 +285,8 @@ struct tcp_sock {
* Header prediction flags
* 0x5?10 << 16 + snd_wnd in net byte order
*/
+ u8 nonagle : 4,/* Disable Nagle algorithm? */
+ rate_app_limited:1; /* rate_{delivered,interval_us} limited? */
__be32 pred_flags;
u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */
u64 tcp_mstamp; /* most recent packet received/sent */
@@ -303,8 +305,6 @@ struct tcp_sock {
* Options received (usually on last packet, some only on SYN packets).
*/
struct tcp_options_received rx_opt;
- u8 nonagle : 4,/* Disable Nagle algorithm? */
- rate_app_limited:1; /* rate_{delivered,interval_us} limited? */
__cacheline_group_end(tcp_sock_write_txrx);
/* RX read-write hotpath cache lines */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 277914c4d067..e25340459ce4 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -821,33 +821,6 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us);
}
-static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
-{
- /* mptcp hooks are only on the slow path */
- if (sk_is_mptcp((struct sock *)tp))
- return;
-
- tp->pred_flags = htonl((tp->tcp_header_len << 26) |
- ntohl(TCP_FLAG_ACK) |
- snd_wnd);
-}
-
-static inline void tcp_fast_path_on(struct tcp_sock *tp)
-{
- __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
-}
-
-static inline void tcp_fast_path_check(struct sock *sk)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (RB_EMPTY_ROOT(&tp->out_of_order_queue) &&
- tp->rcv_wnd &&
- atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
- !tp->urg_data)
- tcp_fast_path_on(tp);
-}
-
u32 tcp_delack_max(const struct sock *sk);
/* Compute the actual rto_min value */
@@ -1807,6 +1780,33 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
return true;
}
+static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
+{
+ /* mptcp hooks are only on the slow path */
+ if (sk_is_mptcp((struct sock *)tp))
+ return;
+
+ tp->pred_flags = htonl((tp->tcp_header_len << 26) |
+ ntohl(TCP_FLAG_ACK) |
+ snd_wnd);
+}
+
+static inline void tcp_fast_path_on(struct tcp_sock *tp)
+{
+ __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
+}
+
+static inline void tcp_fast_path_check(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (RB_EMPTY_ROOT(&tp->out_of_order_queue) &&
+ tp->rcv_wnd &&
+ atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
+ !tp->urg_data)
+ tcp_fast_path_on(tp);
+}
+
bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
int mib_idx, u32 *last_oow_ack_time);
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
new file mode 100644
index 000000000000..b3430557676b
--- /dev/null
+++ b/include/net/tcp_ecn.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _TCP_ECN_H
+#define _TCP_ECN_H
+
+#include <linux/tcp.h>
+#include <linux/skbuff.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/inet_ecn.h>
+
+static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp)
+{
+ if (tcp_ecn_mode_rfc3168(tp))
+ tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
+}
+
+static inline void tcp_ecn_accept_cwr(struct sock *sk,
+ const struct sk_buff *skb)
+{
+ if (tcp_hdr(skb)->cwr) {
+ tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+
+ /* If the sender is telling us it has entered CWR, then its
+ * cwnd may be very low (even just 1 packet), so we should ACK
+ * immediately.
+ */
+ if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq)
+ inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
+ }
+}
+
+static inline void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
+{
+ tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
+}
+
+static inline void tcp_ecn_rcv_synack(struct tcp_sock *tp,
+ const struct tcphdr *th)
+{
+ if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || th->cwr))
+ tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
+}
+
+static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp,
+ const struct tcphdr *th)
+{
+ if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr))
+ tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
+}
+
+static inline bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp,
+ const struct tcphdr *th)
+{
+ if (th->ece && !th->syn && tcp_ecn_mode_rfc3168(tp))
+ return true;
+ return false;
+}
+
+/* Packet ECN state for a SYN-ACK */
+static inline void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
+ if (tcp_ecn_disabled(tp))
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
+ else if (tcp_ca_needs_ecn(sk) ||
+ tcp_bpf_ca_needs_ecn(sk))
+ INET_ECN_xmit(sk);
+}
+
+/* Packet ECN state for a SYN. */
+static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
+ bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||
+ tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
+
+ if (!use_ecn) {
+ const struct dst_entry *dst = __sk_dst_get(sk);
+
+ if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
+ use_ecn = true;
+ }
+
+ tp->ecn_flags = 0;
+
+ if (use_ecn) {
+ if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
+ INET_ECN_xmit(sk);
+
+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
+ tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
+ }
+}
+
+static inline void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
+{
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback))
+ /* tp->ecn_flags are cleared at a later point in time when
+ * SYN ACK is ultimatively being received.
+ */
+ TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR);
+}
+
+static inline void
+tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
+{
+ if (inet_rsk(req)->ecn_ok)
+ th->ece = 1;
+}
+
+#endif /* _LINUX_TCP_ECN_H */