summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-09-15 16:26:40 -0700
committerJakub Kicinski <kuba@kernel.org>2025-09-15 16:26:40 -0700
commit943a4fd7e1f382ac35cb630b0c04f695ef12ab2b (patch)
tree499fc09258e8c43c5826071fb2218ce35c38e2d8
parent0915cb22452723407ca9606b7e5cc3fe6ce767d5 (diff)
parent30f5ca00624397d81c99515bdd43286ade93d7c8 (diff)
Merge branch 'accecn-protocol-patch-series'
TCP preparations for AccECN support Just code reshuffling, no functional changes. Link: https://patch.msgid.link/20250911110642.87529-1-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--include/linux/tcp.h4
-rw-r--r--include/net/tcp.h54
-rw-r--r--include/net/tcp_ecn.h116
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_input.c45
-rw-r--r--net/ipv4/tcp_output.c55
6 files changed, 149 insertions, 129 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 57e478bfaef2..d103cc0e7a35 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -285,6 +285,8 @@ struct tcp_sock {
* Header prediction flags
* 0x5?10 << 16 + snd_wnd in net byte order
*/
+ u8 nonagle : 4,/* Disable Nagle algorithm? */
+ rate_app_limited:1; /* rate_{delivered,interval_us} limited? */
__be32 pred_flags;
u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */
u64 tcp_mstamp; /* most recent packet received/sent */
@@ -303,8 +305,6 @@ struct tcp_sock {
* Options received (usually on last packet, some only on SYN packets).
*/
struct tcp_options_received rx_opt;
- u8 nonagle : 4,/* Disable Nagle algorithm? */
- rate_app_limited:1; /* rate_{delivered,interval_us} limited? */
__cacheline_group_end(tcp_sock_write_txrx);
/* RX read-write hotpath cache lines */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 277914c4d067..e25340459ce4 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -821,33 +821,6 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us);
}
-static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
-{
- /* mptcp hooks are only on the slow path */
- if (sk_is_mptcp((struct sock *)tp))
- return;
-
- tp->pred_flags = htonl((tp->tcp_header_len << 26) |
- ntohl(TCP_FLAG_ACK) |
- snd_wnd);
-}
-
-static inline void tcp_fast_path_on(struct tcp_sock *tp)
-{
- __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
-}
-
-static inline void tcp_fast_path_check(struct sock *sk)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (RB_EMPTY_ROOT(&tp->out_of_order_queue) &&
- tp->rcv_wnd &&
- atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
- !tp->urg_data)
- tcp_fast_path_on(tp);
-}
-
u32 tcp_delack_max(const struct sock *sk);
/* Compute the actual rto_min value */
@@ -1807,6 +1780,33 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
return true;
}
+static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
+{
+ /* mptcp hooks are only on the slow path */
+ if (sk_is_mptcp((struct sock *)tp))
+ return;
+
+ tp->pred_flags = htonl((tp->tcp_header_len << 26) |
+ ntohl(TCP_FLAG_ACK) |
+ snd_wnd);
+}
+
+static inline void tcp_fast_path_on(struct tcp_sock *tp)
+{
+ __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
+}
+
+static inline void tcp_fast_path_check(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (RB_EMPTY_ROOT(&tp->out_of_order_queue) &&
+ tp->rcv_wnd &&
+ atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
+ !tp->urg_data)
+ tcp_fast_path_on(tp);
+}
+
bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
int mib_idx, u32 *last_oow_ack_time);
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
new file mode 100644
index 000000000000..b3430557676b
--- /dev/null
+++ b/include/net/tcp_ecn.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _TCP_ECN_H
+#define _TCP_ECN_H
+
+#include <linux/tcp.h>
+#include <linux/skbuff.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/inet_ecn.h>
+
+static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp)
+{
+ if (tcp_ecn_mode_rfc3168(tp))
+ tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
+}
+
+static inline void tcp_ecn_accept_cwr(struct sock *sk,
+ const struct sk_buff *skb)
+{
+ if (tcp_hdr(skb)->cwr) {
+ tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+
+ /* If the sender is telling us it has entered CWR, then its
+ * cwnd may be very low (even just 1 packet), so we should ACK
+ * immediately.
+ */
+ if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq)
+ inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
+ }
+}
+
+static inline void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
+{
+ tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
+}
+
+static inline void tcp_ecn_rcv_synack(struct tcp_sock *tp,
+ const struct tcphdr *th)
+{
+ if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || th->cwr))
+ tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
+}
+
+static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp,
+ const struct tcphdr *th)
+{
+ if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr))
+ tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
+}
+
+static inline bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp,
+ const struct tcphdr *th)
+{
+ if (th->ece && !th->syn && tcp_ecn_mode_rfc3168(tp))
+ return true;
+ return false;
+}
+
+/* Packet ECN state for a SYN-ACK */
+static inline void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
+ if (tcp_ecn_disabled(tp))
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
+ else if (tcp_ca_needs_ecn(sk) ||
+ tcp_bpf_ca_needs_ecn(sk))
+ INET_ECN_xmit(sk);
+}
+
+/* Packet ECN state for a SYN. */
+static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
+ bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||
+ tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
+
+ if (!use_ecn) {
+ const struct dst_entry *dst = __sk_dst_get(sk);
+
+ if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
+ use_ecn = true;
+ }
+
+ tp->ecn_flags = 0;
+
+ if (use_ecn) {
+ if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
+ INET_ECN_xmit(sk);
+
+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
+ tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
+ }
+}
+
+static inline void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
+{
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback))
+ /* tp->ecn_flags are cleared at a later point in time when
+ * SYN ACK is ultimatively being received.
+ */
+ TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR);
+}
+
+static inline void
+tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
+{
+ if (inet_rsk(req)->ecn_ok)
+ th->ece = 1;
+}
+
+#endif /* _LINUX_TCP_ECN_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7f9c671b1ee0..1f643faa8b93 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5145,7 +5145,7 @@ static void __init tcp_struct_check(void)
/* 32bit arches with 8byte alignment on u64 fields might need padding
* before tcp_clock_cache.
*/
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 92 + 4);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 91 + 4);
/* RX read-write hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received);
@@ -5162,7 +5162,7 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_acked);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_est);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcvq_space);
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_rx, 99);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_rx, 96);
}
void __init tcp_init(void)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f1be65af1a77..b2793e749cfd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -72,6 +72,7 @@
#include <linux/prefetch.h>
#include <net/dst.h>
#include <net/tcp.h>
+#include <net/tcp_ecn.h>
#include <net/proto_memory.h>
#include <net/inet_common.h>
#include <linux/ipsec.h>
@@ -339,31 +340,6 @@ static bool tcp_in_quickack_mode(struct sock *sk)
(icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk));
}
-static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
-{
- if (tcp_ecn_mode_rfc3168(tp))
- tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
-}
-
-static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb)
-{
- if (tcp_hdr(skb)->cwr) {
- tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
-
- /* If the sender is telling us it has entered CWR, then its
- * cwnd may be very low (even just 1 packet), so we should ACK
- * immediately.
- */
- if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq)
- inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
- }
-}
-
-static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
-{
- tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
-}
-
static void tcp_data_ecn_check(struct sock *sk, const struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -399,25 +375,6 @@ static void tcp_data_ecn_check(struct sock *sk, const struct sk_buff *skb)
}
}
-static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
-{
- if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || th->cwr))
- tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
-}
-
-static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
-{
- if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr))
- tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
-}
-
-static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
-{
- if (th->ece && !th->syn && tcp_ecn_mode_rfc3168(tp))
- return true;
- return false;
-}
-
static void tcp_count_delivered_ce(struct tcp_sock *tp, u32 ecn_count)
{
tp->delivered_ce += ecn_count;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e180364b8dda..be8ceefa5332 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -38,6 +38,7 @@
#define pr_fmt(fmt) "TCP: " fmt
#include <net/tcp.h>
+#include <net/tcp_ecn.h>
#include <net/mptcp.h>
#include <net/proto_memory.h>
@@ -319,60 +320,6 @@ static u16 tcp_select_window(struct sock *sk)
return new_win;
}
-/* Packet ECN state for a SYN-ACK */
-static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
-
- TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
- if (tcp_ecn_disabled(tp))
- TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
- else if (tcp_ca_needs_ecn(sk) ||
- tcp_bpf_ca_needs_ecn(sk))
- INET_ECN_xmit(sk);
-}
-
-/* Packet ECN state for a SYN. */
-static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
- bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||
- tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
-
- if (!use_ecn) {
- const struct dst_entry *dst = __sk_dst_get(sk);
-
- if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
- use_ecn = true;
- }
-
- tp->ecn_flags = 0;
-
- if (use_ecn) {
- TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
- tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
- if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
- INET_ECN_xmit(sk);
- }
-}
-
-static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
-{
- if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback))
- /* tp->ecn_flags are cleared at a later point in time when
- * SYN ACK is ultimatively being received.
- */
- TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR);
-}
-
-static void
-tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
-{
- if (inet_rsk(req)->ecn_ok)
- th->ece = 1;
-}
-
/* Set up ECN state for a packet on a ESTABLISHED socket that is about to
* be sent.
*/