summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/ip-sysctl.rst4
-rw-r--r--include/linux/tcp.h3
-rw-r--r--include/net/tcp_ecn.h2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c2
-rw-r--r--net/ipv4/tcp_input.c13
-rw-r--r--net/ipv4/tcp_output.c7
6 files changed, 26 insertions, 5 deletions
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index bc9a01606daf..28c7e4f5ecf9 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -482,7 +482,9 @@ tcp_ecn_option - INTEGER
1 Send AccECN option sparingly according to the minimum option
rules outlined in draft-ietf-tcpm-accurate-ecn.
2 Send AccECN option on every packet whenever it fits into TCP
- option space.
+ option space except when AccECN fallback is triggered.
+ 3 Send AccECN option on every packet whenever it fits into TCP
+ option space even when AccECN fallback is triggered.
= ============================================================
Default: 2
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index fbc514d582e7..f72eef31fa23 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -291,7 +291,8 @@ struct tcp_sock {
u8 nonagle : 4,/* Disable Nagle algorithm? */
rate_app_limited:1; /* rate_{delivered,interval_us} limited? */
u8 received_ce_pending:4, /* Not yet transmit cnt of received_ce */
- unused2:4;
+ accecn_opt_sent_w_dsack:1,/* Sent ACCECN opt in previous ACK w/ D-SACK */
+ unused2:3;
u8 accecn_minlen:2,/* Minimum length of AccECN option sent */
est_ecnfield:2,/* ECN field for AccECN delivered estimates */
accecn_opt_demand:2,/* Demand AccECN option for n next ACKs */
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
index 49e0b865fe02..e01653bbf181 100644
--- a/include/net/tcp_ecn.h
+++ b/include/net/tcp_ecn.h
@@ -29,6 +29,7 @@ enum tcp_accecn_option {
TCP_ACCECN_OPTION_DISABLED = 0,
TCP_ACCECN_OPTION_MINIMUM = 1,
TCP_ACCECN_OPTION_FULL = 2,
+ TCP_ACCECN_OPTION_PERSIST = 3,
};
/* Apply either ECT(0) or ECT(1) based on TCP_CONG_ECT_1_NEGOTIATION flag */
@@ -406,6 +407,7 @@ static inline void tcp_accecn_init_counters(struct tcp_sock *tp)
tp->received_ce_pending = 0;
__tcp_accecn_init_bytes_counters(tp->received_ecn_bytes);
__tcp_accecn_init_bytes_counters(tp->delivered_ecn_bytes);
+ tp->accecn_opt_sent_w_dsack = 0;
tp->accecn_minlen = 0;
tp->accecn_opt_demand = 0;
tp->est_ecnfield = 0;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index a1a50a5c80dc..385b5b986d23 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -749,7 +749,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_TWO,
+ .extra2 = SYSCTL_THREE,
},
{
.procname = "tcp_ecn_option_beacon",
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 988d161e9918..89526f0f2301 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5046,8 +5046,11 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
}
-static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
+static void tcp_rcv_spurious_retrans(struct sock *sk,
+ const struct sk_buff *skb)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
/* When the ACK path fails or drops most ACKs, the sender would
* timeout and spuriously retransmit the same segment repeatedly.
* If it seems our ACKs are not reaching the other side,
@@ -5067,6 +5070,14 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
/* Save last flowlabel after a spurious retrans. */
tcp_save_lrcv_flowlabel(sk, skb);
#endif
+ /* Check DSACK info to detect that the previous ACK carrying the
+ * AccECN option was lost after the second retransmision, and then
+ * stop sending AccECN option in all subsequent ACKs.
+ */
+ if (tcp_ecn_mode_accecn(tp) &&
+ tp->accecn_opt_sent_w_dsack &&
+ TCP_SKB_CB(skb)->seq == tp->duplicate_sack[0].start_seq)
+ tcp_accecn_fail_mode_set(tp, TCP_ACCECN_OPT_FAIL_SEND);
}
static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2b356fdbf2ca..f44d60d13b9f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -715,9 +715,12 @@ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp,
if (tp) {
tp->accecn_minlen = 0;
tp->accecn_opt_tstamp = tp->tcp_mstamp;
+ tp->accecn_opt_sent_w_dsack = tp->rx_opt.dsack;
if (tp->accecn_opt_demand)
tp->accecn_opt_demand--;
}
+ } else if (tp) {
+ tp->accecn_opt_sent_w_dsack = 0;
}
if (unlikely(OPTION_SACK_ADVERTISE & options)) {
@@ -1189,7 +1192,9 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
if (tcp_ecn_mode_accecn(tp)) {
int ecn_opt = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_option);
- if (ecn_opt && tp->saw_accecn_opt && !tcp_accecn_opt_fail_send(tp) &&
+ if (ecn_opt && tp->saw_accecn_opt &&
+ (ecn_opt >= TCP_ACCECN_OPTION_PERSIST ||
+ !tcp_accecn_opt_fail_send(tp)) &&
(ecn_opt >= TCP_ACCECN_OPTION_FULL || tp->accecn_opt_demand ||
tcp_accecn_option_beacon_check(sk))) {
opts->use_synack_ecn_bytes = 0;