diff options
| -rw-r--r-- | Documentation/networking/ip-sysctl.rst | 4 | ||||
| -rw-r--r-- | include/linux/tcp.h | 3 | ||||
| -rw-r--r-- | include/net/tcp_ecn.h | 2 | ||||
| -rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 2 | ||||
| -rw-r--r-- | net/ipv4/tcp_input.c | 13 | ||||
| -rw-r--r-- | net/ipv4/tcp_output.c | 7 |
6 files changed, 26 insertions, 5 deletions
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index bc9a01606daf..28c7e4f5ecf9 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -482,7 +482,9 @@ tcp_ecn_option - INTEGER 1 Send AccECN option sparingly according to the minimum option rules outlined in draft-ietf-tcpm-accurate-ecn. 2 Send AccECN option on every packet whenever it fits into TCP - option space. + option space except when AccECN fallback is triggered. + 3 Send AccECN option on every packet whenever it fits into TCP + option space even when AccECN fallback is triggered. = ============================================================ Default: 2 diff --git a/include/linux/tcp.h b/include/linux/tcp.h index fbc514d582e7..f72eef31fa23 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -291,7 +291,8 @@ struct tcp_sock { u8 nonagle : 4,/* Disable Nagle algorithm? */ rate_app_limited:1; /* rate_{delivered,interval_us} limited? */ u8 received_ce_pending:4, /* Not yet transmit cnt of received_ce */ - unused2:4; + accecn_opt_sent_w_dsack:1,/* Sent ACCECN opt in previous ACK w/ D-SACK */ + unused2:3; u8 accecn_minlen:2,/* Minimum length of AccECN option sent */ est_ecnfield:2,/* ECN field for AccECN delivered estimates */ accecn_opt_demand:2,/* Demand AccECN option for n next ACKs */ diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index 49e0b865fe02..e01653bbf181 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -29,6 +29,7 @@ enum tcp_accecn_option { TCP_ACCECN_OPTION_DISABLED = 0, TCP_ACCECN_OPTION_MINIMUM = 1, TCP_ACCECN_OPTION_FULL = 2, + TCP_ACCECN_OPTION_PERSIST = 3, }; /* Apply either ECT(0) or ECT(1) based on TCP_CONG_ECT_1_NEGOTIATION flag */ @@ -406,6 +407,7 @@ static inline void tcp_accecn_init_counters(struct tcp_sock *tp) tp->received_ce_pending = 0; __tcp_accecn_init_bytes_counters(tp->received_ecn_bytes); __tcp_accecn_init_bytes_counters(tp->delivered_ecn_bytes); + tp->accecn_opt_sent_w_dsack = 0; tp->accecn_minlen = 0; tp->accecn_opt_demand = 0; tp->est_ecnfield = 0; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a1a50a5c80dc..385b5b986d23 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -749,7 +749,7 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_TWO, + .extra2 = SYSCTL_THREE, }, { .procname = "tcp_ecn_option_beacon", diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 988d161e9918..89526f0f2301 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5046,8 +5046,11 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq) tcp_sack_extend(tp->duplicate_sack, seq, end_seq); } -static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb) +static void tcp_rcv_spurious_retrans(struct sock *sk, + const struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); + /* When the ACK path fails or drops most ACKs, the sender would * timeout and spuriously retransmit the same segment repeatedly. * If it seems our ACKs are not reaching the other side, @@ -5067,6 +5070,14 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb) /* Save last flowlabel after a spurious retrans. */ tcp_save_lrcv_flowlabel(sk, skb); #endif + /* Check DSACK info to detect that the previous ACK carrying the + * AccECN option was lost after the second retransmision, and then + * stop sending AccECN option in all subsequent ACKs. + */ + if (tcp_ecn_mode_accecn(tp) && + tp->accecn_opt_sent_w_dsack && + TCP_SKB_CB(skb)->seq == tp->duplicate_sack[0].start_seq) + tcp_accecn_fail_mode_set(tp, TCP_ACCECN_OPT_FAIL_SEND); } static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2b356fdbf2ca..f44d60d13b9f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -715,9 +715,12 @@ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp, if (tp) { tp->accecn_minlen = 0; tp->accecn_opt_tstamp = tp->tcp_mstamp; + tp->accecn_opt_sent_w_dsack = tp->rx_opt.dsack; if (tp->accecn_opt_demand) tp->accecn_opt_demand--; } + } else if (tp) { + tp->accecn_opt_sent_w_dsack = 0; } if (unlikely(OPTION_SACK_ADVERTISE & options)) { @@ -1189,7 +1192,9 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb if (tcp_ecn_mode_accecn(tp)) { int ecn_opt = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_option); - if (ecn_opt && tp->saw_accecn_opt && !tcp_accecn_opt_fail_send(tp) && + if (ecn_opt && tp->saw_accecn_opt && + (ecn_opt >= TCP_ACCECN_OPTION_PERSIST || + !tcp_accecn_opt_fail_send(tp)) && (ecn_opt >= TCP_ACCECN_OPTION_FULL || tp->accecn_opt_demand || tcp_accecn_option_beacon_check(sk))) { opts->use_synack_ecn_bytes = 0; |
