summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c72
1 files changed, 47 insertions, 25 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e4a979b75cc6..198f8a0d37be 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -896,6 +896,7 @@ void tcp_rcvbuf_grow(struct sock *sk, u32 newval)
const struct net *net = sock_net(sk);
struct tcp_sock *tp = tcp_sk(sk);
u32 rcvwin, rcvbuf, cap, oldval;
+ u32 rtt_threshold, rtt_us;
u64 grow;
oldval = tp->rcvq_space.space;
@@ -908,10 +909,19 @@ void tcp_rcvbuf_grow(struct sock *sk, u32 newval)
/* DRS is always one RTT late. */
rcvwin = newval << 1;
- /* slow start: allow the sender to double its rate. */
- grow = (u64)rcvwin * (newval - oldval);
- do_div(grow, oldval);
- rcvwin += grow << 1;
+ rtt_us = tp->rcv_rtt_est.rtt_us >> 3;
+ rtt_threshold = READ_ONCE(net->ipv4.sysctl_tcp_rcvbuf_low_rtt);
+ if (rtt_us < rtt_threshold) {
+ /* For small RTT, we set @grow to rcvwin * rtt_us/rtt_threshold.
+ * It might take few additional ms to reach 'line rate',
+ * but will avoid sk_rcvbuf inflation and poor cache use.
+ */
+ grow = div_u64((u64)rcvwin * rtt_us, rtt_threshold);
+ } else {
+ /* slow start: allow the sender to double its rate. */
+ grow = div_u64(((u64)rcvwin << 1) * (newval - oldval), oldval);
+ }
+ rcvwin += grow;
if (!RB_EMPTY_ROOT(&tp->out_of_order_queue))
rcvwin += TCP_SKB_CB(tp->ooo_last_skb)->end_seq - tp->rcv_nxt;
@@ -937,9 +947,15 @@ void tcp_rcv_space_adjust(struct sock *sk)
trace_tcp_rcv_space_adjust(sk);
- tcp_mstamp_refresh(tp);
+ if (unlikely(!tp->rcv_rtt_est.rtt_us))
+ return;
+
+ /* We do not refresh tp->tcp_mstamp here.
+ * Some platforms have expensive ktime_get() implementations.
+ * Using the last cached value is enough for DRS.
+ */
time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
- if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
+ if (time < (tp->rcv_rtt_est.rtt_us >> 3))
return;
/* Number of bytes copied to user in last RTT */
@@ -1102,7 +1118,7 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
tp->srtt_us = max(1U, srtt);
}
-static void tcp_update_pacing_rate(struct sock *sk)
+void tcp_update_pacing_rate(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
u64 rate;
@@ -1139,7 +1155,7 @@ static void tcp_update_pacing_rate(struct sock *sk)
/* Calculate rto without backoff. This is the second half of Van Jacobson's
* routine referred to above.
*/
-static void tcp_set_rto(struct sock *sk)
+void tcp_set_rto(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
/* Old crap is replaced with new one. 8)
@@ -5887,7 +5903,9 @@ static inline void tcp_data_snd_check(struct sock *sk)
static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
{
struct tcp_sock *tp = tcp_sk(sk);
- unsigned long rtt, delay;
+ struct net *net = sock_net(sk);
+ unsigned long rtt;
+ u64 delay;
/* More than one full frame received... */
if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
@@ -5906,7 +5924,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
* Defer the ack until tcp_release_cb().
*/
if (sock_owned_by_user_nocheck(sk) &&
- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_backlog_ack_defer)) {
+ READ_ONCE(net->ipv4.sysctl_tcp_backlog_ack_defer)) {
set_bit(TCP_ACK_DEFERRED, &sk->sk_tsq_flags);
return;
}
@@ -5921,7 +5939,7 @@ send_now:
}
if (!tcp_is_sack(tp) ||
- tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr))
+ tp->compressed_ack >= READ_ONCE(net->ipv4.sysctl_tcp_comp_sack_nr))
goto send_now;
if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
@@ -5936,18 +5954,26 @@ send_now:
if (hrtimer_is_queued(&tp->compressed_ack_timer))
return;
- /* compress ack timer : 5 % of rtt, but no more than tcp_comp_sack_delay_ns */
+ /* compress ack timer : comp_sack_rtt_percent of rtt,
+ * but no more than tcp_comp_sack_delay_ns.
+ */
rtt = tp->rcv_rtt_est.rtt_us;
if (tp->srtt_us && tp->srtt_us < rtt)
rtt = tp->srtt_us;
- delay = min_t(unsigned long,
- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
- rtt * (NSEC_PER_USEC >> 3)/20);
+ /* delay = (rtt >> 3) * NSEC_PER_USEC * comp_sack_rtt_percent / 100
+ * ->
+ * delay = rtt * 1.25 * comp_sack_rtt_percent
+ */
+ delay = (u64)(rtt + (rtt >> 2)) *
+ READ_ONCE(net->ipv4.sysctl_tcp_comp_sack_rtt_percent);
+
+ delay = min(delay, READ_ONCE(net->ipv4.sysctl_tcp_comp_sack_delay_ns));
+
sock_hold(sk);
hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns),
+ READ_ONCE(net->ipv4.sysctl_tcp_comp_sack_slack_ns),
HRTIMER_MODE_REL_PINNED_SOFT);
}
@@ -7525,15 +7551,11 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
sock_put(fastopen_sk);
} else {
tcp_rsk(req)->tfo_listener = false;
- if (!want_cookie) {
- req->timeout = tcp_timeout_init((struct sock *)req);
- if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req,
- req->timeout))) {
- reqsk_free(req);
- dst_release(dst);
- return 0;
- }
-
+ if (!want_cookie &&
+ unlikely(!inet_csk_reqsk_queue_hash_add(sk, req))) {
+ reqsk_free(req);
+ dst_release(dst);
+ return 0;
}
af_ops->send_synack(sk, dst, &fl, req, &foc,
!want_cookie ? TCP_SYNACK_NORMAL :