diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
| -rw-r--r-- | net/ipv4/tcp_output.c | 112 | 
1 files changed, 51 insertions, 61 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d07e34f8e309..8e08b409c71e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -162,6 +162,15 @@ static void tcp_event_data_sent(struct tcp_sock *tp,  /* Account for an ACK we sent. */  static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)  { +	struct tcp_sock *tp = tcp_sk(sk); + +	if (unlikely(tp->compressed_ack)) { +		NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED, +			      tp->compressed_ack); +		tp->compressed_ack = 0; +		if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1) +			__sock_put(sk); +	}  	tcp_dec_quickack_mode(sk, pkts);  	inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);  } @@ -229,11 +238,9 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,  		}  	} -	if (mss > (1 << *rcv_wscale)) { -		if (!init_rcv_wnd) /* Use default unless specified otherwise */ -			init_rcv_wnd = tcp_default_init_rwnd(mss); -		*rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss); -	} +	if (!init_rcv_wnd) /* Use default unless specified otherwise */ +		init_rcv_wnd = tcp_default_init_rwnd(mss); +	*rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);  	/* Set the clamp no higher than max representable value */  	(*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp); @@ -585,14 +592,15 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,  	unsigned int remaining = MAX_TCP_OPTION_SPACE;  	struct tcp_fastopen_request *fastopen = tp->fastopen_req; +	*md5 = NULL;  #ifdef CONFIG_TCP_MD5SIG -	*md5 = tp->af_specific->md5_lookup(sk, sk); -	if (*md5) { -		opts->options |= OPTION_MD5; -		remaining -= TCPOLEN_MD5SIG_ALIGNED; +	if (unlikely(rcu_access_pointer(tp->md5sig_info))) { +		*md5 = tp->af_specific->md5_lookup(sk, sk); +		if (*md5) { +			opts->options |= OPTION_MD5; +			remaining -= TCPOLEN_MD5SIG_ALIGNED; +		}  	} -#else -	*md5 = NULL;  #endif  	/* We always get an MSS option.  The option bytes which will be seen in @@ -720,14 +728,15 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb  	opts->options = 0; +	*md5 = NULL;  #ifdef CONFIG_TCP_MD5SIG -	*md5 = tp->af_specific->md5_lookup(sk, sk); -	if (unlikely(*md5)) { -		opts->options |= OPTION_MD5; -		size += TCPOLEN_MD5SIG_ALIGNED; +	if (unlikely(rcu_access_pointer(tp->md5sig_info))) { +		*md5 = tp->af_specific->md5_lookup(sk, sk); +		if (*md5) { +			opts->options |= OPTION_MD5; +			size += TCPOLEN_MD5SIG_ALIGNED; +		}  	} -#else -	*md5 = NULL;  #endif  	if (likely(tp->rx_opt.tstamp_ok)) { @@ -772,7 +781,7 @@ struct tsq_tasklet {  };  static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet); -static void tcp_tsq_handler(struct sock *sk) +static void tcp_tsq_write(struct sock *sk)  {  	if ((1 << sk->sk_state) &  	    (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | @@ -789,6 +798,16 @@ static void tcp_tsq_handler(struct sock *sk)  			       0, GFP_ATOMIC);  	}  } + +static void tcp_tsq_handler(struct sock *sk) +{ +	bh_lock_sock(sk); +	if (!sock_owned_by_user(sk)) +		tcp_tsq_write(sk); +	else if (!test_and_set_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) +		sock_hold(sk); +	bh_unlock_sock(sk); +}  /*   * One tasklet per cpu tries to send more skbs.   * We run in tasklet context but need to disable irqs when @@ -816,16 +835,7 @@ static void tcp_tasklet_func(unsigned long data)  		smp_mb__before_atomic();  		clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags); -		if (!sk->sk_lock.owned && -		    test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) { -			bh_lock_sock(sk); -			if (!sock_owned_by_user(sk)) { -				clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags); -				tcp_tsq_handler(sk); -			} -			bh_unlock_sock(sk); -		} - +		tcp_tsq_handler(sk);  		sk_free(sk);  	}  } @@ -853,9 +863,10 @@ void tcp_release_cb(struct sock *sk)  		nflags = flags & ~TCP_DEFERRED_ALL;  	} while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags); -	if (flags & TCPF_TSQ_DEFERRED) -		tcp_tsq_handler(sk); - +	if (flags & TCPF_TSQ_DEFERRED) { +		tcp_tsq_write(sk); +		__sock_put(sk); +	}  	/* Here begins the tricky part :  	 * We are called from release_sock() with :  	 * 1) BH disabled @@ -929,7 +940,7 @@ void tcp_wfree(struct sk_buff *skb)  		if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED))  			goto out; -		nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED; +		nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED;  		nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);  		if (nval != oval)  			continue; @@ -948,37 +959,17 @@ out:  	sk_free(sk);  } -/* Note: Called under hard irq. - * We can not call TCP stack right away. +/* Note: Called under soft irq. + * We can call TCP stack right away, unless socket is owned by user.   */  enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)  {  	struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer);  	struct sock *sk = (struct sock *)tp; -	unsigned long nval, oval; -	for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) { -		struct tsq_tasklet *tsq; -		bool empty; - -		if (oval & TSQF_QUEUED) -			break; - -		nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED; -		nval = cmpxchg(&sk->sk_tsq_flags, oval, nval); -		if (nval != oval) -			continue; +	tcp_tsq_handler(sk); +	sock_put(sk); -		if (!refcount_inc_not_zero(&sk->sk_wmem_alloc)) -			break; -		/* queue this socket to tasklet queue */ -		tsq = this_cpu_ptr(&tsq_tasklet); -		empty = list_empty(&tsq->head); -		list_add(&tp->tsq_node, &tsq->head); -		if (empty) -			tasklet_schedule(&tsq->tasklet); -		break; -	}  	return HRTIMER_NORESTART;  } @@ -1011,7 +1002,8 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)  	do_div(len_ns, rate);  	hrtimer_start(&tcp_sk(sk)->pacing_timer,  		      ktime_add_ns(ktime_get(), len_ns), -		      HRTIMER_MODE_ABS_PINNED); +		      HRTIMER_MODE_ABS_PINNED_SOFT); +	sock_hold(sk);  }  static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb) @@ -1078,7 +1070,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,  	/* if no packet is in qdisc/device queue, then allow XPS to select  	 * another queue. We can be called from tcp_tsq_handler() -	 * which holds one reference to sk_wmem_alloc. +	 * which holds one reference to sk.  	 *  	 * TODO: Ideally, in-flight pure ACK packets should not matter here.  	 * One way to get this would be to set skb->truesize = 2 on them. @@ -2185,7 +2177,7 @@ static int tcp_mtu_probe(struct sock *sk)  static bool tcp_pacing_check(const struct sock *sk)  {  	return tcp_needs_internal_pacing(sk) && -	       hrtimer_active(&tcp_sk(sk)->pacing_timer); +	       hrtimer_is_queued(&tcp_sk(sk)->pacing_timer);  }  /* TCP Small Queues : @@ -2365,8 +2357,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,  					  skb, limit, mss_now, gfp)))  			break; -		if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) -			clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);  		if (tcp_small_queue_check(sk, skb, 0))  			break;  | 
