summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c100
1 files changed, 66 insertions, 34 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ad76556800f2..7949d16506a4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -270,11 +270,14 @@
#include <net/icmp.h>
#include <net/inet_common.h>
+#include <net/inet_ecn.h>
#include <net/tcp.h>
+#include <net/tcp_ecn.h>
#include <net/mptcp.h>
#include <net/proto_memory.h>
#include <net/xfrm.h>
#include <net/ip.h>
+#include <net/psp.h>
#include <net/sock.h>
#include <net/rstreason.h>
@@ -412,6 +415,22 @@ static u64 tcp_compute_delivery_rate(const struct tcp_sock *tp)
return rate64;
}
+#ifdef CONFIG_TCP_MD5SIG
+void tcp_md5_destruct_sock(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (tp->md5sig_info) {
+
+ tcp_clear_md5_list(sk);
+ kfree(rcu_replace_pointer(tp->md5sig_info, NULL, 1));
+ static_branch_slow_dec_deferred(&tcp_md5_needed);
+ tcp_md5_release_sigpool();
+ }
+}
+EXPORT_IPV6_MOD_GPL(tcp_md5_destruct_sock);
+#endif
+
/* Address-family independent initialization for a tcp_sock.
*
* NOTE: A lot of things set to zero explicitly by call to
@@ -687,6 +706,7 @@ void tcp_skb_entail(struct sock *sk, struct sk_buff *skb)
tcb->seq = tcb->end_seq = tp->write_seq;
tcb->tcp_flags = TCPHDR_ACK;
__skb_header_release(skb);
+ psp_enqueue_set_decrypted(sk, skb);
tcp_add_write_queue_tail(sk, skb);
sk_wmem_queued_add(sk, skb->truesize);
sk_mem_charge(sk, skb->truesize);
@@ -2818,9 +2838,9 @@ found_ok_skb:
err = tcp_recvmsg_dmabuf(sk, skb, offset, msg,
used);
- if (err <= 0) {
+ if (err < 0) {
if (!copied)
- copied = -EFAULT;
+ copied = err;
break;
}
@@ -3099,8 +3119,8 @@ bool tcp_check_oom(const struct sock *sk, int shift)
void __tcp_close(struct sock *sk, long timeout)
{
+ bool data_was_unread = false;
struct sk_buff *skb;
- int data_was_unread = 0;
int state;
WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
@@ -3118,13 +3138,14 @@ void __tcp_close(struct sock *sk, long timeout)
* descriptor close, not protocol-sourced closes, because the
* reader process may not have drained the data yet!
*/
- while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
- u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq;
+ while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
+ u32 end_seq = TCP_SKB_CB(skb)->end_seq;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
- len--;
- data_was_unread += len;
- __kfree_skb(skb);
+ end_seq--;
+ if (after(end_seq, tcp_sk(sk)->copied_seq))
+ data_was_unread = true;
+ tcp_eat_recv_skb(sk, skb);
}
/* If socket has been already reset (e.g. in tcp_reset()) - kill it. */
@@ -3195,7 +3216,7 @@ adjudge_to_death:
/* remove backlog if any, without releasing ownership. */
__release_sock(sk);
- this_cpu_inc(tcp_orphan_count);
+ tcp_orphan_count_inc();
/* Have we already been destroyed by a softirq or backlog? */
if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
@@ -3377,7 +3398,7 @@ int tcp_disconnect(struct sock *sk, int flags)
WRITE_ONCE(tp->write_seq, seq);
icsk->icsk_backoff = 0;
- icsk->icsk_probes_out = 0;
+ WRITE_ONCE(icsk->icsk_probes_out, 0);
icsk->icsk_probes_tstamp = 0;
icsk->icsk_rto = TCP_TIMEOUT_INIT;
WRITE_ONCE(icsk->icsk_rto_min, TCP_RTO_MIN);
@@ -3390,6 +3411,11 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->window_clamp = 0;
tp->delivered = 0;
tp->delivered_ce = 0;
+ tp->accecn_fail_mode = 0;
+ tp->saw_accecn_opt = TCP_ACCECN_OPT_NOT_SEEN;
+ tcp_accecn_init_counters(tp);
+ tp->prev_ecnfield = 0;
+ tp->accecn_opt_tstamp = 0;
if (icsk->icsk_ca_initialized && icsk->icsk_ca_ops->release)
icsk->icsk_ca_ops->release(sk);
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
@@ -3765,7 +3791,7 @@ int tcp_sock_set_maxseg(struct sock *sk, int val)
if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW))
return -EINVAL;
- tcp_sk(sk)->rx_opt.user_mss = val;
+ WRITE_ONCE(tcp_sk(sk)->rx_opt.user_mss, val);
return 0;
}
@@ -3895,15 +3921,13 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
WRITE_ONCE(inet_csk(sk)->icsk_delack_max, delack_max);
return 0;
}
+ case TCP_MAXSEG:
+ return tcp_sock_set_maxseg(sk, val);
}
sockopt_lock_sock(sk);
switch (optname) {
- case TCP_MAXSEG:
- err = tcp_sock_set_maxseg(sk, val);
- break;
-
case TCP_NODELAY:
__tcp_sock_set_nodelay(sk, val);
break;
@@ -4142,6 +4166,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
{
const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
const struct inet_connection_sock *icsk = inet_csk(sk);
+ const u8 ect1_idx = INET_ECN_ECT_1 - 1;
+ const u8 ect0_idx = INET_ECN_ECT_0 - 1;
+ const u8 ce_idx = INET_ECN_CE - 1;
unsigned long rate;
u32 now;
u64 rate64;
@@ -4268,6 +4295,16 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
if (tp->rto_stamp)
info->tcpi_total_rto_time += tcp_clock_ms() - tp->rto_stamp;
+ info->tcpi_accecn_fail_mode = tp->accecn_fail_mode;
+ info->tcpi_accecn_opt_seen = tp->saw_accecn_opt;
+ info->tcpi_received_ce = tp->received_ce;
+ info->tcpi_delivered_e1_bytes = tp->delivered_ecn_bytes[ect1_idx];
+ info->tcpi_delivered_e0_bytes = tp->delivered_ecn_bytes[ect0_idx];
+ info->tcpi_delivered_ce_bytes = tp->delivered_ecn_bytes[ce_idx];
+ info->tcpi_received_e1_bytes = tp->received_ecn_bytes[ect1_idx];
+ info->tcpi_received_e0_bytes = tp->received_ecn_bytes[ect0_idx];
+ info->tcpi_received_ce_bytes = tp->received_ecn_bytes[ce_idx];
+
unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -4353,7 +4390,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering);
nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp));
- nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
+ nla_put_u8(stats, TCP_NLA_RECUR_RETRANS,
+ READ_ONCE(inet_csk(sk)->icsk_retransmits));
nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh);
nla_put_u32(stats, TCP_NLA_DELIVERED, tp->delivered);
@@ -4388,6 +4426,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
+ int user_mss;
int val, len;
if (copy_from_sockptr(&len, optlen, sizeof(int)))
@@ -4401,9 +4440,10 @@ int do_tcp_getsockopt(struct sock *sk, int level,
switch (optname) {
case TCP_MAXSEG:
val = tp->mss_cache;
- if (tp->rx_opt.user_mss &&
+ user_mss = READ_ONCE(tp->rx_opt.user_mss);
+ if (user_mss &&
((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
- val = tp->rx_opt.user_mss;
+ val = user_mss;
if (tp->repair)
val = tp->rx_opt.mss_clamp;
break;
@@ -5061,7 +5101,9 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, notsent_lowat);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, gso_segs);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, retransmit_skb_hint);
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_tx, 32);
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, tcp_clean_acked);
+#endif
/* TXRX read-mostly hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, tsoffset);
@@ -5072,11 +5114,9 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, lost_out);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, sacked_out);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, scaling_ratio);
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 32);
/* RX read-mostly hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, copied_seq);
- CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rcv_tstamp);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_wl1);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, tlp_high_seq);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rttvar_us);
@@ -5087,12 +5127,6 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rtt_min);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, out_of_order_queue);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_ssthresh);
-#if IS_ENABLED(CONFIG_TLS_DEVICE)
- CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, tcp_clean_acked);
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_rx, 77);
-#else
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_rx, 69);
-#endif
/* TX read-write hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, segs_out);
@@ -5106,11 +5140,11 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, lsndtime);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, mdev_us);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_wstamp_ns);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, accecn_opt_tstamp);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, rtt_seq);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tsorted_sent_queue);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, highest_sack);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, ecn_flags);
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 89);
/* TXRX read-write hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, pred_flags);
@@ -5125,15 +5159,13 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_up);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered_ce);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ce);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ecn_bytes);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_tstamp);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt);
- /* 32bit arches with 8byte alignment on u64 fields might need padding
- * before tcp_clock_cache.
- */
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 92 + 4);
-
/* RX read-write hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, segs_in);
@@ -5144,12 +5176,12 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rate_delivered);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rate_interval_us);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_last_tsecr);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, delivered_ecn_bytes);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, first_tx_mstamp);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, delivered_mstamp);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_acked);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_est);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcvq_space);
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_rx, 99);
}
void __init tcp_init(void)