Merge branch 'net-reduce-tcp_memory_allocated-inflation'

Eric Dumazet says: ==================== net: reduce tcp_memory_allocated inflation Hosts with a lot of sockets tend to hit so called TCP memory pressure, leading to very bad TCP performance and/or OOM. The problem is that some TCP sockets can hold up to 2MB of 'forward allocations' in their per-socket cache (sk->sk_forward_alloc), and there is no mechanism to make them relinquish their share under mem pressure. Only under some potentially rare events their share is reclaimed, one socket at a time. In this series, I implemented a per-cpu cache instead of a per-socket one. Each CPU has a +1/-1 MB (256 pages on x86) forward alloc cache, in order to not dirty tcp_memory_allocated shared cache line too often. We keep sk->sk_forward_alloc values as small as possible, to meet memcg page granularity constraint. Note that memcg already has a per-cpu cache, although MEMCG_CHARGE_BATCH is defined to 32 pages, which seems a bit small. Note that while this cover letter mentions TCP, this work is generic and supports TCP, UDP, DECNET, SCTP. ==================== Link: https://lore.kernel.org/r/20220609063412.2205738-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
author: Jakub Kicinski <kuba@kernel.org> 2022-06-10 16:21:39 -0700
committer: Jakub Kicinski <kuba@kernel.org> 2022-06-10 16:21:40 -0700
commit: e10b02ee5b6c95872064cf0a8e65f31951a31967 (patch)
tree: e061107c999e33aac6a61f87cd45a24cd4258422 /net/sctp/socket.c
parent: 5c281b4e529cd5a73b32ac561d79f448d18dda6f (diff)
parent: 0f2c2693988aeeb4c83a581fe58a28d526eecd39 (diff)
1 files changed, 7 insertions, 5 deletions
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6d37d2dfb3da..171f1a35d205 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -93,6 +93,7 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 
 static unsigned long sctp_memory_pressure;
 static atomic_long_t sctp_memory_allocated;
+static DEFINE_PER_CPU(int, sctp_memory_per_cpu_fw_alloc);
 struct percpu_counter sctp_sockets_allocated;
 
 static void sctp_enter_memory_pressure(struct sock *sk)
@@ -1823,9 +1824,6 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
 	if (sctp_wspace(asoc) < (int)msg_len)
 		sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
 
-	if (sk_under_memory_pressure(sk))
-		sk_mem_reclaim(sk);
-
 	if (sctp_wspace(asoc) <= 0 || !sk_wmem_schedule(sk, msg_len)) {
 		timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
@@ -9194,8 +9192,6 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
 			goto do_error;
 		if (signal_pending(current))
 			goto do_interrupted;
-		if (sk_under_memory_pressure(sk))
-			sk_mem_reclaim(sk);
 		if ((int)msg_len <= sctp_wspace(asoc) &&
 		    sk_wmem_schedule(sk, msg_len))
 			break;
@@ -9657,7 +9653,10 @@ struct proto sctp_prot = {
 	.sysctl_wmem =  sysctl_sctp_wmem,
 	.memory_pressure = &sctp_memory_pressure,
 	.enter_memory_pressure = sctp_enter_memory_pressure,
+
 	.memory_allocated = &sctp_memory_allocated,
+	.per_cpu_fw_alloc = &sctp_memory_per_cpu_fw_alloc,
+
 	.sockets_allocated = &sctp_sockets_allocated,
 };
 
@@ -9700,7 +9699,10 @@ struct proto sctpv6_prot = {
 	.sysctl_wmem	= sysctl_sctp_wmem,
 	.memory_pressure = &sctp_memory_pressure,
 	.enter_memory_pressure = sctp_enter_memory_pressure,
+
 	.memory_allocated = &sctp_memory_allocated,
+	.per_cpu_fw_alloc = &sctp_memory_per_cpu_fw_alloc,
+
 	.sockets_allocated = &sctp_sockets_allocated,
 };
 #endif /* IS_ENABLED(CONFIG_IPV6) */
author	Jakub Kicinski <kuba@kernel.org>	2022-06-10 16:21:39 -0700
committer	Jakub Kicinski <kuba@kernel.org>	2022-06-10 16:21:40 -0700
commit	e10b02ee5b6c95872064cf0a8e65f31951a31967 (patch)
tree	e061107c999e33aac6a61f87cd45a24cd4258422 /net/sctp/socket.c
parent	5c281b4e529cd5a73b32ac561d79f448d18dda6f (diff)
parent	0f2c2693988aeeb4c83a581fe58a28d526eecd39 (diff)