summaryrefslogtreecommitdiff
path: root/net/sctp/socket.c
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-06-10 16:21:39 -0700
committerJakub Kicinski <kuba@kernel.org>2022-06-10 16:21:40 -0700
commite10b02ee5b6c95872064cf0a8e65f31951a31967 (patch)
treee061107c999e33aac6a61f87cd45a24cd4258422 /net/sctp/socket.c
parent5c281b4e529cd5a73b32ac561d79f448d18dda6f (diff)
parent0f2c2693988aeeb4c83a581fe58a28d526eecd39 (diff)
Merge branch 'net-reduce-tcp_memory_allocated-inflation'
Eric Dumazet says: ==================== net: reduce tcp_memory_allocated inflation Hosts with a lot of sockets tend to hit so called TCP memory pressure, leading to very bad TCP performance and/or OOM. The problem is that some TCP sockets can hold up to 2MB of 'forward allocations' in their per-socket cache (sk->sk_forward_alloc), and there is no mechanism to make them relinquish their share under mem pressure. Only under some potentially rare events their share is reclaimed, one socket at a time. In this series, I implemented a per-cpu cache instead of a per-socket one. Each CPU has a +1/-1 MB (256 pages on x86) forward alloc cache, in order to not dirty tcp_memory_allocated shared cache line too often. We keep sk->sk_forward_alloc values as small as possible, to meet memcg page granularity constraint. Note that memcg already has a per-cpu cache, although MEMCG_CHARGE_BATCH is defined to 32 pages, which seems a bit small. Note that while this cover letter mentions TCP, this work is generic and supports TCP, UDP, DECNET, SCTP. ==================== Link: https://lore.kernel.org/r/20220609063412.2205738-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/sctp/socket.c')
-rw-r--r--net/sctp/socket.c12
1 files changed, 7 insertions, 5 deletions
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6d37d2dfb3da..171f1a35d205 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -93,6 +93,7 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
static unsigned long sctp_memory_pressure;
static atomic_long_t sctp_memory_allocated;
+static DEFINE_PER_CPU(int, sctp_memory_per_cpu_fw_alloc);
struct percpu_counter sctp_sockets_allocated;
static void sctp_enter_memory_pressure(struct sock *sk)
@@ -1823,9 +1824,6 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
if (sctp_wspace(asoc) < (int)msg_len)
sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
- if (sk_under_memory_pressure(sk))
- sk_mem_reclaim(sk);
-
if (sctp_wspace(asoc) <= 0 || !sk_wmem_schedule(sk, msg_len)) {
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
@@ -9194,8 +9192,6 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
goto do_error;
if (signal_pending(current))
goto do_interrupted;
- if (sk_under_memory_pressure(sk))
- sk_mem_reclaim(sk);
if ((int)msg_len <= sctp_wspace(asoc) &&
sk_wmem_schedule(sk, msg_len))
break;
@@ -9657,7 +9653,10 @@ struct proto sctp_prot = {
.sysctl_wmem = sysctl_sctp_wmem,
.memory_pressure = &sctp_memory_pressure,
.enter_memory_pressure = sctp_enter_memory_pressure,
+
.memory_allocated = &sctp_memory_allocated,
+ .per_cpu_fw_alloc = &sctp_memory_per_cpu_fw_alloc,
+
.sockets_allocated = &sctp_sockets_allocated,
};
@@ -9700,7 +9699,10 @@ struct proto sctpv6_prot = {
.sysctl_wmem = sysctl_sctp_wmem,
.memory_pressure = &sctp_memory_pressure,
.enter_memory_pressure = sctp_enter_memory_pressure,
+
.memory_allocated = &sctp_memory_allocated,
+ .per_cpu_fw_alloc = &sctp_memory_per_cpu_fw_alloc,
+
.sockets_allocated = &sctp_sockets_allocated,
};
#endif /* IS_ENABLED(CONFIG_IPV6) */