diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2022-06-10 16:21:39 -0700 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2022-06-10 16:21:40 -0700 |
| commit | e10b02ee5b6c95872064cf0a8e65f31951a31967 (patch) | |
| tree | e061107c999e33aac6a61f87cd45a24cd4258422 /net/sctp/socket.c | |
| parent | 5c281b4e529cd5a73b32ac561d79f448d18dda6f (diff) | |
| parent | 0f2c2693988aeeb4c83a581fe58a28d526eecd39 (diff) | |
Merge branch 'net-reduce-tcp_memory_allocated-inflation'
Eric Dumazet says:
====================
net: reduce tcp_memory_allocated inflation
Hosts with a lot of sockets tend to hit so called TCP memory pressure,
leading to very bad TCP performance and/or OOM.
The problem is that some TCP sockets can hold up to 2MB of 'forward
allocations' in their per-socket cache (sk->sk_forward_alloc),
and there is no mechanism to make them relinquish their share
under mem pressure.
Only under some potentially rare events their share is reclaimed,
one socket at a time.
In this series, I implemented a per-cpu cache instead of a per-socket one.
Each CPU has a +1/-1 MB (256 pages on x86) forward alloc cache, in order
to not dirty tcp_memory_allocated shared cache line too often.
We keep sk->sk_forward_alloc values as small as possible, to meet
memcg page granularity constraint.
Note that memcg already has a per-cpu cache, although MEMCG_CHARGE_BATCH
is defined to 32 pages, which seems a bit small.
Note that while this cover letter mentions TCP, this work is generic
and supports TCP, UDP, DECNET, SCTP.
====================
Link: https://lore.kernel.org/r/20220609063412.2205738-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/sctp/socket.c')
| -rw-r--r-- | net/sctp/socket.c | 12 |
1 files changed, 7 insertions, 5 deletions
diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6d37d2dfb3da..171f1a35d205 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -93,6 +93,7 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, static unsigned long sctp_memory_pressure; static atomic_long_t sctp_memory_allocated; +static DEFINE_PER_CPU(int, sctp_memory_per_cpu_fw_alloc); struct percpu_counter sctp_sockets_allocated; static void sctp_enter_memory_pressure(struct sock *sk) @@ -1823,9 +1824,6 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, if (sctp_wspace(asoc) < (int)msg_len) sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc)); - if (sk_under_memory_pressure(sk)) - sk_mem_reclaim(sk); - if (sctp_wspace(asoc) <= 0 || !sk_wmem_schedule(sk, msg_len)) { timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); @@ -9194,8 +9192,6 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, goto do_error; if (signal_pending(current)) goto do_interrupted; - if (sk_under_memory_pressure(sk)) - sk_mem_reclaim(sk); if ((int)msg_len <= sctp_wspace(asoc) && sk_wmem_schedule(sk, msg_len)) break; @@ -9657,7 +9653,10 @@ struct proto sctp_prot = { .sysctl_wmem = sysctl_sctp_wmem, .memory_pressure = &sctp_memory_pressure, .enter_memory_pressure = sctp_enter_memory_pressure, + .memory_allocated = &sctp_memory_allocated, + .per_cpu_fw_alloc = &sctp_memory_per_cpu_fw_alloc, + .sockets_allocated = &sctp_sockets_allocated, }; @@ -9700,7 +9699,10 @@ struct proto sctpv6_prot = { .sysctl_wmem = sysctl_sctp_wmem, .memory_pressure = &sctp_memory_pressure, .enter_memory_pressure = sctp_enter_memory_pressure, + .memory_allocated = &sctp_memory_allocated, + .per_cpu_fw_alloc = &sctp_memory_per_cpu_fw_alloc, + .sockets_allocated = &sctp_sockets_allocated, }; #endif /* IS_ENABLED(CONFIG_IPV6) */ |
