summaryrefslogtreecommitdiff
path: root/net/mptcp/protocol.c
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-06-10 16:21:39 -0700
committerJakub Kicinski <kuba@kernel.org>2022-06-10 16:21:40 -0700
commite10b02ee5b6c95872064cf0a8e65f31951a31967 (patch)
treee061107c999e33aac6a61f87cd45a24cd4258422 /net/mptcp/protocol.c
parent5c281b4e529cd5a73b32ac561d79f448d18dda6f (diff)
parent0f2c2693988aeeb4c83a581fe58a28d526eecd39 (diff)
Merge branch 'net-reduce-tcp_memory_allocated-inflation'
Eric Dumazet says: ==================== net: reduce tcp_memory_allocated inflation Hosts with a lot of sockets tend to hit so called TCP memory pressure, leading to very bad TCP performance and/or OOM. The problem is that some TCP sockets can hold up to 2MB of 'forward allocations' in their per-socket cache (sk->sk_forward_alloc), and there is no mechanism to make them relinquish their share under mem pressure. Only under some potentially rare events their share is reclaimed, one socket at a time. In this series, I implemented a per-cpu cache instead of a per-socket one. Each CPU has a +1/-1 MB (256 pages on x86) forward alloc cache, in order to not dirty tcp_memory_allocated shared cache line too often. We keep sk->sk_forward_alloc values as small as possible, to meet memcg page granularity constraint. Note that memcg already has a per-cpu cache, although MEMCG_CHARGE_BATCH is defined to 32 pages, which seems a bit small. Note that while this cover letter mentions TCP, this work is generic and supports TCP, UDP, DECNET, SCTP. ==================== Link: https://lore.kernel.org/r/20220609063412.2205738-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/mptcp/protocol.c')
-rw-r--r--net/mptcp/protocol.c13
1 files changed, 8 insertions, 5 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 17e13396024a..e0fb9f96c45c 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -167,8 +167,8 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to,
static void __mptcp_rmem_reclaim(struct sock *sk, int amount)
{
- amount >>= SK_MEM_QUANTUM_SHIFT;
- mptcp_sk(sk)->rmem_fwd_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
+ amount >>= PAGE_SHIFT;
+ mptcp_sk(sk)->rmem_fwd_alloc -= amount << PAGE_SHIFT;
__sk_mem_reduce_allocated(sk, amount);
}
@@ -327,7 +327,7 @@ static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size)
return true;
amt = sk_mem_pages(size);
- amount = amt << SK_MEM_QUANTUM_SHIFT;
+ amount = amt << PAGE_SHIFT;
msk->rmem_fwd_alloc += amount;
if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV)) {
if (ssk->sk_forward_alloc < amount) {
@@ -972,10 +972,10 @@ static void __mptcp_mem_reclaim_partial(struct sock *sk)
lockdep_assert_held_once(&sk->sk_lock.slock);
- if (reclaimable > SK_MEM_QUANTUM)
+ if (reclaimable > (int)PAGE_SIZE)
__mptcp_rmem_reclaim(sk, reclaimable - 1);
- sk_mem_reclaim_partial(sk);
+ sk_mem_reclaim(sk);
}
static void mptcp_mem_reclaim_partial(struct sock *sk)
@@ -3437,7 +3437,10 @@ static struct proto mptcp_prot = {
.get_port = mptcp_get_port,
.forward_alloc_get = mptcp_forward_alloc_get,
.sockets_allocated = &mptcp_sockets_allocated,
+
.memory_allocated = &tcp_memory_allocated,
+ .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc,
+
.memory_pressure = &tcp_memory_pressure,
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),