summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2025-08-28 10:27:38 +0000
committerJakub Kicinski <kuba@kernel.org>2025-08-29 19:29:24 -0700
commit95fa78830e5b2eb2041174c7f9549c746e003dd6 (patch)
tree61e4192b22fe88910082096dcf0e76e3d84e19d3
parent9529320ad64e614cfaf96e6b8e3d8c0a1245160c (diff)
inet_diag: avoid cache line misses in inet_diag_bc_sk()
inet_diag_bc_sk() pulls five cache lines per socket, while most filters only need the two first ones. Add three booleans to struct inet_diag_dump_data, that are selectively set if a filter needs specific socket fields. - mark_needed /* INET_DIAG_BC_MARK_COND present. */ - cgroup_needed /* INET_DIAG_BC_CGROUP_COND present. */ - userlocks_needed /* INET_DIAG_BC_AUTO present. */ This removes millions of cache lines misses per ss invocation when simple filters are specified on busy servers. offsetof(struct sock, sk_userlocks) = 0xf3 offsetof(struct sock, sk_mark) = 0x20c offsetof(struct sock, sk_cgrp_data) = 0x298 Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com> Link: https://patch.msgid.link/20250828102738.2065992-6-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--include/linux/inet_diag.h5
-rw-r--r--net/ipv4/inet_diag.c52
2 files changed, 36 insertions, 21 deletions
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 86a0641ec36e..704fd415c2b4 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -38,6 +38,11 @@ struct inet_diag_dump_data {
#define inet_diag_nla_bpf_stgs req_nlas[INET_DIAG_REQ_SK_BPF_STORAGES]
struct bpf_sk_storage_diag *bpf_stg_diag;
+ bool mark_needed; /* INET_DIAG_BC_MARK_COND present. */
+#ifdef CONFIG_SOCK_CGROUP_DATA
+ bool cgroup_needed; /* INET_DIAG_BC_CGROUP_COND present. */
+#endif
+ bool userlocks_needed; /* INET_DIAG_BC_AUTO present. */
};
struct inet_connection_sock;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 117103042687..f0b6c5a411a2 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -605,18 +605,22 @@ int inet_diag_bc_sk(const struct inet_diag_dump_data *cb_data, struct sock *sk)
entry.sport = READ_ONCE(inet->inet_num);
entry.dport = ntohs(READ_ONCE(inet->inet_dport));
entry.ifindex = READ_ONCE(sk->sk_bound_dev_if);
- entry.userlocks = sk_fullsock(sk) ? READ_ONCE(sk->sk_userlocks) : 0;
- if (sk_fullsock(sk))
- entry.mark = READ_ONCE(sk->sk_mark);
- else if (sk->sk_state == TCP_NEW_SYN_RECV)
- entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
- else if (sk->sk_state == TCP_TIME_WAIT)
- entry.mark = inet_twsk(sk)->tw_mark;
- else
- entry.mark = 0;
+ if (cb_data->userlocks_needed)
+ entry.userlocks = sk_fullsock(sk) ? READ_ONCE(sk->sk_userlocks) : 0;
+ if (cb_data->mark_needed) {
+ if (sk_fullsock(sk))
+ entry.mark = READ_ONCE(sk->sk_mark);
+ else if (sk->sk_state == TCP_NEW_SYN_RECV)
+ entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
+ else if (sk->sk_state == TCP_TIME_WAIT)
+ entry.mark = inet_twsk(sk)->tw_mark;
+ else
+ entry.mark = 0;
+ }
#ifdef CONFIG_SOCK_CGROUP_DATA
- entry.cgroup_id = sk_fullsock(sk) ?
- cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0;
+ if (cb_data->cgroup_needed)
+ entry.cgroup_id = sk_fullsock(sk) ?
+ cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0;
#endif
return inet_diag_bc_run(bc, &entry);
@@ -716,16 +720,21 @@ static bool valid_cgroupcond(const struct inet_diag_bc_op *op, int len,
}
#endif
-static int inet_diag_bc_audit(const struct nlattr *attr,
+static int inet_diag_bc_audit(struct inet_diag_dump_data *cb_data,
const struct sk_buff *skb)
{
- bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN);
+ const struct nlattr *attr = cb_data->inet_diag_nla_bc;
const void *bytecode, *bc;
int bytecode_len, len;
+ bool net_admin;
+
+ if (!attr)
+ return 0;
- if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op))
+ if (nla_len(attr) < sizeof(struct inet_diag_bc_op))
return -EINVAL;
+ net_admin = netlink_net_capable(skb, CAP_NET_ADMIN);
bytecode = bc = nla_data(attr);
len = bytecode_len = nla_len(attr);
@@ -757,14 +766,18 @@ static int inet_diag_bc_audit(const struct nlattr *attr,
return -EPERM;
if (!valid_markcond(bc, len, &min_len))
return -EINVAL;
+ cb_data->mark_needed = true;
break;
#ifdef CONFIG_SOCK_CGROUP_DATA
case INET_DIAG_BC_CGROUP_COND:
if (!valid_cgroupcond(bc, len, &min_len))
return -EINVAL;
+ cb_data->cgroup_needed = true;
break;
#endif
case INET_DIAG_BC_AUTO:
+ cb_data->userlocks_needed = true;
+ fallthrough;
case INET_DIAG_BC_JMP:
case INET_DIAG_BC_NOP:
break;
@@ -841,13 +854,10 @@ static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen)
kfree(cb_data);
return err;
}
- nla = cb_data->inet_diag_nla_bc;
- if (nla) {
- err = inet_diag_bc_audit(nla, skb);
- if (err) {
- kfree(cb_data);
- return err;
- }
+ err = inet_diag_bc_audit(cb_data, skb);
+ if (err) {
+ kfree(cb_data);
+ return err;
}
nla = cb_data->inet_diag_nla_bpf_stgs;