diff options
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 147 |
1 files changed, 115 insertions, 32 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 28062e292d8b..aef35555792e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -58,6 +58,7 @@ #include <linux/times.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/sock_diag.h> #include <net/net_namespace.h> #include <net/icmp.h> @@ -3016,6 +3017,7 @@ out: #ifdef CONFIG_BPF_SYSCALL union bpf_tcp_iter_batch_item { struct sock *sk; + __u64 cookie; }; struct bpf_tcp_iter_state { @@ -3046,10 +3048,19 @@ static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, static void bpf_iter_tcp_put_batch(struct bpf_tcp_iter_state *iter) { + union bpf_tcp_iter_batch_item *item; unsigned int cur_sk = iter->cur_sk; + __u64 cookie; - while (cur_sk < iter->end_sk) - sock_gen_put(iter->batch[cur_sk++].sk); + /* Remember the cookies of the sockets we haven't seen yet, so we can + * pick up where we left off next time around. + */ + while (cur_sk < iter->end_sk) { + item = &iter->batch[cur_sk++]; + cookie = sock_gen_cookie(item->sk); + sock_gen_put(item->sk); + item->cookie = cookie; + } } static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter, @@ -3070,6 +3081,106 @@ static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter, return 0; } +static struct sock *bpf_iter_tcp_resume_bucket(struct sock *first_sk, + union bpf_tcp_iter_batch_item *cookies, + int n_cookies) +{ + struct hlist_nulls_node *node; + struct sock *sk; + int i; + + for (i = 0; i < n_cookies; i++) { + sk = first_sk; + sk_nulls_for_each_from(sk, node) + if (cookies[i].cookie == atomic64_read(&sk->sk_cookie)) + return sk; + } + + return NULL; +} + +static struct sock *bpf_iter_tcp_resume_listening(struct seq_file *seq) +{ + struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo; + struct bpf_tcp_iter_state *iter = seq->private; + struct tcp_iter_state *st = &iter->state; + unsigned int find_cookie = iter->cur_sk; + unsigned int end_cookie = iter->end_sk; + int resume_bucket = st->bucket; + struct sock *sk; + + if (end_cookie && find_cookie == end_cookie) + ++st->bucket; + + sk = listening_get_first(seq); + iter->cur_sk = 0; + iter->end_sk = 0; + + if (sk && st->bucket == resume_bucket && end_cookie) { + sk = bpf_iter_tcp_resume_bucket(sk, &iter->batch[find_cookie], + end_cookie - find_cookie); + if (!sk) { + spin_unlock(&hinfo->lhash2[st->bucket].lock); + ++st->bucket; + sk = listening_get_first(seq); + } + } + + return sk; +} + +static struct sock *bpf_iter_tcp_resume_established(struct seq_file *seq) +{ + struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo; + struct bpf_tcp_iter_state *iter = seq->private; + struct tcp_iter_state *st = &iter->state; + unsigned int find_cookie = iter->cur_sk; + unsigned int end_cookie = iter->end_sk; + int resume_bucket = st->bucket; + struct sock *sk; + + if (end_cookie && find_cookie == end_cookie) + ++st->bucket; + + sk = established_get_first(seq); + iter->cur_sk = 0; + iter->end_sk = 0; + + if (sk && st->bucket == resume_bucket && end_cookie) { + sk = bpf_iter_tcp_resume_bucket(sk, &iter->batch[find_cookie], + end_cookie - find_cookie); + if (!sk) { + spin_unlock_bh(inet_ehash_lockp(hinfo, st->bucket)); + ++st->bucket; + sk = established_get_first(seq); + } + } + + return sk; +} + +static struct sock *bpf_iter_tcp_resume(struct seq_file *seq) +{ + struct bpf_tcp_iter_state *iter = seq->private; + struct tcp_iter_state *st = &iter->state; + struct sock *sk = NULL; + + switch (st->state) { + case TCP_SEQ_STATE_LISTENING: + sk = bpf_iter_tcp_resume_listening(seq); + if (sk) + break; + st->bucket = 0; + st->state = TCP_SEQ_STATE_ESTABLISHED; + fallthrough; + case TCP_SEQ_STATE_ESTABLISHED: + sk = bpf_iter_tcp_resume_established(seq); + break; + } + + return sk; +} + static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq, struct sock **start_sk) { @@ -3154,32 +3265,12 @@ static void bpf_iter_tcp_unlock_bucket(struct seq_file *seq) static struct sock *bpf_iter_tcp_batch(struct seq_file *seq) { - struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo; struct bpf_tcp_iter_state *iter = seq->private; - struct tcp_iter_state *st = &iter->state; unsigned int expected; struct sock *sk; int err; - /* The st->bucket is done. Directly advance to the next - * bucket instead of having the tcp_seek_last_pos() to skip - * one by one in the current bucket and eventually find out - * it has to advance to the next bucket. - */ - if (iter->end_sk && iter->cur_sk == iter->end_sk) { - st->offset = 0; - st->bucket++; - if (st->state == TCP_SEQ_STATE_LISTENING && - st->bucket > hinfo->lhash2_mask) { - st->state = TCP_SEQ_STATE_ESTABLISHED; - st->bucket = 0; - } - } - - iter->cur_sk = 0; - iter->end_sk = 0; - - sk = tcp_seek_last_pos(seq); + sk = bpf_iter_tcp_resume(seq); if (!sk) return NULL; /* Done */ @@ -3195,10 +3286,7 @@ static struct sock *bpf_iter_tcp_batch(struct seq_file *seq) if (err) return ERR_PTR(err); - iter->cur_sk = 0; - iter->end_sk = 0; - - sk = tcp_seek_last_pos(seq); + sk = bpf_iter_tcp_resume(seq); if (!sk) return NULL; /* Done */ @@ -3250,11 +3338,6 @@ static void *bpf_iter_tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) * meta.seq_num is used instead. */ st->num++; - /* Move st->offset to the next sk in the bucket such that - * the future start() will resume at st->offset in - * st->bucket. See tcp_seek_last_pos(). - */ - st->offset++; sock_gen_put(iter->batch[iter->cur_sk++].sk); } |
