diff options
| author | Peter Zijlstra <peterz@infradead.org> | 2023-10-18 13:56:54 +0200 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2023-10-25 11:17:02 +0200 |
| commit | a714491fa92d2068358dd603cc50bf2062517bd8 (patch) | |
| tree | 1b364e71ec1dfd689706479281e6666916f39e01 /kernel | |
| parent | 78695075850e0c616e0108a1dbce6dd1606c87e4 (diff) | |
perf: Disallow mis-matched inherited group reads
commit 32671e3799ca2e4590773fd0e63aaa4229e50c06 upstream.
Because group consistency is non-atomic between parent (filedesc) and children
(inherited) events, it is possible for PERF_FORMAT_GROUP read() to try and sum
non-matching counter groups -- with non-sensical results.
Add group_generation to distinguish the case where a parent group removes and
adds an event and thus has the same number, but a different configuration of
events as inherited groups.
This became a problem when commit fa8c269353d5 ("perf/core: Invert
perf_read_group() loops") flipped the order of child_list and sibling_list.
Previously it would iterate the group (sibling_list) first, and for each
sibling traverse the child_list. In this order, only the group composition of
the parent is relevant. By flipping the order the group composition of the
child (inherited) events becomes an issue and the mis-match in group
composition becomes evident.
That said; even prior to this commit, while reading of a group that is not
equally inherited was not broken, it still made no sense.
(Ab)use ECHILD as error return to indicate issues with child process group
composition.
Fixes: fa8c269353d5 ("perf/core: Invert perf_read_group() loops")
Reported-by: Budimir Markovic <markovicbudimir@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20231018115654.GK33217@noisy.programming.kicks-ass.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/events/core.c | 39 |
1 files changed, 33 insertions, 6 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index cb2b717666ce..8f3f3cc08496 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1848,6 +1848,7 @@ static void perf_group_attach(struct perf_event *event) list_add_tail(&event->sibling_list, &group_leader->sibling_list); group_leader->nr_siblings++; + group_leader->group_generation++; perf_event__header_size(group_leader); @@ -1918,6 +1919,7 @@ static void perf_group_detach(struct perf_event *event) if (event->group_leader != event) { list_del_init(&event->sibling_list); event->group_leader->nr_siblings--; + event->group_leader->group_generation++; goto out; } @@ -4755,7 +4757,7 @@ static int __perf_read_group_add(struct perf_event *leader, u64 read_format, u64 *values) { struct perf_event_context *ctx = leader->ctx; - struct perf_event *sub; + struct perf_event *sub, *parent; unsigned long flags; int n = 1; /* skip @nr */ int ret; @@ -4765,6 +4767,33 @@ static int __perf_read_group_add(struct perf_event *leader, return ret; raw_spin_lock_irqsave(&ctx->lock, flags); + /* + * Verify the grouping between the parent and child (inherited) + * events is still in tact. + * + * Specifically: + * - leader->ctx->lock pins leader->sibling_list + * - parent->child_mutex pins parent->child_list + * - parent->ctx->mutex pins parent->sibling_list + * + * Because parent->ctx != leader->ctx (and child_list nests inside + * ctx->mutex), group destruction is not atomic between children, also + * see perf_event_release_kernel(). Additionally, parent can grow the + * group. + * + * Therefore it is possible to have parent and child groups in a + * different configuration and summing over such a beast makes no sense + * what so ever. + * + * Reject this. + */ + parent = leader->parent; + if (parent && + (parent->group_generation != leader->group_generation || + parent->nr_siblings != leader->nr_siblings)) { + ret = -ECHILD; + goto unlock; + } /* * Since we co-schedule groups, {enabled,running} times of siblings @@ -4794,8 +4823,9 @@ static int __perf_read_group_add(struct perf_event *leader, values[n++] = primary_event_id(sub); } +unlock: raw_spin_unlock_irqrestore(&ctx->lock, flags); - return 0; + return ret; } static int perf_read_group(struct perf_event *event, @@ -4814,10 +4844,6 @@ static int perf_read_group(struct perf_event *event, values[0] = 1 + leader->nr_siblings; - /* - * By locking the child_mutex of the leader we effectively - * lock the child list of all siblings.. XXX explain how. - */ mutex_lock(&leader->child_mutex); ret = __perf_read_group_add(leader, read_format, values); @@ -11603,6 +11629,7 @@ static int inherit_group(struct perf_event *parent_event, if (IS_ERR(child_ctr)) return PTR_ERR(child_ctr); } + leader->group_generation = parent_event->group_generation; return 0; } |
