summaryrefslogtreecommitdiff
path: root/kernel/events/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/events/core.c')
-rw-r--r--kernel/events/core.c373
1 files changed, 195 insertions, 178 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 820127536e62..ef1beb9ea128 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3974,7 +3974,7 @@ static noinline int visit_groups_merge(struct perf_event_context *ctx,
*/
static inline bool event_update_userpage(struct perf_event *event)
{
- if (likely(!atomic_read(&event->mmap_count)))
+ if (likely(!refcount_read(&event->mmap_count)))
return false;
perf_event_update_time(event);
@@ -6710,11 +6710,11 @@ static void perf_mmap_open(struct vm_area_struct *vma)
struct perf_event *event = vma->vm_file->private_data;
mapped_f mapped = get_mapped(event, event_mapped);
- atomic_inc(&event->mmap_count);
- atomic_inc(&event->rb->mmap_count);
+ refcount_inc(&event->mmap_count);
+ refcount_inc(&event->rb->mmap_count);
if (vma->vm_pgoff)
- atomic_inc(&event->rb->aux_mmap_count);
+ refcount_inc(&event->rb->aux_mmap_count);
if (mapped)
mapped(event, vma->vm_mm);
@@ -6749,7 +6749,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
* to avoid complications.
*/
if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff &&
- atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &rb->aux_mutex)) {
+ refcount_dec_and_mutex_lock(&rb->aux_mmap_count, &rb->aux_mutex)) {
/*
* Stop all AUX events that are writing to this buffer,
* so that we can free its AUX pages and corresponding PMU
@@ -6769,10 +6769,10 @@ static void perf_mmap_close(struct vm_area_struct *vma)
mutex_unlock(&rb->aux_mutex);
}
- if (atomic_dec_and_test(&rb->mmap_count))
+ if (refcount_dec_and_test(&rb->mmap_count))
detach_rest = true;
- if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
+ if (!refcount_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
goto out_put;
ring_buffer_attach(event, NULL);
@@ -6933,230 +6933,242 @@ static int map_range(struct perf_buffer *rb, struct vm_area_struct *vma)
return err;
}
-static int perf_mmap(struct file *file, struct vm_area_struct *vma)
+static bool perf_mmap_calc_limits(struct vm_area_struct *vma, long *user_extra, long *extra)
{
- struct perf_event *event = file->private_data;
- unsigned long user_locked, user_lock_limit;
+ unsigned long user_locked, user_lock_limit, locked, lock_limit;
struct user_struct *user = current_user();
- struct mutex *aux_mutex = NULL;
- struct perf_buffer *rb = NULL;
- unsigned long locked, lock_limit;
- unsigned long vma_size;
- unsigned long nr_pages;
- long user_extra = 0, extra = 0;
- int ret, flags = 0;
- mapped_f mapped;
+
+ user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);
+ /* Increase the limit linearly with more CPUs */
+ user_lock_limit *= num_online_cpus();
+
+ user_locked = atomic_long_read(&user->locked_vm);
/*
- * Don't allow mmap() of inherited per-task counters. This would
- * create a performance issue due to all children writing to the
- * same rb.
+ * sysctl_perf_event_mlock may have changed, so that
+ * user->locked_vm > user_lock_limit
*/
- if (event->cpu == -1 && event->attr.inherit)
- return -EINVAL;
+ if (user_locked > user_lock_limit)
+ user_locked = user_lock_limit;
+ user_locked += *user_extra;
- if (!(vma->vm_flags & VM_SHARED))
- return -EINVAL;
+ if (user_locked > user_lock_limit) {
+ /*
+ * charge locked_vm until it hits user_lock_limit;
+ * charge the rest from pinned_vm
+ */
+ *extra = user_locked - user_lock_limit;
+ *user_extra -= *extra;
+ }
- ret = security_perf_event_read(event);
- if (ret)
- return ret;
+ lock_limit = rlimit(RLIMIT_MEMLOCK);
+ lock_limit >>= PAGE_SHIFT;
+ locked = atomic64_read(&vma->vm_mm->pinned_vm) + *extra;
- vma_size = vma->vm_end - vma->vm_start;
- nr_pages = vma_size / PAGE_SIZE;
+ return locked <= lock_limit || !perf_is_paranoid() || capable(CAP_IPC_LOCK);
+}
- if (nr_pages > INT_MAX)
- return -ENOMEM;
+static void perf_mmap_account(struct vm_area_struct *vma, long user_extra, long extra)
+{
+ struct user_struct *user = current_user();
- if (vma_size != PAGE_SIZE * nr_pages)
- return -EINVAL;
+ atomic_long_add(user_extra, &user->locked_vm);
+ atomic64_add(extra, &vma->vm_mm->pinned_vm);
+}
- user_extra = nr_pages;
+static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
+ unsigned long nr_pages)
+{
+ long extra = 0, user_extra = nr_pages;
+ struct perf_buffer *rb;
+ int rb_flags = 0;
- mutex_lock(&event->mmap_mutex);
- ret = -EINVAL;
+ nr_pages -= 1;
/*
- * This relies on __pmu_detach_event() taking mmap_mutex after marking
- * the event REVOKED. Either we observe the state, or __pmu_detach_event()
- * will detach the rb created here.
+ * If we have rb pages ensure they're a power-of-two number, so we
+ * can do bitmasks instead of modulo.
*/
- if (event->state <= PERF_EVENT_STATE_REVOKED) {
- ret = -ENODEV;
- goto unlock;
- }
-
- if (vma->vm_pgoff == 0) {
- nr_pages -= 1;
-
- /*
- * If we have rb pages ensure they're a power-of-two number, so we
- * can do bitmasks instead of modulo.
- */
- if (nr_pages != 0 && !is_power_of_2(nr_pages))
- goto unlock;
-
- WARN_ON_ONCE(event->ctx->parent_ctx);
+ if (nr_pages != 0 && !is_power_of_2(nr_pages))
+ return -EINVAL;
- if (event->rb) {
- if (data_page_nr(event->rb) != nr_pages)
- goto unlock;
+ WARN_ON_ONCE(event->ctx->parent_ctx);
- if (atomic_inc_not_zero(&event->rb->mmap_count)) {
- /*
- * Success -- managed to mmap() the same buffer
- * multiple times.
- */
- ret = 0;
- /* We need the rb to map pages. */
- rb = event->rb;
- goto unlock;
- }
+ if (event->rb) {
+ if (data_page_nr(event->rb) != nr_pages)
+ return -EINVAL;
+ if (refcount_inc_not_zero(&event->rb->mmap_count)) {
/*
- * Raced against perf_mmap_close()'s
- * atomic_dec_and_mutex_lock() remove the
- * event and continue as if !event->rb
+ * Success -- managed to mmap() the same buffer
+ * multiple times.
*/
- ring_buffer_attach(event, NULL);
+ perf_mmap_account(vma, user_extra, extra);
+ refcount_inc(&event->mmap_count);
+ return 0;
}
- } else {
/*
- * AUX area mapping: if rb->aux_nr_pages != 0, it's already
- * mapped, all subsequent mappings should have the same size
- * and offset. Must be above the normal perf buffer.
+ * Raced against perf_mmap_close()'s
+ * refcount_dec_and_mutex_lock() remove the
+ * event and continue as if !event->rb
*/
- u64 aux_offset, aux_size;
+ ring_buffer_attach(event, NULL);
+ }
- rb = event->rb;
- if (!rb)
- goto aux_unlock;
+ if (!perf_mmap_calc_limits(vma, &user_extra, &extra))
+ return -EPERM;
- aux_mutex = &rb->aux_mutex;
- mutex_lock(aux_mutex);
+ if (vma->vm_flags & VM_WRITE)
+ rb_flags |= RING_BUFFER_WRITABLE;
- aux_offset = READ_ONCE(rb->user_page->aux_offset);
- aux_size = READ_ONCE(rb->user_page->aux_size);
+ rb = rb_alloc(nr_pages,
+ event->attr.watermark ? event->attr.wakeup_watermark : 0,
+ event->cpu, rb_flags);
- if (aux_offset < perf_data_size(rb) + PAGE_SIZE)
- goto aux_unlock;
+ if (!rb)
+ return -ENOMEM;
- if (aux_offset != vma->vm_pgoff << PAGE_SHIFT)
- goto aux_unlock;
+ refcount_set(&rb->mmap_count, 1);
+ rb->mmap_user = get_current_user();
+ rb->mmap_locked = extra;
- /* already mapped with a different offset */
- if (rb_has_aux(rb) && rb->aux_pgoff != vma->vm_pgoff)
- goto aux_unlock;
+ ring_buffer_attach(event, rb);
- if (aux_size != vma_size || aux_size != nr_pages * PAGE_SIZE)
- goto aux_unlock;
+ perf_event_update_time(event);
+ perf_event_init_userpage(event);
+ perf_event_update_userpage(event);
- /* already mapped with a different size */
- if (rb_has_aux(rb) && rb->aux_nr_pages != nr_pages)
- goto aux_unlock;
+ perf_mmap_account(vma, user_extra, extra);
+ refcount_set(&event->mmap_count, 1);
- if (!is_power_of_2(nr_pages))
- goto aux_unlock;
+ return 0;
+}
- if (!atomic_inc_not_zero(&rb->mmap_count))
- goto aux_unlock;
+static int perf_mmap_aux(struct vm_area_struct *vma, struct perf_event *event,
+ unsigned long nr_pages)
+{
+ long extra = 0, user_extra = nr_pages;
+ u64 aux_offset, aux_size;
+ struct perf_buffer *rb;
+ int ret, rb_flags = 0;
- if (rb_has_aux(rb)) {
- atomic_inc(&rb->aux_mmap_count);
- ret = 0;
- goto unlock;
- }
- }
+ rb = event->rb;
+ if (!rb)
+ return -EINVAL;
- user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);
+ guard(mutex)(&rb->aux_mutex);
/*
- * Increase the limit linearly with more CPUs:
+ * AUX area mapping: if rb->aux_nr_pages != 0, it's already
+ * mapped, all subsequent mappings should have the same size
+ * and offset. Must be above the normal perf buffer.
*/
- user_lock_limit *= num_online_cpus();
+ aux_offset = READ_ONCE(rb->user_page->aux_offset);
+ aux_size = READ_ONCE(rb->user_page->aux_size);
- user_locked = atomic_long_read(&user->locked_vm);
+ if (aux_offset < perf_data_size(rb) + PAGE_SIZE)
+ return -EINVAL;
- /*
- * sysctl_perf_event_mlock may have changed, so that
- * user->locked_vm > user_lock_limit
- */
- if (user_locked > user_lock_limit)
- user_locked = user_lock_limit;
- user_locked += user_extra;
+ if (aux_offset != vma->vm_pgoff << PAGE_SHIFT)
+ return -EINVAL;
- if (user_locked > user_lock_limit) {
- /*
- * charge locked_vm until it hits user_lock_limit;
- * charge the rest from pinned_vm
- */
- extra = user_locked - user_lock_limit;
- user_extra -= extra;
- }
+ /* already mapped with a different offset */
+ if (rb_has_aux(rb) && rb->aux_pgoff != vma->vm_pgoff)
+ return -EINVAL;
- lock_limit = rlimit(RLIMIT_MEMLOCK);
- lock_limit >>= PAGE_SHIFT;
- locked = atomic64_read(&vma->vm_mm->pinned_vm) + extra;
+ if (aux_size != nr_pages * PAGE_SIZE)
+ return -EINVAL;
- if ((locked > lock_limit) && perf_is_paranoid() &&
- !capable(CAP_IPC_LOCK)) {
- ret = -EPERM;
- goto unlock;
- }
+ /* already mapped with a different size */
+ if (rb_has_aux(rb) && rb->aux_nr_pages != nr_pages)
+ return -EINVAL;
- WARN_ON(!rb && event->rb);
+ if (!is_power_of_2(nr_pages))
+ return -EINVAL;
- if (vma->vm_flags & VM_WRITE)
- flags |= RING_BUFFER_WRITABLE;
+ if (!refcount_inc_not_zero(&rb->mmap_count))
+ return -EINVAL;
- if (!rb) {
- rb = rb_alloc(nr_pages,
- event->attr.watermark ? event->attr.wakeup_watermark : 0,
- event->cpu, flags);
+ if (rb_has_aux(rb)) {
+ refcount_inc(&rb->aux_mmap_count);
- if (!rb) {
- ret = -ENOMEM;
- goto unlock;
+ } else {
+ if (!perf_mmap_calc_limits(vma, &user_extra, &extra)) {
+ refcount_dec(&rb->mmap_count);
+ return -EPERM;
}
- atomic_set(&rb->mmap_count, 1);
- rb->mmap_user = get_current_user();
- rb->mmap_locked = extra;
+ WARN_ON(!rb && event->rb);
- ring_buffer_attach(event, rb);
+ if (vma->vm_flags & VM_WRITE)
+ rb_flags |= RING_BUFFER_WRITABLE;
- perf_event_update_time(event);
- perf_event_init_userpage(event);
- perf_event_update_userpage(event);
- ret = 0;
- } else {
ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages,
- event->attr.aux_watermark, flags);
- if (!ret) {
- atomic_set(&rb->aux_mmap_count, 1);
- rb->aux_mmap_locked = extra;
+ event->attr.aux_watermark, rb_flags);
+ if (ret) {
+ refcount_dec(&rb->mmap_count);
+ return ret;
}
+
+ refcount_set(&rb->aux_mmap_count, 1);
+ rb->aux_mmap_locked = extra;
}
-unlock:
- if (!ret) {
- atomic_long_add(user_extra, &user->locked_vm);
- atomic64_add(extra, &vma->vm_mm->pinned_vm);
-
- atomic_inc(&event->mmap_count);
- } else if (rb) {
- /* AUX allocation failed */
- atomic_dec(&rb->mmap_count);
- }
-aux_unlock:
- if (aux_mutex)
- mutex_unlock(aux_mutex);
- mutex_unlock(&event->mmap_mutex);
+ perf_mmap_account(vma, user_extra, extra);
+ refcount_inc(&event->mmap_count);
+
+ return 0;
+}
+
+static int perf_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct perf_event *event = file->private_data;
+ unsigned long vma_size, nr_pages;
+ mapped_f mapped;
+ int ret;
+
+ /*
+ * Don't allow mmap() of inherited per-task counters. This would
+ * create a performance issue due to all children writing to the
+ * same rb.
+ */
+ if (event->cpu == -1 && event->attr.inherit)
+ return -EINVAL;
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ ret = security_perf_event_read(event);
if (ret)
return ret;
+ vma_size = vma->vm_end - vma->vm_start;
+ nr_pages = vma_size / PAGE_SIZE;
+
+ if (nr_pages > INT_MAX)
+ return -ENOMEM;
+
+ if (vma_size != PAGE_SIZE * nr_pages)
+ return -EINVAL;
+
+ scoped_guard (mutex, &event->mmap_mutex) {
+ /*
+ * This relies on __pmu_detach_event() taking mmap_mutex after marking
+ * the event REVOKED. Either we observe the state, or __pmu_detach_event()
+ * will detach the rb created here.
+ */
+ if (event->state <= PERF_EVENT_STATE_REVOKED)
+ return -ENODEV;
+
+ if (vma->vm_pgoff == 0)
+ ret = perf_mmap_rb(vma, event, nr_pages);
+ else
+ ret = perf_mmap_aux(vma, event, nr_pages);
+ if (ret)
+ return ret;
+ }
+
/*
* Since pinned accounting is per vm we cannot allow fork() to copy our
* vma.
@@ -7174,7 +7186,7 @@ aux_unlock:
* full cleanup in this case and therefore does not invoke
* vmops::close().
*/
- ret = map_range(rb, vma);
+ ret = map_range(event->rb, vma);
if (ret)
perf_mmap_close(vma);
@@ -7440,7 +7452,7 @@ static void perf_sample_regs_user(struct perf_regs *regs_user,
if (user_mode(regs)) {
regs_user->abi = perf_reg_abi(current);
regs_user->regs = regs;
- } else if (!(current->flags & PF_KTHREAD)) {
+ } else if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER))) {
perf_get_regs_user(regs_user, regs);
} else {
regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
@@ -8080,7 +8092,7 @@ static u64 perf_virt_to_phys(u64 virt)
* Try IRQ-safe get_user_page_fast_only first.
* If failed, leave phys_addr as 0.
*/
- if (current->mm != NULL) {
+ if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER))) {
struct page *p;
pagefault_disable();
@@ -8192,7 +8204,8 @@ struct perf_callchain_entry *
perf_callchain(struct perf_event *event, struct pt_regs *regs)
{
bool kernel = !event->attr.exclude_callchain_kernel;
- bool user = !event->attr.exclude_callchain_user;
+ bool user = !event->attr.exclude_callchain_user &&
+ !(current->flags & (PF_KTHREAD | PF_USER_WORKER));
/* Disallow cross-task user callchains. */
bool crosstask = event->ctx->task && event->ctx->task != current;
const u32 max_stack = event->attr.sample_max_stack;
@@ -8204,7 +8217,7 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
if (!kernel && !user)
return &__empty_callchain;
- callchain = get_perf_callchain(regs, 0, kernel, user,
+ callchain = get_perf_callchain(regs, kernel, user,
max_stack, crosstask, true);
return callchain ?: &__empty_callchain;
}
@@ -11232,6 +11245,10 @@ static int __perf_event_set_bpf_prog(struct perf_event *event,
if (prog->kprobe_override && !is_kprobe)
return -EINVAL;
+ /* Writing to context allowed only for uprobes. */
+ if (prog->aux->kprobe_write_ctx && !is_uprobe)
+ return -EINVAL;
+
if (is_tracepoint || is_syscall_tp) {
int off = trace_event_get_offsets(event->tp_event);
@@ -13249,7 +13266,7 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
mutex_lock_double(&event->mmap_mutex, &output_event->mmap_mutex);
set:
/* Can't redirect output if we've got an active mmap() */
- if (atomic_read(&event->mmap_count))
+ if (refcount_read(&event->mmap_count))
goto unlock;
if (output_event) {
@@ -13262,7 +13279,7 @@ set:
goto unlock;
/* did we race against perf_mmap_close() */
- if (!atomic_read(&rb->mmap_count)) {
+ if (!refcount_read(&rb->mmap_count)) {
ring_buffer_put(rb);
goto unlock;
}