summaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig40
-rw-r--r--kernel/trace/Makefile17
-rw-r--r--kernel/trace/blktrace.c539
-rw-r--r--kernel/trace/bpf_trace.c48
-rw-r--r--kernel/trace/fgraph.c20
-rw-r--r--kernel/trace/fprobe.c301
-rw-r--r--kernel/trace/ftrace.c109
-rw-r--r--kernel/trace/pid_list.c30
-rw-r--r--kernel/trace/pid_list.h1
-rw-r--r--kernel/trace/ring_buffer.c101
-rw-r--r--kernel/trace/rv/monitors/pagefault/Kconfig1
-rw-r--r--kernel/trace/rv/reactor_panic.c6
-rw-r--r--kernel/trace/rv/reactor_printk.c6
-rw-r--r--kernel/trace/rv/rv.c114
-rw-r--r--kernel/trace/rv/rv.h6
-rw-r--r--kernel/trace/rv/rv_reactors.c78
-rw-r--r--kernel/trace/trace.c906
-rw-r--r--kernel/trace/trace.h239
-rw-r--r--kernel/trace/trace_dynevent.c11
-rw-r--r--kernel/trace/trace_entries.h15
-rw-r--r--kernel/trace/trace_eprobe.c127
-rw-r--r--kernel/trace/trace_events.c4
-rw-r--r--kernel/trace/trace_events_hist.c149
-rw-r--r--kernel/trace/trace_events_synth.c2
-rw-r--r--kernel/trace/trace_events_trigger.c408
-rw-r--r--kernel/trace/trace_events_user.c22
-rw-r--r--kernel/trace/trace_fprobe.c13
-rw-r--r--kernel/trace/trace_functions.c10
-rw-r--r--kernel/trace/trace_functions_graph.c223
-rw-r--r--kernel/trace/trace_irqsoff.c30
-rw-r--r--kernel/trace/trace_kdb.c2
-rw-r--r--kernel/trace/trace_kprobe.c6
-rw-r--r--kernel/trace/trace_output.c51
-rw-r--r--kernel/trace/trace_output.h11
-rw-r--r--kernel/trace/trace_probe.c5
-rw-r--r--kernel/trace/trace_probe.h4
-rw-r--r--kernel/trace/trace_sched_wakeup.c24
-rw-r--r--kernel/trace/trace_syscalls.c935
-rw-r--r--kernel/trace/trace_uprobe.c82
39 files changed, 3246 insertions, 1450 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d2c79da81e4f..bfa2ec46e075 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -80,6 +80,12 @@ config HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
If the architecture generates __patchable_function_entries sections
but does not want them included in the ftrace locations.
+config HAVE_DYNAMIC_FTRACE_WITH_JMP
+ bool
+ help
+ If the architecture supports to replace the __fentry__ with a
+ "jmp" instruction.
+
config HAVE_SYSCALL_TRACEPOINTS
bool
help
@@ -330,6 +336,26 @@ config DYNAMIC_FTRACE_WITH_ARGS
depends on DYNAMIC_FTRACE
depends on HAVE_DYNAMIC_FTRACE_WITH_ARGS
+config DYNAMIC_FTRACE_WITH_JMP
+ def_bool y
+ depends on DYNAMIC_FTRACE
+ depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ depends on HAVE_DYNAMIC_FTRACE_WITH_JMP
+
+config FUNCTION_SELF_TRACING
+ bool "Function trace tracing code"
+ depends on FUNCTION_TRACER
+ help
+ Normally all the tracing code is set to notrace, where the function
+ tracer will ignore all the tracing functions. Sometimes it is useful
+ for debugging to trace some of the tracing infratructure itself.
+ Enable this to allow some of the tracing infrastructure to be traced
+ by the function tracer. Note, this will likely add noise to function
+ tracing if events and other tracing features are enabled along with
+ function tracing.
+
+ If unsure, say N.
+
config FPROBE
bool "Kernel Function Probe (fprobe)"
depends on HAVE_FUNCTION_GRAPH_FREGS && HAVE_FTRACE_GRAPH_FUNC
@@ -575,6 +601,20 @@ config FTRACE_SYSCALLS
help
Basic tracer to catch the syscall entry and exit events.
+config TRACE_SYSCALL_BUF_SIZE_DEFAULT
+ int "System call user read max size"
+ range 0 165
+ default 63
+ depends on FTRACE_SYSCALLS
+ help
+ Some system call trace events will record the data from a user
+ space address that one of the parameters point to. The amount of
+ data per event is limited. That limit is set by this config and
+ this config also affects how much user space data perf can read.
+
+ For a tracing instance, this size may be changed by writing into
+ its syscall_user_buf_size file.
+
config TRACER_SNAPSHOT
bool "Create a snapshot trace buffer"
select TRACER_MAX_TRACE
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index dcb4e02afc5f..fc5dcc888e13 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -16,6 +16,23 @@ obj-y += trace_selftest_dynamic.o
endif
endif
+# Allow some files to be function traced
+ifdef CONFIG_FUNCTION_SELF_TRACING
+CFLAGS_trace_output.o = $(CC_FLAGS_FTRACE)
+CFLAGS_trace_seq.o = $(CC_FLAGS_FTRACE)
+CFLAGS_trace_stat.o = $(CC_FLAGS_FTRACE)
+CFLAGS_tracing_map.o = $(CC_FLAGS_FTRACE)
+CFLAGS_synth_event_gen_test.o = $(CC_FLAGS_FTRACE)
+CFLAGS_trace_events.o = $(CC_FLAGS_FTRACE)
+CFLAGS_trace_syscalls.o = $(CC_FLAGS_FTRACE)
+CFLAGS_trace_events_filter.o = $(CC_FLAGS_FTRACE)
+CFLAGS_trace_events_trigger.o = $(CC_FLAGS_FTRACE)
+CFLAGS_trace_events_synth.o = $(CC_FLAGS_FTRACE)
+CFLAGS_trace_events_hist.o = $(CC_FLAGS_FTRACE)
+CFLAGS_trace_events_user.o = $(CC_FLAGS_FTRACE)
+CFLAGS_trace_dynevent.o = $(CC_FLAGS_FTRACE)
+endif
+
ifdef CONFIG_FTRACE_STARTUP_TEST
CFLAGS_trace_kprobe_selftest.o = $(CC_FLAGS_FTRACE)
obj-$(CONFIG_KPROBE_EVENTS) += trace_kprobe_selftest.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 6941145b5058..d031c8d80be4 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -63,13 +63,116 @@ static int blk_probes_ref;
static void blk_register_tracepoints(void);
static void blk_unregister_tracepoints(void);
+static void record_blktrace_event(struct blk_io_trace *t, pid_t pid, int cpu,
+ sector_t sector, int bytes, u64 what,
+ dev_t dev, int error, u64 cgid,
+ ssize_t cgid_len, void *pdu_data, int pdu_len)
+
+{
+ /*
+ * These two are not needed in ftrace as they are in the
+ * generic trace_entry, filled by tracing_generic_entry_update,
+ * but for the trace_event->bin() synthesizer benefit we do it
+ * here too.
+ */
+ t->cpu = cpu;
+ t->pid = pid;
+
+ t->sector = sector;
+ t->bytes = bytes;
+ t->action = lower_32_bits(what);
+ t->device = dev;
+ t->error = error;
+ t->pdu_len = pdu_len + cgid_len;
+
+ if (cgid_len)
+ memcpy((void *)t + sizeof(*t), &cgid, cgid_len);
+ if (pdu_len)
+ memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len);
+}
+
+static void record_blktrace_event2(struct blk_io_trace2 *t2, pid_t pid, int cpu,
+ sector_t sector, int bytes, u64 what,
+ dev_t dev, int error, u64 cgid,
+ ssize_t cgid_len, void *pdu_data,
+ int pdu_len)
+{
+ t2->pid = pid;
+ t2->cpu = cpu;
+
+ t2->sector = sector;
+ t2->bytes = bytes;
+ t2->action = what;
+ t2->device = dev;
+ t2->error = error;
+ t2->pdu_len = pdu_len + cgid_len;
+
+ if (cgid_len)
+ memcpy((void *)t2 + sizeof(*t2), &cgid, cgid_len);
+ if (pdu_len)
+ memcpy((void *)t2 + sizeof(*t2) + cgid_len, pdu_data, pdu_len);
+}
+
+static void relay_blktrace_event1(struct blk_trace *bt, unsigned long sequence,
+ pid_t pid, int cpu, sector_t sector, int bytes,
+ u64 what, int error, u64 cgid,
+ ssize_t cgid_len, void *pdu_data, int pdu_len)
+{
+ struct blk_io_trace *t;
+ size_t trace_len = sizeof(*t) + pdu_len + cgid_len;
+
+ t = relay_reserve(bt->rchan, trace_len);
+ if (!t)
+ return;
+
+ t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
+ t->sequence = sequence;
+ t->time = ktime_to_ns(ktime_get());
+
+ record_blktrace_event(t, pid, cpu, sector, bytes, what, bt->dev, error,
+ cgid, cgid_len, pdu_data, pdu_len);
+}
+
+static void relay_blktrace_event2(struct blk_trace *bt, unsigned long sequence,
+ pid_t pid, int cpu, sector_t sector,
+ int bytes, u64 what, int error, u64 cgid,
+ ssize_t cgid_len, void *pdu_data, int pdu_len)
+{
+ struct blk_io_trace2 *t;
+ size_t trace_len = sizeof(struct blk_io_trace2) + pdu_len + cgid_len;
+
+ t = relay_reserve(bt->rchan, trace_len);
+ if (!t)
+ return;
+
+ t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE2_VERSION;
+ t->sequence = sequence;
+ t->time = ktime_to_ns(ktime_get());
+
+ record_blktrace_event2(t, pid, cpu, sector, bytes, what, bt->dev, error,
+ cgid, cgid_len, pdu_data, pdu_len);
+}
+
+static void relay_blktrace_event(struct blk_trace *bt, unsigned long sequence,
+ pid_t pid, int cpu, sector_t sector, int bytes,
+ u64 what, int error, u64 cgid,
+ ssize_t cgid_len, void *pdu_data, int pdu_len)
+{
+ if (bt->version == 2)
+ return relay_blktrace_event2(bt, sequence, pid, cpu, sector,
+ bytes, what, error, cgid, cgid_len,
+ pdu_data, pdu_len);
+ return relay_blktrace_event1(bt, sequence, pid, cpu, sector, bytes,
+ what, error, cgid, cgid_len, pdu_data,
+ pdu_len);
+}
+
/*
* Send out a notify message.
*/
-static void trace_note(struct blk_trace *bt, pid_t pid, int action,
+static void trace_note(struct blk_trace *bt, pid_t pid, u64 action,
const void *data, size_t len, u64 cgid)
{
- struct blk_io_trace *t;
struct ring_buffer_event *event = NULL;
struct trace_buffer *buffer = NULL;
unsigned int trace_ctx = 0;
@@ -77,38 +180,30 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
bool blk_tracer = blk_tracer_enabled;
ssize_t cgid_len = cgid ? sizeof(cgid) : 0;
+ action = lower_32_bits(action | (cgid ? __BLK_TN_CGROUP : 0));
if (blk_tracer) {
+ struct blk_io_trace2 *t;
+ size_t trace_len = sizeof(*t) + cgid_len + len;
+
buffer = blk_tr->array_buffer.buffer;
trace_ctx = tracing_gen_ctx_flags(0);
event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
- sizeof(*t) + len + cgid_len,
- trace_ctx);
+ trace_len, trace_ctx);
if (!event)
return;
t = ring_buffer_event_data(event);
- goto record_it;
+ record_blktrace_event2(t, pid, cpu, 0, 0,
+ action, bt->dev, 0, cgid, cgid_len,
+ (void *)data, len);
+ trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx);
+ return;
}
if (!bt->rchan)
return;
- t = relay_reserve(bt->rchan, sizeof(*t) + len + cgid_len);
- if (t) {
- t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
- t->time = ktime_to_ns(ktime_get());
-record_it:
- t->device = bt->dev;
- t->action = action | (cgid ? __BLK_TN_CGROUP : 0);
- t->pid = pid;
- t->cpu = cpu;
- t->pdu_len = len + cgid_len;
- if (cgid_len)
- memcpy((void *)t + sizeof(*t), &cgid, cgid_len);
- memcpy((void *) t + sizeof(*t) + cgid_len, data, len);
-
- if (blk_tracer)
- trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx);
- }
+ relay_blktrace_event(bt, 0, pid, cpu, 0, 0, action, 0, cgid,
+ cgid_len, (void *)data, len);
}
/*
@@ -182,7 +277,7 @@ void __blk_trace_note_message(struct blk_trace *bt,
}
EXPORT_SYMBOL_GPL(__blk_trace_note_message);
-static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
+static int act_log_check(struct blk_trace *bt, u64 what, sector_t sector,
pid_t pid)
{
if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
@@ -213,13 +308,12 @@ static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
* blk_io_trace structure and places it in a per-cpu subbuffer.
*/
static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
- const blk_opf_t opf, u32 what, int error,
+ const blk_opf_t opf, u64 what, int error,
int pdu_len, void *pdu_data, u64 cgid)
{
struct task_struct *tsk = current;
struct ring_buffer_event *event = NULL;
struct trace_buffer *buffer = NULL;
- struct blk_io_trace *t;
unsigned long flags = 0;
unsigned long *sequence;
unsigned int trace_ctx = 0;
@@ -228,6 +322,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
bool blk_tracer = blk_tracer_enabled;
ssize_t cgid_len = cgid ? sizeof(cgid) : 0;
const enum req_op op = opf & REQ_OP_MASK;
+ size_t trace_len;
if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer))
return;
@@ -238,10 +333,47 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
what |= MASK_TC_BIT(opf, META);
what |= MASK_TC_BIT(opf, PREFLUSH);
what |= MASK_TC_BIT(opf, FUA);
- if (op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)
+
+ switch (op) {
+ case REQ_OP_DISCARD:
+ case REQ_OP_SECURE_ERASE:
what |= BLK_TC_ACT(BLK_TC_DISCARD);
- if (op == REQ_OP_FLUSH)
+ break;
+ case REQ_OP_FLUSH:
what |= BLK_TC_ACT(BLK_TC_FLUSH);
+ break;
+ case REQ_OP_ZONE_APPEND:
+ what |= BLK_TC_ACT(BLK_TC_ZONE_APPEND);
+ break;
+ case REQ_OP_ZONE_RESET:
+ what |= BLK_TC_ACT(BLK_TC_ZONE_RESET);
+ break;
+ case REQ_OP_ZONE_RESET_ALL:
+ what |= BLK_TC_ACT(BLK_TC_ZONE_RESET_ALL);
+ break;
+ case REQ_OP_ZONE_FINISH:
+ what |= BLK_TC_ACT(BLK_TC_ZONE_FINISH);
+ break;
+ case REQ_OP_ZONE_OPEN:
+ what |= BLK_TC_ACT(BLK_TC_ZONE_OPEN);
+ break;
+ case REQ_OP_ZONE_CLOSE:
+ what |= BLK_TC_ACT(BLK_TC_ZONE_CLOSE);
+ break;
+ case REQ_OP_WRITE_ZEROES:
+ what |= BLK_TC_ACT(BLK_TC_WRITE_ZEROES);
+ break;
+ default:
+ break;
+ }
+
+ /* Drop trace events for zone operations with blktrace v1 */
+ if (bt->version == 1 && (what >> BLK_TC_SHIFT) > BLK_TC_END_V1) {
+ pr_debug_ratelimited("blktrace v1 cannot trace zone operation 0x%llx\n",
+ (unsigned long long)what);
+ return;
+ }
+
if (cgid)
what |= __BLK_TA_CGROUP;
@@ -255,13 +387,68 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
buffer = blk_tr->array_buffer.buffer;
trace_ctx = tracing_gen_ctx_flags(0);
+ switch (bt->version) {
+ case 1:
+ trace_len = sizeof(struct blk_io_trace);
+ break;
+ case 2:
+ default:
+ /*
+ * ftrace always uses v2 (blk_io_trace2) format.
+ *
+ * For sysfs-enabled tracing path (enabled via
+ * /sys/block/DEV/trace/enable), blk_trace_setup_queue()
+ * never initializes bt->version, leaving it 0 from
+ * kzalloc(). We must handle version==0 safely here.
+ *
+ * Fall through to default to ensure we never hit the
+ * old bug where default set trace_len=0, causing
+ * buffer underflow and memory corruption.
+ *
+ * Always use v2 format for ftrace and normalize
+ * bt->version to 2 when uninitialized.
+ */
+ trace_len = sizeof(struct blk_io_trace2);
+ if (bt->version == 0)
+ bt->version = 2;
+ break;
+ }
+ trace_len += pdu_len + cgid_len;
event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
- sizeof(*t) + pdu_len + cgid_len,
- trace_ctx);
+ trace_len, trace_ctx);
if (!event)
return;
- t = ring_buffer_event_data(event);
- goto record_it;
+
+ switch (bt->version) {
+ case 1:
+ record_blktrace_event(ring_buffer_event_data(event),
+ pid, cpu, sector, bytes,
+ what, bt->dev, error, cgid, cgid_len,
+ pdu_data, pdu_len);
+ break;
+ case 2:
+ default:
+ /*
+ * Use v2 recording function (record_blktrace_event2)
+ * which writes blk_io_trace2 structure with correct
+ * field layout:
+ * - 32-bit pid at offset 28
+ * - 64-bit action at offset 32
+ *
+ * Fall through to default handles version==0 case
+ * (from sysfs path), ensuring we always use correct
+ * v2 recording function to match the v2 buffer
+ * allocated above.
+ */
+ record_blktrace_event2(ring_buffer_event_data(event),
+ pid, cpu, sector, bytes,
+ what, bt->dev, error, cgid, cgid_len,
+ pdu_data, pdu_len);
+ break;
+ }
+
+ trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx);
+ return;
}
if (unlikely(tsk->btrace_seq != blktrace_seq))
@@ -273,41 +460,10 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
* from coming in and stepping on our toes.
*/
local_irq_save(flags);
- t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len + cgid_len);
- if (t) {
- sequence = per_cpu_ptr(bt->sequence, cpu);
-
- t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
- t->sequence = ++(*sequence);
- t->time = ktime_to_ns(ktime_get());
-record_it:
- /*
- * These two are not needed in ftrace as they are in the
- * generic trace_entry, filled by tracing_generic_entry_update,
- * but for the trace_event->bin() synthesizer benefit we do it
- * here too.
- */
- t->cpu = cpu;
- t->pid = pid;
-
- t->sector = sector;
- t->bytes = bytes;
- t->action = what;
- t->device = bt->dev;
- t->error = error;
- t->pdu_len = pdu_len + cgid_len;
-
- if (cgid_len)
- memcpy((void *)t + sizeof(*t), &cgid, cgid_len);
- if (pdu_len)
- memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len);
-
- if (blk_tracer) {
- trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx);
- return;
- }
- }
-
+ sequence = per_cpu_ptr(bt->sequence, cpu);
+ (*sequence)++;
+ relay_blktrace_event(bt, *sequence, pid, cpu, sector, bytes,
+ what, error, cgid, cgid_len, pdu_data, pdu_len);
local_irq_restore(flags);
}
@@ -494,9 +650,10 @@ static void blk_trace_setup_lba(struct blk_trace *bt,
/*
* Setup everything required to start tracing
*/
-static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
- struct block_device *bdev,
- struct blk_user_trace_setup *buts)
+static struct blk_trace *blk_trace_setup_prepare(struct request_queue *q,
+ char *name, dev_t dev,
+ u32 buf_size, u32 buf_nr,
+ struct block_device *bdev)
{
struct blk_trace *bt = NULL;
struct dentry *dir = NULL;
@@ -504,31 +661,19 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
lockdep_assert_held(&q->debugfs_mutex);
- if (!buts->buf_size || !buts->buf_nr)
- return -EINVAL;
-
- strscpy_pad(buts->name, name, BLKTRACE_BDEV_SIZE);
-
- /*
- * some device names have larger paths - convert the slashes
- * to underscores for this to work as expected
- */
- strreplace(buts->name, '/', '_');
-
/*
* bdev can be NULL, as with scsi-generic, this is a helpful as
* we can be.
*/
if (rcu_dereference_protected(q->blk_trace,
lockdep_is_held(&q->debugfs_mutex))) {
- pr_warn("Concurrent blktraces are not allowed on %s\n",
- buts->name);
- return -EBUSY;
+ pr_warn("Concurrent blktraces are not allowed on %s\n", name);
+ return ERR_PTR(-EBUSY);
}
bt = kzalloc(sizeof(*bt), GFP_KERNEL);
if (!bt)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
ret = -ENOMEM;
bt->sequence = alloc_percpu(unsigned long);
@@ -548,7 +693,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (bdev && !bdev_is_partition(bdev))
dir = q->debugfs_dir;
else
- bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root);
+ bt->dir = dir = debugfs_create_dir(name, blk_debugfs_root);
/*
* As blktrace relies on debugfs for its interface the debugfs directory
@@ -556,8 +701,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
* files or directories.
*/
if (IS_ERR_OR_NULL(dir)) {
- pr_warn("debugfs_dir not present for %s so skipping\n",
- buts->name);
+ pr_warn("debugfs_dir not present for %s so skipping\n", name);
ret = -ENOENT;
goto err;
}
@@ -569,17 +713,40 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops);
debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
- bt->rchan = relay_open("trace", dir, buts->buf_size,
- buts->buf_nr, &blk_relay_callbacks, bt);
+ bt->rchan = relay_open("trace", dir, buf_size, buf_nr,
+ &blk_relay_callbacks, bt);
if (!bt->rchan)
goto err;
+ blk_trace_setup_lba(bt, bdev);
+
+ return bt;
+
+err:
+ blk_trace_free(q, bt);
+
+ return ERR_PTR(ret);
+}
+
+static void blk_trace_setup_finalize(struct request_queue *q,
+ char *name, int version,
+ struct blk_trace *bt,
+ struct blk_user_trace_setup2 *buts)
+
+{
+ strscpy_pad(buts->name, name, BLKTRACE_BDEV_SIZE2);
+
+ /*
+ * some device names have larger paths - convert the slashes
+ * to underscores for this to work as expected
+ */
+ strreplace(buts->name, '/', '_');
+
+ bt->version = version;
bt->act_mask = buts->act_mask;
if (!bt->act_mask)
bt->act_mask = (u16) -1;
- blk_trace_setup_lba(bt, bdev);
-
/* overwrite with user settings */
if (buts->start_lba)
bt->start_lba = buts->start_lba;
@@ -591,30 +758,43 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
rcu_assign_pointer(q->blk_trace, bt);
get_probe_ref();
-
- ret = 0;
-err:
- if (ret)
- blk_trace_free(q, bt);
- return ret;
}
int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
struct block_device *bdev,
char __user *arg)
{
+ struct blk_user_trace_setup2 buts2;
struct blk_user_trace_setup buts;
+ struct blk_trace *bt;
int ret;
ret = copy_from_user(&buts, arg, sizeof(buts));
if (ret)
return -EFAULT;
+ if (!buts.buf_size || !buts.buf_nr)
+ return -EINVAL;
+
+ buts2 = (struct blk_user_trace_setup2) {
+ .act_mask = buts.act_mask,
+ .buf_size = buts.buf_size,
+ .buf_nr = buts.buf_nr,
+ .start_lba = buts.start_lba,
+ .end_lba = buts.end_lba,
+ .pid = buts.pid,
+ };
+
mutex_lock(&q->debugfs_mutex);
- ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
+ bt = blk_trace_setup_prepare(q, name, dev, buts.buf_size, buts.buf_nr,
+ bdev);
+ if (IS_ERR(bt)) {
+ mutex_unlock(&q->debugfs_mutex);
+ return PTR_ERR(bt);
+ }
+ blk_trace_setup_finalize(q, name, 1, bt, &buts2);
+ strcpy(buts.name, buts2.name);
mutex_unlock(&q->debugfs_mutex);
- if (ret)
- return ret;
if (copy_to_user(arg, &buts, sizeof(buts))) {
blk_trace_remove(q);
@@ -624,19 +804,54 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
}
EXPORT_SYMBOL_GPL(blk_trace_setup);
+static int blk_trace_setup2(struct request_queue *q, char *name, dev_t dev,
+ struct block_device *bdev, char __user *arg)
+{
+ struct blk_user_trace_setup2 buts2;
+ struct blk_trace *bt;
+
+ if (copy_from_user(&buts2, arg, sizeof(buts2)))
+ return -EFAULT;
+
+ if (!buts2.buf_size || !buts2.buf_nr)
+ return -EINVAL;
+
+ if (buts2.flags != 0)
+ return -EINVAL;
+
+ mutex_lock(&q->debugfs_mutex);
+ bt = blk_trace_setup_prepare(q, name, dev, buts2.buf_size, buts2.buf_nr,
+ bdev);
+ if (IS_ERR(bt)) {
+ mutex_unlock(&q->debugfs_mutex);
+ return PTR_ERR(bt);
+ }
+ blk_trace_setup_finalize(q, name, 2, bt, &buts2);
+ mutex_unlock(&q->debugfs_mutex);
+
+ if (copy_to_user(arg, &buts2, sizeof(buts2))) {
+ blk_trace_remove(q);
+ return -EFAULT;
+ }
+ return 0;
+}
+
#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
static int compat_blk_trace_setup(struct request_queue *q, char *name,
dev_t dev, struct block_device *bdev,
char __user *arg)
{
- struct blk_user_trace_setup buts;
+ struct blk_user_trace_setup2 buts2;
struct compat_blk_user_trace_setup cbuts;
- int ret;
+ struct blk_trace *bt;
if (copy_from_user(&cbuts, arg, sizeof(cbuts)))
return -EFAULT;
- buts = (struct blk_user_trace_setup) {
+ if (!cbuts.buf_size || !cbuts.buf_nr)
+ return -EINVAL;
+
+ buts2 = (struct blk_user_trace_setup2) {
.act_mask = cbuts.act_mask,
.buf_size = cbuts.buf_size,
.buf_nr = cbuts.buf_nr,
@@ -646,12 +861,16 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
};
mutex_lock(&q->debugfs_mutex);
- ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
+ bt = blk_trace_setup_prepare(q, name, dev, buts2.buf_size, buts2.buf_nr,
+ bdev);
+ if (IS_ERR(bt)) {
+ mutex_unlock(&q->debugfs_mutex);
+ return PTR_ERR(bt);
+ }
+ blk_trace_setup_finalize(q, name, 1, bt, &buts2);
mutex_unlock(&q->debugfs_mutex);
- if (ret)
- return ret;
- if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) {
+ if (copy_to_user(arg, &buts2.name, ARRAY_SIZE(buts2.name))) {
blk_trace_remove(q);
return -EFAULT;
}
@@ -707,6 +926,10 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
char b[BDEVNAME_SIZE];
switch (cmd) {
+ case BLKTRACESETUP2:
+ snprintf(b, sizeof(b), "%pg", bdev);
+ ret = blk_trace_setup2(q, b, bdev->bd_dev, bdev, arg);
+ break;
case BLKTRACESETUP:
snprintf(b, sizeof(b), "%pg", bdev);
ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
@@ -794,7 +1017,7 @@ blk_trace_request_get_cgid(struct request *rq)
*
**/
static void blk_add_trace_rq(struct request *rq, blk_status_t error,
- unsigned int nr_bytes, u32 what, u64 cgid)
+ unsigned int nr_bytes, u64 what, u64 cgid)
{
struct blk_trace *bt;
@@ -846,6 +1069,22 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
blk_trace_request_get_cgid(rq));
}
+static void blk_add_trace_zone_update_request(void *ignore, struct request *rq)
+{
+ struct blk_trace *bt;
+
+ rcu_read_lock();
+ bt = rcu_dereference(rq->q->blk_trace);
+ if (likely(!bt) || bt->version < 2) {
+ rcu_read_unlock();
+ return;
+ }
+ rcu_read_unlock();
+
+ blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ZONE_APPEND,
+ blk_trace_request_get_cgid(rq));
+}
+
/**
* blk_add_trace_bio - Add a trace for a bio oriented action
* @q: queue the io is for
@@ -858,7 +1097,7 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
*
**/
static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
- u32 what, int error)
+ u64 what, int error)
{
struct blk_trace *bt;
@@ -924,7 +1163,7 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
bt = rcu_dereference(q->blk_trace);
if (bt) {
__be64 rpdu = cpu_to_be64(depth);
- u32 what;
+ u64 what;
if (explicit)
what = BLK_TA_UNPLUG_IO;
@@ -936,6 +1175,37 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
rcu_read_unlock();
}
+static void blk_add_trace_zone_plug(void *ignore, struct request_queue *q,
+ unsigned int zno, sector_t sector,
+ unsigned int sectors)
+{
+ struct blk_trace *bt;
+
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
+ if (bt && bt->version >= 2)
+ __blk_add_trace(bt, sector, sectors << SECTOR_SHIFT, 0,
+ BLK_TA_ZONE_PLUG, 0, 0, NULL, 0);
+ rcu_read_unlock();
+
+ return;
+}
+
+static void blk_add_trace_zone_unplug(void *ignore, struct request_queue *q,
+ unsigned int zno, sector_t sector,
+ unsigned int sectors)
+{
+ struct blk_trace *bt;
+
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
+ if (bt && bt->version >= 2)
+ __blk_add_trace(bt, sector, sectors << SECTOR_SHIFT, 0,
+ BLK_TA_ZONE_UNPLUG, 0, 0, NULL, 0);
+ rcu_read_unlock();
+ return;
+}
+
static void blk_add_trace_split(void *ignore, struct bio *bio, unsigned int pdu)
{
struct request_queue *q = bio->bi_bdev->bd_disk->queue;
@@ -1076,6 +1346,15 @@ static void blk_register_tracepoints(void)
WARN_ON(ret);
ret = register_trace_block_getrq(blk_add_trace_getrq, NULL);
WARN_ON(ret);
+ ret = register_trace_blk_zone_append_update_request_bio(
+ blk_add_trace_zone_update_request, NULL);
+ WARN_ON(ret);
+ ret = register_trace_disk_zone_wplug_add_bio(blk_add_trace_zone_plug,
+ NULL);
+ WARN_ON(ret);
+ ret = register_trace_blk_zone_wplug_bio(blk_add_trace_zone_unplug,
+ NULL);
+ WARN_ON(ret);
ret = register_trace_block_plug(blk_add_trace_plug, NULL);
WARN_ON(ret);
ret = register_trace_block_unplug(blk_add_trace_unplug, NULL);
@@ -1095,6 +1374,10 @@ static void blk_unregister_tracepoints(void)
unregister_trace_block_split(blk_add_trace_split, NULL);
unregister_trace_block_unplug(blk_add_trace_unplug, NULL);
unregister_trace_block_plug(blk_add_trace_plug, NULL);
+ unregister_trace_blk_zone_wplug_bio(blk_add_trace_zone_unplug, NULL);
+ unregister_trace_disk_zone_wplug_add_bio(blk_add_trace_zone_plug, NULL);
+ unregister_trace_blk_zone_append_update_request_bio(
+ blk_add_trace_zone_update_request, NULL);
unregister_trace_block_getrq(blk_add_trace_getrq, NULL);
unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL);
unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL);
@@ -1113,7 +1396,7 @@ static void blk_unregister_tracepoints(void)
* struct blk_io_tracer formatting routines
*/
-static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
+static void fill_rwbs(char *rwbs, const struct blk_io_trace2 *t)
{
int i = 0;
int tc = t->action >> BLK_TC_SHIFT;
@@ -1128,7 +1411,10 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
if (tc & BLK_TC_DISCARD)
rwbs[i++] = 'D';
- else if (tc & BLK_TC_WRITE)
+ else if (tc & BLK_TC_WRITE_ZEROES) {
+ rwbs[i++] = 'W';
+ rwbs[i++] = 'Z';
+ } else if (tc & BLK_TC_WRITE)
rwbs[i++] = 'W';
else if (t->bytes)
rwbs[i++] = 'R';
@@ -1148,9 +1434,9 @@ out:
}
static inline
-const struct blk_io_trace *te_blk_io_trace(const struct trace_entry *ent)
+const struct blk_io_trace2 *te_blk_io_trace(const struct trace_entry *ent)
{
- return (const struct blk_io_trace *)ent;
+ return (const struct blk_io_trace2 *)ent;
}
static inline const void *pdu_start(const struct trace_entry *ent, bool has_cg)
@@ -1209,7 +1495,7 @@ static void blk_log_action_classic(struct trace_iterator *iter, const char *act,
unsigned long long ts = iter->ts;
unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC);
unsigned secs = (unsigned long)ts;
- const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
+ const struct blk_io_trace2 *t = te_blk_io_trace(iter->ent);
fill_rwbs(rwbs, t);
@@ -1223,7 +1509,7 @@ static void blk_log_action(struct trace_iterator *iter, const char *act,
bool has_cg)
{
char rwbs[RWBS_LEN];
- const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
+ const struct blk_io_trace2 *t = te_blk_io_trace(iter->ent);
fill_rwbs(rwbs, t);
if (has_cg) {
@@ -1444,7 +1730,7 @@ static enum print_line_t print_one_line(struct trace_iterator *iter,
{
struct trace_array *tr = iter->tr;
struct trace_seq *s = &iter->seq;
- const struct blk_io_trace *t;
+ const struct blk_io_trace2 *t;
u16 what;
bool long_act;
blk_log_action_t *log_action;
@@ -1452,7 +1738,7 @@ static enum print_line_t print_one_line(struct trace_iterator *iter,
t = te_blk_io_trace(iter->ent);
what = (t->action & ((1 << BLK_TC_SHIFT) - 1)) & ~__BLK_TA_CGROUP;
- long_act = !!(tr->trace_flags & TRACE_ITER_VERBOSE);
+ long_act = !!(tr->trace_flags & TRACE_ITER(VERBOSE));
log_action = classic ? &blk_log_action_classic : &blk_log_action;
has_cg = t->action & __BLK_TA_CGROUP;
@@ -1481,8 +1767,8 @@ static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
static void blk_trace_synthesize_old_trace(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
- struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
- const int offset = offsetof(struct blk_io_trace, sector);
+ struct blk_io_trace2 *t = (struct blk_io_trace2 *)iter->ent;
+ const int offset = offsetof(struct blk_io_trace2, sector);
struct blk_io_trace old = {
.magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION,
.time = iter->ts,
@@ -1517,9 +1803,9 @@ blk_tracer_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
/* don't output context-info for blk_classic output */
if (bit == TRACE_BLK_OPT_CLASSIC) {
if (set)
- tr->trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
+ tr->trace_flags &= ~TRACE_ITER(CONTEXT_INFO);
else
- tr->trace_flags |= TRACE_ITER_CONTEXT_INFO;
+ tr->trace_flags |= TRACE_ITER(CONTEXT_INFO);
}
return 0;
}
@@ -1559,6 +1845,10 @@ static int __init init_blk_tracer(void)
return 1;
}
+ BUILD_BUG_ON(__alignof__(struct blk_user_trace_setup2) %
+ __alignof__(long));
+ BUILD_BUG_ON(__alignof__(struct blk_io_trace2) % __alignof__(long));
+
return 0;
}
@@ -1667,6 +1957,7 @@ static const struct {
{ BLK_TC_DISCARD, "discard" },
{ BLK_TC_DRV_DATA, "drv_data" },
{ BLK_TC_FUA, "fua" },
+ { BLK_TC_WRITE_ZEROES, "write-zeroes" },
};
static int blk_trace_str2mask(const char *str)
@@ -1880,6 +2171,10 @@ void blk_fill_rwbs(char *rwbs, blk_opf_t opf)
rwbs[i++] = 'Z';
rwbs[i++] = 'C';
break;
+ case REQ_OP_WRITE_ZEROES:
+ rwbs[i++] = 'W';
+ rwbs[i++] = 'Z';
+ break;
default:
rwbs[i++] = 'N';
}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 4f87c16d915a..d57727abaade 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2529,7 +2529,7 @@ static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
return run_ctx->entry_ip;
}
-static int
+static __always_inline int
kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
unsigned long entry_ip, struct ftrace_regs *fregs,
bool is_return, void *data)
@@ -3372,13 +3372,13 @@ typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struc
* direct calls into all the specific callback implementations
* (copy_user_data_sleepable, copy_user_data_nofault, and so on)
*/
-static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 doff, u32 size,
+static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u64 doff, u64 size,
const void *unsafe_src,
copy_fn_t str_copy_fn,
struct task_struct *tsk)
{
struct bpf_dynptr_kern *dst;
- u32 chunk_sz, off;
+ u64 chunk_sz, off;
void *dst_slice;
int cnt, err;
char buf[256];
@@ -3392,7 +3392,7 @@ static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 do
return -E2BIG;
for (off = 0; off < size; off += chunk_sz - 1) {
- chunk_sz = min_t(u32, sizeof(buf), size - off);
+ chunk_sz = min_t(u64, sizeof(buf), size - off);
/* Expect str_copy_fn to return count of copied bytes, including
* zero terminator. Next iteration increment off by chunk_sz - 1 to
* overwrite NUL.
@@ -3409,14 +3409,14 @@ static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 do
return off;
}
-static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u32 doff,
- u32 size, const void *unsafe_src,
+static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u64 doff,
+ u64 size, const void *unsafe_src,
copy_fn_t copy_fn, struct task_struct *tsk)
{
struct bpf_dynptr_kern *dst;
void *dst_slice;
char buf[256];
- u32 off, chunk_sz;
+ u64 off, chunk_sz;
int err;
dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
@@ -3428,7 +3428,7 @@ static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u32
return -E2BIG;
for (off = 0; off < size; off += chunk_sz) {
- chunk_sz = min_t(u32, sizeof(buf), size - off);
+ chunk_sz = min_t(u64, sizeof(buf), size - off);
err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
if (err)
return err;
@@ -3514,58 +3514,58 @@ __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid
return bpf_send_signal_common(sig, type, task, value);
}
-__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u32 off,
- u32 size, const void __user *unsafe_ptr__ign)
+__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void __user *unsafe_ptr__ign)
{
return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
copy_user_data_nofault, NULL);
}
-__bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u32 off,
- u32 size, const void *unsafe_ptr__ign)
+__bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void *unsafe_ptr__ign)
{
return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign,
copy_kernel_data_nofault, NULL);
}
-__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u32 off,
- u32 size, const void __user *unsafe_ptr__ign)
+__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void __user *unsafe_ptr__ign)
{
return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
copy_user_str_nofault, NULL);
}
-__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u32 off,
- u32 size, const void *unsafe_ptr__ign)
+__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void *unsafe_ptr__ign)
{
return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign,
copy_kernel_str_nofault, NULL);
}
-__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u32 off,
- u32 size, const void __user *unsafe_ptr__ign)
+__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void __user *unsafe_ptr__ign)
{
return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
copy_user_data_sleepable, NULL);
}
-__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u32 off,
- u32 size, const void __user *unsafe_ptr__ign)
+__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void __user *unsafe_ptr__ign)
{
return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
copy_user_str_sleepable, NULL);
}
-__bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u32 off,
- u32 size, const void __user *unsafe_ptr__ign,
+__bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void __user *unsafe_ptr__ign,
struct task_struct *tsk)
{
return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
copy_user_data_sleepable, tsk);
}
-__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u32 off,
- u32 size, const void __user *unsafe_ptr__ign,
+__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void __user *unsafe_ptr__ign,
struct task_struct *tsk)
{
return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 484ad7a18463..65ac0f04a946 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -498,9 +498,6 @@ found:
return get_data_type_data(current, offset);
}
-/* Both enabled by default (can be cleared by function_graph tracer flags */
-bool fgraph_sleep_time = true;
-
#ifdef CONFIG_DYNAMIC_FTRACE
/*
* archs can override this function if they must do something
@@ -1019,15 +1016,11 @@ void fgraph_init_ops(struct ftrace_ops *dst_ops,
mutex_init(&dst_ops->local_hash.regex_lock);
INIT_LIST_HEAD(&dst_ops->subop_list);
dst_ops->flags |= FTRACE_OPS_FL_INITIALIZED;
+ dst_ops->private = src_ops->private;
}
#endif
}
-void ftrace_graph_sleep_time_control(bool enable)
-{
- fgraph_sleep_time = enable;
-}
-
/*
* Simply points to ftrace_stub, but with the proper protocol.
* Defined by the linker script in linux/vmlinux.lds.h
@@ -1098,7 +1091,7 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
* Does the user want to count the time a function was asleep.
* If so, do not update the time stamps.
*/
- if (fgraph_sleep_time)
+ if (!fgraph_no_sleep_time)
return;
timestamp = trace_clock_local();
@@ -1376,6 +1369,13 @@ int register_ftrace_graph(struct fgraph_ops *gops)
ftrace_graph_active++;
+ /* Always save the function, and reset at unregistering */
+ gops->saved_func = gops->entryfunc;
+#ifdef CONFIG_DYNAMIC_FTRACE
+ if (ftrace_pids_enabled(&gops->ops))
+ gops->entryfunc = fgraph_pid_func;
+#endif
+
if (ftrace_graph_active == 2)
ftrace_graph_disable_direct(true);
@@ -1395,8 +1395,6 @@ int register_ftrace_graph(struct fgraph_ops *gops)
} else {
init_task_vars(gops->idx);
}
- /* Always save the function, and reset at unregistering */
- gops->saved_func = gops->entryfunc;
gops->ops.flags |= FTRACE_OPS_FL_GRAPH;
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 5a807d62e76d..0b1ee8e585f2 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -10,6 +10,7 @@
#include <linux/kprobes.h>
#include <linux/list.h>
#include <linux/mutex.h>
+#include <linux/rhashtable.h>
#include <linux/slab.h>
#include <linux/sort.h>
@@ -41,60 +42,68 @@
* - RCU hlist traversal under disabling preempt
*/
static struct hlist_head fprobe_table[FPROBE_TABLE_SIZE];
-static struct hlist_head fprobe_ip_table[FPROBE_IP_TABLE_SIZE];
+static struct rhltable fprobe_ip_table;
static DEFINE_MUTEX(fprobe_mutex);
+static struct fgraph_ops fprobe_graph_ops;
-/*
- * Find first fprobe in the hlist. It will be iterated twice in the entry
- * probe, once for correcting the total required size, the second time is
- * calling back the user handlers.
- * Thus the hlist in the fprobe_table must be sorted and new probe needs to
- * be added *before* the first fprobe.
- */
-static struct fprobe_hlist_node *find_first_fprobe_node(unsigned long ip)
+static u32 fprobe_node_hashfn(const void *data, u32 len, u32 seed)
{
- struct fprobe_hlist_node *node;
- struct hlist_head *head;
+ return hash_ptr(*(unsigned long **)data, 32);
+}
- head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)];
- hlist_for_each_entry_rcu(node, head, hlist,
- lockdep_is_held(&fprobe_mutex)) {
- if (node->addr == ip)
- return node;
- }
- return NULL;
+static int fprobe_node_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ unsigned long key = *(unsigned long *)arg->key;
+ const struct fprobe_hlist_node *n = ptr;
+
+ return n->addr != key;
}
-NOKPROBE_SYMBOL(find_first_fprobe_node);
-/* Node insertion and deletion requires the fprobe_mutex */
-static void insert_fprobe_node(struct fprobe_hlist_node *node)
+static u32 fprobe_node_obj_hashfn(const void *data, u32 len, u32 seed)
{
- unsigned long ip = node->addr;
- struct fprobe_hlist_node *next;
- struct hlist_head *head;
+ const struct fprobe_hlist_node *n = data;
+
+ return hash_ptr((void *)n->addr, 32);
+}
+
+static const struct rhashtable_params fprobe_rht_params = {
+ .head_offset = offsetof(struct fprobe_hlist_node, hlist),
+ .key_offset = offsetof(struct fprobe_hlist_node, addr),
+ .key_len = sizeof_field(struct fprobe_hlist_node, addr),
+ .hashfn = fprobe_node_hashfn,
+ .obj_hashfn = fprobe_node_obj_hashfn,
+ .obj_cmpfn = fprobe_node_cmp,
+ .automatic_shrinking = true,
+};
+/* Node insertion and deletion requires the fprobe_mutex */
+static int insert_fprobe_node(struct fprobe_hlist_node *node)
+{
lockdep_assert_held(&fprobe_mutex);
- next = find_first_fprobe_node(ip);
- if (next) {
- hlist_add_before_rcu(&node->hlist, &next->hlist);
- return;
- }
- head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)];
- hlist_add_head_rcu(&node->hlist, head);
+ return rhltable_insert(&fprobe_ip_table, &node->hlist, fprobe_rht_params);
}
/* Return true if there are synonims */
static bool delete_fprobe_node(struct fprobe_hlist_node *node)
{
lockdep_assert_held(&fprobe_mutex);
+ bool ret;
/* Avoid double deleting */
if (READ_ONCE(node->fp) != NULL) {
WRITE_ONCE(node->fp, NULL);
- hlist_del_rcu(&node->hlist);
+ rhltable_remove(&fprobe_ip_table, &node->hlist,
+ fprobe_rht_params);
}
- return !!find_first_fprobe_node(node->addr);
+
+ rcu_read_lock();
+ ret = !!rhltable_lookup(&fprobe_ip_table, &node->addr,
+ fprobe_rht_params);
+ rcu_read_unlock();
+
+ return ret;
}
/* Check existence of the fprobe */
@@ -246,12 +255,128 @@ static inline int __fprobe_kprobe_handler(unsigned long ip, unsigned long parent
return ret;
}
-static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
- struct ftrace_regs *fregs)
+#if defined(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) || defined(CONFIG_DYNAMIC_FTRACE_WITH_REGS)
+/* ftrace_ops callback, this processes fprobes which have only entry_handler. */
+static void fprobe_ftrace_entry(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *ops, struct ftrace_regs *fregs)
+{
+ struct fprobe_hlist_node *node;
+ struct rhlist_head *head, *pos;
+ struct fprobe *fp;
+ int bit;
+
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
+ if (bit < 0)
+ return;
+
+ /*
+ * ftrace_test_recursion_trylock() disables preemption, but
+ * rhltable_lookup() checks whether rcu_read_lcok is held.
+ * So we take rcu_read_lock() here.
+ */
+ rcu_read_lock();
+ head = rhltable_lookup(&fprobe_ip_table, &ip, fprobe_rht_params);
+
+ rhl_for_each_entry_rcu(node, pos, head, hlist) {
+ if (node->addr != ip)
+ break;
+ fp = READ_ONCE(node->fp);
+ if (unlikely(!fp || fprobe_disabled(fp) || fp->exit_handler))
+ continue;
+
+ if (fprobe_shared_with_kprobes(fp))
+ __fprobe_kprobe_handler(ip, parent_ip, fp, fregs, NULL);
+ else
+ __fprobe_handler(ip, parent_ip, fp, fregs, NULL);
+ }
+ rcu_read_unlock();
+ ftrace_test_recursion_unlock(bit);
+}
+NOKPROBE_SYMBOL(fprobe_ftrace_entry);
+
+static struct ftrace_ops fprobe_ftrace_ops = {
+ .func = fprobe_ftrace_entry,
+ .flags = FTRACE_OPS_FL_SAVE_ARGS,
+};
+static int fprobe_ftrace_active;
+
+static int fprobe_ftrace_add_ips(unsigned long *addrs, int num)
+{
+ int ret;
+
+ lockdep_assert_held(&fprobe_mutex);
+
+ ret = ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 0, 0);
+ if (ret)
+ return ret;
+
+ if (!fprobe_ftrace_active) {
+ ret = register_ftrace_function(&fprobe_ftrace_ops);
+ if (ret) {
+ ftrace_free_filter(&fprobe_ftrace_ops);
+ return ret;
+ }
+ }
+ fprobe_ftrace_active++;
+ return 0;
+}
+
+static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num)
+{
+ lockdep_assert_held(&fprobe_mutex);
+
+ fprobe_ftrace_active--;
+ if (!fprobe_ftrace_active)
+ unregister_ftrace_function(&fprobe_ftrace_ops);
+ if (num)
+ ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 1, 0);
+}
+
+static bool fprobe_is_ftrace(struct fprobe *fp)
+{
+ return !fp->exit_handler;
+}
+
+#ifdef CONFIG_MODULES
+static void fprobe_set_ips(unsigned long *ips, unsigned int cnt, int remove,
+ int reset)
+{
+ ftrace_set_filter_ips(&fprobe_graph_ops.ops, ips, cnt, remove, reset);
+ ftrace_set_filter_ips(&fprobe_ftrace_ops, ips, cnt, remove, reset);
+}
+#endif
+#else
+static int fprobe_ftrace_add_ips(unsigned long *addrs, int num)
+{
+ return -ENOENT;
+}
+
+static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num)
+{
+}
+
+static bool fprobe_is_ftrace(struct fprobe *fp)
+{
+ return false;
+}
+
+#ifdef CONFIG_MODULES
+static void fprobe_set_ips(unsigned long *ips, unsigned int cnt, int remove,
+ int reset)
+{
+ ftrace_set_filter_ips(&fprobe_graph_ops.ops, ips, cnt, remove, reset);
+}
+#endif
+#endif /* !CONFIG_DYNAMIC_FTRACE_WITH_ARGS && !CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
+/* fgraph_ops callback, this processes fprobes which have exit_handler. */
+static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
- struct fprobe_hlist_node *node, *first;
unsigned long *fgraph_data = NULL;
unsigned long func = trace->func;
+ struct fprobe_hlist_node *node;
+ struct rhlist_head *head, *pos;
unsigned long ret_ip;
int reserved_words;
struct fprobe *fp;
@@ -260,14 +385,12 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
if (WARN_ON_ONCE(!fregs))
return 0;
- first = node = find_first_fprobe_node(func);
- if (unlikely(!first))
- return 0;
-
+ guard(rcu)();
+ head = rhltable_lookup(&fprobe_ip_table, &func, fprobe_rht_params);
reserved_words = 0;
- hlist_for_each_entry_from_rcu(node, hlist) {
+ rhl_for_each_entry_rcu(node, pos, head, hlist) {
if (node->addr != func)
- break;
+ continue;
fp = READ_ONCE(node->fp);
if (!fp || !fp->exit_handler)
continue;
@@ -278,15 +401,14 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
reserved_words +=
FPROBE_HEADER_SIZE_IN_LONG + SIZE_IN_LONG(fp->entry_data_size);
}
- node = first;
if (reserved_words) {
fgraph_data = fgraph_reserve_data(gops->idx, reserved_words * sizeof(long));
if (unlikely(!fgraph_data)) {
- hlist_for_each_entry_from_rcu(node, hlist) {
+ rhl_for_each_entry_rcu(node, pos, head, hlist) {
if (node->addr != func)
- break;
+ continue;
fp = READ_ONCE(node->fp);
- if (fp && !fprobe_disabled(fp))
+ if (fp && !fprobe_disabled(fp) && !fprobe_is_ftrace(fp))
fp->nmissed++;
}
return 0;
@@ -299,14 +421,14 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
*/
ret_ip = ftrace_regs_get_return_address(fregs);
used = 0;
- hlist_for_each_entry_from_rcu(node, hlist) {
+ rhl_for_each_entry_rcu(node, pos, head, hlist) {
int data_size;
void *data;
if (node->addr != func)
- break;
+ continue;
fp = READ_ONCE(node->fp);
- if (!fp || fprobe_disabled(fp))
+ if (unlikely(!fp || fprobe_disabled(fp) || fprobe_is_ftrace(fp)))
continue;
data_size = fp->entry_data_size;
@@ -334,7 +456,7 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
/* If any exit_handler is set, data must be used. */
return used != 0;
}
-NOKPROBE_SYMBOL(fprobe_entry);
+NOKPROBE_SYMBOL(fprobe_fgraph_entry);
static void fprobe_return(struct ftrace_graph_ret *trace,
struct fgraph_ops *gops,
@@ -373,7 +495,7 @@ static void fprobe_return(struct ftrace_graph_ret *trace,
NOKPROBE_SYMBOL(fprobe_return);
static struct fgraph_ops fprobe_graph_ops = {
- .entryfunc = fprobe_entry,
+ .entryfunc = fprobe_fgraph_entry,
.retfunc = fprobe_return,
};
static int fprobe_graph_active;
@@ -449,25 +571,18 @@ static int fprobe_addr_list_add(struct fprobe_addr_list *alist, unsigned long ad
return 0;
}
-static void fprobe_remove_node_in_module(struct module *mod, struct hlist_head *head,
- struct fprobe_addr_list *alist)
+static void fprobe_remove_node_in_module(struct module *mod, struct fprobe_hlist_node *node,
+ struct fprobe_addr_list *alist)
{
- struct fprobe_hlist_node *node;
- int ret = 0;
-
- hlist_for_each_entry_rcu(node, head, hlist,
- lockdep_is_held(&fprobe_mutex)) {
- if (!within_module(node->addr, mod))
- continue;
- if (delete_fprobe_node(node))
- continue;
- /*
- * If failed to update alist, just continue to update hlist.
- * Therefore, at list user handler will not hit anymore.
- */
- if (!ret)
- ret = fprobe_addr_list_add(alist, node->addr);
- }
+ if (!within_module(node->addr, mod))
+ return;
+ if (delete_fprobe_node(node))
+ return;
+ /*
+ * If failed to update alist, just continue to update hlist.
+ * Therefore, at list user handler will not hit anymore.
+ */
+ fprobe_addr_list_add(alist, node->addr);
}
/* Handle module unloading to manage fprobe_ip_table. */
@@ -475,8 +590,9 @@ static int fprobe_module_callback(struct notifier_block *nb,
unsigned long val, void *data)
{
struct fprobe_addr_list alist = {.size = FPROBE_IPS_BATCH_INIT};
+ struct fprobe_hlist_node *node;
+ struct rhashtable_iter iter;
struct module *mod = data;
- int i;
if (val != MODULE_STATE_GOING)
return NOTIFY_DONE;
@@ -487,12 +603,19 @@ static int fprobe_module_callback(struct notifier_block *nb,
return NOTIFY_DONE;
mutex_lock(&fprobe_mutex);
- for (i = 0; i < FPROBE_IP_TABLE_SIZE; i++)
- fprobe_remove_node_in_module(mod, &fprobe_ip_table[i], &alist);
+ rhltable_walk_enter(&fprobe_ip_table, &iter);
+ do {
+ rhashtable_walk_start(&iter);
+
+ while ((node = rhashtable_walk_next(&iter)) && !IS_ERR(node))
+ fprobe_remove_node_in_module(mod, node, &alist);
+
+ rhashtable_walk_stop(&iter);
+ } while (node == ERR_PTR(-EAGAIN));
+ rhashtable_walk_exit(&iter);
if (alist.index > 0)
- ftrace_set_filter_ips(&fprobe_graph_ops.ops,
- alist.addrs, alist.index, 1, 0);
+ fprobe_set_ips(alist.addrs, alist.index, 1, 0);
mutex_unlock(&fprobe_mutex);
kfree(alist.addrs);
@@ -725,11 +848,23 @@ int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num)
mutex_lock(&fprobe_mutex);
hlist_array = fp->hlist_array;
- ret = fprobe_graph_add_ips(addrs, num);
+ if (fprobe_is_ftrace(fp))
+ ret = fprobe_ftrace_add_ips(addrs, num);
+ else
+ ret = fprobe_graph_add_ips(addrs, num);
+
if (!ret) {
add_fprobe_hash(fp);
- for (i = 0; i < hlist_array->size; i++)
- insert_fprobe_node(&hlist_array->array[i]);
+ for (i = 0; i < hlist_array->size; i++) {
+ ret = insert_fprobe_node(&hlist_array->array[i]);
+ if (ret)
+ break;
+ }
+ /* fallback on insert error */
+ if (ret) {
+ for (i--; i >= 0; i--)
+ delete_fprobe_node(&hlist_array->array[i]);
+ }
}
mutex_unlock(&fprobe_mutex);
@@ -813,7 +948,10 @@ int unregister_fprobe(struct fprobe *fp)
}
del_fprobe_hash(fp);
- fprobe_graph_remove_ips(addrs, count);
+ if (fprobe_is_ftrace(fp))
+ fprobe_ftrace_remove_ips(addrs, count);
+ else
+ fprobe_graph_remove_ips(addrs, count);
kfree_rcu(hlist_array, rcu);
fp->hlist_array = NULL;
@@ -825,3 +963,10 @@ out:
return ret;
}
EXPORT_SYMBOL_GPL(unregister_fprobe);
+
+static int __init fprobe_initcall(void)
+{
+ rhltable_init(&fprobe_ip_table, &fprobe_rht_params);
+ return 0;
+}
+core_initcall(fprobe_initcall);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 42bd2ba68a82..3ec2033c0774 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -534,7 +534,9 @@ static int function_stat_headers(struct seq_file *m)
static int function_stat_show(struct seq_file *m, void *v)
{
+ struct trace_array *tr = trace_get_global_array();
struct ftrace_profile *rec = v;
+ const char *refsymbol = NULL;
char str[KSYM_SYMBOL_LEN];
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static struct trace_seq s;
@@ -554,7 +556,29 @@ static int function_stat_show(struct seq_file *m, void *v)
return 0;
#endif
- kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+ if (tr->trace_flags & TRACE_ITER(PROF_TEXT_OFFSET)) {
+ unsigned long offset;
+
+ if (core_kernel_text(rec->ip)) {
+ refsymbol = "_text";
+ offset = rec->ip - (unsigned long)_text;
+ } else {
+ struct module *mod;
+
+ guard(rcu)();
+ mod = __module_text_address(rec->ip);
+ if (mod) {
+ refsymbol = mod->name;
+ /* Calculate offset from module's text entry address. */
+ offset = rec->ip - (unsigned long)mod->mem[MOD_TEXT].base;
+ }
+ }
+ if (refsymbol)
+ snprintf(str, sizeof(str), " %s+%#lx", refsymbol, offset);
+ }
+ if (!refsymbol)
+ kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+
seq_printf(m, " %-30.30s %10lu", str, rec->counter);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -838,6 +862,8 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace,
return 1;
}
+bool fprofile_no_sleep_time;
+
static void profile_graph_return(struct ftrace_graph_ret *trace,
struct fgraph_ops *gops,
struct ftrace_regs *fregs)
@@ -863,7 +889,7 @@ static void profile_graph_return(struct ftrace_graph_ret *trace,
calltime = rettime - profile_data->calltime;
- if (!fgraph_sleep_time) {
+ if (fprofile_no_sleep_time) {
if (current->ftrace_sleeptime)
calltime -= current->ftrace_sleeptime - profile_data->sleeptime;
}
@@ -1971,7 +1997,8 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops)
*/
static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
struct ftrace_hash *old_hash,
- struct ftrace_hash *new_hash)
+ struct ftrace_hash *new_hash,
+ bool update_target)
{
struct ftrace_page *pg;
struct dyn_ftrace *rec, *end = NULL;
@@ -2006,10 +2033,13 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
if (rec->flags & FTRACE_FL_DISABLED)
continue;
- /* We need to update only differences of filter_hash */
+ /*
+ * Unless we are updating the target of a direct function,
+ * we only need to update differences of filter_hash
+ */
in_old = !!ftrace_lookup_ip(old_hash, rec->ip);
in_new = !!ftrace_lookup_ip(new_hash, rec->ip);
- if (in_old == in_new)
+ if (!update_target && (in_old == in_new))
continue;
if (in_new) {
@@ -2020,7 +2050,16 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
if (is_ipmodify)
goto rollback;
- FTRACE_WARN_ON(rec->flags & FTRACE_FL_DIRECT);
+ /*
+ * If this is called by __modify_ftrace_direct()
+ * then it is only changing where the direct
+ * pointer is jumping to, and the record already
+ * points to a direct trampoline. If it isn't,
+ * then it is a bug to update ipmodify on a direct
+ * caller.
+ */
+ FTRACE_WARN_ON(!update_target &&
+ (rec->flags & FTRACE_FL_DIRECT));
/*
* Another ops with IPMODIFY is already
@@ -2076,7 +2115,7 @@ static int ftrace_hash_ipmodify_enable(struct ftrace_ops *ops)
if (ftrace_hash_empty(hash))
hash = NULL;
- return __ftrace_hash_update_ipmodify(ops, EMPTY_HASH, hash);
+ return __ftrace_hash_update_ipmodify(ops, EMPTY_HASH, hash, false);
}
/* Disabling always succeeds */
@@ -2087,7 +2126,7 @@ static void ftrace_hash_ipmodify_disable(struct ftrace_ops *ops)
if (ftrace_hash_empty(hash))
hash = NULL;
- __ftrace_hash_update_ipmodify(ops, hash, EMPTY_HASH);
+ __ftrace_hash_update_ipmodify(ops, hash, EMPTY_HASH, false);
}
static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops,
@@ -2101,7 +2140,7 @@ static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops,
if (ftrace_hash_empty(new_hash))
new_hash = NULL;
- return __ftrace_hash_update_ipmodify(ops, old_hash, new_hash);
+ return __ftrace_hash_update_ipmodify(ops, old_hash, new_hash, false);
}
static void print_ip_ins(const char *fmt, const unsigned char *p)
@@ -5938,7 +5977,8 @@ static void remove_direct_functions_hash(struct ftrace_hash *hash, unsigned long
for (i = 0; i < size; i++) {
hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
del = __ftrace_lookup_ip(direct_functions, entry->ip);
- if (del && del->direct == addr) {
+ if (del && ftrace_jmp_get(del->direct) ==
+ ftrace_jmp_get(addr)) {
remove_hash_entry(direct_functions, del);
kfree(del);
}
@@ -5953,6 +5993,17 @@ static void register_ftrace_direct_cb(struct rcu_head *rhp)
free_ftrace_hash(fhp);
}
+static void reset_direct(struct ftrace_ops *ops, unsigned long addr)
+{
+ struct ftrace_hash *hash = ops->func_hash->filter_hash;
+
+ remove_direct_functions_hash(hash, addr);
+
+ /* cleanup for possible another register call */
+ ops->func = NULL;
+ ops->trampoline = 0;
+}
+
/**
* register_ftrace_direct - Call a custom trampoline directly
* for multiple functions registered in @ops
@@ -5992,8 +6043,15 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
if (ftrace_hash_empty(hash))
return -EINVAL;
+ /* This is a "raw" address, and this should never happen. */
+ if (WARN_ON_ONCE(ftrace_is_jmp(addr)))
+ return -EINVAL;
+
mutex_lock(&direct_mutex);
+ if (ops->flags & FTRACE_OPS_FL_JMP)
+ addr = ftrace_jmp_set(addr);
+
/* Make sure requested entries are not already registered.. */
size = 1 << hash->size_bits;
for (i = 0; i < size; i++) {
@@ -6043,11 +6101,13 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
new_hash = NULL;
ops->func = call_direct_funcs;
- ops->flags = MULTI_FLAGS;
+ ops->flags |= MULTI_FLAGS;
ops->trampoline = FTRACE_REGS_ADDR;
ops->direct_call = addr;
err = register_ftrace_function_nolock(ops);
+ if (err)
+ reset_direct(ops, addr);
out_unlock:
mutex_unlock(&direct_mutex);
@@ -6080,7 +6140,6 @@ EXPORT_SYMBOL_GPL(register_ftrace_direct);
int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr,
bool free_filters)
{
- struct ftrace_hash *hash = ops->func_hash->filter_hash;
int err;
if (check_direct_multi(ops))
@@ -6090,13 +6149,9 @@ int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr,
mutex_lock(&direct_mutex);
err = unregister_ftrace_function(ops);
- remove_direct_functions_hash(hash, addr);
+ reset_direct(ops, addr);
mutex_unlock(&direct_mutex);
- /* cleanup for possible another register call */
- ops->func = NULL;
- ops->trampoline = 0;
-
if (free_filters)
ftrace_free_filter(ops);
return err;
@@ -6106,7 +6161,7 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_direct);
static int
__modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
{
- struct ftrace_hash *hash;
+ struct ftrace_hash *hash = ops->func_hash->filter_hash;
struct ftrace_func_entry *entry, *iter;
static struct ftrace_ops tmp_ops = {
.func = ftrace_stub,
@@ -6117,6 +6172,13 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
lockdep_assert_held_once(&direct_mutex);
+ /* This is a "raw" address, and this should never happen. */
+ if (WARN_ON_ONCE(ftrace_is_jmp(addr)))
+ return -EINVAL;
+
+ if (ops->flags & FTRACE_OPS_FL_JMP)
+ addr = ftrace_jmp_set(addr);
+
/* Enable the tmp_ops to have the same functions as the direct ops */
ftrace_ops_init(&tmp_ops);
tmp_ops.func_hash = ops->func_hash;
@@ -6127,12 +6189,20 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
return err;
/*
+ * Call __ftrace_hash_update_ipmodify() here, so that we can call
+ * ops->ops_func for the ops. This is needed because the above
+ * register_ftrace_function_nolock() worked on tmp_ops.
+ */
+ err = __ftrace_hash_update_ipmodify(ops, hash, hash, true);
+ if (err)
+ goto out;
+
+ /*
* Now the ftrace_ops_list_func() is called to do the direct callers.
* We can safely change the direct functions attached to each entry.
*/
mutex_lock(&ftrace_lock);
- hash = ops->func_hash->filter_hash;
size = 1 << hash->size_bits;
for (i = 0; i < size; i++) {
hlist_for_each_entry(iter, &hash->buckets[i], hlist) {
@@ -6147,6 +6217,7 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
mutex_unlock(&ftrace_lock);
+out:
/* Removing the tmp_ops will add the updated direct callers to the functions */
unregister_ftrace_function(&tmp_ops);
diff --git a/kernel/trace/pid_list.c b/kernel/trace/pid_list.c
index 090bb5ea4a19..dbee72d69d0a 100644
--- a/kernel/trace/pid_list.c
+++ b/kernel/trace/pid_list.c
@@ -3,6 +3,7 @@
* Copyright (C) 2021 VMware Inc, Steven Rostedt <rostedt@goodmis.org>
*/
#include <linux/spinlock.h>
+#include <linux/seqlock.h>
#include <linux/irq_work.h>
#include <linux/slab.h>
#include "trace.h"
@@ -126,7 +127,7 @@ bool trace_pid_list_is_set(struct trace_pid_list *pid_list, unsigned int pid)
{
union upper_chunk *upper_chunk;
union lower_chunk *lower_chunk;
- unsigned long flags;
+ unsigned int seq;
unsigned int upper1;
unsigned int upper2;
unsigned int lower;
@@ -138,14 +139,16 @@ bool trace_pid_list_is_set(struct trace_pid_list *pid_list, unsigned int pid)
if (pid_split(pid, &upper1, &upper2, &lower) < 0)
return false;
- raw_spin_lock_irqsave(&pid_list->lock, flags);
- upper_chunk = pid_list->upper[upper1];
- if (upper_chunk) {
- lower_chunk = upper_chunk->data[upper2];
- if (lower_chunk)
- ret = test_bit(lower, lower_chunk->data);
- }
- raw_spin_unlock_irqrestore(&pid_list->lock, flags);
+ do {
+ seq = read_seqcount_begin(&pid_list->seqcount);
+ ret = false;
+ upper_chunk = pid_list->upper[upper1];
+ if (upper_chunk) {
+ lower_chunk = upper_chunk->data[upper2];
+ if (lower_chunk)
+ ret = test_bit(lower, lower_chunk->data);
+ }
+ } while (read_seqcount_retry(&pid_list->seqcount, seq));
return ret;
}
@@ -178,6 +181,7 @@ int trace_pid_list_set(struct trace_pid_list *pid_list, unsigned int pid)
return -EINVAL;
raw_spin_lock_irqsave(&pid_list->lock, flags);
+ write_seqcount_begin(&pid_list->seqcount);
upper_chunk = pid_list->upper[upper1];
if (!upper_chunk) {
upper_chunk = get_upper_chunk(pid_list);
@@ -199,6 +203,7 @@ int trace_pid_list_set(struct trace_pid_list *pid_list, unsigned int pid)
set_bit(lower, lower_chunk->data);
ret = 0;
out:
+ write_seqcount_end(&pid_list->seqcount);
raw_spin_unlock_irqrestore(&pid_list->lock, flags);
return ret;
}
@@ -230,6 +235,7 @@ int trace_pid_list_clear(struct trace_pid_list *pid_list, unsigned int pid)
return -EINVAL;
raw_spin_lock_irqsave(&pid_list->lock, flags);
+ write_seqcount_begin(&pid_list->seqcount);
upper_chunk = pid_list->upper[upper1];
if (!upper_chunk)
goto out;
@@ -250,6 +256,7 @@ int trace_pid_list_clear(struct trace_pid_list *pid_list, unsigned int pid)
}
}
out:
+ write_seqcount_end(&pid_list->seqcount);
raw_spin_unlock_irqrestore(&pid_list->lock, flags);
return 0;
}
@@ -340,8 +347,10 @@ static void pid_list_refill_irq(struct irq_work *iwork)
again:
raw_spin_lock(&pid_list->lock);
+ write_seqcount_begin(&pid_list->seqcount);
upper_count = CHUNK_ALLOC - pid_list->free_upper_chunks;
lower_count = CHUNK_ALLOC - pid_list->free_lower_chunks;
+ write_seqcount_end(&pid_list->seqcount);
raw_spin_unlock(&pid_list->lock);
if (upper_count <= 0 && lower_count <= 0)
@@ -370,6 +379,7 @@ static void pid_list_refill_irq(struct irq_work *iwork)
}
raw_spin_lock(&pid_list->lock);
+ write_seqcount_begin(&pid_list->seqcount);
if (upper) {
*upper_next = pid_list->upper_list;
pid_list->upper_list = upper;
@@ -380,6 +390,7 @@ static void pid_list_refill_irq(struct irq_work *iwork)
pid_list->lower_list = lower;
pid_list->free_lower_chunks += lcnt;
}
+ write_seqcount_end(&pid_list->seqcount);
raw_spin_unlock(&pid_list->lock);
/*
@@ -419,6 +430,7 @@ struct trace_pid_list *trace_pid_list_alloc(void)
init_irq_work(&pid_list->refill_irqwork, pid_list_refill_irq);
raw_spin_lock_init(&pid_list->lock);
+ seqcount_raw_spinlock_init(&pid_list->seqcount, &pid_list->lock);
for (i = 0; i < CHUNK_ALLOC; i++) {
union upper_chunk *chunk;
diff --git a/kernel/trace/pid_list.h b/kernel/trace/pid_list.h
index 62e73f1ac85f..0b45fb0eadb9 100644
--- a/kernel/trace/pid_list.h
+++ b/kernel/trace/pid_list.h
@@ -76,6 +76,7 @@ union upper_chunk {
};
struct trace_pid_list {
+ seqcount_raw_spinlock_t seqcount;
raw_spinlock_t lock;
struct irq_work refill_irqwork;
union upper_chunk *upper[UPPER1_SIZE]; // 1 or 2K in size
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 1244d2c5c384..8688c88534de 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -402,6 +402,41 @@ static void free_buffer_page(struct buffer_page *bpage)
}
/*
+ * For best performance, allocate cpu buffer data cache line sized
+ * and per CPU.
+ */
+#define alloc_cpu_buffer(cpu) (struct ring_buffer_per_cpu *) \
+ kzalloc_node(ALIGN(sizeof(struct ring_buffer_per_cpu), \
+ cache_line_size()), GFP_KERNEL, cpu_to_node(cpu));
+
+#define alloc_cpu_page(cpu) (struct buffer_page *) \
+ kzalloc_node(ALIGN(sizeof(struct buffer_page), \
+ cache_line_size()), GFP_KERNEL, cpu_to_node(cpu));
+
+static struct buffer_data_page *alloc_cpu_data(int cpu, int order)
+{
+ struct buffer_data_page *dpage;
+ struct page *page;
+ gfp_t mflags;
+
+ /*
+ * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails
+ * gracefully without invoking oom-killer and the system is not
+ * destabilized.
+ */
+ mflags = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_COMP | __GFP_ZERO;
+
+ page = alloc_pages_node(cpu_to_node(cpu), mflags, order);
+ if (!page)
+ return NULL;
+
+ dpage = page_address(page);
+ rb_init_page(dpage);
+
+ return dpage;
+}
+
+/*
* We need to fit the time_stamp delta into 27 bits.
*/
static inline bool test_time_stamp(u64 delta)
@@ -2204,7 +2239,6 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_cpu_meta *meta = NULL;
struct buffer_page *bpage, *tmp;
bool user_thread = current->mm != NULL;
- gfp_t mflags;
long i;
/*
@@ -2219,13 +2253,6 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
return -ENOMEM;
/*
- * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails
- * gracefully without invoking oom-killer and the system is not
- * destabilized.
- */
- mflags = GFP_KERNEL | __GFP_RETRY_MAYFAIL;
-
- /*
* If a user thread allocates too much, and si_mem_available()
* reports there's enough memory, even though there is not.
* Make sure the OOM killer kills this thread. This can happen
@@ -2241,10 +2268,8 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
meta = rb_range_meta(buffer, nr_pages, cpu_buffer->cpu);
for (i = 0; i < nr_pages; i++) {
- struct page *page;
- bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
- mflags, cpu_to_node(cpu_buffer->cpu));
+ bpage = alloc_cpu_page(cpu_buffer->cpu);
if (!bpage)
goto free_pages;
@@ -2267,13 +2292,10 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
bpage->range = 1;
bpage->id = i + 1;
} else {
- page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
- mflags | __GFP_COMP | __GFP_ZERO,
- cpu_buffer->buffer->subbuf_order);
- if (!page)
+ int order = cpu_buffer->buffer->subbuf_order;
+ bpage->page = alloc_cpu_data(cpu_buffer->cpu, order);
+ if (!bpage->page)
goto free_pages;
- bpage->page = page_address(page);
- rb_init_page(bpage->page);
}
bpage->order = cpu_buffer->buffer->subbuf_order;
@@ -2324,14 +2346,12 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
static struct ring_buffer_per_cpu *
rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
{
- struct ring_buffer_per_cpu *cpu_buffer __free(kfree) = NULL;
+ struct ring_buffer_per_cpu *cpu_buffer __free(kfree) =
+ alloc_cpu_buffer(cpu);
struct ring_buffer_cpu_meta *meta;
struct buffer_page *bpage;
- struct page *page;
int ret;
- cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
- GFP_KERNEL, cpu_to_node(cpu));
if (!cpu_buffer)
return NULL;
@@ -2347,8 +2367,7 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
init_waitqueue_head(&cpu_buffer->irq_work.full_waiters);
mutex_init(&cpu_buffer->mapping_lock);
- bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
- GFP_KERNEL, cpu_to_node(cpu));
+ bpage = alloc_cpu_page(cpu);
if (!bpage)
return NULL;
@@ -2370,13 +2389,10 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
rb_meta_buffer_update(cpu_buffer, bpage);
bpage->range = 1;
} else {
- page = alloc_pages_node(cpu_to_node(cpu),
- GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
- cpu_buffer->buffer->subbuf_order);
- if (!page)
+ int order = cpu_buffer->buffer->subbuf_order;
+ bpage->page = alloc_cpu_data(cpu, order);
+ if (!bpage->page)
goto fail_free_reader;
- bpage->page = page_address(page);
- rb_init_page(bpage->page);
}
INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
@@ -6464,7 +6480,6 @@ ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu)
struct ring_buffer_per_cpu *cpu_buffer;
struct buffer_data_read_page *bpage = NULL;
unsigned long flags;
- struct page *page;
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return ERR_PTR(-ENODEV);
@@ -6486,22 +6501,16 @@ ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu)
arch_spin_unlock(&cpu_buffer->lock);
local_irq_restore(flags);
- if (bpage->data)
- goto out;
-
- page = alloc_pages_node(cpu_to_node(cpu),
- GFP_KERNEL | __GFP_NORETRY | __GFP_COMP | __GFP_ZERO,
- cpu_buffer->buffer->subbuf_order);
- if (!page) {
- kfree(bpage);
- return ERR_PTR(-ENOMEM);
+ if (bpage->data) {
+ rb_init_page(bpage->data);
+ } else {
+ bpage->data = alloc_cpu_data(cpu, cpu_buffer->buffer->subbuf_order);
+ if (!bpage->data) {
+ kfree(bpage);
+ return ERR_PTR(-ENOMEM);
+ }
}
- bpage->data = page_address(page);
-
- out:
- rb_init_page(bpage->data);
-
return bpage;
}
EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
@@ -7344,6 +7353,10 @@ consume:
goto out;
}
+ /* Did the reader catch up with the writer? */
+ if (cpu_buffer->reader_page == cpu_buffer->commit_page)
+ goto out;
+
reader = rb_get_reader_page(cpu_buffer);
if (WARN_ON(!reader))
goto out;
diff --git a/kernel/trace/rv/monitors/pagefault/Kconfig b/kernel/trace/rv/monitors/pagefault/Kconfig
index 5e16625f1653..0e013f00c33b 100644
--- a/kernel/trace/rv/monitors/pagefault/Kconfig
+++ b/kernel/trace/rv/monitors/pagefault/Kconfig
@@ -5,6 +5,7 @@ config RV_MON_PAGEFAULT
select RV_LTL_MONITOR
depends on RV_MON_RTAPP
depends on X86 || RISCV
+ depends on MMU
default y
select LTL_MON_EVENTS_ID
bool "pagefault monitor"
diff --git a/kernel/trace/rv/reactor_panic.c b/kernel/trace/rv/reactor_panic.c
index 74c6bcc2c749..76537b8a4343 100644
--- a/kernel/trace/rv/reactor_panic.c
+++ b/kernel/trace/rv/reactor_panic.c
@@ -13,13 +13,9 @@
#include <linux/init.h>
#include <linux/rv.h>
-__printf(1, 2) static void rv_panic_reaction(const char *msg, ...)
+__printf(1, 0) static void rv_panic_reaction(const char *msg, va_list args)
{
- va_list args;
-
- va_start(args, msg);
vpanic(msg, args);
- va_end(args);
}
static struct rv_reactor rv_panic = {
diff --git a/kernel/trace/rv/reactor_printk.c b/kernel/trace/rv/reactor_printk.c
index 2dae2916c05f..48c934e315b3 100644
--- a/kernel/trace/rv/reactor_printk.c
+++ b/kernel/trace/rv/reactor_printk.c
@@ -12,13 +12,9 @@
#include <linux/init.h>
#include <linux/rv.h>
-__printf(1, 2) static void rv_printk_reaction(const char *msg, ...)
+__printf(1, 0) static void rv_printk_reaction(const char *msg, va_list args)
{
- va_list args;
-
- va_start(args, msg);
vprintk_deferred(msg, args);
- va_end(args);
}
static struct rv_reactor rv_printk = {
diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c
index 48338520376f..ee4e68102f17 100644
--- a/kernel/trace/rv/rv.c
+++ b/kernel/trace/rv/rv.c
@@ -375,15 +375,13 @@ static ssize_t monitor_enable_write_data(struct file *filp, const char __user *u
if (retval)
return retval;
- mutex_lock(&rv_interface_lock);
+ guard(mutex)(&rv_interface_lock);
if (val)
retval = rv_enable_monitor(mon);
else
retval = rv_disable_monitor(mon);
- mutex_unlock(&rv_interface_lock);
-
return retval ? : count;
}
@@ -422,35 +420,27 @@ static const struct file_operations interface_desc_fops = {
static int create_monitor_dir(struct rv_monitor *mon, struct rv_monitor *parent)
{
struct dentry *root = parent ? parent->root_d : get_monitors_root();
- const char *name = mon->name;
+ struct dentry *dir __free(rv_remove) = rv_create_dir(mon->name, root);
struct dentry *tmp;
int retval;
- mon->root_d = rv_create_dir(name, root);
- if (!mon->root_d)
+ if (!dir)
return -ENOMEM;
- tmp = rv_create_file("enable", RV_MODE_WRITE, mon->root_d, mon, &interface_enable_fops);
- if (!tmp) {
- retval = -ENOMEM;
- goto out_remove_root;
- }
+ tmp = rv_create_file("enable", RV_MODE_WRITE, dir, mon, &interface_enable_fops);
+ if (!tmp)
+ return -ENOMEM;
- tmp = rv_create_file("desc", RV_MODE_READ, mon->root_d, mon, &interface_desc_fops);
- if (!tmp) {
- retval = -ENOMEM;
- goto out_remove_root;
- }
+ tmp = rv_create_file("desc", RV_MODE_READ, dir, mon, &interface_desc_fops);
+ if (!tmp)
+ return -ENOMEM;
- retval = reactor_populate_monitor(mon);
+ retval = reactor_populate_monitor(mon, dir);
if (retval)
- goto out_remove_root;
+ return retval;
+ mon->root_d = no_free_ptr(dir);
return 0;
-
-out_remove_root:
- rv_remove(mon->root_d);
- return retval;
}
/*
@@ -501,7 +491,7 @@ static void *enabled_monitors_next(struct seq_file *m, void *p, loff_t *pos)
list_for_each_entry_continue(mon, &rv_monitors_list, list) {
if (mon->enabled)
- return mon;
+ return &mon->list;
}
return NULL;
@@ -509,7 +499,7 @@ static void *enabled_monitors_next(struct seq_file *m, void *p, loff_t *pos)
static void *enabled_monitors_start(struct seq_file *m, loff_t *pos)
{
- struct rv_monitor *mon;
+ struct list_head *head;
loff_t l;
mutex_lock(&rv_interface_lock);
@@ -517,15 +507,15 @@ static void *enabled_monitors_start(struct seq_file *m, loff_t *pos)
if (list_empty(&rv_monitors_list))
return NULL;
- mon = list_entry(&rv_monitors_list, struct rv_monitor, list);
+ head = &rv_monitors_list;
for (l = 0; l <= *pos; ) {
- mon = enabled_monitors_next(m, mon, &l);
- if (!mon)
+ head = enabled_monitors_next(m, head, &l);
+ if (!head)
break;
}
- return mon;
+ return head;
}
/*
@@ -568,7 +558,7 @@ static void disable_all_monitors(void)
struct rv_monitor *mon;
int enabled = 0;
- mutex_lock(&rv_interface_lock);
+ guard(mutex)(&rv_interface_lock);
list_for_each_entry(mon, &rv_monitors_list, list)
enabled += __rv_disable_monitor(mon, false);
@@ -581,8 +571,6 @@ static void disable_all_monitors(void)
*/
tracepoint_synchronize_unregister();
}
-
- mutex_unlock(&rv_interface_lock);
}
static int enabled_monitors_open(struct inode *inode, struct file *file)
@@ -623,7 +611,7 @@ static ssize_t enabled_monitors_write(struct file *filp, const char __user *user
if (!len)
return count;
- mutex_lock(&rv_interface_lock);
+ guard(mutex)(&rv_interface_lock);
retval = -EINVAL;
@@ -644,13 +632,11 @@ static ssize_t enabled_monitors_write(struct file *filp, const char __user *user
else
retval = rv_disable_monitor(mon);
- if (!retval)
- retval = count;
-
- break;
+ if (retval)
+ return retval;
+ return count;
}
- mutex_unlock(&rv_interface_lock);
return retval;
}
@@ -737,7 +723,7 @@ static ssize_t monitoring_on_write_data(struct file *filp, const char __user *us
if (retval)
return retval;
- mutex_lock(&rv_interface_lock);
+ guard(mutex)(&rv_interface_lock);
if (val)
turn_monitoring_on_with_reset();
@@ -750,8 +736,6 @@ static ssize_t monitoring_on_write_data(struct file *filp, const char __user *us
*/
tracepoint_synchronize_unregister();
- mutex_unlock(&rv_interface_lock);
-
return count;
}
@@ -784,28 +768,26 @@ int rv_register_monitor(struct rv_monitor *monitor, struct rv_monitor *parent)
return -EINVAL;
}
- mutex_lock(&rv_interface_lock);
+ guard(mutex)(&rv_interface_lock);
list_for_each_entry(r, &rv_monitors_list, list) {
if (strcmp(monitor->name, r->name) == 0) {
pr_info("Monitor %s is already registered\n", monitor->name);
- retval = -EEXIST;
- goto out_unlock;
+ return -EEXIST;
}
}
if (parent && rv_is_nested_monitor(parent)) {
pr_info("Parent monitor %s is already nested, cannot nest further\n",
parent->name);
- retval = -EINVAL;
- goto out_unlock;
+ return -EINVAL;
}
monitor->parent = parent;
retval = create_monitor_dir(monitor, parent);
if (retval)
- goto out_unlock;
+ return retval;
/* keep children close to the parent for easier visualisation */
if (parent)
@@ -813,9 +795,7 @@ int rv_register_monitor(struct rv_monitor *monitor, struct rv_monitor *parent)
else
list_add_tail(&monitor->list, &rv_monitors_list);
-out_unlock:
- mutex_unlock(&rv_interface_lock);
- return retval;
+ return 0;
}
/**
@@ -826,13 +806,12 @@ out_unlock:
*/
int rv_unregister_monitor(struct rv_monitor *monitor)
{
- mutex_lock(&rv_interface_lock);
+ guard(mutex)(&rv_interface_lock);
rv_disable_monitor(monitor);
list_del(&monitor->list);
destroy_monitor_dir(monitor);
- mutex_unlock(&rv_interface_lock);
return 0;
}
@@ -840,39 +819,36 @@ int __init rv_init_interface(void)
{
struct dentry *tmp;
int retval;
+ struct dentry *root_dir __free(rv_remove) = rv_create_dir("rv", NULL);
- rv_root.root_dir = rv_create_dir("rv", NULL);
- if (!rv_root.root_dir)
- goto out_err;
+ if (!root_dir)
+ return 1;
- rv_root.monitors_dir = rv_create_dir("monitors", rv_root.root_dir);
+ rv_root.monitors_dir = rv_create_dir("monitors", root_dir);
if (!rv_root.monitors_dir)
- goto out_err;
+ return 1;
- tmp = rv_create_file("available_monitors", RV_MODE_READ, rv_root.root_dir, NULL,
+ tmp = rv_create_file("available_monitors", RV_MODE_READ, root_dir, NULL,
&available_monitors_ops);
if (!tmp)
- goto out_err;
+ return 1;
- tmp = rv_create_file("enabled_monitors", RV_MODE_WRITE, rv_root.root_dir, NULL,
+ tmp = rv_create_file("enabled_monitors", RV_MODE_WRITE, root_dir, NULL,
&enabled_monitors_ops);
if (!tmp)
- goto out_err;
+ return 1;
- tmp = rv_create_file("monitoring_on", RV_MODE_WRITE, rv_root.root_dir, NULL,
+ tmp = rv_create_file("monitoring_on", RV_MODE_WRITE, root_dir, NULL,
&monitoring_on_fops);
if (!tmp)
- goto out_err;
- retval = init_rv_reactors(rv_root.root_dir);
+ return 1;
+ retval = init_rv_reactors(root_dir);
if (retval)
- goto out_err;
+ return 1;
turn_monitoring_on();
- return 0;
+ rv_root.root_dir = no_free_ptr(root_dir);
-out_err:
- rv_remove(rv_root.root_dir);
- printk(KERN_ERR "RV: Error while creating the RV interface\n");
- return 1;
+ return 0;
}
diff --git a/kernel/trace/rv/rv.h b/kernel/trace/rv/rv.h
index 1485a70c1bf4..2c0f51ff9d5c 100644
--- a/kernel/trace/rv/rv.h
+++ b/kernel/trace/rv/rv.h
@@ -17,6 +17,8 @@ struct rv_interface {
#define rv_create_file tracefs_create_file
#define rv_remove tracefs_remove
+DEFINE_FREE(rv_remove, struct dentry *, if (_T) rv_remove(_T));
+
#define MAX_RV_MONITOR_NAME_SIZE 32
#define MAX_RV_REACTOR_NAME_SIZE 32
@@ -30,10 +32,10 @@ bool rv_is_container_monitor(struct rv_monitor *mon);
bool rv_is_nested_monitor(struct rv_monitor *mon);
#ifdef CONFIG_RV_REACTORS
-int reactor_populate_monitor(struct rv_monitor *mon);
+int reactor_populate_monitor(struct rv_monitor *mon, struct dentry *root);
int init_rv_reactors(struct dentry *root_dir);
#else
-static inline int reactor_populate_monitor(struct rv_monitor *mon)
+static inline int reactor_populate_monitor(struct rv_monitor *mon, struct dentry *root)
{
return 0;
}
diff --git a/kernel/trace/rv/rv_reactors.c b/kernel/trace/rv/rv_reactors.c
index d32859fec238..460af07f7aba 100644
--- a/kernel/trace/rv/rv_reactors.c
+++ b/kernel/trace/rv/rv_reactors.c
@@ -61,6 +61,7 @@
* printk
*/
+#include <linux/lockdep.h>
#include <linux/slab.h>
#include "rv.h"
@@ -232,9 +233,7 @@ monitor_reactors_write(struct file *file, const char __user *user_buf,
seq_f = file->private_data;
mon = seq_f->private;
- mutex_lock(&rv_interface_lock);
-
- retval = -EINVAL;
+ guard(mutex)(&rv_interface_lock);
list_for_each_entry(reactor, &rv_reactors_list, list) {
if (strcmp(ptr, reactor->name) != 0)
@@ -242,13 +241,10 @@ monitor_reactors_write(struct file *file, const char __user *user_buf,
monitor_swap_reactors(mon, reactor);
- retval = count;
- break;
+ return count;
}
- mutex_unlock(&rv_interface_lock);
-
- return retval;
+ return -EINVAL;
}
/*
@@ -309,18 +305,14 @@ static int __rv_register_reactor(struct rv_reactor *reactor)
*/
int rv_register_reactor(struct rv_reactor *reactor)
{
- int retval = 0;
-
if (strlen(reactor->name) >= MAX_RV_REACTOR_NAME_SIZE) {
pr_info("Reactor %s has a name longer than %d\n",
reactor->name, MAX_RV_MONITOR_NAME_SIZE);
return -EINVAL;
}
- mutex_lock(&rv_interface_lock);
- retval = __rv_register_reactor(reactor);
- mutex_unlock(&rv_interface_lock);
- return retval;
+ guard(mutex)(&rv_interface_lock);
+ return __rv_register_reactor(reactor);
}
/**
@@ -331,9 +323,8 @@ int rv_register_reactor(struct rv_reactor *reactor)
*/
int rv_unregister_reactor(struct rv_reactor *reactor)
{
- mutex_lock(&rv_interface_lock);
+ guard(mutex)(&rv_interface_lock);
list_del(&reactor->list);
- mutex_unlock(&rv_interface_lock);
return 0;
}
@@ -347,7 +338,7 @@ static bool __read_mostly reacting_on;
*
* Returns 1 if on, 0 otherwise.
*/
-bool rv_reacting_on(void)
+static bool rv_reacting_on(void)
{
/* Ensures that concurrent monitors read consistent reacting_on */
smp_rmb();
@@ -389,7 +380,7 @@ static ssize_t reacting_on_write_data(struct file *filp, const char __user *user
if (retval)
return retval;
- mutex_lock(&rv_interface_lock);
+ guard(mutex)(&rv_interface_lock);
if (val)
turn_reacting_on();
@@ -402,8 +393,6 @@ static ssize_t reacting_on_write_data(struct file *filp, const char __user *user
*/
tracepoint_synchronize_unregister();
- mutex_unlock(&rv_interface_lock);
-
return count;
}
@@ -416,14 +405,15 @@ static const struct file_operations reacting_on_fops = {
/**
* reactor_populate_monitor - creates per monitor reactors file
* @mon: The monitor.
+ * @root: The directory of the monitor.
*
* Returns 0 if successful, error otherwise.
*/
-int reactor_populate_monitor(struct rv_monitor *mon)
+int reactor_populate_monitor(struct rv_monitor *mon, struct dentry *root)
{
struct dentry *tmp;
- tmp = rv_create_file("reactors", RV_MODE_WRITE, mon->root_d, mon, &monitor_reactors_ops);
+ tmp = rv_create_file("reactors", RV_MODE_WRITE, root, mon, &monitor_reactors_ops);
if (!tmp)
return -ENOMEM;
@@ -438,7 +428,7 @@ int reactor_populate_monitor(struct rv_monitor *mon)
/*
* Nop reactor register
*/
-__printf(1, 2) static void rv_nop_reaction(const char *msg, ...)
+__printf(1, 0) static void rv_nop_reaction(const char *msg, va_list args)
{
}
@@ -450,30 +440,42 @@ static struct rv_reactor rv_nop = {
int init_rv_reactors(struct dentry *root_dir)
{
- struct dentry *available, *reacting;
int retval;
- available = rv_create_file("available_reactors", RV_MODE_READ, root_dir, NULL,
- &available_reactors_ops);
- if (!available)
- goto out_err;
+ struct dentry *available __free(rv_remove) =
+ rv_create_file("available_reactors", RV_MODE_READ, root_dir,
+ NULL, &available_reactors_ops);
+
+ struct dentry *reacting __free(rv_remove) =
+ rv_create_file("reacting_on", RV_MODE_WRITE, root_dir, NULL, &reacting_on_fops);
- reacting = rv_create_file("reacting_on", RV_MODE_WRITE, root_dir, NULL, &reacting_on_fops);
- if (!reacting)
- goto rm_available;
+ if (!reacting || !available)
+ return -ENOMEM;
retval = __rv_register_reactor(&rv_nop);
if (retval)
- goto rm_reacting;
+ return retval;
turn_reacting_on();
+ retain_and_null_ptr(available);
+ retain_and_null_ptr(reacting);
return 0;
+}
+
+void rv_react(struct rv_monitor *monitor, const char *msg, ...)
+{
+ static DEFINE_WAIT_OVERRIDE_MAP(rv_react_map, LD_WAIT_FREE);
+ va_list args;
+
+ if (!rv_reacting_on() || !monitor->react)
+ return;
+
+ va_start(args, msg);
+
+ lock_map_acquire_try(&rv_react_map);
+ monitor->react(msg, args);
+ lock_map_release(&rv_react_map);
-rm_reacting:
- rv_remove(reacting);
-rm_available:
- rv_remove(available);
-out_err:
- return -ENOMEM;
+ va_end(args);
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d1e527cf2aae..c9fbb316dcbd 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -20,6 +20,7 @@
#include <linux/security.h>
#include <linux/seq_file.h>
#include <linux/irqflags.h>
+#include <linux/syscalls.h>
#include <linux/debugfs.h>
#include <linux/tracefs.h>
#include <linux/pagemap.h>
@@ -93,17 +94,13 @@ static bool tracepoint_printk_stop_on_boot __initdata;
static bool traceoff_after_boot __initdata;
static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
-/* For tracers that don't implement custom flags */
-static struct tracer_opt dummy_tracer_opt[] = {
- { }
+/* Store tracers and their flags per instance */
+struct tracers {
+ struct list_head list;
+ struct tracer *tracer;
+ struct tracer_flags *flags;
};
-static int
-dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
-{
- return 0;
-}
-
/*
* To prevent the comm cache from being overwritten when no
* tracing is active, only save the comm when a trace event
@@ -512,22 +509,23 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_export);
/* trace_flags holds trace_options default values */
#define TRACE_DEFAULT_FLAGS \
- (FUNCTION_DEFAULT_FLAGS | \
- TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
- TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
- TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
- TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
- TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK | \
- TRACE_ITER_COPY_MARKER)
+ (FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS | \
+ TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) | \
+ TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) | \
+ TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) | \
+ TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) | \
+ TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) | \
+ TRACE_ITER(COPY_MARKER))
/* trace_options that are only supported by global_trace */
-#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
- TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
+#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) | \
+ TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) | \
+ TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS)
/* trace_flags that are default zero for instances */
#define ZEROED_TRACE_FLAGS \
- (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK | \
- TRACE_ITER_COPY_MARKER)
+ (TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \
+ TRACE_ITER(COPY_MARKER))
/*
* The global_trace is the descriptor that holds the top-level tracing
@@ -558,9 +556,9 @@ static void update_printk_trace(struct trace_array *tr)
if (printk_trace == tr)
return;
- printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
+ printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK);
printk_trace = tr;
- tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
+ tr->trace_flags |= TRACE_ITER(TRACE_PRINTK);
}
/* Returns true if the status of tr changed */
@@ -573,7 +571,7 @@ static bool update_marker_trace(struct trace_array *tr, int enabled)
return false;
list_add_rcu(&tr->marker_list, &marker_copies);
- tr->trace_flags |= TRACE_ITER_COPY_MARKER;
+ tr->trace_flags |= TRACE_ITER(COPY_MARKER);
return true;
}
@@ -581,7 +579,7 @@ static bool update_marker_trace(struct trace_array *tr, int enabled)
return false;
list_del_init(&tr->marker_list);
- tr->trace_flags &= ~TRACE_ITER_COPY_MARKER;
+ tr->trace_flags &= ~TRACE_ITER(COPY_MARKER);
return true;
}
@@ -1139,7 +1137,7 @@ int __trace_array_puts(struct trace_array *tr, unsigned long ip,
unsigned int trace_ctx;
int alloc;
- if (!(tr->trace_flags & TRACE_ITER_PRINTK))
+ if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
return 0;
if (unlikely(tracing_selftest_running && tr == &global_trace))
@@ -1205,7 +1203,7 @@ int __trace_bputs(unsigned long ip, const char *str)
if (!printk_binsafe(tr))
return __trace_puts(ip, str, strlen(str));
- if (!(tr->trace_flags & TRACE_ITER_PRINTK))
+ if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
return 0;
if (unlikely(tracing_selftest_running || tracing_disabled))
@@ -2173,6 +2171,7 @@ static int save_selftest(struct tracer *type)
static int run_tracer_selftest(struct tracer *type)
{
struct trace_array *tr = &global_trace;
+ struct tracer_flags *saved_flags = tr->current_trace_flags;
struct tracer *saved_tracer = tr->current_trace;
int ret;
@@ -2203,6 +2202,7 @@ static int run_tracer_selftest(struct tracer *type)
tracing_reset_online_cpus(&tr->array_buffer);
tr->current_trace = type;
+ tr->current_trace_flags = type->flags ? : type->default_flags;
#ifdef CONFIG_TRACER_MAX_TRACE
if (type->use_max_tr) {
@@ -2219,6 +2219,7 @@ static int run_tracer_selftest(struct tracer *type)
ret = type->selftest(type, tr);
/* the test is responsible for resetting too */
tr->current_trace = saved_tracer;
+ tr->current_trace_flags = saved_flags;
if (ret) {
printk(KERN_CONT "FAILED!\n");
/* Add the warning after printing 'FAILED' */
@@ -2311,10 +2312,23 @@ static inline int do_run_tracer_selftest(struct tracer *type)
}
#endif /* CONFIG_FTRACE_STARTUP_TEST */
-static void add_tracer_options(struct trace_array *tr, struct tracer *t);
+static int add_tracer(struct trace_array *tr, struct tracer *t);
static void __init apply_trace_boot_options(void);
+static void free_tracers(struct trace_array *tr)
+{
+ struct tracers *t, *n;
+
+ lockdep_assert_held(&trace_types_lock);
+
+ list_for_each_entry_safe(t, n, &tr->tracers, list) {
+ list_del(&t->list);
+ kfree(t->flags);
+ kfree(t);
+ }
+}
+
/**
* register_tracer - register a tracer with the ftrace system.
* @type: the plugin for the tracer
@@ -2323,6 +2337,7 @@ static void __init apply_trace_boot_options(void);
*/
int __init register_tracer(struct tracer *type)
{
+ struct trace_array *tr;
struct tracer *t;
int ret = 0;
@@ -2354,31 +2369,25 @@ int __init register_tracer(struct tracer *type)
}
}
- if (!type->set_flag)
- type->set_flag = &dummy_set_flag;
- if (!type->flags) {
- /*allocate a dummy tracer_flags*/
- type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
- if (!type->flags) {
- ret = -ENOMEM;
- goto out;
- }
- type->flags->val = 0;
- type->flags->opts = dummy_tracer_opt;
- } else
- if (!type->flags->opts)
- type->flags->opts = dummy_tracer_opt;
-
/* store the tracer for __set_tracer_option */
- type->flags->trace = type;
+ if (type->flags)
+ type->flags->trace = type;
ret = do_run_tracer_selftest(type);
if (ret < 0)
goto out;
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ ret = add_tracer(tr, type);
+ if (ret < 0) {
+ /* The tracer will still exist but without options */
+ pr_warn("Failed to create tracer options for %s\n", type->name);
+ break;
+ }
+ }
+
type->next = trace_types;
trace_types = type;
- add_tracer_options(&global_trace, type);
out:
mutex_unlock(&trace_types_lock);
@@ -2391,7 +2400,7 @@ int __init register_tracer(struct tracer *type)
printk(KERN_INFO "Starting tracer '%s'\n", type->name);
/* Do we want this tracer to start on bootup? */
- tracing_set_tracer(&global_trace, type->name);
+ WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0);
default_bootup_tracer = NULL;
apply_trace_boot_options();
@@ -3078,7 +3087,7 @@ static inline void ftrace_trace_stack(struct trace_array *tr,
unsigned int trace_ctx,
int skip, struct pt_regs *regs)
{
- if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
+ if (!(tr->trace_flags & TRACE_ITER(STACKTRACE)))
return;
__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
@@ -3139,7 +3148,7 @@ ftrace_trace_userstack(struct trace_array *tr,
struct ring_buffer_event *event;
struct userstack_entry *entry;
- if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
+ if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE)))
return;
/*
@@ -3484,7 +3493,7 @@ int trace_array_printk(struct trace_array *tr,
if (tr == &global_trace)
return 0;
- if (!(tr->trace_flags & TRACE_ITER_PRINTK))
+ if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
return 0;
va_start(ap, fmt);
@@ -3521,7 +3530,7 @@ int trace_array_printk_buf(struct trace_buffer *buffer,
int ret;
va_list ap;
- if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
+ if (!(printk_trace->trace_flags & TRACE_ITER(PRINTK)))
return 0;
va_start(ap, fmt);
@@ -3791,7 +3800,7 @@ const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
if (WARN_ON_ONCE(!fmt))
return fmt;
- if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
+ if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR))
return fmt;
p = fmt;
@@ -4113,7 +4122,7 @@ static void print_event_info(struct array_buffer *buf, struct seq_file *m)
static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
unsigned int flags)
{
- bool tgid = flags & TRACE_ITER_RECORD_TGID;
+ bool tgid = flags & TRACE_ITER(RECORD_TGID);
print_event_info(buf, m);
@@ -4124,7 +4133,7 @@ static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
unsigned int flags)
{
- bool tgid = flags & TRACE_ITER_RECORD_TGID;
+ bool tgid = flags & TRACE_ITER(RECORD_TGID);
static const char space[] = " ";
int prec = tgid ? 12 : 2;
@@ -4197,7 +4206,7 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
struct trace_seq *s = &iter->seq;
struct trace_array *tr = iter->tr;
- if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
+ if (!(tr->trace_flags & TRACE_ITER(ANNOTATE)))
return;
if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
@@ -4219,6 +4228,22 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
iter->cpu);
}
+#ifdef CONFIG_FTRACE_SYSCALLS
+static bool is_syscall_event(struct trace_event *event)
+{
+ return (event->funcs == &enter_syscall_print_funcs) ||
+ (event->funcs == &exit_syscall_print_funcs);
+
+}
+#define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT
+#else
+static inline bool is_syscall_event(struct trace_event *event)
+{
+ return false;
+}
+#define syscall_buf_size 0
+#endif /* CONFIG_FTRACE_SYSCALLS */
+
static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
{
struct trace_array *tr = iter->tr;
@@ -4233,7 +4258,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
event = ftrace_find_event(entry->type);
- if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
+ if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
if (iter->iter_flags & TRACE_FILE_LAT_FMT)
trace_print_lat_context(iter);
else
@@ -4244,17 +4269,19 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
return TRACE_TYPE_PARTIAL_LINE;
if (event) {
- if (tr->trace_flags & TRACE_ITER_FIELDS)
+ if (tr->trace_flags & TRACE_ITER(FIELDS))
return print_event_fields(iter, event);
/*
* For TRACE_EVENT() events, the print_fmt is not
* safe to use if the array has delta offsets
* Force printing via the fields.
*/
- if ((tr->text_delta) &&
- event->type > __TRACE_LAST_TYPE)
+ if ((tr->text_delta)) {
+ /* ftrace and system call events are still OK */
+ if ((event->type > __TRACE_LAST_TYPE) &&
+ !is_syscall_event(event))
return print_event_fields(iter, event);
-
+ }
return event->funcs->trace(iter, sym_flags, event);
}
@@ -4272,7 +4299,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
entry = iter->ent;
- if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
+ if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO))
trace_seq_printf(s, "%d %d %llu ",
entry->pid, iter->cpu, iter->ts);
@@ -4298,7 +4325,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
entry = iter->ent;
- if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
+ if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
SEQ_PUT_HEX_FIELD(s, entry->pid);
SEQ_PUT_HEX_FIELD(s, iter->cpu);
SEQ_PUT_HEX_FIELD(s, iter->ts);
@@ -4327,7 +4354,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
entry = iter->ent;
- if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
+ if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
SEQ_PUT_FIELD(s, entry->pid);
SEQ_PUT_FIELD(s, iter->cpu);
SEQ_PUT_FIELD(s, iter->ts);
@@ -4398,27 +4425,27 @@ enum print_line_t print_trace_line(struct trace_iterator *iter)
}
if (iter->ent->type == TRACE_BPUTS &&
- trace_flags & TRACE_ITER_PRINTK &&
- trace_flags & TRACE_ITER_PRINTK_MSGONLY)
+ trace_flags & TRACE_ITER(PRINTK) &&
+ trace_flags & TRACE_ITER(PRINTK_MSGONLY))
return trace_print_bputs_msg_only(iter);
if (iter->ent->type == TRACE_BPRINT &&
- trace_flags & TRACE_ITER_PRINTK &&
- trace_flags & TRACE_ITER_PRINTK_MSGONLY)
+ trace_flags & TRACE_ITER(PRINTK) &&
+ trace_flags & TRACE_ITER(PRINTK_MSGONLY))
return trace_print_bprintk_msg_only(iter);
if (iter->ent->type == TRACE_PRINT &&
- trace_flags & TRACE_ITER_PRINTK &&
- trace_flags & TRACE_ITER_PRINTK_MSGONLY)
+ trace_flags & TRACE_ITER(PRINTK) &&
+ trace_flags & TRACE_ITER(PRINTK_MSGONLY))
return trace_print_printk_msg_only(iter);
- if (trace_flags & TRACE_ITER_BIN)
+ if (trace_flags & TRACE_ITER(BIN))
return print_bin_fmt(iter);
- if (trace_flags & TRACE_ITER_HEX)
+ if (trace_flags & TRACE_ITER(HEX))
return print_hex_fmt(iter);
- if (trace_flags & TRACE_ITER_RAW)
+ if (trace_flags & TRACE_ITER(RAW))
return print_raw_fmt(iter);
return print_trace_fmt(iter);
@@ -4436,7 +4463,7 @@ void trace_latency_header(struct seq_file *m)
if (iter->iter_flags & TRACE_FILE_LAT_FMT)
print_trace_header(m, iter);
- if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
+ if (!(tr->trace_flags & TRACE_ITER(VERBOSE)))
print_lat_help_header(m);
}
@@ -4446,7 +4473,7 @@ void trace_default_header(struct seq_file *m)
struct trace_array *tr = iter->tr;
unsigned long trace_flags = tr->trace_flags;
- if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
+ if (!(trace_flags & TRACE_ITER(CONTEXT_INFO)))
return;
if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
@@ -4454,11 +4481,11 @@ void trace_default_header(struct seq_file *m)
if (trace_empty(iter))
return;
print_trace_header(m, iter);
- if (!(trace_flags & TRACE_ITER_VERBOSE))
+ if (!(trace_flags & TRACE_ITER(VERBOSE)))
print_lat_help_header(m);
} else {
- if (!(trace_flags & TRACE_ITER_VERBOSE)) {
- if (trace_flags & TRACE_ITER_IRQ_INFO)
+ if (!(trace_flags & TRACE_ITER(VERBOSE))) {
+ if (trace_flags & TRACE_ITER(IRQ_INFO))
print_func_help_header_irq(iter->array_buffer,
m, trace_flags);
else
@@ -4682,7 +4709,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
* If pause-on-trace is enabled, then stop the trace while
* dumping, unless this is the "snapshot" file
*/
- if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
+ if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE)))
tracing_stop_tr(tr);
if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
@@ -4876,7 +4903,7 @@ static int tracing_open(struct inode *inode, struct file *file)
iter = __tracing_open(inode, file, false);
if (IS_ERR(iter))
ret = PTR_ERR(iter);
- else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
+ else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
iter->iter_flags |= TRACE_FILE_LAT_FMT;
}
@@ -5139,21 +5166,26 @@ static int tracing_trace_options_show(struct seq_file *m, void *v)
{
struct tracer_opt *trace_opts;
struct trace_array *tr = m->private;
+ struct tracer_flags *flags;
u32 tracer_flags;
int i;
guard(mutex)(&trace_types_lock);
- tracer_flags = tr->current_trace->flags->val;
- trace_opts = tr->current_trace->flags->opts;
-
for (i = 0; trace_options[i]; i++) {
- if (tr->trace_flags & (1 << i))
+ if (tr->trace_flags & (1ULL << i))
seq_printf(m, "%s\n", trace_options[i]);
else
seq_printf(m, "no%s\n", trace_options[i]);
}
+ flags = tr->current_trace_flags;
+ if (!flags || !flags->opts)
+ return 0;
+
+ tracer_flags = flags->val;
+ trace_opts = flags->opts;
+
for (i = 0; trace_opts[i].name; i++) {
if (tracer_flags & trace_opts[i].bit)
seq_printf(m, "%s\n", trace_opts[i].name);
@@ -5169,9 +5201,10 @@ static int __set_tracer_option(struct trace_array *tr,
struct tracer_opt *opts, int neg)
{
struct tracer *trace = tracer_flags->trace;
- int ret;
+ int ret = 0;
- ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
+ if (trace->set_flag)
+ ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
if (ret)
return ret;
@@ -5185,37 +5218,41 @@ static int __set_tracer_option(struct trace_array *tr,
/* Try to assign a tracer specific option */
static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
{
- struct tracer *trace = tr->current_trace;
- struct tracer_flags *tracer_flags = trace->flags;
+ struct tracer_flags *tracer_flags = tr->current_trace_flags;
struct tracer_opt *opts = NULL;
int i;
+ if (!tracer_flags || !tracer_flags->opts)
+ return 0;
+
for (i = 0; tracer_flags->opts[i].name; i++) {
opts = &tracer_flags->opts[i];
if (strcmp(cmp, opts->name) == 0)
- return __set_tracer_option(tr, trace->flags, opts, neg);
+ return __set_tracer_option(tr, tracer_flags, opts, neg);
}
return -EINVAL;
}
/* Some tracers require overwrite to stay enabled */
-int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
+int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set)
{
- if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
+ if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set)
return -1;
return 0;
}
-int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
+int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled)
{
- if ((mask == TRACE_ITER_RECORD_TGID) ||
- (mask == TRACE_ITER_RECORD_CMD) ||
- (mask == TRACE_ITER_TRACE_PRINTK) ||
- (mask == TRACE_ITER_COPY_MARKER))
+ switch (mask) {
+ case TRACE_ITER(RECORD_TGID):
+ case TRACE_ITER(RECORD_CMD):
+ case TRACE_ITER(TRACE_PRINTK):
+ case TRACE_ITER(COPY_MARKER):
lockdep_assert_held(&event_mutex);
+ }
/* do nothing if flag is already set */
if (!!(tr->trace_flags & mask) == !!enabled)
@@ -5226,7 +5263,8 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
if (tr->current_trace->flag_changed(tr, mask, !!enabled))
return -EINVAL;
- if (mask == TRACE_ITER_TRACE_PRINTK) {
+ switch (mask) {
+ case TRACE_ITER(TRACE_PRINTK):
if (enabled) {
update_printk_trace(tr);
} else {
@@ -5243,45 +5281,59 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
if (printk_trace == tr)
update_printk_trace(&global_trace);
}
- }
+ break;
- if (mask == TRACE_ITER_COPY_MARKER)
+ case TRACE_ITER(COPY_MARKER):
update_marker_trace(tr, enabled);
+ /* update_marker_trace updates the tr->trace_flags */
+ return 0;
+ }
if (enabled)
tr->trace_flags |= mask;
else
tr->trace_flags &= ~mask;
- if (mask == TRACE_ITER_RECORD_CMD)
+ switch (mask) {
+ case TRACE_ITER(RECORD_CMD):
trace_event_enable_cmd_record(enabled);
+ break;
- if (mask == TRACE_ITER_RECORD_TGID) {
+ case TRACE_ITER(RECORD_TGID):
if (trace_alloc_tgid_map() < 0) {
- tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
+ tr->trace_flags &= ~TRACE_ITER(RECORD_TGID);
return -ENOMEM;
}
trace_event_enable_tgid_record(enabled);
- }
+ break;
- if (mask == TRACE_ITER_EVENT_FORK)
+ case TRACE_ITER(EVENT_FORK):
trace_event_follow_fork(tr, enabled);
+ break;
- if (mask == TRACE_ITER_FUNC_FORK)
+ case TRACE_ITER(FUNC_FORK):
ftrace_pid_follow_fork(tr, enabled);
+ break;
- if (mask == TRACE_ITER_OVERWRITE) {
+ case TRACE_ITER(OVERWRITE):
ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
#ifdef CONFIG_TRACER_MAX_TRACE
ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
#endif
- }
+ break;
- if (mask == TRACE_ITER_PRINTK) {
+ case TRACE_ITER(PRINTK):
trace_printk_start_stop_comm(enabled);
trace_printk_control(enabled);
+ break;
+
+#if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER)
+ case TRACE_GRAPH_GRAPH_TIME:
+ ftrace_graph_graph_time_control(enabled);
+ break;
+#endif
}
return 0;
@@ -5311,7 +5363,7 @@ int trace_set_options(struct trace_array *tr, char *option)
if (ret < 0)
ret = set_tracer_option(tr, cmp, neg);
else
- ret = set_tracer_flag(tr, 1 << ret, !neg);
+ ret = set_tracer_flag(tr, 1ULL << ret, !neg);
mutex_unlock(&trace_types_lock);
mutex_unlock(&event_mutex);
@@ -6215,11 +6267,6 @@ int tracing_update_buffers(struct trace_array *tr)
return ret;
}
-struct trace_option_dentry;
-
-static void
-create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
-
/*
* Used to clear out the tracer before deletion of an instance.
* Must have trace_types_lock held.
@@ -6235,26 +6282,15 @@ static void tracing_set_nop(struct trace_array *tr)
tr->current_trace->reset(tr);
tr->current_trace = &nop_trace;
+ tr->current_trace_flags = nop_trace.flags;
}
static bool tracer_options_updated;
-static void add_tracer_options(struct trace_array *tr, struct tracer *t)
-{
- /* Only enable if the directory has been created already. */
- if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
- return;
-
- /* Only create trace option files after update_tracer_options finish */
- if (!tracer_options_updated)
- return;
-
- create_trace_option_files(tr, t);
-}
-
int tracing_set_tracer(struct trace_array *tr, const char *buf)
{
- struct tracer *t;
+ struct tracer *trace = NULL;
+ struct tracers *t;
#ifdef CONFIG_TRACER_MAX_TRACE
bool had_max_tr;
#endif
@@ -6272,18 +6308,20 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
ret = 0;
}
- for (t = trace_types; t; t = t->next) {
- if (strcmp(t->name, buf) == 0)
+ list_for_each_entry(t, &tr->tracers, list) {
+ if (strcmp(t->tracer->name, buf) == 0) {
+ trace = t->tracer;
break;
+ }
}
- if (!t)
+ if (!trace)
return -EINVAL;
- if (t == tr->current_trace)
+ if (trace == tr->current_trace)
return 0;
#ifdef CONFIG_TRACER_SNAPSHOT
- if (t->use_max_tr) {
+ if (trace->use_max_tr) {
local_irq_disable();
arch_spin_lock(&tr->max_lock);
ret = tr->cond_snapshot ? -EBUSY : 0;
@@ -6294,14 +6332,14 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
}
#endif
/* Some tracers won't work on kernel command line */
- if (system_state < SYSTEM_RUNNING && t->noboot) {
+ if (system_state < SYSTEM_RUNNING && trace->noboot) {
pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
- t->name);
+ trace->name);
return -EINVAL;
}
/* Some tracers are only allowed for the top level buffer */
- if (!trace_ok_for_array(t, tr))
+ if (!trace_ok_for_array(trace, tr))
return -EINVAL;
/* If trace pipe files are being read, we can't change the tracer */
@@ -6320,8 +6358,9 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
/* Current trace needs to be nop_trace before synchronize_rcu */
tr->current_trace = &nop_trace;
+ tr->current_trace_flags = nop_trace.flags;
- if (had_max_tr && !t->use_max_tr) {
+ if (had_max_tr && !trace->use_max_tr) {
/*
* We need to make sure that the update_max_tr sees that
* current_trace changed to nop_trace to keep it from
@@ -6334,7 +6373,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
tracing_disarm_snapshot(tr);
}
- if (!had_max_tr && t->use_max_tr) {
+ if (!had_max_tr && trace->use_max_tr) {
ret = tracing_arm_snapshot_locked(tr);
if (ret)
return ret;
@@ -6343,18 +6382,21 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
tr->current_trace = &nop_trace;
#endif
- if (t->init) {
- ret = tracer_init(t, tr);
+ tr->current_trace_flags = t->flags ? : t->tracer->flags;
+
+ if (trace->init) {
+ ret = tracer_init(trace, tr);
if (ret) {
#ifdef CONFIG_TRACER_MAX_TRACE
- if (t->use_max_tr)
+ if (trace->use_max_tr)
tracing_disarm_snapshot(tr);
#endif
+ tr->current_trace_flags = nop_trace.flags;
return ret;
}
}
- tr->current_trace = t;
+ tr->current_trace = trace;
tr->current_trace->enabled++;
trace_branch_enable(tr);
@@ -6532,7 +6574,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
/* trace pipe does not show start of buffer */
cpumask_setall(iter->started);
- if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
+ if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
iter->iter_flags |= TRACE_FILE_LAT_FMT;
/* Output in nanoseconds only if we are using a clock in nanoseconds. */
@@ -6593,7 +6635,7 @@ trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_tabl
if (trace_buffer_iter(iter, iter->cpu_file))
return EPOLLIN | EPOLLRDNORM;
- if (tr->trace_flags & TRACE_ITER_BLOCK)
+ if (tr->trace_flags & TRACE_ITER(BLOCK))
/*
* Always select as readable when in blocking mode
*/
@@ -6912,6 +6954,43 @@ out_err:
}
static ssize_t
+tracing_syscall_buf_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct inode *inode = file_inode(filp);
+ struct trace_array *tr = inode->i_private;
+ char buf[64];
+ int r;
+
+ r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz);
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+tracing_syscall_buf_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct inode *inode = file_inode(filp);
+ struct trace_array *tr = inode->i_private;
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+ if (ret)
+ return ret;
+
+ if (val > SYSCALL_FAULT_USER_MAX)
+ val = SYSCALL_FAULT_USER_MAX;
+
+ tr->syscall_buf_sz = val;
+
+ *ppos += cnt;
+
+ return cnt;
+}
+
+static ssize_t
tracing_entries_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
@@ -7145,7 +7224,7 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
struct trace_array *tr = inode->i_private;
/* disable tracing ? */
- if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
+ if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE))
tracer_tracing_off(tr);
/* resize the ring buffer to 0 */
tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
@@ -7223,52 +7302,43 @@ struct trace_user_buf {
char *buf;
};
-struct trace_user_buf_info {
- struct trace_user_buf __percpu *tbuf;
- int ref;
-};
-
-
static DEFINE_MUTEX(trace_user_buffer_mutex);
static struct trace_user_buf_info *trace_user_buffer;
-static void trace_user_fault_buffer_free(struct trace_user_buf_info *tinfo)
+/**
+ * trace_user_fault_destroy - free up allocated memory of a trace user buffer
+ * @tinfo: The descriptor to free up
+ *
+ * Frees any data allocated in the trace info dsecriptor.
+ */
+void trace_user_fault_destroy(struct trace_user_buf_info *tinfo)
{
char *buf;
int cpu;
+ if (!tinfo || !tinfo->tbuf)
+ return;
+
for_each_possible_cpu(cpu) {
buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
kfree(buf);
}
free_percpu(tinfo->tbuf);
- kfree(tinfo);
}
-static int trace_user_fault_buffer_enable(void)
+static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size)
{
- struct trace_user_buf_info *tinfo;
char *buf;
int cpu;
- guard(mutex)(&trace_user_buffer_mutex);
-
- if (trace_user_buffer) {
- trace_user_buffer->ref++;
- return 0;
- }
-
- tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
- if (!tinfo)
- return -ENOMEM;
+ lockdep_assert_held(&trace_user_buffer_mutex);
tinfo->tbuf = alloc_percpu(struct trace_user_buf);
- if (!tinfo->tbuf) {
- kfree(tinfo);
+ if (!tinfo->tbuf)
return -ENOMEM;
- }
tinfo->ref = 1;
+ tinfo->size = size;
/* Clear each buffer in case of error */
for_each_possible_cpu(cpu) {
@@ -7276,42 +7346,165 @@ static int trace_user_fault_buffer_enable(void)
}
for_each_possible_cpu(cpu) {
- buf = kmalloc_node(TRACE_MARKER_MAX_SIZE, GFP_KERNEL,
+ buf = kmalloc_node(size, GFP_KERNEL,
cpu_to_node(cpu));
- if (!buf) {
- trace_user_fault_buffer_free(tinfo);
+ if (!buf)
return -ENOMEM;
- }
per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf;
}
- trace_user_buffer = tinfo;
-
return 0;
}
-static void trace_user_fault_buffer_disable(void)
+/* For internal use. Free and reinitialize */
+static void user_buffer_free(struct trace_user_buf_info **tinfo)
{
- struct trace_user_buf_info *tinfo;
+ lockdep_assert_held(&trace_user_buffer_mutex);
- guard(mutex)(&trace_user_buffer_mutex);
+ trace_user_fault_destroy(*tinfo);
+ kfree(*tinfo);
+ *tinfo = NULL;
+}
+
+/* For internal use. Initialize and allocate */
+static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size)
+{
+ bool alloc = false;
+ int ret;
+
+ lockdep_assert_held(&trace_user_buffer_mutex);
+
+ if (!*tinfo) {
+ alloc = true;
+ *tinfo = kzalloc(sizeof(**tinfo), GFP_KERNEL);
+ if (!*tinfo)
+ return -ENOMEM;
+ }
- tinfo = trace_user_buffer;
+ ret = user_fault_buffer_enable(*tinfo, size);
+ if (ret < 0 && alloc)
+ user_buffer_free(tinfo);
- if (WARN_ON_ONCE(!tinfo))
+ return ret;
+}
+
+/* For internal use, derefrence and free if necessary */
+static void user_buffer_put(struct trace_user_buf_info **tinfo)
+{
+ guard(mutex)(&trace_user_buffer_mutex);
+
+ if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref))
return;
- if (--tinfo->ref)
+ if (--(*tinfo)->ref)
return;
- trace_user_fault_buffer_free(tinfo);
- trace_user_buffer = NULL;
+ user_buffer_free(tinfo);
+}
+
+/**
+ * trace_user_fault_init - Allocated or reference a per CPU buffer
+ * @tinfo: A pointer to the trace buffer descriptor
+ * @size: The size to allocate each per CPU buffer
+ *
+ * Create a per CPU buffer that can be used to copy from user space
+ * in a task context. When calling trace_user_fault_read(), preemption
+ * must be disabled, and it will enable preemption and copy user
+ * space data to the buffer. If any schedule switches occur, it will
+ * retry until it succeeds without a schedule switch knowing the buffer
+ * is still valid.
+ *
+ * Returns 0 on success, negative on failure.
+ */
+int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size)
+{
+ int ret;
+
+ if (!tinfo)
+ return -EINVAL;
+
+ guard(mutex)(&trace_user_buffer_mutex);
+
+ ret = user_buffer_init(&tinfo, size);
+ if (ret < 0)
+ trace_user_fault_destroy(tinfo);
+
+ return ret;
+}
+
+/**
+ * trace_user_fault_get - up the ref count for the user buffer
+ * @tinfo: A pointer to a pointer to the trace buffer descriptor
+ *
+ * Ups the ref count of the trace buffer.
+ *
+ * Returns the new ref count.
+ */
+int trace_user_fault_get(struct trace_user_buf_info *tinfo)
+{
+ if (!tinfo)
+ return -1;
+
+ guard(mutex)(&trace_user_buffer_mutex);
+
+ tinfo->ref++;
+ return tinfo->ref;
+}
+
+/**
+ * trace_user_fault_put - dereference a per cpu trace buffer
+ * @tinfo: The @tinfo that was passed to trace_user_fault_get()
+ *
+ * Decrement the ref count of @tinfo.
+ *
+ * Returns the new refcount (negative on error).
+ */
+int trace_user_fault_put(struct trace_user_buf_info *tinfo)
+{
+ guard(mutex)(&trace_user_buffer_mutex);
+
+ if (WARN_ON_ONCE(!tinfo || !tinfo->ref))
+ return -1;
+
+ --tinfo->ref;
+ return tinfo->ref;
}
-/* Must be called with preemption disabled */
-static char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
- const char __user *ptr, size_t size,
- size_t *read_size)
+/**
+ * trace_user_fault_read - Read user space into a per CPU buffer
+ * @tinfo: The @tinfo allocated by trace_user_fault_get()
+ * @ptr: The user space pointer to read
+ * @size: The size of user space to read.
+ * @copy_func: Optional function to use to copy from user space
+ * @data: Data to pass to copy_func if it was supplied
+ *
+ * Preemption must be disabled when this is called, and must not
+ * be enabled while using the returned buffer.
+ * This does the copying from user space into a per CPU buffer.
+ *
+ * The @size must not be greater than the size passed in to
+ * trace_user_fault_init().
+ *
+ * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(),
+ * otherwise it will call @copy_func. It will call @copy_func with:
+ *
+ * buffer: the per CPU buffer of the @tinfo.
+ * ptr: The pointer @ptr to user space to read
+ * size: The @size of the ptr to read
+ * data: The @data parameter
+ *
+ * It is expected that @copy_func will return 0 on success and non zero
+ * if there was a fault.
+ *
+ * Returns a pointer to the buffer with the content read from @ptr.
+ * Preemption must remain disabled while the caller accesses the
+ * buffer returned by this function.
+ * Returns NULL if there was a fault, or the size passed in is
+ * greater than the size passed to trace_user_fault_init().
+ */
+char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
+ const char __user *ptr, size_t size,
+ trace_user_buf_copy copy_func, void *data)
{
int cpu = smp_processor_id();
char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
@@ -7319,9 +7512,14 @@ static char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
int trys = 0;
int ret;
- if (size > TRACE_MARKER_MAX_SIZE)
- size = TRACE_MARKER_MAX_SIZE;
- *read_size = 0;
+ lockdep_assert_preemption_disabled();
+
+ /*
+ * It's up to the caller to not try to copy more than it said
+ * it would.
+ */
+ if (size > tinfo->size)
+ return NULL;
/*
* This acts similar to a seqcount. The per CPU context switches are
@@ -7361,7 +7559,14 @@ static char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
*/
preempt_enable_notrace();
- ret = __copy_from_user(buffer, ptr, size);
+ /* Make sure preemption is enabled here */
+ lockdep_assert_preemption_enabled();
+
+ if (copy_func) {
+ ret = copy_func(buffer, ptr, size, data);
+ } else {
+ ret = __copy_from_user(buffer, ptr, size);
+ }
preempt_disable_notrace();
migrate_enable();
@@ -7378,7 +7583,6 @@ static char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
*/
} while (nr_context_switches_cpu(cpu) != cnt);
- *read_size = size;
return buffer;
}
@@ -7389,13 +7593,12 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
struct trace_array *tr = filp->private_data;
ssize_t written = -ENODEV;
unsigned long ip;
- size_t size;
char *buf;
if (tracing_disabled)
return -EINVAL;
- if (!(tr->trace_flags & TRACE_ITER_MARKERS))
+ if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
return -EINVAL;
if ((ssize_t)cnt < 0)
@@ -7407,13 +7610,10 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
/* Must have preemption disabled while having access to the buffer */
guard(preempt_notrace)();
- buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, &size);
+ buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
if (!buf)
return -EFAULT;
- if (cnt > size)
- cnt = size;
-
/* The selftests expect this function to be the IP address */
ip = _THIS_IP_;
@@ -7442,7 +7642,7 @@ static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
size_t size;
/* cnt includes both the entry->id and the data behind it. */
- size = struct_size(entry, buf, cnt - sizeof(entry->id));
+ size = struct_offset(entry, id) + cnt;
buffer = tr->array_buffer.buffer;
@@ -7473,30 +7673,29 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
{
struct trace_array *tr = filp->private_data;
ssize_t written = -ENODEV;
- size_t size;
char *buf;
if (tracing_disabled)
return -EINVAL;
- if (!(tr->trace_flags & TRACE_ITER_MARKERS))
+ if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
return -EINVAL;
/* The marker must at least have a tag id */
if (cnt < sizeof(unsigned int))
return -EINVAL;
+ /* raw write is all or nothing */
+ if (cnt > TRACE_MARKER_MAX_SIZE)
+ return -EINVAL;
+
/* Must have preemption disabled while having access to the buffer */
guard(preempt_notrace)();
- buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, &size);
+ buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
if (!buf)
return -EFAULT;
- /* raw write is all or nothing */
- if (cnt > size)
- return -EINVAL;
-
/* The global trace_marker_raw can go to multiple instances */
if (tr == &global_trace) {
guard(rcu)();
@@ -7516,20 +7715,26 @@ static int tracing_mark_open(struct inode *inode, struct file *filp)
{
int ret;
- ret = trace_user_fault_buffer_enable();
- if (ret < 0)
- return ret;
+ scoped_guard(mutex, &trace_user_buffer_mutex) {
+ if (!trace_user_buffer) {
+ ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE);
+ if (ret < 0)
+ return ret;
+ } else {
+ trace_user_buffer->ref++;
+ }
+ }
stream_open(inode, filp);
ret = tracing_open_generic_tr(inode, filp);
if (ret < 0)
- trace_user_fault_buffer_disable();
+ user_buffer_put(&trace_user_buffer);
return ret;
}
static int tracing_mark_release(struct inode *inode, struct file *file)
{
- trace_user_fault_buffer_disable();
+ user_buffer_put(&trace_user_buffer);
return tracing_release_generic_tr(inode, file);
}
@@ -7917,6 +8122,14 @@ static const struct file_operations tracing_entries_fops = {
.release = tracing_release_generic_tr,
};
+static const struct file_operations tracing_syscall_buf_fops = {
+ .open = tracing_open_generic_tr,
+ .read = tracing_syscall_buf_read,
+ .write = tracing_syscall_buf_write,
+ .llseek = generic_file_llseek,
+ .release = tracing_release_generic_tr,
+};
+
static const struct file_operations tracing_buffer_meta_fops = {
.open = tracing_buffer_meta_open,
.read = seq_read,
@@ -8781,8 +8994,18 @@ static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
put_snapshot_map(iter->tr);
}
+static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr)
+{
+ /*
+ * Trace buffer mappings require the complete buffer including
+ * the meta page. Partial mappings are not supported.
+ */
+ return -EINVAL;
+}
+
static const struct vm_operations_struct tracing_buffers_vmops = {
.close = tracing_buffers_mmap_close,
+ .may_split = tracing_buffers_may_split,
};
static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -8791,8 +9014,8 @@ static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
struct trace_iterator *iter = &info->iter;
int ret = 0;
- /* A memmap'ed buffer is not supported for user space mmap */
- if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
+ /* A memmap'ed and backup buffers are not supported for user space mmap */
+ if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC))
return -ENODEV;
ret = get_snapshot_map(iter->tr);
@@ -9305,7 +9528,7 @@ trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
get_tr_index(tr_index, &tr, &index);
- if (tr->trace_flags & (1 << index))
+ if (tr->trace_flags & (1ULL << index))
buf = "1\n";
else
buf = "0\n";
@@ -9334,7 +9557,7 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
mutex_lock(&event_mutex);
mutex_lock(&trace_types_lock);
- ret = set_tracer_flag(tr, 1 << index, val);
+ ret = set_tracer_flag(tr, 1ULL << index, val);
mutex_unlock(&trace_types_lock);
mutex_unlock(&event_mutex);
@@ -9407,39 +9630,19 @@ create_trace_option_file(struct trace_array *tr,
topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
t_options, topt, &trace_options_fops);
-
}
-static void
-create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
+static int
+create_trace_option_files(struct trace_array *tr, struct tracer *tracer,
+ struct tracer_flags *flags)
{
struct trace_option_dentry *topts;
struct trace_options *tr_topts;
- struct tracer_flags *flags;
struct tracer_opt *opts;
int cnt;
- int i;
-
- if (!tracer)
- return;
-
- flags = tracer->flags;
if (!flags || !flags->opts)
- return;
-
- /*
- * If this is an instance, only create flags for tracers
- * the instance may have.
- */
- if (!trace_ok_for_array(tracer, tr))
- return;
-
- for (i = 0; i < tr->nr_topts; i++) {
- /* Make sure there's no duplicate flags. */
- if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
- return;
- }
+ return 0;
opts = flags->opts;
@@ -9448,13 +9651,13 @@ create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
if (!topts)
- return;
+ return 0;
tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
GFP_KERNEL);
if (!tr_topts) {
kfree(topts);
- return;
+ return -ENOMEM;
}
tr->topts = tr_topts;
@@ -9469,6 +9672,97 @@ create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
"Failed to create trace option: %s",
opts[cnt].name);
}
+ return 0;
+}
+
+static int get_global_flags_val(struct tracer *tracer)
+{
+ struct tracers *t;
+
+ list_for_each_entry(t, &global_trace.tracers, list) {
+ if (t->tracer != tracer)
+ continue;
+ if (!t->flags)
+ return -1;
+ return t->flags->val;
+ }
+ return -1;
+}
+
+static int add_tracer_options(struct trace_array *tr, struct tracers *t)
+{
+ struct tracer *tracer = t->tracer;
+ struct tracer_flags *flags = t->flags ?: tracer->flags;
+
+ if (!flags)
+ return 0;
+
+ /* Only add tracer options after update_tracer_options finish */
+ if (!tracer_options_updated)
+ return 0;
+
+ return create_trace_option_files(tr, tracer, flags);
+}
+
+static int add_tracer(struct trace_array *tr, struct tracer *tracer)
+{
+ struct tracer_flags *flags;
+ struct tracers *t;
+ int ret;
+
+ /* Only enable if the directory has been created already. */
+ if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
+ return 0;
+
+ /*
+ * If this is an instance, only create flags for tracers
+ * the instance may have.
+ */
+ if (!trace_ok_for_array(tracer, tr))
+ return 0;
+
+ t = kmalloc(sizeof(*t), GFP_KERNEL);
+ if (!t)
+ return -ENOMEM;
+
+ t->tracer = tracer;
+ t->flags = NULL;
+ list_add(&t->list, &tr->tracers);
+
+ flags = tracer->flags;
+ if (!flags) {
+ if (!tracer->default_flags)
+ return 0;
+
+ /*
+ * If the tracer defines default flags, it means the flags are
+ * per trace instance.
+ */
+ flags = kmalloc(sizeof(*flags), GFP_KERNEL);
+ if (!flags)
+ return -ENOMEM;
+
+ *flags = *tracer->default_flags;
+ flags->trace = tracer;
+
+ t->flags = flags;
+
+ /* If this is an instance, inherit the global_trace flags */
+ if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) {
+ int val = get_global_flags_val(tracer);
+ if (!WARN_ON_ONCE(val < 0))
+ flags->val = val;
+ }
+ }
+
+ ret = add_tracer_options(tr, t);
+ if (ret < 0) {
+ list_del(&t->list);
+ kfree(t->flags);
+ kfree(t);
+ }
+
+ return ret;
}
static struct dentry *
@@ -9498,8 +9792,9 @@ static void create_trace_options_dir(struct trace_array *tr)
for (i = 0; trace_options[i]; i++) {
if (top_level ||
- !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
+ !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) {
create_trace_option_core_file(tr, trace_options[i], i);
+ }
}
}
@@ -9820,7 +10115,7 @@ allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size
struct trace_scratch *tscratch;
unsigned int scratch_size = 0;
- rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
+ rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0;
buf->tr = tr;
@@ -9918,19 +10213,39 @@ static void init_trace_flags_index(struct trace_array *tr)
tr->trace_flags_index[i] = i;
}
-static void __update_tracer_options(struct trace_array *tr)
+static int __update_tracer(struct trace_array *tr)
{
struct tracer *t;
+ int ret = 0;
- for (t = trace_types; t; t = t->next)
- add_tracer_options(tr, t);
+ for (t = trace_types; t && !ret; t = t->next)
+ ret = add_tracer(tr, t);
+
+ return ret;
}
-static void update_tracer_options(struct trace_array *tr)
+static __init int __update_tracer_options(struct trace_array *tr)
{
+ struct tracers *t;
+ int ret = 0;
+
+ list_for_each_entry(t, &tr->tracers, list) {
+ ret = add_tracer_options(tr, t);
+ if (ret < 0)
+ break;
+ }
+
+ return ret;
+}
+
+static __init void update_tracer_options(void)
+{
+ struct trace_array *tr;
+
guard(mutex)(&trace_types_lock);
tracer_options_updated = true;
- __update_tracer_options(tr);
+ list_for_each_entry(tr, &ftrace_trace_arrays, list)
+ __update_tracer_options(tr);
}
/* Must have trace_types_lock held */
@@ -9975,9 +10290,13 @@ static int trace_array_create_dir(struct trace_array *tr)
}
init_tracer_tracefs(tr, tr->dir);
- __update_tracer_options(tr);
-
- return ret;
+ ret = __update_tracer(tr);
+ if (ret) {
+ event_trace_del_tracer(tr);
+ tracefs_remove(tr->dir);
+ return ret;
+ }
+ return 0;
}
static struct trace_array *
@@ -10019,16 +10338,20 @@ trace_array_create_systems(const char *name, const char *systems,
raw_spin_lock_init(&tr->start_lock);
+ tr->syscall_buf_sz = global_trace.syscall_buf_sz;
+
tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
#ifdef CONFIG_TRACER_MAX_TRACE
spin_lock_init(&tr->snapshot_trigger_lock);
#endif
tr->current_trace = &nop_trace;
+ tr->current_trace_flags = nop_trace.flags;
INIT_LIST_HEAD(&tr->systems);
INIT_LIST_HEAD(&tr->events);
INIT_LIST_HEAD(&tr->hist_vars);
INIT_LIST_HEAD(&tr->err_log);
+ INIT_LIST_HEAD(&tr->tracers);
INIT_LIST_HEAD(&tr->marker_list);
#ifdef CONFIG_MODULES
@@ -10183,7 +10506,7 @@ static int __remove_instance(struct trace_array *tr)
/* Disable all the flags that were enabled coming in */
for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
if ((1 << i) & ZEROED_TRACE_FLAGS)
- set_tracer_flag(tr, 1 << i, 0);
+ set_tracer_flag(tr, 1ULL << i, 0);
}
if (printk_trace == tr)
@@ -10201,11 +10524,14 @@ static int __remove_instance(struct trace_array *tr)
free_percpu(tr->last_func_repeats);
free_trace_buffers(tr);
clear_tracing_err_log(tr);
+ free_tracers(tr);
if (tr->range_name) {
reserve_mem_release_by_name(tr->range_name);
kfree(tr->range_name);
}
+ if (tr->flags & TRACE_ARRAY_FL_VMALLOC)
+ vfree((void *)tr->range_addr_start);
for (i = 0; i < tr->nr_topts; i++) {
kfree(tr->topts[i].topts);
@@ -10335,6 +10661,9 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
tr, &buffer_subbuf_size_fops);
+ trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer,
+ tr, &tracing_syscall_buf_fops);
+
create_trace_options_dir(tr);
#ifdef CONFIG_TRACER_MAX_TRACE
@@ -10620,7 +10949,7 @@ static __init void tracer_init_tracefs_work_func(struct work_struct *work)
create_trace_instances(NULL);
- update_tracer_options(&global_trace);
+ update_tracer_options();
}
static __init int tracer_init_tracefs(void)
@@ -10640,7 +10969,8 @@ static __init int tracer_init_tracefs(void)
tracer_init_tracefs_work_func(NULL);
}
- rv_init_interface();
+ if (rv_init_interface())
+ pr_err("RV: Error while creating the RV interface\n");
return 0;
}
@@ -10773,10 +11103,10 @@ static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_m
/* While dumping, do not allow the buffer to be enable */
tracer_tracing_disable(tr);
- old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
+ old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ);
/* don't look at user memory in panic mode */
- tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
+ tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ);
if (dump_mode == DUMP_ORIG)
iter.cpu_file = raw_smp_processor_id();
@@ -11008,6 +11338,42 @@ __init static void do_allocate_snapshot(const char *name)
static inline void do_allocate_snapshot(const char *name) { }
#endif
+__init static int backup_instance_area(const char *backup,
+ unsigned long *addr, phys_addr_t *size)
+{
+ struct trace_array *backup_tr;
+ void *allocated_vaddr = NULL;
+
+ backup_tr = trace_array_get_by_name(backup, NULL);
+ if (!backup_tr) {
+ pr_warn("Tracing: Instance %s is not found.\n", backup);
+ return -ENOENT;
+ }
+
+ if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) {
+ pr_warn("Tracing: Instance %s is not boot mapped.\n", backup);
+ trace_array_put(backup_tr);
+ return -EINVAL;
+ }
+
+ *size = backup_tr->range_addr_size;
+
+ allocated_vaddr = vzalloc(*size);
+ if (!allocated_vaddr) {
+ pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n",
+ backup, (unsigned long)*size);
+ trace_array_put(backup_tr);
+ return -ENOMEM;
+ }
+
+ memcpy(allocated_vaddr,
+ (void *)backup_tr->range_addr_start, (size_t)*size);
+ *addr = (unsigned long)allocated_vaddr;
+
+ trace_array_put(backup_tr);
+ return 0;
+}
+
__init static void enable_instances(void)
{
struct trace_array *tr;
@@ -11030,11 +11396,15 @@ __init static void enable_instances(void)
char *flag_delim;
char *addr_delim;
char *rname __free(kfree) = NULL;
+ char *backup;
tok = strsep(&curr_str, ",");
- flag_delim = strchr(tok, '^');
- addr_delim = strchr(tok, '@');
+ name = strsep(&tok, "=");
+ backup = tok;
+
+ flag_delim = strchr(name, '^');
+ addr_delim = strchr(name, '@');
if (addr_delim)
*addr_delim++ = '\0';
@@ -11042,7 +11412,10 @@ __init static void enable_instances(void)
if (flag_delim)
*flag_delim++ = '\0';
- name = tok;
+ if (backup) {
+ if (backup_instance_area(backup, &addr, &size) < 0)
+ continue;
+ }
if (flag_delim) {
char *flag;
@@ -11138,7 +11511,13 @@ __init static void enable_instances(void)
tr->ref++;
}
- if (start) {
+ /*
+ * Backup buffers can be freed but need vfree().
+ */
+ if (backup)
+ tr->flags |= TRACE_ARRAY_FL_VMALLOC;
+
+ if (start || backup) {
tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
tr->range_name = no_free_ptr(rname);
}
@@ -11232,6 +11611,7 @@ __init static int tracer_alloc_buffers(void)
* just a bootstrap of current_trace anyway.
*/
global_trace.current_trace = &nop_trace;
+ global_trace.current_trace_flags = nop_trace.flags;
global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
#ifdef CONFIG_TRACER_MAX_TRACE
@@ -11245,10 +11625,7 @@ __init static int tracer_alloc_buffers(void)
init_trace_flags_index(&global_trace);
- register_tracer(&nop_trace);
-
- /* Function tracing may start here (via kernel command line) */
- init_function_trace();
+ INIT_LIST_HEAD(&global_trace.tracers);
/* All seems OK, enable tracing */
tracing_disabled = 0;
@@ -11260,6 +11637,8 @@ __init static int tracer_alloc_buffers(void)
global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
+ global_trace.syscall_buf_sz = syscall_buf_size;
+
INIT_LIST_HEAD(&global_trace.systems);
INIT_LIST_HEAD(&global_trace.events);
INIT_LIST_HEAD(&global_trace.hist_vars);
@@ -11267,6 +11646,11 @@ __init static int tracer_alloc_buffers(void)
list_add(&global_trace.marker_list, &marker_copies);
list_add(&global_trace.list, &ftrace_trace_arrays);
+ register_tracer(&nop_trace);
+
+ /* Function tracing may start here (via kernel command line) */
+ init_function_trace();
+
apply_trace_boot_options();
register_snapshot_cmd();
@@ -11290,7 +11674,7 @@ out_free_buffer_mask:
#ifdef CONFIG_FUNCTION_TRACER
/* Used to set module cached ftrace filtering at boot up */
-__init struct trace_array *trace_get_global_array(void)
+struct trace_array *trace_get_global_array(void)
{
return &global_trace;
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 85eabb454bee..b6d42fe06115 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -22,6 +22,7 @@
#include <linux/ctype.h>
#include <linux/once_lite.h>
#include <linux/ftrace_regs.h>
+#include <linux/llist.h>
#include "pid_list.h"
@@ -131,6 +132,8 @@ enum trace_type {
#define HIST_STACKTRACE_SIZE (HIST_STACKTRACE_DEPTH * sizeof(unsigned long))
#define HIST_STACKTRACE_SKIP 5
+#define SYSCALL_FAULT_USER_MAX 165
+
/*
* syscalls are special, and need special handling, this is why
* they are not included in trace_entries.h
@@ -216,7 +219,7 @@ struct array_buffer {
int cpu;
};
-#define TRACE_FLAGS_MAX_SIZE 32
+#define TRACE_FLAGS_MAX_SIZE 64
struct trace_options {
struct tracer *tracer;
@@ -390,7 +393,8 @@ struct trace_array {
int buffer_percent;
unsigned int n_err_log_entries;
struct tracer *current_trace;
- unsigned int trace_flags;
+ struct tracer_flags *current_trace_flags;
+ u64 trace_flags;
unsigned char trace_flags_index[TRACE_FLAGS_MAX_SIZE];
unsigned int flags;
raw_spinlock_t start_lock;
@@ -404,6 +408,7 @@ struct trace_array {
struct list_head systems;
struct list_head events;
struct list_head marker_list;
+ struct list_head tracers;
struct trace_event_file *trace_marker_file;
cpumask_var_t tracing_cpumask; /* only trace on set CPUs */
/* one per_cpu trace_pipe can be opened by only one user */
@@ -430,6 +435,7 @@ struct trace_array {
int function_enabled;
#endif
int no_filter_buffering_ref;
+ unsigned int syscall_buf_sz;
struct list_head hist_vars;
#ifdef CONFIG_TRACER_SNAPSHOT
struct cond_snapshot *cond_snapshot;
@@ -448,6 +454,7 @@ enum {
TRACE_ARRAY_FL_LAST_BOOT = BIT(2),
TRACE_ARRAY_FL_MOD_INIT = BIT(3),
TRACE_ARRAY_FL_MEMMAP = BIT(4),
+ TRACE_ARRAY_FL_VMALLOC = BIT(5),
};
#ifdef CONFIG_MODULES
@@ -631,9 +638,10 @@ struct tracer {
u32 old_flags, u32 bit, int set);
/* Return 0 if OK with change, else return non-zero */
int (*flag_changed)(struct trace_array *tr,
- u32 mask, int set);
+ u64 mask, int set);
struct tracer *next;
struct tracer_flags *flags;
+ struct tracer_flags *default_flags;
int enabled;
bool print_max;
bool allow_instances;
@@ -937,8 +945,6 @@ static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash)
#define TRACE_GRAPH_PRINT_FILL_SHIFT 28
#define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
-extern void ftrace_graph_sleep_time_control(bool enable);
-
#ifdef CONFIG_FUNCTION_PROFILER
extern void ftrace_graph_graph_time_control(bool enable);
#else
@@ -958,7 +964,8 @@ extern int __trace_graph_entry(struct trace_array *tr,
extern int __trace_graph_retaddr_entry(struct trace_array *tr,
struct ftrace_graph_ent *trace,
unsigned int trace_ctx,
- unsigned long retaddr);
+ unsigned long retaddr,
+ struct ftrace_regs *fregs);
extern void __trace_graph_return(struct trace_array *tr,
struct ftrace_graph_ret *trace,
unsigned int trace_ctx,
@@ -1109,7 +1116,8 @@ static inline void ftrace_graph_addr_finish(struct fgraph_ops *gops, struct ftra
#endif /* CONFIG_DYNAMIC_FTRACE */
extern unsigned int fgraph_max_depth;
-extern bool fgraph_sleep_time;
+extern int fgraph_no_sleep_time;
+extern bool fprofile_no_sleep_time;
static inline bool
ftrace_graph_ignore_func(struct fgraph_ops *gops, struct ftrace_graph_ent *trace)
@@ -1154,11 +1162,6 @@ struct ftrace_func_command {
char *params, int enable);
};
extern bool ftrace_filter_param __initdata;
-static inline int ftrace_trace_task(struct trace_array *tr)
-{
- return this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid) !=
- FTRACE_PID_IGNORE;
-}
extern int ftrace_is_dead(void);
int ftrace_create_function_files(struct trace_array *tr,
struct dentry *parent);
@@ -1176,10 +1179,6 @@ void ftrace_clear_pids(struct trace_array *tr);
int init_function_trace(void);
void ftrace_pid_follow_fork(struct trace_array *tr, bool enable);
#else
-static inline int ftrace_trace_task(struct trace_array *tr)
-{
- return 1;
-}
static inline int ftrace_is_dead(void) { return 0; }
static inline int
ftrace_create_function_files(struct trace_array *tr,
@@ -1345,11 +1344,11 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
# define FUNCTION_FLAGS \
C(FUNCTION, "function-trace"), \
C(FUNC_FORK, "function-fork"),
-# define FUNCTION_DEFAULT_FLAGS TRACE_ITER_FUNCTION
+# define FUNCTION_DEFAULT_FLAGS TRACE_ITER(FUNCTION)
#else
# define FUNCTION_FLAGS
# define FUNCTION_DEFAULT_FLAGS 0UL
-# define TRACE_ITER_FUNC_FORK 0UL
+# define TRACE_ITER_FUNC_FORK_BIT -1
#endif
#ifdef CONFIG_STACKTRACE
@@ -1359,6 +1358,24 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
# define STACK_FLAGS
#endif
+#ifdef CONFIG_FUNCTION_PROFILER
+# define PROFILER_FLAGS \
+ C(PROF_TEXT_OFFSET, "prof-text-offset"),
+# ifdef CONFIG_FUNCTION_GRAPH_TRACER
+# define FPROFILE_FLAGS \
+ C(GRAPH_TIME, "graph-time"),
+# define FPROFILE_DEFAULT_FLAGS TRACE_ITER(GRAPH_TIME)
+# else
+# define FPROFILE_FLAGS
+# define FPROFILE_DEFAULT_FLAGS 0UL
+# endif
+#else
+# define PROFILER_FLAGS
+# define FPROFILE_FLAGS
+# define FPROFILE_DEFAULT_FLAGS 0UL
+# define TRACE_ITER_PROF_TEXT_OFFSET_BIT -1
+#endif
+
/*
* trace_iterator_flags is an enumeration that defines bit
* positions into trace_flags that controls the output.
@@ -1391,13 +1408,15 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
C(MARKERS, "markers"), \
C(EVENT_FORK, "event-fork"), \
C(TRACE_PRINTK, "trace_printk_dest"), \
- C(COPY_MARKER, "copy_trace_marker"),\
+ C(COPY_MARKER, "copy_trace_marker"), \
C(PAUSE_ON_TRACE, "pause-on-trace"), \
C(HASH_PTR, "hash-ptr"), /* Print hashed pointer */ \
FUNCTION_FLAGS \
FGRAPH_FLAGS \
STACK_FLAGS \
- BRANCH_FLAGS
+ BRANCH_FLAGS \
+ PROFILER_FLAGS \
+ FPROFILE_FLAGS
/*
* By defining C, we can make TRACE_FLAGS a list of bit names
@@ -1413,20 +1432,17 @@ enum trace_iterator_bits {
};
/*
- * By redefining C, we can make TRACE_FLAGS a list of masks that
- * use the bits as defined above.
+ * And use TRACE_ITER(flag) to define the bit masks.
*/
-#undef C
-#define C(a, b) TRACE_ITER_##a = (1 << TRACE_ITER_##a##_BIT)
-
-enum trace_iterator_flags { TRACE_FLAGS };
+#define TRACE_ITER(flag) \
+ (TRACE_ITER_##flag##_BIT < 0 ? 0 : 1ULL << (TRACE_ITER_##flag##_BIT))
/*
* TRACE_ITER_SYM_MASK masks the options in trace_flags that
* control the output of kernel symbols.
*/
#define TRACE_ITER_SYM_MASK \
- (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
+ (TRACE_ITER(PRINT_PARENT)|TRACE_ITER(SYM_OFFSET)|TRACE_ITER(SYM_ADDR))
extern struct tracer nop_trace;
@@ -1435,7 +1451,7 @@ extern int enable_branch_tracing(struct trace_array *tr);
extern void disable_branch_tracing(void);
static inline int trace_branch_enable(struct trace_array *tr)
{
- if (tr->trace_flags & TRACE_ITER_BRANCH)
+ if (tr->trace_flags & TRACE_ITER(BRANCH))
return enable_branch_tracing(tr);
return 0;
}
@@ -1531,6 +1547,23 @@ void trace_buffered_event_enable(void);
void early_enable_events(struct trace_array *tr, char *buf, bool disable_first);
+struct trace_user_buf;
+struct trace_user_buf_info {
+ struct trace_user_buf __percpu *tbuf;
+ size_t size;
+ int ref;
+};
+
+typedef int (*trace_user_buf_copy)(char *dst, const char __user *src,
+ size_t size, void *data);
+int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size);
+int trace_user_fault_get(struct trace_user_buf_info *tinfo);
+int trace_user_fault_put(struct trace_user_buf_info *tinfo);
+void trace_user_fault_destroy(struct trace_user_buf_info *tinfo);
+char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
+ const char __user *ptr, size_t size,
+ trace_user_buf_copy copy_func, void *data);
+
static inline void
__trace_event_discard_commit(struct trace_buffer *buffer,
struct ring_buffer_event *event)
@@ -1752,13 +1785,13 @@ extern void clear_event_triggers(struct trace_array *tr);
enum {
EVENT_TRIGGER_FL_PROBE = BIT(0),
+ EVENT_TRIGGER_FL_COUNT = BIT(1),
};
struct event_trigger_data {
unsigned long count;
int ref;
int flags;
- const struct event_trigger_ops *ops;
struct event_command *cmd_ops;
struct event_filter __rcu *filter;
char *filter_str;
@@ -1769,6 +1802,7 @@ struct event_trigger_data {
char *name;
struct list_head named_list;
struct event_trigger_data *named_data;
+ struct llist_node llist;
};
/* Avoid typos */
@@ -1783,6 +1817,10 @@ struct enable_trigger_data {
bool hist;
};
+bool event_trigger_count(struct event_trigger_data *data,
+ struct trace_buffer *buffer, void *rec,
+ struct ring_buffer_event *event);
+
extern int event_enable_trigger_print(struct seq_file *m,
struct event_trigger_data *data);
extern void event_enable_trigger_free(struct event_trigger_data *data);
@@ -1846,64 +1884,6 @@ extern void event_file_get(struct trace_event_file *file);
extern void event_file_put(struct trace_event_file *file);
/**
- * struct event_trigger_ops - callbacks for trace event triggers
- *
- * The methods in this structure provide per-event trigger hooks for
- * various trigger operations.
- *
- * The @init and @free methods are used during trigger setup and
- * teardown, typically called from an event_command's @parse()
- * function implementation.
- *
- * The @print method is used to print the trigger spec.
- *
- * The @trigger method is the function that actually implements the
- * trigger and is called in the context of the triggering event
- * whenever that event occurs.
- *
- * All the methods below, except for @init() and @free(), must be
- * implemented.
- *
- * @trigger: The trigger 'probe' function called when the triggering
- * event occurs. The data passed into this callback is the data
- * that was supplied to the event_command @reg() function that
- * registered the trigger (see struct event_command) along with
- * the trace record, rec.
- *
- * @init: An optional initialization function called for the trigger
- * when the trigger is registered (via the event_command reg()
- * function). This can be used to perform per-trigger
- * initialization such as incrementing a per-trigger reference
- * count, for instance. This is usually implemented by the
- * generic utility function @event_trigger_init() (see
- * trace_event_triggers.c).
- *
- * @free: An optional de-initialization function called for the
- * trigger when the trigger is unregistered (via the
- * event_command @reg() function). This can be used to perform
- * per-trigger de-initialization such as decrementing a
- * per-trigger reference count and freeing corresponding trigger
- * data, for instance. This is usually implemented by the
- * generic utility function @event_trigger_free() (see
- * trace_event_triggers.c).
- *
- * @print: The callback function invoked to have the trigger print
- * itself. This is usually implemented by a wrapper function
- * that calls the generic utility function @event_trigger_print()
- * (see trace_event_triggers.c).
- */
-struct event_trigger_ops {
- void (*trigger)(struct event_trigger_data *data,
- struct trace_buffer *buffer,
- void *rec,
- struct ring_buffer_event *rbe);
- int (*init)(struct event_trigger_data *data);
- void (*free)(struct event_trigger_data *data);
- int (*print)(struct seq_file *m,
- struct event_trigger_data *data);
-};
-
-/**
* struct event_command - callbacks and data members for event commands
*
* Event commands are invoked by users by writing the command name
@@ -1952,7 +1932,7 @@ struct event_trigger_ops {
*
* @reg: Adds the trigger to the list of triggers associated with the
* event, and enables the event trigger itself, after
- * initializing it (via the event_trigger_ops @init() function).
+ * initializing it (via the event_command @init() function).
* This is also where commands can use the @trigger_type value to
* make the decision as to whether or not multiple instances of
* the trigger should be allowed. This is usually implemented by
@@ -1961,7 +1941,7 @@ struct event_trigger_ops {
*
* @unreg: Removes the trigger from the list of triggers associated
* with the event, and disables the event trigger itself, after
- * initializing it (via the event_trigger_ops @free() function).
+ * initializing it (via the event_command @free() function).
* This is usually implemented by the generic utility function
* @unregister_trigger() (see trace_event_triggers.c).
*
@@ -1975,12 +1955,41 @@ struct event_trigger_ops {
* ignored. This is usually implemented by the generic utility
* function @set_trigger_filter() (see trace_event_triggers.c).
*
- * @get_trigger_ops: The callback function invoked to retrieve the
- * event_trigger_ops implementation associated with the command.
- * This callback function allows a single event_command to
- * support multiple trigger implementations via different sets of
- * event_trigger_ops, depending on the value of the @param
- * string.
+ * All the methods below, except for @init() and @free(), must be
+ * implemented.
+ *
+ * @trigger: The trigger 'probe' function called when the triggering
+ * event occurs. The data passed into this callback is the data
+ * that was supplied to the event_command @reg() function that
+ * registered the trigger (see struct event_command) along with
+ * the trace record, rec.
+ *
+ * @count_func: If defined and a numeric parameter is passed to the
+ * trigger, then this function will be called before @trigger
+ * is called. If this function returns false, then @trigger is not
+ * executed.
+ *
+ * @init: An optional initialization function called for the trigger
+ * when the trigger is registered (via the event_command reg()
+ * function). This can be used to perform per-trigger
+ * initialization such as incrementing a per-trigger reference
+ * count, for instance. This is usually implemented by the
+ * generic utility function @event_trigger_init() (see
+ * trace_event_triggers.c).
+ *
+ * @free: An optional de-initialization function called for the
+ * trigger when the trigger is unregistered (via the
+ * event_command @reg() function). This can be used to perform
+ * per-trigger de-initialization such as decrementing a
+ * per-trigger reference count and freeing corresponding trigger
+ * data, for instance. This is usually implemented by the
+ * generic utility function @event_trigger_free() (see
+ * trace_event_triggers.c).
+ *
+ * @print: The callback function invoked to have the trigger print
+ * itself. This is usually implemented by a wrapper function
+ * that calls the generic utility function @event_trigger_print()
+ * (see trace_event_triggers.c).
*/
struct event_command {
struct list_head list;
@@ -2001,7 +2010,18 @@ struct event_command {
int (*set_filter)(char *filter_str,
struct event_trigger_data *data,
struct trace_event_file *file);
- const struct event_trigger_ops *(*get_trigger_ops)(char *cmd, char *param);
+ void (*trigger)(struct event_trigger_data *data,
+ struct trace_buffer *buffer,
+ void *rec,
+ struct ring_buffer_event *rbe);
+ bool (*count_func)(struct event_trigger_data *data,
+ struct trace_buffer *buffer,
+ void *rec,
+ struct ring_buffer_event *rbe);
+ int (*init)(struct event_trigger_data *data);
+ void (*free)(struct event_trigger_data *data);
+ int (*print)(struct seq_file *m,
+ struct event_trigger_data *data);
};
/**
@@ -2022,7 +2042,7 @@ struct event_command {
* either committed or discarded. At that point, if any commands
* have deferred their triggers, those commands are finally
* invoked following the close of the current event. In other
- * words, if the event_trigger_ops @func() probe implementation
+ * words, if the event_command @func() probe implementation
* itself logs to the trace buffer, this flag should be set,
* otherwise it can be left unspecified.
*
@@ -2064,8 +2084,8 @@ extern const char *__stop___tracepoint_str[];
void trace_printk_control(bool enabled);
void trace_printk_start_comm(void);
-int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set);
-int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled);
+int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set);
+int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled);
/* Used from boot time tracer */
extern int trace_set_options(struct trace_array *tr, char *option);
@@ -2248,4 +2268,25 @@ static inline int rv_init_interface(void)
*/
#define FTRACE_TRAMPOLINE_MARKER ((unsigned long) INT_MAX)
+/*
+ * This is used to get the address of the args array based on
+ * the type of the entry.
+ */
+#define FGRAPH_ENTRY_ARGS(e) \
+ ({ \
+ unsigned long *_args; \
+ struct ftrace_graph_ent_entry *_e = e; \
+ \
+ if (IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) && \
+ e->ent.type == TRACE_GRAPH_RETADDR_ENT) { \
+ struct fgraph_retaddr_ent_entry *_re; \
+ \
+ _re = (typeof(_re))_e; \
+ _args = _re->args; \
+ } else { \
+ _args = _e->args; \
+ } \
+ _args; \
+ })
+
#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c
index d06854bd32b3..c4dfbc293bae 100644
--- a/kernel/trace/trace_dynevent.c
+++ b/kernel/trace/trace_dynevent.c
@@ -144,9 +144,16 @@ static int create_dyn_event(const char *raw_command)
if (!ret || ret != -ECANCELED)
break;
}
- mutex_unlock(&dyn_event_ops_mutex);
- if (ret == -ECANCELED)
+ if (ret == -ECANCELED) {
+ static const char *err_msg[] = {"No matching dynamic event type"};
+
+ /* Wrong dynamic event. Leave an error message. */
+ tracing_log_err(NULL, "dynevent", raw_command, err_msg,
+ 0, 0);
ret = -EINVAL;
+ }
+
+ mutex_unlock(&dyn_event_ops_mutex);
return ret;
}
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index de294ae2c5c5..f6a8d29c0d76 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -80,11 +80,11 @@ FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry,
F_STRUCT(
__field_struct( struct ftrace_graph_ent, graph_ent )
__field_packed( unsigned long, graph_ent, func )
- __field_packed( unsigned int, graph_ent, depth )
+ __field_packed( unsigned long, graph_ent, depth )
__dynamic_array(unsigned long, args )
),
- F_printk("--> %ps (%u)", (void *)__entry->func, __entry->depth)
+ F_printk("--> %ps (%lu)", (void *)__entry->func, __entry->depth)
);
#ifdef CONFIG_FUNCTION_GRAPH_RETADDR
@@ -95,13 +95,14 @@ FTRACE_ENTRY_PACKED(fgraph_retaddr_entry, fgraph_retaddr_ent_entry,
TRACE_GRAPH_RETADDR_ENT,
F_STRUCT(
- __field_struct( struct fgraph_retaddr_ent, graph_ent )
- __field_packed( unsigned long, graph_ent, func )
- __field_packed( unsigned int, graph_ent, depth )
- __field_packed( unsigned long, graph_ent, retaddr )
+ __field_struct( struct fgraph_retaddr_ent, graph_rent )
+ __field_packed( unsigned long, graph_rent.ent, func )
+ __field_packed( unsigned long, graph_rent.ent, depth )
+ __field_packed( unsigned long, graph_rent, retaddr )
+ __dynamic_array(unsigned long, args )
),
- F_printk("--> %ps (%u) <- %ps", (void *)__entry->func, __entry->depth,
+ F_printk("--> %ps (%lu) <- %ps", (void *)__entry->func, __entry->depth,
(void *)__entry->retaddr)
);
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
index a1d402124836..3ee39715d5e4 100644
--- a/kernel/trace/trace_eprobe.c
+++ b/kernel/trace/trace_eprobe.c
@@ -61,6 +61,9 @@ static void trace_event_probe_cleanup(struct trace_eprobe *ep)
kfree(ep);
}
+DEFINE_FREE(trace_event_probe_cleanup, struct trace_eprobe *,
+ if (!IS_ERR_OR_NULL(_T)) trace_event_probe_cleanup(_T))
+
static struct trace_eprobe *to_trace_eprobe(struct dyn_event *ev)
{
return container_of(ev, struct trace_eprobe, devent);
@@ -197,10 +200,10 @@ static struct trace_eprobe *alloc_event_probe(const char *group,
struct trace_event_call *event,
int nargs)
{
- struct trace_eprobe *ep;
+ struct trace_eprobe *ep __free(trace_event_probe_cleanup) = NULL;
const char *event_name;
const char *sys_name;
- int ret = -ENOMEM;
+ int ret;
if (!event)
return ERR_PTR(-ENODEV);
@@ -211,25 +214,22 @@ static struct trace_eprobe *alloc_event_probe(const char *group,
ep = kzalloc(struct_size(ep, tp.args, nargs), GFP_KERNEL);
if (!ep) {
trace_event_put_ref(event);
- goto error;
+ return ERR_PTR(-ENOMEM);
}
ep->event = event;
ep->event_name = kstrdup(event_name, GFP_KERNEL);
if (!ep->event_name)
- goto error;
+ return ERR_PTR(-ENOMEM);
ep->event_system = kstrdup(sys_name, GFP_KERNEL);
if (!ep->event_system)
- goto error;
+ return ERR_PTR(-ENOMEM);
ret = trace_probe_init(&ep->tp, this_event, group, false, nargs);
if (ret < 0)
- goto error;
+ return ERR_PTR(ret);
dyn_event_init(&ep->devent, &eprobe_dyn_event_ops);
- return ep;
-error:
- trace_event_probe_cleanup(ep);
- return ERR_PTR(ret);
+ return_ptr(ep);
}
static int eprobe_event_define_fields(struct trace_event_call *event_call)
@@ -484,13 +484,6 @@ static void eprobe_trigger_func(struct event_trigger_data *data,
__eprobe_trace_func(edata, rec);
}
-static const struct event_trigger_ops eprobe_trigger_ops = {
- .trigger = eprobe_trigger_func,
- .print = eprobe_trigger_print,
- .init = eprobe_trigger_init,
- .free = eprobe_trigger_free,
-};
-
static int eprobe_trigger_cmd_parse(struct event_command *cmd_ops,
struct trace_event_file *file,
char *glob, char *cmd,
@@ -513,12 +506,6 @@ static void eprobe_trigger_unreg_func(char *glob,
}
-static const struct event_trigger_ops *eprobe_trigger_get_ops(char *cmd,
- char *param)
-{
- return &eprobe_trigger_ops;
-}
-
static struct event_command event_trigger_cmd = {
.name = "eprobe",
.trigger_type = ETT_EVENT_EPROBE,
@@ -527,8 +514,11 @@ static struct event_command event_trigger_cmd = {
.reg = eprobe_trigger_reg_func,
.unreg = eprobe_trigger_unreg_func,
.unreg_all = NULL,
- .get_trigger_ops = eprobe_trigger_get_ops,
.set_filter = NULL,
+ .trigger = eprobe_trigger_func,
+ .print = eprobe_trigger_print,
+ .init = eprobe_trigger_init,
+ .free = eprobe_trigger_free,
};
static struct event_trigger_data *
@@ -548,7 +538,6 @@ new_eprobe_trigger(struct trace_eprobe *ep, struct trace_event_file *file)
trigger->flags = EVENT_TRIGGER_FL_PROBE;
trigger->count = -1;
- trigger->ops = &eprobe_trigger_ops;
/*
* EVENT PROBE triggers are not registered as commands with
@@ -801,25 +790,6 @@ find_and_get_event(const char *system, const char *event_name)
return NULL;
}
-static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[], int i)
-{
- struct traceprobe_parse_context *ctx __free(traceprobe_parse_context) = NULL;
- int ret;
-
- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
- if (!ctx)
- return -ENOMEM;
- ctx->event = ep->event;
- ctx->flags = TPARG_FL_KERNEL | TPARG_FL_TEVENT;
-
- ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], ctx);
- /* Handle symbols "@" */
- if (!ret)
- ret = traceprobe_update_arg(&ep->tp.args[i]);
-
- return ret;
-}
-
static int trace_eprobe_parse_filter(struct trace_eprobe *ep, int argc, const char *argv[])
{
struct event_filter *dummy = NULL;
@@ -856,13 +826,10 @@ static int trace_eprobe_parse_filter(struct trace_eprobe *ep, int argc, const ch
ret = create_event_filter(top_trace_array(), ep->event, ep->filter_str,
true, &dummy);
free_event_filter(dummy);
- if (ret)
- goto error;
-
- return 0;
-error:
- kfree(ep->filter_str);
- ep->filter_str = NULL;
+ if (ret) {
+ kfree(ep->filter_str);
+ ep->filter_str = NULL;
+ }
return ret;
}
@@ -874,31 +841,33 @@ static int __trace_eprobe_create(int argc, const char *argv[])
* Fetch args (no space):
* <name>=$<field>[:TYPE]
*/
+ struct traceprobe_parse_context *ctx __free(traceprobe_parse_context) = NULL;
+ struct trace_eprobe *ep __free(trace_event_probe_cleanup) = NULL;
+ const char *trlog __free(trace_probe_log_clear) = NULL;
const char *event = NULL, *group = EPROBE_EVENT_SYSTEM;
const char *sys_event = NULL, *sys_name = NULL;
struct trace_event_call *event_call;
char *buf1 __free(kfree) = NULL;
char *buf2 __free(kfree) = NULL;
char *gbuf __free(kfree) = NULL;
- struct trace_eprobe *ep = NULL;
int ret = 0, filter_idx = 0;
int i, filter_cnt;
if (argc < 2 || argv[0][0] != 'e')
return -ECANCELED;
- trace_probe_log_init("event_probe", argc, argv);
+ trlog = trace_probe_log_init("event_probe", argc, argv);
event = strchr(&argv[0][1], ':');
if (event) {
gbuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL);
if (!gbuf)
- goto mem_error;
+ return -ENOMEM;
event++;
ret = traceprobe_parse_event_name(&event, &group, gbuf,
event - argv[0]);
if (ret)
- goto parse_error;
+ return -EINVAL;
}
trace_probe_log_set_index(1);
@@ -906,18 +875,18 @@ static int __trace_eprobe_create(int argc, const char *argv[])
buf2 = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL);
if (!buf2)
- goto mem_error;
+ return -ENOMEM;
ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2, 0);
if (ret || !sys_event || !sys_name) {
trace_probe_log_err(0, NO_EVENT_INFO);
- goto parse_error;
+ return -EINVAL;
}
if (!event) {
buf1 = kstrdup(sys_event, GFP_KERNEL);
if (!buf1)
- goto mem_error;
+ return -ENOMEM;
event = buf1;
}
@@ -933,8 +902,7 @@ static int __trace_eprobe_create(int argc, const char *argv[])
if (argc - 2 > MAX_TRACE_ARGS) {
trace_probe_log_set_index(2);
trace_probe_log_err(0, TOO_MANY_ARGS);
- ret = -E2BIG;
- goto error;
+ return -E2BIG;
}
scoped_guard(mutex, &event_mutex) {
@@ -948,29 +916,39 @@ static int __trace_eprobe_create(int argc, const char *argv[])
trace_probe_log_err(0, BAD_ATTACH_EVENT);
/* This must return -ENOMEM or missing event, else there is a bug */
WARN_ON_ONCE(ret != -ENOMEM && ret != -ENODEV);
- ep = NULL;
- goto error;
+ return ret;
}
if (filter_idx) {
trace_probe_log_set_index(filter_idx);
ret = trace_eprobe_parse_filter(ep, filter_cnt, argv + filter_idx);
if (ret)
- goto parse_error;
+ return -EINVAL;
} else
ep->filter_str = NULL;
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ ctx->event = ep->event;
+ ctx->flags = TPARG_FL_KERNEL | TPARG_FL_TEVENT;
+
argc -= 2; argv += 2;
/* parse arguments */
for (i = 0; i < argc; i++) {
trace_probe_log_set_index(i + 2);
- ret = trace_eprobe_tp_update_arg(ep, argv, i);
+
+ ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], ctx);
+ /* Handle symbols "@" */
+ if (!ret)
+ ret = traceprobe_update_arg(&ep->tp.args[i]);
if (ret)
- goto error;
+ return ret;
}
ret = traceprobe_set_print_fmt(&ep->tp, PROBE_PRINT_EVENT);
if (ret < 0)
- goto error;
+ return ret;
+
init_trace_eprobe_call(ep);
scoped_guard(mutex, &event_mutex) {
ret = trace_probe_register_event_call(&ep->tp);
@@ -979,25 +957,16 @@ static int __trace_eprobe_create(int argc, const char *argv[])
trace_probe_log_set_index(0);
trace_probe_log_err(0, EVENT_EXIST);
}
- goto error;
+ return ret;
}
ret = dyn_event_add(&ep->devent, &ep->tp.event->call);
if (ret < 0) {
trace_probe_unregister_event_call(&ep->tp);
- goto error;
+ return ret;
}
+ /* To avoid freeing registered eprobe event, clear ep. */
+ ep = NULL;
}
- trace_probe_log_clear();
- return ret;
-
-mem_error:
- ret = -ENOMEM;
- goto error;
-parse_error:
- ret = -EINVAL;
-error:
- trace_probe_log_clear();
- trace_event_probe_cleanup(ep);
return ret;
}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index e00da4182deb..9b07ad9eb284 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -845,13 +845,13 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
if (soft_disable)
set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
- if (tr->trace_flags & TRACE_ITER_RECORD_CMD) {
+ if (tr->trace_flags & TRACE_ITER(RECORD_CMD)) {
cmd = true;
tracing_start_cmdline_record();
set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
}
- if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
+ if (tr->trace_flags & TRACE_ITER(RECORD_TGID)) {
tgid = true;
tracing_start_tgid_record();
set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 1d536219b624..289bdea98776 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -3272,14 +3272,16 @@ static struct field_var *create_field_var(struct hist_trigger_data *hist_data,
var = create_var(hist_data, file, field_name, val->size, val->type);
if (IS_ERR(var)) {
hist_err(tr, HIST_ERR_VAR_CREATE_FIND_FAIL, errpos(field_name));
- kfree(val);
+ destroy_hist_field(val, 0);
ret = PTR_ERR(var);
goto err;
}
field_var = kzalloc(sizeof(struct field_var), GFP_KERNEL);
if (!field_var) {
- kfree(val);
+ destroy_hist_field(val, 0);
+ kfree_const(var->type);
+ kfree(var->var.name);
kfree(var);
ret = -ENOMEM;
goto err;
@@ -5694,7 +5696,7 @@ static void hist_trigger_show(struct seq_file *m,
seq_puts(m, "\n\n");
seq_puts(m, "# event histogram\n#\n# trigger info: ");
- data->ops->print(m, data);
+ data->cmd_ops->print(m, data);
seq_puts(m, "#\n\n");
hist_data = data->private_data;
@@ -6016,7 +6018,7 @@ static void hist_trigger_debug_show(struct seq_file *m,
seq_puts(m, "\n\n");
seq_puts(m, "# event histogram\n#\n# trigger info: ");
- data->ops->print(m, data);
+ data->cmd_ops->print(m, data);
seq_puts(m, "#\n\n");
hist_data = data->private_data;
@@ -6326,20 +6328,21 @@ static void event_hist_trigger_free(struct event_trigger_data *data)
free_hist_pad();
}
-static const struct event_trigger_ops event_hist_trigger_ops = {
- .trigger = event_hist_trigger,
- .print = event_hist_trigger_print,
- .init = event_hist_trigger_init,
- .free = event_hist_trigger_free,
-};
-
static int event_hist_trigger_named_init(struct event_trigger_data *data)
{
+ int ret;
+
data->ref++;
save_named_trigger(data->named_data->name, data);
- return event_hist_trigger_init(data->named_data);
+ ret = event_hist_trigger_init(data->named_data);
+ if (ret < 0) {
+ kfree(data->cmd_ops);
+ data->cmd_ops = &trigger_hist_cmd;
+ }
+
+ return ret;
}
static void event_hist_trigger_named_free(struct event_trigger_data *data)
@@ -6351,24 +6354,14 @@ static void event_hist_trigger_named_free(struct event_trigger_data *data)
data->ref--;
if (!data->ref) {
+ struct event_command *cmd_ops = data->cmd_ops;
+
del_named_trigger(data);
trigger_data_free(data);
+ kfree(cmd_ops);
}
}
-static const struct event_trigger_ops event_hist_trigger_named_ops = {
- .trigger = event_hist_trigger,
- .print = event_hist_trigger_print,
- .init = event_hist_trigger_named_init,
- .free = event_hist_trigger_named_free,
-};
-
-static const struct event_trigger_ops *event_hist_get_trigger_ops(char *cmd,
- char *param)
-{
- return &event_hist_trigger_ops;
-}
-
static void hist_clear(struct event_trigger_data *data)
{
struct hist_trigger_data *hist_data = data->private_data;
@@ -6562,13 +6555,24 @@ static int hist_register_trigger(char *glob,
data->paused = true;
if (named_data) {
+ struct event_command *cmd_ops;
+
data->private_data = named_data->private_data;
set_named_trigger_data(data, named_data);
- data->ops = &event_hist_trigger_named_ops;
+ /* Copy the command ops and update some of the functions */
+ cmd_ops = kmalloc(sizeof(*cmd_ops), GFP_KERNEL);
+ if (!cmd_ops) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ *cmd_ops = *data->cmd_ops;
+ cmd_ops->init = event_hist_trigger_named_init;
+ cmd_ops->free = event_hist_trigger_named_free;
+ data->cmd_ops = cmd_ops;
}
- if (data->ops->init) {
- ret = data->ops->init(data);
+ if (data->cmd_ops->init) {
+ ret = data->cmd_ops->init(data);
if (ret < 0)
goto out;
}
@@ -6682,8 +6686,8 @@ static void hist_unregister_trigger(char *glob,
}
}
- if (test && test->ops->free)
- test->ops->free(test);
+ if (test && test->cmd_ops->free)
+ test->cmd_ops->free(test);
if (hist_data->enable_timestamps) {
if (!hist_data->remove || test)
@@ -6735,8 +6739,8 @@ static void hist_unreg_all(struct trace_event_file *file)
update_cond_flag(file);
if (hist_data->enable_timestamps)
tracing_set_filter_buffering(file->tr, false);
- if (test->ops->free)
- test->ops->free(test);
+ if (test->cmd_ops->free)
+ test->cmd_ops->free(test);
}
}
}
@@ -6912,8 +6916,11 @@ static struct event_command trigger_hist_cmd = {
.reg = hist_register_trigger,
.unreg = hist_unregister_trigger,
.unreg_all = hist_unreg_all,
- .get_trigger_ops = event_hist_get_trigger_ops,
.set_filter = set_trigger_filter,
+ .trigger = event_hist_trigger,
+ .print = event_hist_trigger_print,
+ .init = event_hist_trigger_init,
+ .free = event_hist_trigger_free,
};
__init int register_trigger_hist_cmd(void)
@@ -6945,66 +6952,6 @@ hist_enable_trigger(struct event_trigger_data *data,
}
}
-static void
-hist_enable_count_trigger(struct event_trigger_data *data,
- struct trace_buffer *buffer, void *rec,
- struct ring_buffer_event *event)
-{
- if (!data->count)
- return;
-
- if (data->count != -1)
- (data->count)--;
-
- hist_enable_trigger(data, buffer, rec, event);
-}
-
-static const struct event_trigger_ops hist_enable_trigger_ops = {
- .trigger = hist_enable_trigger,
- .print = event_enable_trigger_print,
- .init = event_trigger_init,
- .free = event_enable_trigger_free,
-};
-
-static const struct event_trigger_ops hist_enable_count_trigger_ops = {
- .trigger = hist_enable_count_trigger,
- .print = event_enable_trigger_print,
- .init = event_trigger_init,
- .free = event_enable_trigger_free,
-};
-
-static const struct event_trigger_ops hist_disable_trigger_ops = {
- .trigger = hist_enable_trigger,
- .print = event_enable_trigger_print,
- .init = event_trigger_init,
- .free = event_enable_trigger_free,
-};
-
-static const struct event_trigger_ops hist_disable_count_trigger_ops = {
- .trigger = hist_enable_count_trigger,
- .print = event_enable_trigger_print,
- .init = event_trigger_init,
- .free = event_enable_trigger_free,
-};
-
-static const struct event_trigger_ops *
-hist_enable_get_trigger_ops(char *cmd, char *param)
-{
- const struct event_trigger_ops *ops;
- bool enable;
-
- enable = (strcmp(cmd, ENABLE_HIST_STR) == 0);
-
- if (enable)
- ops = param ? &hist_enable_count_trigger_ops :
- &hist_enable_trigger_ops;
- else
- ops = param ? &hist_disable_count_trigger_ops :
- &hist_disable_trigger_ops;
-
- return ops;
-}
-
static void hist_enable_unreg_all(struct trace_event_file *file)
{
struct event_trigger_data *test, *n;
@@ -7014,8 +6961,8 @@ static void hist_enable_unreg_all(struct trace_event_file *file)
list_del_rcu(&test->list);
update_cond_flag(file);
trace_event_trigger_enable_disable(file, 0);
- if (test->ops->free)
- test->ops->free(test);
+ if (test->cmd_ops->free)
+ test->cmd_ops->free(test);
}
}
}
@@ -7027,8 +6974,12 @@ static struct event_command trigger_hist_enable_cmd = {
.reg = event_enable_register_trigger,
.unreg = event_enable_unregister_trigger,
.unreg_all = hist_enable_unreg_all,
- .get_trigger_ops = hist_enable_get_trigger_ops,
.set_filter = set_trigger_filter,
+ .trigger = hist_enable_trigger,
+ .count_func = event_trigger_count,
+ .print = event_enable_trigger_print,
+ .init = event_trigger_init,
+ .free = event_enable_trigger_free,
};
static struct event_command trigger_hist_disable_cmd = {
@@ -7038,8 +6989,12 @@ static struct event_command trigger_hist_disable_cmd = {
.reg = event_enable_register_trigger,
.unreg = event_enable_unregister_trigger,
.unreg_all = hist_enable_unreg_all,
- .get_trigger_ops = hist_enable_get_trigger_ops,
.set_filter = set_trigger_filter,
+ .trigger = hist_enable_trigger,
+ .count_func = event_trigger_count,
+ .print = event_enable_trigger_print,
+ .init = event_trigger_init,
+ .free = event_enable_trigger_free,
};
static __init void unregister_trigger_hist_enable_disable_cmds(void)
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index f24ee61f8884..2f19bbe73d27 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -359,7 +359,7 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
fmt = synth_field_fmt(se->fields[i]->type);
/* parameter types */
- if (tr && tr->trace_flags & TRACE_ITER_VERBOSE)
+ if (tr && tr->trace_flags & TRACE_ITER(VERBOSE))
trace_seq_printf(s, "%s ", fmt);
snprintf(print_fmt, sizeof(print_fmt), "%%s=%s%%s", fmt);
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index cbfc306c0159..96aad82b1628 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -6,6 +6,7 @@
*/
#include <linux/security.h>
+#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/mutex.h>
@@ -17,15 +18,77 @@
static LIST_HEAD(trigger_commands);
static DEFINE_MUTEX(trigger_cmd_mutex);
+static struct task_struct *trigger_kthread;
+static struct llist_head trigger_data_free_list;
+static DEFINE_MUTEX(trigger_data_kthread_mutex);
+
+/* Bulk garbage collection of event_trigger_data elements */
+static int trigger_kthread_fn(void *ignore)
+{
+ struct event_trigger_data *data, *tmp;
+ struct llist_node *llnodes;
+
+ /* Once this task starts, it lives forever */
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (llist_empty(&trigger_data_free_list))
+ schedule();
+
+ __set_current_state(TASK_RUNNING);
+
+ llnodes = llist_del_all(&trigger_data_free_list);
+
+ /* make sure current triggers exit before free */
+ tracepoint_synchronize_unregister();
+
+ llist_for_each_entry_safe(data, tmp, llnodes, llist)
+ kfree(data);
+ }
+
+ return 0;
+}
+
void trigger_data_free(struct event_trigger_data *data)
{
if (data->cmd_ops->set_filter)
data->cmd_ops->set_filter(NULL, data, NULL);
- /* make sure current triggers exit before free */
- tracepoint_synchronize_unregister();
+ if (unlikely(!trigger_kthread)) {
+ guard(mutex)(&trigger_data_kthread_mutex);
+ /* Check again after taking mutex */
+ if (!trigger_kthread) {
+ struct task_struct *kthread;
+
+ kthread = kthread_create(trigger_kthread_fn, NULL,
+ "trigger_data_free");
+ if (!IS_ERR(kthread))
+ WRITE_ONCE(trigger_kthread, kthread);
+ }
+ }
- kfree(data);
+ if (!trigger_kthread) {
+ /* Do it the slow way */
+ tracepoint_synchronize_unregister();
+ kfree(data);
+ return;
+ }
+
+ llist_add(&data->llist, &trigger_data_free_list);
+ wake_up_process(trigger_kthread);
+}
+
+static inline void data_ops_trigger(struct event_trigger_data *data,
+ struct trace_buffer *buffer, void *rec,
+ struct ring_buffer_event *event)
+{
+ const struct event_command *cmd_ops = data->cmd_ops;
+
+ if (data->flags & EVENT_TRIGGER_FL_COUNT) {
+ if (!cmd_ops->count_func(data, buffer, rec, event))
+ return;
+ }
+
+ cmd_ops->trigger(data, buffer, rec, event);
}
/**
@@ -70,7 +133,7 @@ event_triggers_call(struct trace_event_file *file,
if (data->paused)
continue;
if (!rec) {
- data->ops->trigger(data, buffer, rec, event);
+ data_ops_trigger(data, buffer, rec, event);
continue;
}
filter = rcu_dereference_sched(data->filter);
@@ -80,7 +143,7 @@ event_triggers_call(struct trace_event_file *file,
tt |= data->cmd_ops->trigger_type;
continue;
}
- data->ops->trigger(data, buffer, rec, event);
+ data_ops_trigger(data, buffer, rec, event);
}
return tt;
}
@@ -122,7 +185,7 @@ event_triggers_post_call(struct trace_event_file *file,
if (data->paused)
continue;
if (data->cmd_ops->trigger_type & tt)
- data->ops->trigger(data, NULL, NULL, NULL);
+ data_ops_trigger(data, NULL, NULL, NULL);
}
}
EXPORT_SYMBOL_GPL(event_triggers_post_call);
@@ -191,7 +254,7 @@ static int trigger_show(struct seq_file *m, void *v)
}
data = list_entry(v, struct event_trigger_data, list);
- data->ops->print(m, data);
+ data->cmd_ops->print(m, data);
return 0;
}
@@ -245,7 +308,8 @@ int trigger_process_regex(struct trace_event_file *file, char *buff)
char *command, *next;
struct event_command *p;
- next = buff = skip_spaces(buff);
+ next = buff = strim(buff);
+
command = strsep(&next, ": \t");
if (next) {
next = skip_spaces(next);
@@ -282,8 +346,6 @@ static ssize_t event_trigger_regex_write(struct file *file,
if (IS_ERR(buf))
return PTR_ERR(buf);
- strim(buf);
-
guard(mutex)(&event_mutex);
event_file = event_file_file(file);
@@ -300,13 +362,9 @@ static ssize_t event_trigger_regex_write(struct file *file,
static int event_trigger_regex_release(struct inode *inode, struct file *file)
{
- mutex_lock(&event_mutex);
-
if (file->f_mode & FMODE_READ)
seq_release(inode, file);
- mutex_unlock(&event_mutex);
-
return 0;
}
@@ -378,7 +436,37 @@ __init int unregister_event_command(struct event_command *cmd)
}
/**
- * event_trigger_print - Generic event_trigger_ops @print implementation
+ * event_trigger_count - Optional count function for event triggers
+ * @data: Trigger-specific data
+ * @buffer: The ring buffer that the event is being written to
+ * @rec: The trace entry for the event, NULL for unconditional invocation
+ * @event: The event meta data in the ring buffer
+ *
+ * For triggers that can take a count parameter that doesn't do anything
+ * special, they can use this function to assign to their .count_func
+ * field.
+ *
+ * This simply does a count down of the @data->count field.
+ *
+ * If the @data->count is greater than zero, it will decrement it.
+ *
+ * Returns false if @data->count is zero, otherwise true.
+ */
+bool event_trigger_count(struct event_trigger_data *data,
+ struct trace_buffer *buffer, void *rec,
+ struct ring_buffer_event *event)
+{
+ if (!data->count)
+ return false;
+
+ if (data->count != -1)
+ (data->count)--;
+
+ return true;
+}
+
+/**
+ * event_trigger_print - Generic event_command @print implementation
* @name: The name of the event trigger
* @m: The seq_file being printed to
* @data: Trigger-specific data
@@ -413,7 +501,7 @@ event_trigger_print(const char *name, struct seq_file *m,
}
/**
- * event_trigger_init - Generic event_trigger_ops @init implementation
+ * event_trigger_init - Generic event_command @init implementation
* @data: Trigger-specific data
*
* Common implementation of event trigger initialization.
@@ -430,7 +518,7 @@ int event_trigger_init(struct event_trigger_data *data)
}
/**
- * event_trigger_free - Generic event_trigger_ops @free implementation
+ * event_trigger_free - Generic event_command @free implementation
* @data: Trigger-specific data
*
* Common implementation of event trigger de-initialization.
@@ -492,8 +580,8 @@ clear_event_triggers(struct trace_array *tr)
list_for_each_entry_safe(data, n, &file->triggers, list) {
trace_event_trigger_enable_disable(file, 0);
list_del_rcu(&data->list);
- if (data->ops->free)
- data->ops->free(data);
+ if (data->cmd_ops->free)
+ data->cmd_ops->free(data);
}
}
}
@@ -556,8 +644,8 @@ static int register_trigger(char *glob,
return -EEXIST;
}
- if (data->ops->init) {
- ret = data->ops->init(data);
+ if (data->cmd_ops->init) {
+ ret = data->cmd_ops->init(data);
if (ret < 0)
return ret;
}
@@ -595,8 +683,8 @@ static bool try_unregister_trigger(char *glob,
}
if (data) {
- if (data->ops->free)
- data->ops->free(data);
+ if (data->cmd_ops->free)
+ data->cmd_ops->free(data);
return true;
}
@@ -807,9 +895,13 @@ int event_trigger_separate_filter(char *param_and_filter, char **param,
* @private_data: User data to associate with the event trigger
*
* Allocate an event_trigger_data instance and initialize it. The
- * @cmd_ops are used along with the @cmd and @param to get the
- * trigger_ops to assign to the event_trigger_data. @private_data can
- * also be passed in and associated with the event_trigger_data.
+ * @cmd_ops defines how the trigger will operate. If @param is set,
+ * and @cmd_ops->trigger_ops->count_func is non NULL, then the
+ * data->count is set to @param and before the trigger is executed, the
+ * @cmd_ops->trigger_ops->count_func() is called. If that function returns
+ * false, the @cmd_ops->trigger_ops->trigger() function will not be called.
+ * @private_data can also be passed in and associated with the
+ * event_trigger_data.
*
* Use trigger_data_free() to free an event_trigger_data object.
*
@@ -821,18 +913,16 @@ struct event_trigger_data *trigger_data_alloc(struct event_command *cmd_ops,
void *private_data)
{
struct event_trigger_data *trigger_data;
- const struct event_trigger_ops *trigger_ops;
-
- trigger_ops = cmd_ops->get_trigger_ops(cmd, param);
trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL);
if (!trigger_data)
return NULL;
trigger_data->count = -1;
- trigger_data->ops = trigger_ops;
trigger_data->cmd_ops = cmd_ops;
trigger_data->private_data = private_data;
+ if (param && cmd_ops->count_func)
+ trigger_data->flags |= EVENT_TRIGGER_FL_COUNT;
INIT_LIST_HEAD(&trigger_data->list);
INIT_LIST_HEAD(&trigger_data->named_list);
@@ -1271,31 +1361,28 @@ traceon_trigger(struct event_trigger_data *data,
tracing_on();
}
-static void
-traceon_count_trigger(struct event_trigger_data *data,
- struct trace_buffer *buffer, void *rec,
- struct ring_buffer_event *event)
+static bool
+traceon_count_func(struct event_trigger_data *data,
+ struct trace_buffer *buffer, void *rec,
+ struct ring_buffer_event *event)
{
struct trace_event_file *file = data->private_data;
if (file) {
if (tracer_tracing_is_on(file->tr))
- return;
+ return false;
} else {
if (tracing_is_on())
- return;
+ return false;
}
if (!data->count)
- return;
+ return false;
if (data->count != -1)
(data->count)--;
- if (file)
- tracer_tracing_on(file->tr);
- else
- tracing_on();
+ return true;
}
static void
@@ -1319,31 +1406,28 @@ traceoff_trigger(struct event_trigger_data *data,
tracing_off();
}
-static void
-traceoff_count_trigger(struct event_trigger_data *data,
- struct trace_buffer *buffer, void *rec,
- struct ring_buffer_event *event)
+static bool
+traceoff_count_func(struct event_trigger_data *data,
+ struct trace_buffer *buffer, void *rec,
+ struct ring_buffer_event *event)
{
struct trace_event_file *file = data->private_data;
if (file) {
if (!tracer_tracing_is_on(file->tr))
- return;
+ return false;
} else {
if (!tracing_is_on())
- return;
+ return false;
}
if (!data->count)
- return;
+ return false;
if (data->count != -1)
(data->count)--;
- if (file)
- tracer_tracing_off(file->tr);
- else
- tracing_off();
+ return true;
}
static int
@@ -1360,58 +1444,18 @@ traceoff_trigger_print(struct seq_file *m, struct event_trigger_data *data)
data->filter_str);
}
-static const struct event_trigger_ops traceon_trigger_ops = {
- .trigger = traceon_trigger,
- .print = traceon_trigger_print,
- .init = event_trigger_init,
- .free = event_trigger_free,
-};
-
-static const struct event_trigger_ops traceon_count_trigger_ops = {
- .trigger = traceon_count_trigger,
- .print = traceon_trigger_print,
- .init = event_trigger_init,
- .free = event_trigger_free,
-};
-
-static const struct event_trigger_ops traceoff_trigger_ops = {
- .trigger = traceoff_trigger,
- .print = traceoff_trigger_print,
- .init = event_trigger_init,
- .free = event_trigger_free,
-};
-
-static const struct event_trigger_ops traceoff_count_trigger_ops = {
- .trigger = traceoff_count_trigger,
- .print = traceoff_trigger_print,
- .init = event_trigger_init,
- .free = event_trigger_free,
-};
-
-static const struct event_trigger_ops *
-onoff_get_trigger_ops(char *cmd, char *param)
-{
- const struct event_trigger_ops *ops;
-
- /* we register both traceon and traceoff to this callback */
- if (strcmp(cmd, "traceon") == 0)
- ops = param ? &traceon_count_trigger_ops :
- &traceon_trigger_ops;
- else
- ops = param ? &traceoff_count_trigger_ops :
- &traceoff_trigger_ops;
-
- return ops;
-}
-
static struct event_command trigger_traceon_cmd = {
.name = "traceon",
.trigger_type = ETT_TRACE_ONOFF,
.parse = event_trigger_parse,
.reg = register_trigger,
.unreg = unregister_trigger,
- .get_trigger_ops = onoff_get_trigger_ops,
.set_filter = set_trigger_filter,
+ .trigger = traceon_trigger,
+ .count_func = traceon_count_func,
+ .print = traceon_trigger_print,
+ .init = event_trigger_init,
+ .free = event_trigger_free,
};
static struct event_command trigger_traceoff_cmd = {
@@ -1421,8 +1465,12 @@ static struct event_command trigger_traceoff_cmd = {
.parse = event_trigger_parse,
.reg = register_trigger,
.unreg = unregister_trigger,
- .get_trigger_ops = onoff_get_trigger_ops,
.set_filter = set_trigger_filter,
+ .trigger = traceoff_trigger,
+ .count_func = traceoff_count_func,
+ .print = traceoff_trigger_print,
+ .init = event_trigger_init,
+ .free = event_trigger_free,
};
#ifdef CONFIG_TRACER_SNAPSHOT
@@ -1439,20 +1487,6 @@ snapshot_trigger(struct event_trigger_data *data,
tracing_snapshot();
}
-static void
-snapshot_count_trigger(struct event_trigger_data *data,
- struct trace_buffer *buffer, void *rec,
- struct ring_buffer_event *event)
-{
- if (!data->count)
- return;
-
- if (data->count != -1)
- (data->count)--;
-
- snapshot_trigger(data, buffer, rec, event);
-}
-
static int
register_snapshot_trigger(char *glob,
struct event_trigger_data *data,
@@ -1484,34 +1518,18 @@ snapshot_trigger_print(struct seq_file *m, struct event_trigger_data *data)
data->filter_str);
}
-static const struct event_trigger_ops snapshot_trigger_ops = {
- .trigger = snapshot_trigger,
- .print = snapshot_trigger_print,
- .init = event_trigger_init,
- .free = event_trigger_free,
-};
-
-static const struct event_trigger_ops snapshot_count_trigger_ops = {
- .trigger = snapshot_count_trigger,
- .print = snapshot_trigger_print,
- .init = event_trigger_init,
- .free = event_trigger_free,
-};
-
-static const struct event_trigger_ops *
-snapshot_get_trigger_ops(char *cmd, char *param)
-{
- return param ? &snapshot_count_trigger_ops : &snapshot_trigger_ops;
-}
-
static struct event_command trigger_snapshot_cmd = {
.name = "snapshot",
.trigger_type = ETT_SNAPSHOT,
.parse = event_trigger_parse,
.reg = register_snapshot_trigger,
.unreg = unregister_snapshot_trigger,
- .get_trigger_ops = snapshot_get_trigger_ops,
.set_filter = set_trigger_filter,
+ .trigger = snapshot_trigger,
+ .count_func = event_trigger_count,
+ .print = snapshot_trigger_print,
+ .init = event_trigger_init,
+ .free = event_trigger_free,
};
static __init int register_trigger_snapshot_cmd(void)
@@ -1558,20 +1576,6 @@ stacktrace_trigger(struct event_trigger_data *data,
trace_dump_stack(STACK_SKIP);
}
-static void
-stacktrace_count_trigger(struct event_trigger_data *data,
- struct trace_buffer *buffer, void *rec,
- struct ring_buffer_event *event)
-{
- if (!data->count)
- return;
-
- if (data->count != -1)
- (data->count)--;
-
- stacktrace_trigger(data, buffer, rec, event);
-}
-
static int
stacktrace_trigger_print(struct seq_file *m, struct event_trigger_data *data)
{
@@ -1579,26 +1583,6 @@ stacktrace_trigger_print(struct seq_file *m, struct event_trigger_data *data)
data->filter_str);
}
-static const struct event_trigger_ops stacktrace_trigger_ops = {
- .trigger = stacktrace_trigger,
- .print = stacktrace_trigger_print,
- .init = event_trigger_init,
- .free = event_trigger_free,
-};
-
-static const struct event_trigger_ops stacktrace_count_trigger_ops = {
- .trigger = stacktrace_count_trigger,
- .print = stacktrace_trigger_print,
- .init = event_trigger_init,
- .free = event_trigger_free,
-};
-
-static const struct event_trigger_ops *
-stacktrace_get_trigger_ops(char *cmd, char *param)
-{
- return param ? &stacktrace_count_trigger_ops : &stacktrace_trigger_ops;
-}
-
static struct event_command trigger_stacktrace_cmd = {
.name = "stacktrace",
.trigger_type = ETT_STACKTRACE,
@@ -1606,8 +1590,12 @@ static struct event_command trigger_stacktrace_cmd = {
.parse = event_trigger_parse,
.reg = register_trigger,
.unreg = unregister_trigger,
- .get_trigger_ops = stacktrace_get_trigger_ops,
.set_filter = set_trigger_filter,
+ .trigger = stacktrace_trigger,
+ .count_func = event_trigger_count,
+ .print = stacktrace_trigger_print,
+ .init = event_trigger_init,
+ .free = event_trigger_free,
};
static __init int register_trigger_stacktrace_cmd(void)
@@ -1642,24 +1630,24 @@ event_enable_trigger(struct event_trigger_data *data,
set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &enable_data->file->flags);
}
-static void
-event_enable_count_trigger(struct event_trigger_data *data,
- struct trace_buffer *buffer, void *rec,
- struct ring_buffer_event *event)
+static bool
+event_enable_count_func(struct event_trigger_data *data,
+ struct trace_buffer *buffer, void *rec,
+ struct ring_buffer_event *event)
{
struct enable_trigger_data *enable_data = data->private_data;
if (!data->count)
- return;
+ return false;
/* Skip if the event is in a state we want to switch to */
if (enable_data->enable == !(enable_data->file->flags & EVENT_FILE_FL_SOFT_DISABLED))
- return;
+ return false;
if (data->count != -1)
(data->count)--;
- event_enable_trigger(data, buffer, rec, event);
+ return true;
}
int event_enable_trigger_print(struct seq_file *m,
@@ -1704,34 +1692,6 @@ void event_enable_trigger_free(struct event_trigger_data *data)
}
}
-static const struct event_trigger_ops event_enable_trigger_ops = {
- .trigger = event_enable_trigger,
- .print = event_enable_trigger_print,
- .init = event_trigger_init,
- .free = event_enable_trigger_free,
-};
-
-static const struct event_trigger_ops event_enable_count_trigger_ops = {
- .trigger = event_enable_count_trigger,
- .print = event_enable_trigger_print,
- .init = event_trigger_init,
- .free = event_enable_trigger_free,
-};
-
-static const struct event_trigger_ops event_disable_trigger_ops = {
- .trigger = event_enable_trigger,
- .print = event_enable_trigger_print,
- .init = event_trigger_init,
- .free = event_enable_trigger_free,
-};
-
-static const struct event_trigger_ops event_disable_count_trigger_ops = {
- .trigger = event_enable_count_trigger,
- .print = event_enable_trigger_print,
- .init = event_trigger_init,
- .free = event_enable_trigger_free,
-};
-
int event_enable_trigger_parse(struct event_command *cmd_ops,
struct trace_event_file *file,
char *glob, char *cmd, char *param_and_filter)
@@ -1861,8 +1821,8 @@ int event_enable_register_trigger(char *glob,
}
}
- if (data->ops->init) {
- ret = data->ops->init(data);
+ if (data->cmd_ops->init) {
+ ret = data->cmd_ops->init(data);
if (ret < 0)
return ret;
}
@@ -1902,30 +1862,8 @@ void event_enable_unregister_trigger(char *glob,
}
}
- if (data && data->ops->free)
- data->ops->free(data);
-}
-
-static const struct event_trigger_ops *
-event_enable_get_trigger_ops(char *cmd, char *param)
-{
- const struct event_trigger_ops *ops;
- bool enable;
-
-#ifdef CONFIG_HIST_TRIGGERS
- enable = ((strcmp(cmd, ENABLE_EVENT_STR) == 0) ||
- (strcmp(cmd, ENABLE_HIST_STR) == 0));
-#else
- enable = strcmp(cmd, ENABLE_EVENT_STR) == 0;
-#endif
- if (enable)
- ops = param ? &event_enable_count_trigger_ops :
- &event_enable_trigger_ops;
- else
- ops = param ? &event_disable_count_trigger_ops :
- &event_disable_trigger_ops;
-
- return ops;
+ if (data && data->cmd_ops->free)
+ data->cmd_ops->free(data);
}
static struct event_command trigger_enable_cmd = {
@@ -1934,8 +1872,12 @@ static struct event_command trigger_enable_cmd = {
.parse = event_enable_trigger_parse,
.reg = event_enable_register_trigger,
.unreg = event_enable_unregister_trigger,
- .get_trigger_ops = event_enable_get_trigger_ops,
.set_filter = set_trigger_filter,
+ .trigger = event_enable_trigger,
+ .count_func = event_enable_count_func,
+ .print = event_enable_trigger_print,
+ .init = event_trigger_init,
+ .free = event_enable_trigger_free,
};
static struct event_command trigger_disable_cmd = {
@@ -1944,8 +1886,12 @@ static struct event_command trigger_disable_cmd = {
.parse = event_enable_trigger_parse,
.reg = event_enable_register_trigger,
.unreg = event_enable_unregister_trigger,
- .get_trigger_ops = event_enable_get_trigger_ops,
.set_filter = set_trigger_filter,
+ .trigger = event_enable_trigger,
+ .count_func = event_enable_count_func,
+ .print = event_enable_trigger_print,
+ .init = event_trigger_init,
+ .free = event_enable_trigger_free,
};
static __init void unregister_trigger_enable_disable_cmds(void)
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index c428dafe7496..b15854c75d4f 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -1449,12 +1449,7 @@ static struct trace_event_functions user_event_funcs = {
static int user_event_set_call_visible(struct user_event *user, bool visible)
{
- int ret;
- const struct cred *old_cred;
- struct cred *cred;
-
- cred = prepare_creds();
-
+ CLASS(prepare_creds, cred)();
if (!cred)
return -ENOMEM;
@@ -1469,17 +1464,12 @@ static int user_event_set_call_visible(struct user_event *user, bool visible)
*/
cred->fsuid = GLOBAL_ROOT_UID;
- old_cred = override_creds(cred);
-
- if (visible)
- ret = trace_add_event_call(&user->call);
- else
- ret = trace_remove_event_call(&user->call);
-
- revert_creds(old_cred);
- put_cred(cred);
+ scoped_with_creds(cred) {
+ if (visible)
+ return trace_add_event_call(&user->call);
- return ret;
+ return trace_remove_event_call(&user->call);
+ }
}
static int destroy_user_event(struct user_event *user)
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index ad9d6347b5fa..262c0556e4af 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -106,13 +106,14 @@ static struct tracepoint_user *__tracepoint_user_init(const char *name, struct t
if (!tuser->name)
return NULL;
+ /* Register tracepoint if it is loaded. */
if (tpoint) {
+ tuser->tpoint = tpoint;
ret = tracepoint_user_register(tuser);
if (ret)
return ERR_PTR(ret);
}
- tuser->tpoint = tpoint;
tuser->refcount = 1;
INIT_LIST_HEAD(&tuser->list);
list_add(&tuser->list, &tracepoint_user_list);
@@ -631,7 +632,7 @@ print_fentry_event(struct trace_iterator *iter, int flags,
trace_seq_printf(s, "%s: (", trace_probe_name(tp));
- if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
+ if (!seq_print_ip_sym_offset(s, field->ip, flags))
goto out;
trace_seq_putc(s, ')');
@@ -661,12 +662,12 @@ print_fexit_event(struct trace_iterator *iter, int flags,
trace_seq_printf(s, "%s: (", trace_probe_name(tp));
- if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
+ if (!seq_print_ip_sym_offset(s, field->ret_ip, flags))
goto out;
trace_seq_puts(s, " <- ");
- if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
+ if (!seq_print_ip_sym_no_offset(s, field->func, flags))
goto out;
trace_seq_putc(s, ')');
@@ -1513,6 +1514,10 @@ static int disable_trace_fprobe(struct trace_event_call *call,
if (!trace_probe_is_enabled(tp)) {
list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) {
unregister_fprobe(&tf->fp);
+ if (tf->tuser) {
+ tracepoint_user_put(tf->tuser);
+ tf->tuser = NULL;
+ }
}
}
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index d17c18934445..c12795c2fb39 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -154,11 +154,11 @@ static int function_trace_init(struct trace_array *tr)
if (!tr->ops)
return -ENOMEM;
- func = select_trace_function(func_flags.val);
+ func = select_trace_function(tr->current_trace_flags->val);
if (!func)
return -EINVAL;
- if (!handle_func_repeats(tr, func_flags.val))
+ if (!handle_func_repeats(tr, tr->current_trace_flags->val))
return -ENOMEM;
ftrace_init_array_ops(tr, func);
@@ -459,14 +459,14 @@ func_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
u32 new_flags;
/* Do nothing if already set. */
- if (!!set == !!(func_flags.val & bit))
+ if (!!set == !!(tr->current_trace_flags->val & bit))
return 0;
/* We can change this flag only when not running. */
if (tr->current_trace != &function_trace)
return 0;
- new_flags = (func_flags.val & ~bit) | (set ? bit : 0);
+ new_flags = (tr->current_trace_flags->val & ~bit) | (set ? bit : 0);
func = select_trace_function(new_flags);
if (!func)
return -EINVAL;
@@ -491,7 +491,7 @@ static struct tracer function_trace __tracer_data =
.init = function_trace_init,
.reset = function_trace_reset,
.start = function_trace_start,
- .flags = &func_flags,
+ .default_flags = &func_flags,
.set_flag = func_set_flag,
.allow_instances = true,
#ifdef CONFIG_FTRACE_SELFTEST
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index a7f4b9a47a71..b1e9c9913309 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -16,9 +16,12 @@
#include "trace.h"
#include "trace_output.h"
-/* When set, irq functions will be ignored */
+/* When set, irq functions might be ignored */
static int ftrace_graph_skip_irqs;
+/* Do not record function time when task is sleeping */
+int fgraph_no_sleep_time;
+
struct fgraph_cpu_data {
pid_t last_pid;
int depth;
@@ -33,14 +36,19 @@ struct fgraph_ent_args {
unsigned long args[FTRACE_REGS_MAX_ARGS];
};
+struct fgraph_retaddr_ent_args {
+ struct fgraph_retaddr_ent_entry ent;
+ /* Force the sizeof of args[] to have FTRACE_REGS_MAX_ARGS entries */
+ unsigned long args[FTRACE_REGS_MAX_ARGS];
+};
+
struct fgraph_data {
struct fgraph_cpu_data __percpu *cpu_data;
/* Place to preserve last processed entry. */
union {
struct fgraph_ent_args ent;
- /* TODO allow retaddr to have args */
- struct fgraph_retaddr_ent_entry rent;
+ struct fgraph_retaddr_ent_args rent;
};
struct ftrace_graph_ret_entry ret;
int failed;
@@ -85,11 +93,6 @@ static struct tracer_opt trace_opts[] = {
/* Include sleep time (scheduled out) between entry and return */
{ TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) },
-#ifdef CONFIG_FUNCTION_PROFILER
- /* Include time within nested functions */
- { TRACER_OPT(graph-time, TRACE_GRAPH_GRAPH_TIME) },
-#endif
-
{ } /* Empty entry */
};
@@ -97,13 +100,13 @@ static struct tracer_flags tracer_flags = {
/* Don't display overruns, proc, or tail by default */
.val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS |
- TRACE_GRAPH_SLEEP_TIME | TRACE_GRAPH_GRAPH_TIME,
+ TRACE_GRAPH_SLEEP_TIME,
.opts = trace_opts
};
-static bool tracer_flags_is_set(u32 flags)
+static bool tracer_flags_is_set(struct trace_array *tr, u32 flags)
{
- return (tracer_flags.val & flags) == flags;
+ return (tr->current_trace_flags->val & flags) == flags;
}
/*
@@ -162,20 +165,32 @@ int __trace_graph_entry(struct trace_array *tr,
int __trace_graph_retaddr_entry(struct trace_array *tr,
struct ftrace_graph_ent *trace,
unsigned int trace_ctx,
- unsigned long retaddr)
+ unsigned long retaddr,
+ struct ftrace_regs *fregs)
{
struct ring_buffer_event *event;
struct trace_buffer *buffer = tr->array_buffer.buffer;
struct fgraph_retaddr_ent_entry *entry;
+ int size;
+
+ /* If fregs is defined, add FTRACE_REGS_MAX_ARGS long size words */
+ size = sizeof(*entry) + (FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long));
event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RETADDR_ENT,
- sizeof(*entry), trace_ctx);
+ size, trace_ctx);
if (!event)
return 0;
entry = ring_buffer_event_data(event);
- entry->graph_ent.func = trace->func;
- entry->graph_ent.depth = trace->depth;
- entry->graph_ent.retaddr = retaddr;
+ entry->graph_rent.ent = *trace;
+ entry->graph_rent.retaddr = retaddr;
+
+#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
+ if (fregs) {
+ for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
+ entry->args[i] = ftrace_regs_get_argument(fregs, i);
+ }
+#endif
+
trace_buffer_unlock_commit_nostack(buffer, event);
return 1;
@@ -184,17 +199,21 @@ int __trace_graph_retaddr_entry(struct trace_array *tr,
int __trace_graph_retaddr_entry(struct trace_array *tr,
struct ftrace_graph_ent *trace,
unsigned int trace_ctx,
- unsigned long retaddr)
+ unsigned long retaddr,
+ struct ftrace_regs *fregs)
{
return 1;
}
#endif
-static inline int ftrace_graph_ignore_irqs(void)
+static inline int ftrace_graph_ignore_irqs(struct trace_array *tr)
{
if (!ftrace_graph_skip_irqs || trace_recursion_test(TRACE_IRQ_BIT))
return 0;
+ if (tracer_flags_is_set(tr, TRACE_GRAPH_PRINT_IRQS))
+ return 0;
+
return in_hardirq();
}
@@ -232,22 +251,20 @@ static int graph_entry(struct ftrace_graph_ent *trace,
return 1;
}
- if (!ftrace_trace_task(tr))
- return 0;
-
if (ftrace_graph_ignore_func(gops, trace))
return 0;
- if (ftrace_graph_ignore_irqs())
+ if (ftrace_graph_ignore_irqs(tr))
return 0;
- if (fgraph_sleep_time) {
- /* Only need to record the calltime */
- ftimes = fgraph_reserve_data(gops->idx, sizeof(ftimes->calltime));
- } else {
+ if (fgraph_no_sleep_time &&
+ !tracer_flags_is_set(tr, TRACE_GRAPH_SLEEP_TIME)) {
ftimes = fgraph_reserve_data(gops->idx, sizeof(*ftimes));
if (ftimes)
ftimes->sleeptime = current->ftrace_sleeptime;
+ } else {
+ /* Only need to record the calltime */
+ ftimes = fgraph_reserve_data(gops->idx, sizeof(ftimes->calltime));
}
if (!ftimes)
return 0;
@@ -263,9 +280,10 @@ static int graph_entry(struct ftrace_graph_ent *trace,
trace_ctx = tracing_gen_ctx();
if (IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) &&
- tracer_flags_is_set(TRACE_GRAPH_PRINT_RETADDR)) {
+ tracer_flags_is_set(tr, TRACE_GRAPH_PRINT_RETADDR)) {
unsigned long retaddr = ftrace_graph_top_ret_addr(current);
- ret = __trace_graph_retaddr_entry(tr, trace, trace_ctx, retaddr);
+ ret = __trace_graph_retaddr_entry(tr, trace, trace_ctx,
+ retaddr, fregs);
} else {
ret = __graph_entry(tr, trace, trace_ctx, fregs);
}
@@ -333,11 +351,15 @@ void __trace_graph_return(struct trace_array *tr,
trace_buffer_unlock_commit_nostack(buffer, event);
}
-static void handle_nosleeptime(struct ftrace_graph_ret *trace,
+static void handle_nosleeptime(struct trace_array *tr,
+ struct ftrace_graph_ret *trace,
struct fgraph_times *ftimes,
int size)
{
- if (fgraph_sleep_time || size < sizeof(*ftimes))
+ if (size < sizeof(*ftimes))
+ return;
+
+ if (!fgraph_no_sleep_time || tracer_flags_is_set(tr, TRACE_GRAPH_SLEEP_TIME))
return;
ftimes->calltime += current->ftrace_sleeptime - ftimes->sleeptime;
@@ -366,7 +388,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
if (!ftimes)
return;
- handle_nosleeptime(trace, ftimes, size);
+ handle_nosleeptime(tr, trace, ftimes, size);
calltime = ftimes->calltime;
@@ -379,6 +401,7 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,
struct ftrace_regs *fregs)
{
struct fgraph_times *ftimes;
+ struct trace_array *tr;
int size;
ftrace_graph_addr_finish(gops, trace);
@@ -392,7 +415,8 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,
if (!ftimes)
return;
- handle_nosleeptime(trace, ftimes, size);
+ tr = gops->private;
+ handle_nosleeptime(tr, trace, ftimes, size);
if (tracing_thresh &&
(trace_clock_local() - ftimes->calltime < tracing_thresh))
@@ -441,7 +465,7 @@ static int graph_trace_init(struct trace_array *tr)
{
int ret;
- if (tracer_flags_is_set(TRACE_GRAPH_ARGS))
+ if (tracer_flags_is_set(tr, TRACE_GRAPH_ARGS))
tr->gops->entryfunc = trace_graph_entry_args;
else
tr->gops->entryfunc = trace_graph_entry;
@@ -451,6 +475,12 @@ static int graph_trace_init(struct trace_array *tr)
else
tr->gops->retfunc = trace_graph_return;
+ if (!tracer_flags_is_set(tr, TRACE_GRAPH_PRINT_IRQS))
+ ftrace_graph_skip_irqs++;
+
+ if (!tracer_flags_is_set(tr, TRACE_GRAPH_SLEEP_TIME))
+ fgraph_no_sleep_time++;
+
/* Make gops functions visible before we start tracing */
smp_mb();
@@ -468,10 +498,6 @@ static int ftrace_graph_trace_args(struct trace_array *tr, int set)
{
trace_func_graph_ent_t entry;
- /* Do nothing if the current tracer is not this tracer */
- if (tr->current_trace != &graph_trace)
- return 0;
-
if (set)
entry = trace_graph_entry_args;
else
@@ -492,6 +518,16 @@ static int ftrace_graph_trace_args(struct trace_array *tr, int set)
static void graph_trace_reset(struct trace_array *tr)
{
+ if (!tracer_flags_is_set(tr, TRACE_GRAPH_PRINT_IRQS))
+ ftrace_graph_skip_irqs--;
+ if (WARN_ON_ONCE(ftrace_graph_skip_irqs < 0))
+ ftrace_graph_skip_irqs = 0;
+
+ if (!tracer_flags_is_set(tr, TRACE_GRAPH_SLEEP_TIME))
+ fgraph_no_sleep_time--;
+ if (WARN_ON_ONCE(fgraph_no_sleep_time < 0))
+ fgraph_no_sleep_time = 0;
+
tracing_stop_cmdline_record();
unregister_ftrace_graph(tr->gops);
}
@@ -634,13 +670,9 @@ get_return_for_leaf(struct trace_iterator *iter,
* Save current and next entries for later reference
* if the output fails.
*/
- if (unlikely(curr->ent.type == TRACE_GRAPH_RETADDR_ENT)) {
- data->rent = *(struct fgraph_retaddr_ent_entry *)curr;
- } else {
- int size = min((int)sizeof(data->ent), (int)iter->ent_size);
+ int size = min_t(int, sizeof(data->rent), iter->ent_size);
- memcpy(&data->ent, curr, size);
- }
+ memcpy(&data->rent, curr, size);
/*
* If the next event is not a return type, then
* we only care about what type it is. Otherwise we can
@@ -703,7 +735,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
addr >= (unsigned long)__irqentry_text_end)
return;
- if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
+ if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
/* Absolute time */
if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
print_graph_abs_time(iter->ts, s);
@@ -723,7 +755,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
}
/* Latency format */
- if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
+ if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
print_graph_lat_fmt(s, ent);
}
@@ -777,7 +809,7 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration,
struct trace_seq *s, u32 flags)
{
if (!(flags & TRACE_GRAPH_PRINT_DURATION) ||
- !(tr->trace_flags & TRACE_ITER_CONTEXT_INFO))
+ !(tr->trace_flags & TRACE_ITER(CONTEXT_INFO)))
return;
/* No real adata, just filling the column with spaces */
@@ -818,7 +850,7 @@ static void print_graph_retaddr(struct trace_seq *s, struct fgraph_retaddr_ent_e
trace_seq_puts(s, " /*");
trace_seq_puts(s, " <-");
- seq_print_ip_sym(s, entry->graph_ent.retaddr, trace_flags | TRACE_ITER_SYM_OFFSET);
+ seq_print_ip_sym_offset(s, entry->graph_rent.retaddr, trace_flags);
if (comment)
trace_seq_puts(s, " */");
@@ -964,7 +996,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
trace_seq_printf(s, "%ps", (void *)ret_func);
if (args_size >= FTRACE_REGS_MAX_ARGS * sizeof(long)) {
- print_function_args(s, entry->args, ret_func);
+ print_function_args(s, FGRAPH_ENTRY_ARGS(entry), ret_func);
trace_seq_putc(s, ';');
} else
trace_seq_puts(s, "();");
@@ -1016,7 +1048,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
args_size = iter->ent_size - offsetof(struct ftrace_graph_ent_entry, args);
if (args_size >= FTRACE_REGS_MAX_ARGS * sizeof(long))
- print_function_args(s, entry->args, func);
+ print_function_args(s, FGRAPH_ENTRY_ARGS(entry), func);
else
trace_seq_puts(s, "()");
@@ -1054,7 +1086,7 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
/* Interrupt */
print_graph_irq(iter, addr, type, cpu, ent->pid, flags);
- if (!(tr->trace_flags & TRACE_ITER_CONTEXT_INFO))
+ if (!(tr->trace_flags & TRACE_ITER(CONTEXT_INFO)))
return;
/* Absolute time */
@@ -1076,7 +1108,7 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
}
/* Latency format */
- if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
+ if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
print_graph_lat_fmt(s, ent);
return;
@@ -1198,11 +1230,14 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
/*
* print_graph_entry() may consume the current event,
* thus @field may become invalid, so we need to save it.
- * sizeof(struct ftrace_graph_ent_entry) is very small,
- * it can be safely saved at the stack.
+ * This function is shared by ftrace_graph_ent_entry and
+ * fgraph_retaddr_ent_entry, the size of the latter one
+ * is larger, but it is very small and can be safely saved
+ * at the stack.
*/
struct ftrace_graph_ent_entry *entry;
- u8 save_buf[sizeof(*entry) + FTRACE_REGS_MAX_ARGS * sizeof(long)];
+ struct fgraph_retaddr_ent_entry *rentry;
+ u8 save_buf[sizeof(*rentry) + FTRACE_REGS_MAX_ARGS * sizeof(long)];
/* The ent_size is expected to be as big as the entry */
if (iter->ent_size > sizeof(save_buf))
@@ -1431,12 +1466,17 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
}
#ifdef CONFIG_FUNCTION_GRAPH_RETADDR
case TRACE_GRAPH_RETADDR_ENT: {
- struct fgraph_retaddr_ent_entry saved;
+ /*
+ * ftrace_graph_ent_entry and fgraph_retaddr_ent_entry have
+ * similar functions and memory layouts. The only difference
+ * is that the latter one has an extra retaddr member, so
+ * they can share most of the logic.
+ */
struct fgraph_retaddr_ent_entry *rfield;
trace_assign_type(rfield, entry);
- saved = *rfield;
- return print_graph_entry((struct ftrace_graph_ent_entry *)&saved, s, iter, flags);
+ return print_graph_entry((struct ftrace_graph_ent_entry *)rfield,
+ s, iter, flags);
}
#endif
case TRACE_GRAPH_RET: {
@@ -1459,7 +1499,8 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
static enum print_line_t
print_graph_function(struct trace_iterator *iter)
{
- return print_graph_function_flags(iter, tracer_flags.val);
+ struct trace_array *tr = iter->tr;
+ return print_graph_function_flags(iter, tr->current_trace_flags->val);
}
static enum print_line_t
@@ -1495,7 +1536,7 @@ static void print_lat_header(struct seq_file *s, u32 flags)
static void __print_graph_headers_flags(struct trace_array *tr,
struct seq_file *s, u32 flags)
{
- int lat = tr->trace_flags & TRACE_ITER_LATENCY_FMT;
+ int lat = tr->trace_flags & TRACE_ITER(LATENCY_FMT);
if (lat)
print_lat_header(s, flags);
@@ -1535,7 +1576,10 @@ static void __print_graph_headers_flags(struct trace_array *tr,
static void print_graph_headers(struct seq_file *s)
{
- print_graph_headers_flags(s, tracer_flags.val);
+ struct trace_iterator *iter = s->private;
+ struct trace_array *tr = iter->tr;
+
+ print_graph_headers_flags(s, tr->current_trace_flags->val);
}
void print_graph_headers_flags(struct seq_file *s, u32 flags)
@@ -1543,10 +1587,10 @@ void print_graph_headers_flags(struct seq_file *s, u32 flags)
struct trace_iterator *iter = s->private;
struct trace_array *tr = iter->tr;
- if (!(tr->trace_flags & TRACE_ITER_CONTEXT_INFO))
+ if (!(tr->trace_flags & TRACE_ITER(CONTEXT_INFO)))
return;
- if (tr->trace_flags & TRACE_ITER_LATENCY_FMT) {
+ if (tr->trace_flags & TRACE_ITER(LATENCY_FMT)) {
/* print nothing if the buffers are empty */
if (trace_empty(iter))
return;
@@ -1613,17 +1657,56 @@ void graph_trace_close(struct trace_iterator *iter)
static int
func_graph_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
{
- if (bit == TRACE_GRAPH_PRINT_IRQS)
- ftrace_graph_skip_irqs = !set;
+/*
+ * The function profiler gets updated even if function graph
+ * isn't the current tracer. Handle it separately.
+ */
+#ifdef CONFIG_FUNCTION_PROFILER
+ if (bit == TRACE_GRAPH_SLEEP_TIME && (tr->flags & TRACE_ARRAY_FL_GLOBAL) &&
+ !!set == fprofile_no_sleep_time) {
+ if (set) {
+ fgraph_no_sleep_time--;
+ if (WARN_ON_ONCE(fgraph_no_sleep_time < 0))
+ fgraph_no_sleep_time = 0;
+ fprofile_no_sleep_time = false;
+ } else {
+ fgraph_no_sleep_time++;
+ fprofile_no_sleep_time = true;
+ }
+ }
+#endif
+
+ /* Do nothing if the current tracer is not this tracer */
+ if (tr->current_trace != &graph_trace)
+ return 0;
- if (bit == TRACE_GRAPH_SLEEP_TIME)
- ftrace_graph_sleep_time_control(set);
+ /* Do nothing if already set. */
+ if (!!set == !!(tr->current_trace_flags->val & bit))
+ return 0;
+
+ switch (bit) {
+ case TRACE_GRAPH_SLEEP_TIME:
+ if (set) {
+ fgraph_no_sleep_time--;
+ if (WARN_ON_ONCE(fgraph_no_sleep_time < 0))
+ fgraph_no_sleep_time = 0;
+ } else {
+ fgraph_no_sleep_time++;
+ }
+ break;
- if (bit == TRACE_GRAPH_GRAPH_TIME)
- ftrace_graph_graph_time_control(set);
+ case TRACE_GRAPH_PRINT_IRQS:
+ if (set)
+ ftrace_graph_skip_irqs--;
+ else
+ ftrace_graph_skip_irqs++;
+ if (WARN_ON_ONCE(ftrace_graph_skip_irqs < 0))
+ ftrace_graph_skip_irqs = 0;
+ break;
- if (bit == TRACE_GRAPH_ARGS)
+ case TRACE_GRAPH_ARGS:
return ftrace_graph_trace_args(tr, set);
+ }
return 0;
}
@@ -1660,7 +1743,7 @@ static struct tracer graph_trace __tracer_data = {
.reset = graph_trace_reset,
.print_line = print_graph_function,
.print_header = print_graph_headers,
- .flags = &tracer_flags,
+ .default_flags = &tracer_flags,
.set_flag = func_graph_set_flag,
.allow_instances = true,
#ifdef CONFIG_FTRACE_SELFTEST
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 4c45c49b06c8..17673905907c 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -63,7 +63,7 @@ irq_trace(void)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static int irqsoff_display_graph(struct trace_array *tr, int set);
-# define is_graph(tr) ((tr)->trace_flags & TRACE_ITER_DISPLAY_GRAPH)
+# define is_graph(tr) ((tr)->trace_flags & TRACE_ITER(DISPLAY_GRAPH))
#else
static inline int irqsoff_display_graph(struct trace_array *tr, int set)
{
@@ -485,8 +485,8 @@ static int register_irqsoff_function(struct trace_array *tr, int graph, int set)
{
int ret;
- /* 'set' is set if TRACE_ITER_FUNCTION is about to be set */
- if (function_enabled || (!set && !(tr->trace_flags & TRACE_ITER_FUNCTION)))
+ /* 'set' is set if TRACE_ITER(FUNCTION) is about to be set */
+ if (function_enabled || (!set && !(tr->trace_flags & TRACE_ITER(FUNCTION))))
return 0;
if (graph)
@@ -515,7 +515,7 @@ static void unregister_irqsoff_function(struct trace_array *tr, int graph)
static int irqsoff_function_set(struct trace_array *tr, u32 mask, int set)
{
- if (!(mask & TRACE_ITER_FUNCTION))
+ if (!(mask & TRACE_ITER(FUNCTION)))
return 0;
if (set)
@@ -536,7 +536,7 @@ static inline int irqsoff_function_set(struct trace_array *tr, u32 mask, int set
}
#endif /* CONFIG_FUNCTION_TRACER */
-static int irqsoff_flag_changed(struct trace_array *tr, u32 mask, int set)
+static int irqsoff_flag_changed(struct trace_array *tr, u64 mask, int set)
{
struct tracer *tracer = tr->current_trace;
@@ -544,7 +544,7 @@ static int irqsoff_flag_changed(struct trace_array *tr, u32 mask, int set)
return 0;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- if (mask & TRACE_ITER_DISPLAY_GRAPH)
+ if (mask & TRACE_ITER(DISPLAY_GRAPH))
return irqsoff_display_graph(tr, set);
#endif
@@ -582,10 +582,10 @@ static int __irqsoff_tracer_init(struct trace_array *tr)
save_flags = tr->trace_flags;
/* non overwrite screws up the latency tracers */
- set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1);
- set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1);
+ set_tracer_flag(tr, TRACE_ITER(OVERWRITE), 1);
+ set_tracer_flag(tr, TRACE_ITER(LATENCY_FMT), 1);
/* without pause, we will produce garbage if another latency occurs */
- set_tracer_flag(tr, TRACE_ITER_PAUSE_ON_TRACE, 1);
+ set_tracer_flag(tr, TRACE_ITER(PAUSE_ON_TRACE), 1);
tr->max_latency = 0;
irqsoff_trace = tr;
@@ -605,15 +605,15 @@ static int __irqsoff_tracer_init(struct trace_array *tr)
static void __irqsoff_tracer_reset(struct trace_array *tr)
{
- int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT;
- int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE;
- int pause_flag = save_flags & TRACE_ITER_PAUSE_ON_TRACE;
+ int lat_flag = save_flags & TRACE_ITER(LATENCY_FMT);
+ int overwrite_flag = save_flags & TRACE_ITER(OVERWRITE);
+ int pause_flag = save_flags & TRACE_ITER(PAUSE_ON_TRACE);
stop_irqsoff_tracer(tr, is_graph(tr));
- set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag);
- set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag);
- set_tracer_flag(tr, TRACE_ITER_PAUSE_ON_TRACE, pause_flag);
+ set_tracer_flag(tr, TRACE_ITER(LATENCY_FMT), lat_flag);
+ set_tracer_flag(tr, TRACE_ITER(OVERWRITE), overwrite_flag);
+ set_tracer_flag(tr, TRACE_ITER(PAUSE_ON_TRACE), pause_flag);
ftrace_reset_array_ops(tr);
irqsoff_busy = false;
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c
index 896ff78b8349..b30795f34079 100644
--- a/kernel/trace/trace_kdb.c
+++ b/kernel/trace/trace_kdb.c
@@ -31,7 +31,7 @@ static void ftrace_dump_buf(int skip_entries, long cpu_file)
old_userobj = tr->trace_flags;
/* don't look at user memory in panic mode */
- tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
+ tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ);
kdb_printf("Dumping ftrace buffer:\n");
if (skip_entries)
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index ee8171b19bee..9953506370a5 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1584,7 +1584,7 @@ print_kprobe_event(struct trace_iterator *iter, int flags,
trace_seq_printf(s, "%s: (", trace_probe_name(tp));
- if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
+ if (!seq_print_ip_sym_offset(s, field->ip, flags))
goto out;
trace_seq_putc(s, ')');
@@ -1614,12 +1614,12 @@ print_kretprobe_event(struct trace_iterator *iter, int flags,
trace_seq_printf(s, "%s: (", trace_probe_name(tp));
- if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
+ if (!seq_print_ip_sym_offset(s, field->ret_ip, flags))
goto out;
trace_seq_puts(s, " <- ");
- if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
+ if (!seq_print_ip_sym_no_offset(s, field->func, flags))
goto out;
trace_seq_putc(s, ')');
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 97db0b0ccf3e..cc2d3306bb60 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -420,7 +420,7 @@ static int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
}
mmap_read_unlock(mm);
}
- if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
+ if (ret && ((sym_flags & TRACE_ITER(SYM_ADDR)) || !file))
trace_seq_printf(s, " <" IP_FMT ">", ip);
return !trace_seq_has_overflowed(s);
}
@@ -433,9 +433,9 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
goto out;
}
- trace_seq_print_sym(s, ip, sym_flags & TRACE_ITER_SYM_OFFSET);
+ trace_seq_print_sym(s, ip, sym_flags & TRACE_ITER(SYM_OFFSET));
- if (sym_flags & TRACE_ITER_SYM_ADDR)
+ if (sym_flags & TRACE_ITER(SYM_ADDR))
trace_seq_printf(s, " <" IP_FMT ">", ip);
out:
@@ -569,7 +569,7 @@ static int
lat_print_timestamp(struct trace_iterator *iter, u64 next_ts)
{
struct trace_array *tr = iter->tr;
- unsigned long verbose = tr->trace_flags & TRACE_ITER_VERBOSE;
+ unsigned long verbose = tr->trace_flags & TRACE_ITER(VERBOSE);
unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS;
unsigned long long abs_ts = iter->ts - iter->array_buffer->time_start;
unsigned long long rel_ts = next_ts - iter->ts;
@@ -636,7 +636,7 @@ int trace_print_context(struct trace_iterator *iter)
trace_seq_printf(s, "%16s-%-7d ", comm, entry->pid);
- if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
+ if (tr->trace_flags & TRACE_ITER(RECORD_TGID)) {
unsigned int tgid = trace_find_tgid(entry->pid);
if (!tgid)
@@ -647,7 +647,7 @@ int trace_print_context(struct trace_iterator *iter)
trace_seq_printf(s, "[%03d] ", iter->cpu);
- if (tr->trace_flags & TRACE_ITER_IRQ_INFO)
+ if (tr->trace_flags & TRACE_ITER(IRQ_INFO))
trace_print_lat_fmt(s, entry);
trace_print_time(s, iter, iter->ts);
@@ -661,7 +661,7 @@ int trace_print_lat_context(struct trace_iterator *iter)
struct trace_entry *entry, *next_entry;
struct trace_array *tr = iter->tr;
struct trace_seq *s = &iter->seq;
- unsigned long verbose = (tr->trace_flags & TRACE_ITER_VERBOSE);
+ unsigned long verbose = (tr->trace_flags & TRACE_ITER(VERBOSE));
u64 next_ts;
next_entry = trace_find_next_entry(iter, NULL, &next_ts);
@@ -950,7 +950,9 @@ static void print_fields(struct trace_iterator *iter, struct trace_event_call *c
int offset;
int len;
int ret;
+ int i;
void *pos;
+ char *str;
list_for_each_entry_reverse(field, head, link) {
trace_seq_printf(&iter->seq, " %s=", field->name);
@@ -977,8 +979,29 @@ static void print_fields(struct trace_iterator *iter, struct trace_event_call *c
trace_seq_puts(&iter->seq, "<OVERFLOW>");
break;
}
- pos = (void *)iter->ent + offset;
- trace_seq_printf(&iter->seq, "%.*s", len, (char *)pos);
+ str = (char *)iter->ent + offset;
+ /* Check if there's any non printable strings */
+ for (i = 0; i < len; i++) {
+ if (str[i] && !(isascii(str[i]) && isprint(str[i])))
+ break;
+ }
+ if (i < len) {
+ for (i = 0; i < len; i++) {
+ if (isascii(str[i]) && isprint(str[i]))
+ trace_seq_putc(&iter->seq, str[i]);
+ else
+ trace_seq_putc(&iter->seq, '.');
+ }
+ trace_seq_puts(&iter->seq, " (");
+ for (i = 0; i < len; i++) {
+ if (i)
+ trace_seq_putc(&iter->seq, ':');
+ trace_seq_printf(&iter->seq, "%02x", str[i]);
+ }
+ trace_seq_putc(&iter->seq, ')');
+ } else {
+ trace_seq_printf(&iter->seq, "%.*s", len, str);
+ }
break;
case FILTER_PTR_STRING:
if (!iter->fmt_size)
@@ -1127,7 +1150,7 @@ static void print_fn_trace(struct trace_seq *s, unsigned long ip,
if (args)
print_function_args(s, args, ip);
- if ((flags & TRACE_ITER_PRINT_PARENT) && parent_ip) {
+ if ((flags & TRACE_ITER(PRINT_PARENT)) && parent_ip) {
trace_seq_puts(s, " <-");
seq_print_ip_sym(s, parent_ip, flags);
}
@@ -1417,7 +1440,7 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
trace_seq_puts(s, "<user stack trace>\n");
- if (tr->trace_flags & TRACE_ITER_SYM_USEROBJ) {
+ if (tr->trace_flags & TRACE_ITER(SYM_USEROBJ)) {
struct task_struct *task;
/*
* we do the lookup on the thread group leader,
@@ -1467,12 +1490,12 @@ trace_hwlat_print(struct trace_iterator *iter, int flags,
trace_assign_type(field, entry);
- trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%lld.%09ld count:%d",
+ trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%ptSp count:%d",
field->seqnum,
field->duration,
field->outer_duration,
- (long long)field->timestamp.tv_sec,
- field->timestamp.tv_nsec, field->count);
+ &field->timestamp,
+ field->count);
if (field->nmi_count) {
/*
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 2e305364f2a9..99b676733d46 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -16,6 +16,17 @@ extern int
seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
unsigned long sym_flags);
+static inline int seq_print_ip_sym_offset(struct trace_seq *s, unsigned long ip,
+ unsigned long sym_flags)
+{
+ return seq_print_ip_sym(s, ip, sym_flags | TRACE_ITER(SYM_OFFSET));
+}
+static inline int seq_print_ip_sym_no_offset(struct trace_seq *s, unsigned long ip,
+ unsigned long sym_flags)
+{
+ return seq_print_ip_sym(s, ip, sym_flags & ~TRACE_ITER(SYM_OFFSET));
+}
+
extern void trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset);
extern int trace_print_context(struct trace_iterator *iter);
extern int trace_print_lat_context(struct trace_iterator *iter);
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 5cbdc423afeb..bb67f6a2136c 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -156,7 +156,7 @@ fail:
static struct trace_probe_log trace_probe_log;
extern struct mutex dyn_event_ops_mutex;
-void trace_probe_log_init(const char *subsystem, int argc, const char **argv)
+const char *trace_probe_log_init(const char *subsystem, int argc, const char **argv)
{
lockdep_assert_held(&dyn_event_ops_mutex);
@@ -164,6 +164,7 @@ void trace_probe_log_init(const char *subsystem, int argc, const char **argv)
trace_probe_log.argc = argc;
trace_probe_log.argv = argv;
trace_probe_log.index = 0;
+ return subsystem;
}
void trace_probe_log_clear(void)
@@ -214,7 +215,7 @@ void __trace_probe_log_err(int offset, int err_type)
p = command;
for (i = 0; i < trace_probe_log.argc; i++) {
len = strlen(trace_probe_log.argv[i]);
- strcpy(p, trace_probe_log.argv[i]);
+ memcpy(p, trace_probe_log.argv[i], len);
p[len] = ' ';
p += len + 1;
}
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 08b5bda24da2..9fc56c937130 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -578,11 +578,13 @@ struct trace_probe_log {
int index;
};
-void trace_probe_log_init(const char *subsystem, int argc, const char **argv);
+const char *trace_probe_log_init(const char *subsystem, int argc, const char **argv);
void trace_probe_log_set_index(int index);
void trace_probe_log_clear(void);
void __trace_probe_log_err(int offset, int err);
+DEFINE_FREE(trace_probe_log_clear, const char *, if (_T) trace_probe_log_clear())
+
#define trace_probe_log_err(offs, err) \
__trace_probe_log_err(offs, TP_ERR_##err)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index e3f2e4f56faa..8faa73d3bba1 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -41,7 +41,7 @@ static void stop_func_tracer(struct trace_array *tr, int graph);
static int save_flags;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-# define is_graph(tr) ((tr)->trace_flags & TRACE_ITER_DISPLAY_GRAPH)
+# define is_graph(tr) ((tr)->trace_flags & TRACE_ITER(DISPLAY_GRAPH))
#else
# define is_graph(tr) false
#endif
@@ -247,8 +247,8 @@ static int register_wakeup_function(struct trace_array *tr, int graph, int set)
{
int ret;
- /* 'set' is set if TRACE_ITER_FUNCTION is about to be set */
- if (function_enabled || (!set && !(tr->trace_flags & TRACE_ITER_FUNCTION)))
+ /* 'set' is set if TRACE_ITER(FUNCTION) is about to be set */
+ if (function_enabled || (!set && !(tr->trace_flags & TRACE_ITER(FUNCTION))))
return 0;
if (graph)
@@ -277,7 +277,7 @@ static void unregister_wakeup_function(struct trace_array *tr, int graph)
static int wakeup_function_set(struct trace_array *tr, u32 mask, int set)
{
- if (!(mask & TRACE_ITER_FUNCTION))
+ if (!(mask & TRACE_ITER(FUNCTION)))
return 0;
if (set)
@@ -324,7 +324,7 @@ __trace_function(struct trace_array *tr,
trace_function(tr, ip, parent_ip, trace_ctx, NULL);
}
-static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set)
+static int wakeup_flag_changed(struct trace_array *tr, u64 mask, int set)
{
struct tracer *tracer = tr->current_trace;
@@ -332,7 +332,7 @@ static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set)
return 0;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- if (mask & TRACE_ITER_DISPLAY_GRAPH)
+ if (mask & TRACE_ITER(DISPLAY_GRAPH))
return wakeup_display_graph(tr, set);
#endif
@@ -681,8 +681,8 @@ static int __wakeup_tracer_init(struct trace_array *tr)
save_flags = tr->trace_flags;
/* non overwrite screws up the latency tracers */
- set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1);
- set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1);
+ set_tracer_flag(tr, TRACE_ITER(OVERWRITE), 1);
+ set_tracer_flag(tr, TRACE_ITER(LATENCY_FMT), 1);
tr->max_latency = 0;
wakeup_trace = tr;
@@ -725,15 +725,15 @@ static int wakeup_dl_tracer_init(struct trace_array *tr)
static void wakeup_tracer_reset(struct trace_array *tr)
{
- int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT;
- int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE;
+ int lat_flag = save_flags & TRACE_ITER(LATENCY_FMT);
+ int overwrite_flag = save_flags & TRACE_ITER(OVERWRITE);
stop_wakeup_tracer(tr);
/* make sure we put back any tasks we are tracing */
wakeup_reset(tr);
- set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag);
- set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag);
+ set_tracer_flag(tr, TRACE_ITER(LATENCY_FMT), lat_flag);
+ set_tracer_flag(tr, TRACE_ITER(OVERWRITE), overwrite_flag);
ftrace_reset_array_ops(tr);
wakeup_busy = false;
}
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 0f932b22f9ec..e96d0063cbcf 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <trace/syscall.h>
#include <trace/events/syscalls.h>
+#include <linux/kernel_stat.h>
#include <linux/syscalls.h>
#include <linux/slab.h>
#include <linux/kernel.h>
@@ -123,6 +124,119 @@ const char *get_syscall_name(int syscall)
return entry->name;
}
+/* Added to user strings or arrays when max limit is reached */
+#define EXTRA "..."
+
+static void get_dynamic_len_ptr(struct syscall_trace_enter *trace,
+ struct syscall_metadata *entry,
+ int *offset_p, int *len_p, unsigned char **ptr_p)
+{
+ unsigned char *ptr;
+ int offset = *offset_p;
+ int val;
+
+ /* This arg points to a user space string */
+ ptr = (void *)trace->args + sizeof(long) * entry->nb_args + offset;
+ val = *(int *)ptr;
+
+ /* The value is a dynamic string (len << 16 | offset) */
+ ptr = (void *)trace + (val & 0xffff);
+ *len_p = val >> 16;
+ offset += 4;
+
+ *ptr_p = ptr;
+ *offset_p = offset;
+}
+
+static enum print_line_t
+sys_enter_openat_print(struct syscall_trace_enter *trace, struct syscall_metadata *entry,
+ struct trace_seq *s, struct trace_event *event)
+{
+ unsigned char *ptr;
+ int offset = 0;
+ int bits, len;
+ bool done = false;
+ static const struct trace_print_flags __flags[] =
+ {
+ { O_TMPFILE, "O_TMPFILE" },
+ { O_WRONLY, "O_WRONLY" },
+ { O_RDWR, "O_RDWR" },
+ { O_CREAT, "O_CREAT" },
+ { O_EXCL, "O_EXCL" },
+ { O_NOCTTY, "O_NOCTTY" },
+ { O_TRUNC, "O_TRUNC" },
+ { O_APPEND, "O_APPEND" },
+ { O_NONBLOCK, "O_NONBLOCK" },
+ { O_DSYNC, "O_DSYNC" },
+ { O_DIRECT, "O_DIRECT" },
+ { O_LARGEFILE, "O_LARGEFILE" },
+ { O_DIRECTORY, "O_DIRECTORY" },
+ { O_NOFOLLOW, "O_NOFOLLOW" },
+ { O_NOATIME, "O_NOATIME" },
+ { O_CLOEXEC, "O_CLOEXEC" },
+ { -1, NULL }
+ };
+
+ trace_seq_printf(s, "%s(", entry->name);
+
+ for (int i = 0; !done && i < entry->nb_args; i++) {
+
+ if (trace_seq_has_overflowed(s))
+ goto end;
+
+ if (i)
+ trace_seq_puts(s, ", ");
+
+ switch (i) {
+ case 2:
+ bits = trace->args[2];
+
+ trace_seq_puts(s, "flags: ");
+
+ /* No need to show mode when not creating the file */
+ if (!(bits & (O_CREAT|O_TMPFILE)))
+ done = true;
+
+ if (!(bits & O_ACCMODE)) {
+ if (!bits) {
+ trace_seq_puts(s, "O_RDONLY");
+ continue;
+ }
+ trace_seq_puts(s, "O_RDONLY|");
+ }
+
+ trace_print_flags_seq(s, "|", bits, __flags);
+ /*
+ * trace_print_flags_seq() adds a '\0' to the
+ * buffer, but this needs to append more to the seq.
+ */
+ if (!trace_seq_has_overflowed(s))
+ trace_seq_pop(s);
+
+ continue;
+ case 3:
+ trace_seq_printf(s, "%s: 0%03o", entry->args[i],
+ (unsigned int)trace->args[i]);
+ continue;
+ }
+
+ trace_seq_printf(s, "%s: %lu", entry->args[i],
+ trace->args[i]);
+
+ if (!(BIT(i) & entry->user_mask))
+ continue;
+
+ get_dynamic_len_ptr(trace, entry, &offset, &len, &ptr);
+ trace_seq_printf(s, " \"%.*s\"", len, ptr);
+ }
+
+ trace_seq_putc(s, ')');
+end:
+ trace_seq_putc(s, '\n');
+
+ return trace_handle_return(s);
+}
+
static enum print_line_t
print_syscall_enter(struct trace_iterator *iter, int flags,
struct trace_event *event)
@@ -132,7 +246,9 @@ print_syscall_enter(struct trace_iterator *iter, int flags,
struct trace_entry *ent = iter->ent;
struct syscall_trace_enter *trace;
struct syscall_metadata *entry;
- int i, syscall;
+ int i, syscall, val, len;
+ unsigned char *ptr;
+ int offset = 0;
trace = (typeof(trace))ent;
syscall = trace->nr;
@@ -146,9 +262,20 @@ print_syscall_enter(struct trace_iterator *iter, int flags,
goto end;
}
+ switch (entry->syscall_nr) {
+ case __NR_openat:
+ if (!tr || !(tr->trace_flags & TRACE_ITER(VERBOSE)))
+ return sys_enter_openat_print(trace, entry, s, event);
+ break;
+ default:
+ break;
+ }
+
trace_seq_printf(s, "%s(", entry->name);
for (i = 0; i < entry->nb_args; i++) {
+ bool printable = false;
+ char *str;
if (trace_seq_has_overflowed(s))
goto end;
@@ -157,7 +284,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags,
trace_seq_puts(s, ", ");
/* parameter types */
- if (tr && tr->trace_flags & TRACE_ITER_VERBOSE)
+ if (tr && tr->trace_flags & TRACE_ITER(VERBOSE))
trace_seq_printf(s, "%s ", entry->types[i]);
/* parameter values */
@@ -167,6 +294,48 @@ print_syscall_enter(struct trace_iterator *iter, int flags,
else
trace_seq_printf(s, "%s: 0x%lx", entry->args[i],
trace->args[i]);
+
+ if (!(BIT(i) & entry->user_mask))
+ continue;
+
+ get_dynamic_len_ptr(trace, entry, &offset, &len, &ptr);
+
+ if (entry->user_arg_size < 0 || entry->user_arg_is_str) {
+ trace_seq_printf(s, " \"%.*s\"", len, ptr);
+ continue;
+ }
+
+ val = trace->args[entry->user_arg_size];
+
+ str = ptr;
+ trace_seq_puts(s, " (");
+ for (int x = 0; x < len; x++, ptr++) {
+ if (isascii(*ptr) && isprint(*ptr))
+ printable = true;
+ if (x)
+ trace_seq_putc(s, ':');
+ trace_seq_printf(s, "%02x", *ptr);
+ }
+ if (len < val)
+ trace_seq_printf(s, ", %s", EXTRA);
+
+ trace_seq_putc(s, ')');
+
+ /* If nothing is printable, don't bother printing anything */
+ if (!printable)
+ continue;
+
+ trace_seq_puts(s, " \"");
+ for (int x = 0; x < len; x++) {
+ if (isascii(str[x]) && isprint(str[x]))
+ trace_seq_putc(s, str[x]);
+ else
+ trace_seq_putc(s, '.');
+ }
+ if (len < val)
+ trace_seq_printf(s, "\"%s", EXTRA);
+ else
+ trace_seq_putc(s, '"');
}
trace_seq_putc(s, ')');
@@ -212,26 +381,107 @@ print_syscall_exit(struct trace_iterator *iter, int flags,
.size = sizeof(_type), .align = __alignof__(_type), \
.is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }
+/* When len=0, we just calculate the needed length */
+#define LEN_OR_ZERO (len ? len - pos : 0)
+
+static int __init
+sys_enter_openat_print_fmt(struct syscall_metadata *entry, char *buf, int len)
+{
+ int pos = 0;
+
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ "\"dfd: 0x%%08lx, filename: 0x%%08lx \\\"%%s\\\", flags: %%s%%s, mode: 0%%03o\",");
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ " ((unsigned long)(REC->dfd)),");
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ " ((unsigned long)(REC->filename)),");
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ " __get_str(__filename_val),");
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ " (REC->flags & ~3) && !(REC->flags & 3) ? \"O_RDONLY|\" : \"\", ");
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ " REC->flags ? __print_flags(REC->flags, \"|\", ");
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ "{ 0x%x, \"O_WRONLY\" }, ", O_WRONLY);
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ "{ 0x%x, \"O_RDWR\" }, ", O_RDWR);
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ "{ 0x%x, \"O_CREAT\" }, ", O_CREAT);
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ "{ 0x%x, \"O_EXCL\" }, ", O_EXCL);
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ "{ 0x%x, \"O_NOCTTY\" }, ", O_NOCTTY);
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ "{ 0x%x, \"O_TRUNC\" }, ", O_TRUNC);
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ "{ 0x%x, \"O_APPEND\" }, ", O_APPEND);
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ "{ 0x%x, \"O_NONBLOCK\" }, ", O_NONBLOCK);
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ "{ 0x%x, \"O_DSYNC\" }, ", O_DSYNC);
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ "{ 0x%x, \"O_DIRECT\" }, ", O_DIRECT);
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ "{ 0x%x, \"O_LARGEFILE\" }, ", O_LARGEFILE);
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ "{ 0x%x, \"O_DIRECTORY\" }, ", O_DIRECTORY);
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ "{ 0x%x, \"O_NOFOLLOW\" }, ", O_NOFOLLOW);
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ "{ 0x%x, \"O_NOATIME\" }, ", O_NOATIME);
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ "{ 0x%x, \"O_CLOEXEC\" }) : \"O_RDONLY\", ", O_CLOEXEC);
+
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ " ((unsigned long)(REC->mode))");
+ return pos;
+}
+
static int __init
__set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
{
+ bool is_string = entry->user_arg_is_str;
int i;
int pos = 0;
- /* When len=0, we just calculate the needed length */
-#define LEN_OR_ZERO (len ? len - pos : 0)
+ switch (entry->syscall_nr) {
+ case __NR_openat:
+ return sys_enter_openat_print_fmt(entry, buf, len);
+ default:
+ break;
+ }
pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
for (i = 0; i < entry->nb_args; i++) {
- pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
- entry->args[i], sizeof(unsigned long),
- i == entry->nb_args - 1 ? "" : ", ");
+ if (i)
+ pos += snprintf(buf + pos, LEN_OR_ZERO, ", ");
+ pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx",
+ entry->args[i], sizeof(unsigned long));
+
+ if (!(BIT(i) & entry->user_mask))
+ continue;
+
+ /* Add the format for the user space string or array */
+ if (entry->user_arg_size < 0 || is_string)
+ pos += snprintf(buf + pos, LEN_OR_ZERO, " \\\"%%s\\\"");
+ else
+ pos += snprintf(buf + pos, LEN_OR_ZERO, " (%%s)");
}
pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
for (i = 0; i < entry->nb_args; i++) {
pos += snprintf(buf + pos, LEN_OR_ZERO,
", ((unsigned long)(REC->%s))", entry->args[i]);
+ if (!(BIT(i) & entry->user_mask))
+ continue;
+ /* The user space data for arg has name __<arg>_val */
+ if (entry->user_arg_size < 0 || is_string) {
+ pos += snprintf(buf + pos, LEN_OR_ZERO, ", __get_str(__%s_val)",
+ entry->args[i]);
+ } else {
+ pos += snprintf(buf + pos, LEN_OR_ZERO, ", __print_dynamic_array(__%s_val, 1)",
+ entry->args[i]);
+ }
}
#undef LEN_OR_ZERO
@@ -277,8 +527,11 @@ static int __init syscall_enter_define_fields(struct trace_event_call *call)
{
struct syscall_trace_enter trace;
struct syscall_metadata *meta = call->data;
+ unsigned long mask;
+ char *arg;
int offset = offsetof(typeof(trace), args);
int ret = 0;
+ int len;
int i;
for (i = 0; i < meta->nb_args; i++) {
@@ -291,9 +544,320 @@ static int __init syscall_enter_define_fields(struct trace_event_call *call)
offset += sizeof(unsigned long);
}
+ if (ret || !meta->user_mask)
+ return ret;
+
+ mask = meta->user_mask;
+
+ while (mask) {
+ int idx = ffs(mask) - 1;
+ mask &= ~BIT(idx);
+
+ /*
+ * User space data is faulted into a temporary buffer and then
+ * added as a dynamic string or array to the end of the event.
+ * The user space data name for the arg pointer is
+ * "__<arg>_val".
+ */
+ len = strlen(meta->args[idx]) + sizeof("___val");
+ arg = kmalloc(len, GFP_KERNEL);
+ if (WARN_ON_ONCE(!arg)) {
+ meta->user_mask = 0;
+ return -ENOMEM;
+ }
+
+ snprintf(arg, len, "__%s_val", meta->args[idx]);
+
+ ret = trace_define_field(call, "__data_loc char[]",
+ arg, offset, sizeof(int), 0,
+ FILTER_OTHER);
+ if (ret) {
+ kfree(arg);
+ break;
+ }
+ offset += 4;
+ }
return ret;
}
+/*
+ * Create a per CPU temporary buffer to copy user space pointers into.
+ *
+ * SYSCALL_FAULT_USER_MAX is the amount to copy from user space.
+ * (defined in kernel/trace/trace.h)
+
+ * SYSCALL_FAULT_ARG_SZ is the amount to copy from user space plus the
+ * nul terminating byte and possibly appended EXTRA (4 bytes).
+ *
+ * SYSCALL_FAULT_BUF_SZ holds the size of the per CPU buffer to use
+ * to copy memory from user space addresses into that will hold
+ * 3 args as only 3 args are allowed to be copied from system calls.
+ */
+#define SYSCALL_FAULT_ARG_SZ (SYSCALL_FAULT_USER_MAX + 1 + 4)
+#define SYSCALL_FAULT_MAX_CNT 3
+#define SYSCALL_FAULT_BUF_SZ (SYSCALL_FAULT_ARG_SZ * SYSCALL_FAULT_MAX_CNT)
+
+/* Use the tracing per CPU buffer infrastructure to copy from user space */
+struct syscall_user_buffer {
+ struct trace_user_buf_info buf;
+ struct rcu_head rcu;
+};
+
+static struct syscall_user_buffer *syscall_buffer;
+
+static int syscall_fault_buffer_enable(void)
+{
+ struct syscall_user_buffer *sbuf;
+ int ret;
+
+ lockdep_assert_held(&syscall_trace_lock);
+
+ if (syscall_buffer) {
+ trace_user_fault_get(&syscall_buffer->buf);
+ return 0;
+ }
+
+ sbuf = kmalloc(sizeof(*sbuf), GFP_KERNEL);
+ if (!sbuf)
+ return -ENOMEM;
+
+ ret = trace_user_fault_init(&sbuf->buf, SYSCALL_FAULT_BUF_SZ);
+ if (ret < 0) {
+ kfree(sbuf);
+ return ret;
+ }
+
+ WRITE_ONCE(syscall_buffer, sbuf);
+
+ return 0;
+}
+
+static void rcu_free_syscall_buffer(struct rcu_head *rcu)
+{
+ struct syscall_user_buffer *sbuf =
+ container_of(rcu, struct syscall_user_buffer, rcu);
+
+ trace_user_fault_destroy(&sbuf->buf);
+ kfree(sbuf);
+}
+
+
+static void syscall_fault_buffer_disable(void)
+{
+ struct syscall_user_buffer *sbuf = syscall_buffer;
+
+ lockdep_assert_held(&syscall_trace_lock);
+
+ if (trace_user_fault_put(&sbuf->buf))
+ return;
+
+ WRITE_ONCE(syscall_buffer, NULL);
+ call_rcu_tasks_trace(&sbuf->rcu, rcu_free_syscall_buffer);
+}
+
+struct syscall_args {
+ char *ptr_array[SYSCALL_FAULT_MAX_CNT];
+ int read[SYSCALL_FAULT_MAX_CNT];
+ int uargs;
+};
+
+static int syscall_copy_user(char *buf, const char __user *ptr,
+ size_t size, void *data)
+{
+ struct syscall_args *args = data;
+ int ret;
+
+ for (int i = 0; i < args->uargs; i++, buf += SYSCALL_FAULT_ARG_SZ) {
+ ptr = (char __user *)args->ptr_array[i];
+ ret = strncpy_from_user(buf, ptr, size);
+ args->read[i] = ret;
+ }
+ return 0;
+}
+
+static int syscall_copy_user_array(char *buf, const char __user *ptr,
+ size_t size, void *data)
+{
+ struct syscall_args *args = data;
+ int ret;
+
+ for (int i = 0; i < args->uargs; i++, buf += SYSCALL_FAULT_ARG_SZ) {
+ ptr = (char __user *)args->ptr_array[i];
+ ret = __copy_from_user(buf, ptr, size);
+ args->read[i] = ret ? -1 : size;
+ }
+ return 0;
+}
+
+static char *sys_fault_user(unsigned int buf_size,
+ struct syscall_metadata *sys_data,
+ struct syscall_user_buffer *sbuf,
+ unsigned long *args,
+ unsigned int data_size[SYSCALL_FAULT_MAX_CNT])
+{
+ trace_user_buf_copy syscall_copy = syscall_copy_user;
+ unsigned long mask = sys_data->user_mask;
+ unsigned long size = SYSCALL_FAULT_ARG_SZ - 1;
+ struct syscall_args sargs;
+ bool array = false;
+ char *buffer;
+ char *buf;
+ int ret;
+ int i = 0;
+
+ /* The extra is appended to the user data in the buffer */
+ BUILD_BUG_ON(SYSCALL_FAULT_USER_MAX + sizeof(EXTRA) >=
+ SYSCALL_FAULT_ARG_SZ);
+
+ /*
+ * If this system call event has a size argument, use
+ * it to define how much of user space memory to read,
+ * and read it as an array and not a string.
+ */
+ if (sys_data->user_arg_size >= 0) {
+ array = true;
+ size = args[sys_data->user_arg_size];
+ if (size > SYSCALL_FAULT_ARG_SZ - 1)
+ size = SYSCALL_FAULT_ARG_SZ - 1;
+ syscall_copy = syscall_copy_user_array;
+ }
+
+ while (mask) {
+ int idx = ffs(mask) - 1;
+ mask &= ~BIT(idx);
+
+ if (WARN_ON_ONCE(i == SYSCALL_FAULT_MAX_CNT))
+ break;
+
+ /* Get the pointer to user space memory to read */
+ sargs.ptr_array[i++] = (char *)args[idx];
+ }
+
+ sargs.uargs = i;
+
+ /* Clear the values that are not used */
+ for (; i < SYSCALL_FAULT_MAX_CNT; i++) {
+ data_size[i] = -1; /* Denotes no pointer */
+ }
+
+ /* A zero size means do not even try */
+ if (!buf_size)
+ return NULL;
+
+ buffer = trace_user_fault_read(&sbuf->buf, NULL, size,
+ syscall_copy, &sargs);
+ if (!buffer)
+ return NULL;
+
+ buf = buffer;
+ for (i = 0; i < sargs.uargs; i++, buf += SYSCALL_FAULT_ARG_SZ) {
+
+ ret = sargs.read[i];
+ if (ret < 0)
+ continue;
+ buf[ret] = '\0';
+
+ /* For strings, replace any non-printable characters with '.' */
+ if (!array) {
+ for (int x = 0; x < ret; x++) {
+ if (!isprint(buf[x]))
+ buf[x] = '.';
+ }
+
+ size = min(buf_size, SYSCALL_FAULT_USER_MAX);
+
+ /*
+ * If the text was truncated due to our max limit,
+ * add "..." to the string.
+ */
+ if (ret > size) {
+ strscpy(buf + size, EXTRA, sizeof(EXTRA));
+ ret = size + sizeof(EXTRA);
+ } else {
+ buf[ret++] = '\0';
+ }
+ } else {
+ ret = min((unsigned int)ret, buf_size);
+ }
+ data_size[i] = ret;
+ }
+
+ return buffer;
+}
+
+static int
+syscall_get_data(struct syscall_metadata *sys_data, unsigned long *args,
+ char **buffer, int *size, int *user_sizes, int *uargs,
+ int buf_size)
+{
+ struct syscall_user_buffer *sbuf;
+ int i;
+
+ /* If the syscall_buffer is NULL, tracing is being shutdown */
+ sbuf = READ_ONCE(syscall_buffer);
+ if (!sbuf)
+ return -1;
+
+ *buffer = sys_fault_user(buf_size, sys_data, sbuf, args, user_sizes);
+ /*
+ * user_size is the amount of data to append.
+ * Need to add 4 for the meta field that points to
+ * the user memory at the end of the event and also
+ * stores its size.
+ */
+ for (i = 0; i < SYSCALL_FAULT_MAX_CNT; i++) {
+ if (user_sizes[i] < 0)
+ break;
+ *size += user_sizes[i] + 4;
+ }
+ /* Save the number of user read arguments of this syscall */
+ *uargs = i;
+ return 0;
+}
+
+static void syscall_put_data(struct syscall_metadata *sys_data,
+ struct syscall_trace_enter *entry,
+ char *buffer, int size, int *user_sizes, int uargs)
+{
+ char *buf = buffer;
+ void *ptr;
+ int val;
+
+ /*
+ * Set the pointer to point to the meta data of the event
+ * that has information about the stored user space memory.
+ */
+ ptr = (void *)entry->args + sizeof(unsigned long) * sys_data->nb_args;
+
+ /*
+ * The meta data will store the offset of the user data from
+ * the beginning of the event. That is after the static arguments
+ * and the meta data fields.
+ */
+ val = (ptr - (void *)entry) + 4 * uargs;
+
+ for (int i = 0; i < uargs; i++) {
+
+ if (i)
+ val += user_sizes[i - 1];
+
+ /* Store the offset and the size into the meta data */
+ *(int *)ptr = val | (user_sizes[i] << 16);
+
+ /* Skip the meta data */
+ ptr += 4;
+ }
+
+ for (int i = 0; i < uargs; i++, buf += SYSCALL_FAULT_ARG_SZ) {
+ /* Nothing to do if the user space was empty or faulted */
+ if (!user_sizes[i])
+ continue;
+
+ memcpy(ptr, buf, user_sizes[i]);
+ ptr += user_sizes[i];
+ }
+}
+
static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
{
struct trace_array *tr = data;
@@ -302,15 +866,18 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
struct syscall_metadata *sys_data;
struct trace_event_buffer fbuffer;
unsigned long args[6];
+ char *user_ptr;
+ int user_sizes[SYSCALL_FAULT_MAX_CNT] = {};
int syscall_nr;
- int size;
+ int size = 0;
+ int uargs = 0;
+ bool mayfault;
/*
* Syscall probe called with preemption enabled, but the ring
* buffer and per-cpu data require preemption to be disabled.
*/
might_fault();
- guard(preempt_notrace)();
syscall_nr = trace_get_syscall_nr(current, regs);
if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
@@ -327,7 +894,20 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
if (!sys_data)
return;
- size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
+ /* Check if this syscall event faults in user space memory */
+ mayfault = sys_data->user_mask != 0;
+
+ guard(preempt_notrace)();
+
+ syscall_get_arguments(current, regs, args);
+
+ if (mayfault) {
+ if (syscall_get_data(sys_data, args, &user_ptr,
+ &size, user_sizes, &uargs, tr->syscall_buf_sz) < 0)
+ return;
+ }
+
+ size += sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
entry = trace_event_buffer_reserve(&fbuffer, trace_file, size);
if (!entry)
@@ -335,9 +915,12 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
entry = ring_buffer_event_data(fbuffer.event);
entry->nr = syscall_nr;
- syscall_get_arguments(current, regs, args);
+
memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args);
+ if (mayfault)
+ syscall_put_data(sys_data, entry, user_ptr, size, user_sizes, uargs);
+
trace_event_buffer_commit(&fbuffer);
}
@@ -386,39 +969,50 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
static int reg_event_syscall_enter(struct trace_event_file *file,
struct trace_event_call *call)
{
+ struct syscall_metadata *sys_data = call->data;
struct trace_array *tr = file->tr;
int ret = 0;
int num;
- num = ((struct syscall_metadata *)call->data)->syscall_nr;
+ num = sys_data->syscall_nr;
if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
return -ENOSYS;
- mutex_lock(&syscall_trace_lock);
- if (!tr->sys_refcount_enter)
+ guard(mutex)(&syscall_trace_lock);
+ if (sys_data->user_mask) {
+ ret = syscall_fault_buffer_enable();
+ if (ret < 0)
+ return ret;
+ }
+ if (!tr->sys_refcount_enter) {
ret = register_trace_sys_enter(ftrace_syscall_enter, tr);
- if (!ret) {
- WRITE_ONCE(tr->enter_syscall_files[num], file);
- tr->sys_refcount_enter++;
+ if (ret < 0) {
+ if (sys_data->user_mask)
+ syscall_fault_buffer_disable();
+ return ret;
+ }
}
- mutex_unlock(&syscall_trace_lock);
- return ret;
+ WRITE_ONCE(tr->enter_syscall_files[num], file);
+ tr->sys_refcount_enter++;
+ return 0;
}
static void unreg_event_syscall_enter(struct trace_event_file *file,
struct trace_event_call *call)
{
+ struct syscall_metadata *sys_data = call->data;
struct trace_array *tr = file->tr;
int num;
- num = ((struct syscall_metadata *)call->data)->syscall_nr;
+ num = sys_data->syscall_nr;
if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
return;
- mutex_lock(&syscall_trace_lock);
+ guard(mutex)(&syscall_trace_lock);
tr->sys_refcount_enter--;
WRITE_ONCE(tr->enter_syscall_files[num], NULL);
if (!tr->sys_refcount_enter)
unregister_trace_sys_enter(ftrace_syscall_enter, tr);
- mutex_unlock(&syscall_trace_lock);
+ if (sys_data->user_mask)
+ syscall_fault_buffer_disable();
}
static int reg_event_syscall_exit(struct trace_event_file *file,
@@ -459,6 +1053,215 @@ static void unreg_event_syscall_exit(struct trace_event_file *file,
mutex_unlock(&syscall_trace_lock);
}
+/*
+ * For system calls that reference user space memory that can
+ * be recorded into the event, set the system call meta data's user_mask
+ * to the "args" index that points to the user space memory to retrieve.
+ */
+static void check_faultable_syscall(struct trace_event_call *call, int nr)
+{
+ struct syscall_metadata *sys_data = call->data;
+ unsigned long mask;
+
+ /* Only work on entry */
+ if (sys_data->enter_event != call)
+ return;
+
+ sys_data->user_arg_size = -1;
+
+ switch (nr) {
+ /* user arg 1 with size arg at 2 */
+ case __NR_write:
+#ifdef __NR_mq_timedsend
+ case __NR_mq_timedsend:
+#endif
+ case __NR_pwrite64:
+ sys_data->user_mask = BIT(1);
+ sys_data->user_arg_size = 2;
+ break;
+ /* user arg 0 with size arg at 1 as string */
+ case __NR_setdomainname:
+ case __NR_sethostname:
+ sys_data->user_mask = BIT(0);
+ sys_data->user_arg_size = 1;
+ sys_data->user_arg_is_str = 1;
+ break;
+#ifdef __NR_kexec_file_load
+ /* user arg 4 with size arg at 3 as string */
+ case __NR_kexec_file_load:
+ sys_data->user_mask = BIT(4);
+ sys_data->user_arg_size = 3;
+ sys_data->user_arg_is_str = 1;
+ break;
+#endif
+ /* user arg at position 0 */
+#ifdef __NR_access
+ case __NR_access:
+#endif
+ case __NR_acct:
+ case __NR_chdir:
+#ifdef __NR_chown
+ case __NR_chown:
+#endif
+#ifdef __NR_chmod
+ case __NR_chmod:
+#endif
+ case __NR_chroot:
+#ifdef __NR_creat
+ case __NR_creat:
+#endif
+ case __NR_delete_module:
+ case __NR_execve:
+ case __NR_fsopen:
+#ifdef __NR_lchown
+ case __NR_lchown:
+#endif
+#ifdef __NR_open
+ case __NR_open:
+#endif
+ case __NR_memfd_create:
+#ifdef __NR_mkdir
+ case __NR_mkdir:
+#endif
+#ifdef __NR_mknod
+ case __NR_mknod:
+#endif
+ case __NR_mq_open:
+ case __NR_mq_unlink:
+#ifdef __NR_readlink
+ case __NR_readlink:
+#endif
+#ifdef __NR_rmdir
+ case __NR_rmdir:
+#endif
+ case __NR_shmdt:
+#ifdef __NR_statfs
+ case __NR_statfs:
+#endif
+ case __NR_swapon:
+ case __NR_swapoff:
+#ifdef __NR_truncate
+ case __NR_truncate:
+#endif
+#ifdef __NR_unlink
+ case __NR_unlink:
+#endif
+ case __NR_umount2:
+#ifdef __NR_utime
+ case __NR_utime:
+#endif
+#ifdef __NR_utimes
+ case __NR_utimes:
+#endif
+ sys_data->user_mask = BIT(0);
+ break;
+ /* user arg at position 1 */
+ case __NR_execveat:
+ case __NR_faccessat:
+ case __NR_faccessat2:
+ case __NR_finit_module:
+ case __NR_fchmodat:
+ case __NR_fchmodat2:
+ case __NR_fchownat:
+ case __NR_fgetxattr:
+ case __NR_flistxattr:
+ case __NR_fsetxattr:
+ case __NR_fspick:
+ case __NR_fremovexattr:
+#ifdef __NR_futimesat
+ case __NR_futimesat:
+#endif
+ case __NR_inotify_add_watch:
+ case __NR_mkdirat:
+ case __NR_mknodat:
+ case __NR_mount_setattr:
+ case __NR_name_to_handle_at:
+#ifdef __NR_newfstatat
+ case __NR_newfstatat:
+#endif
+ case __NR_openat:
+ case __NR_openat2:
+ case __NR_open_tree:
+ case __NR_open_tree_attr:
+ case __NR_readlinkat:
+ case __NR_quotactl:
+ case __NR_syslog:
+ case __NR_statx:
+ case __NR_unlinkat:
+#ifdef __NR_utimensat
+ case __NR_utimensat:
+#endif
+ sys_data->user_mask = BIT(1);
+ break;
+ /* user arg at position 2 */
+ case __NR_init_module:
+ case __NR_fsconfig:
+ sys_data->user_mask = BIT(2);
+ break;
+ /* user arg at position 4 */
+ case __NR_fanotify_mark:
+ sys_data->user_mask = BIT(4);
+ break;
+ /* 2 user args, 0 and 1 */
+ case __NR_add_key:
+ case __NR_getxattr:
+ case __NR_lgetxattr:
+ case __NR_lremovexattr:
+#ifdef __NR_link
+ case __NR_link:
+#endif
+ case __NR_listxattr:
+ case __NR_llistxattr:
+ case __NR_lsetxattr:
+ case __NR_pivot_root:
+ case __NR_removexattr:
+#ifdef __NR_rename
+ case __NR_rename:
+#endif
+ case __NR_request_key:
+ case __NR_setxattr:
+#ifdef __NR_symlink
+ case __NR_symlink:
+#endif
+ sys_data->user_mask = BIT(0) | BIT(1);
+ break;
+ /* 2 user args, 0 and 2 */
+ case __NR_symlinkat:
+ sys_data->user_mask = BIT(0) | BIT(2);
+ break;
+ /* 2 user args, 1 and 3 */
+ case __NR_getxattrat:
+ case __NR_linkat:
+ case __NR_listxattrat:
+ case __NR_move_mount:
+#ifdef __NR_renameat
+ case __NR_renameat:
+#endif
+ case __NR_renameat2:
+ case __NR_removexattrat:
+ case __NR_setxattrat:
+ sys_data->user_mask = BIT(1) | BIT(3);
+ break;
+ case __NR_mount: /* Just dev_name and dir_name, TODO add type */
+ sys_data->user_mask = BIT(0) | BIT(1) | BIT(2);
+ break;
+ default:
+ sys_data->user_mask = 0;
+ return;
+ }
+
+ if (sys_data->user_arg_size < 0)
+ return;
+
+ /*
+ * The user_arg_size can only be used when the system call
+ * is reading only a single address from user space.
+ */
+ mask = sys_data->user_mask;
+ if (WARN_ON(mask & (mask - 1)))
+ sys_data->user_arg_size = -1;
+}
+
static int __init init_syscall_trace(struct trace_event_call *call)
{
int id;
@@ -471,6 +1274,8 @@ static int __init init_syscall_trace(struct trace_event_call *call)
return -ENOSYS;
}
+ check_faultable_syscall(call, num);
+
if (set_syscall_print_fmt(call) < 0)
return -ENOMEM;
@@ -598,9 +1403,14 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
struct hlist_head *head;
unsigned long args[6];
bool valid_prog_array;
+ bool mayfault;
+ char *user_ptr;
+ int user_sizes[SYSCALL_FAULT_MAX_CNT] = {};
+ int buf_size = CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT;
int syscall_nr;
int rctx;
- int size;
+ int size = 0;
+ int uargs = 0;
/*
* Syscall probe called with preemption enabled, but the ring
@@ -619,13 +1429,24 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
if (!sys_data)
return;
+ syscall_get_arguments(current, regs, args);
+
+ /* Check if this syscall event faults in user space memory */
+ mayfault = sys_data->user_mask != 0;
+
+ if (mayfault) {
+ if (syscall_get_data(sys_data, args, &user_ptr,
+ &size, user_sizes, &uargs, buf_size) < 0)
+ return;
+ }
+
head = this_cpu_ptr(sys_data->enter_event->perf_events);
valid_prog_array = bpf_prog_array_valid(sys_data->enter_event);
if (!valid_prog_array && hlist_empty(head))
return;
/* get the size after alignment with the u32 buffer size field */
- size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
+ size += sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
size = ALIGN(size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
@@ -634,9 +1455,11 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
return;
rec->nr = syscall_nr;
- syscall_get_arguments(current, regs, args);
memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args);
+ if (mayfault)
+ syscall_put_data(sys_data, rec, user_ptr, size, user_sizes, uargs);
+
if ((valid_prog_array &&
!perf_call_bpf_enter(sys_data->enter_event, fake_regs, sys_data, rec)) ||
hlist_empty(head)) {
@@ -651,36 +1474,46 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
static int perf_sysenter_enable(struct trace_event_call *call)
{
- int ret = 0;
+ struct syscall_metadata *sys_data = call->data;
int num;
+ int ret;
- num = ((struct syscall_metadata *)call->data)->syscall_nr;
+ num = sys_data->syscall_nr;
- mutex_lock(&syscall_trace_lock);
- if (!sys_perf_refcount_enter)
+ guard(mutex)(&syscall_trace_lock);
+ if (sys_data->user_mask) {
+ ret = syscall_fault_buffer_enable();
+ if (ret < 0)
+ return ret;
+ }
+ if (!sys_perf_refcount_enter) {
ret = register_trace_sys_enter(perf_syscall_enter, NULL);
- if (ret) {
- pr_info("event trace: Could not activate syscall entry trace point");
- } else {
- set_bit(num, enabled_perf_enter_syscalls);
- sys_perf_refcount_enter++;
+ if (ret) {
+ pr_info("event trace: Could not activate syscall entry trace point");
+ if (sys_data->user_mask)
+ syscall_fault_buffer_disable();
+ return ret;
+ }
}
- mutex_unlock(&syscall_trace_lock);
- return ret;
+ set_bit(num, enabled_perf_enter_syscalls);
+ sys_perf_refcount_enter++;
+ return 0;
}
static void perf_sysenter_disable(struct trace_event_call *call)
{
+ struct syscall_metadata *sys_data = call->data;
int num;
- num = ((struct syscall_metadata *)call->data)->syscall_nr;
+ num = sys_data->syscall_nr;
- mutex_lock(&syscall_trace_lock);
+ guard(mutex)(&syscall_trace_lock);
sys_perf_refcount_enter--;
clear_bit(num, enabled_perf_enter_syscalls);
if (!sys_perf_refcount_enter)
unregister_trace_sys_enter(perf_syscall_enter, NULL);
- mutex_unlock(&syscall_trace_lock);
+ if (sys_data->user_mask)
+ syscall_fault_buffer_disable();
}
static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *regs,
@@ -757,22 +1590,21 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
static int perf_sysexit_enable(struct trace_event_call *call)
{
- int ret = 0;
int num;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
- mutex_lock(&syscall_trace_lock);
- if (!sys_perf_refcount_exit)
- ret = register_trace_sys_exit(perf_syscall_exit, NULL);
- if (ret) {
- pr_info("event trace: Could not activate syscall exit trace point");
- } else {
- set_bit(num, enabled_perf_exit_syscalls);
- sys_perf_refcount_exit++;
+ guard(mutex)(&syscall_trace_lock);
+ if (!sys_perf_refcount_exit) {
+ int ret = register_trace_sys_exit(perf_syscall_exit, NULL);
+ if (ret) {
+ pr_info("event trace: Could not activate syscall exit trace point");
+ return ret;
+ }
}
- mutex_unlock(&syscall_trace_lock);
- return ret;
+ set_bit(num, enabled_perf_exit_syscalls);
+ sys_perf_refcount_exit++;
+ return 0;
}
static void perf_sysexit_disable(struct trace_event_call *call)
@@ -781,12 +1613,11 @@ static void perf_sysexit_disable(struct trace_event_call *call)
num = ((struct syscall_metadata *)call->data)->syscall_nr;
- mutex_lock(&syscall_trace_lock);
+ guard(mutex)(&syscall_trace_lock);
sys_perf_refcount_exit--;
clear_bit(num, enabled_perf_exit_syscalls);
if (!sys_perf_refcount_exit)
unregister_trace_sys_exit(perf_syscall_exit, NULL);
- mutex_unlock(&syscall_trace_lock);
}
#endif /* CONFIG_PERF_EVENTS */
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 430d09c49462..1b4f32e2b9bd 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -533,21 +533,26 @@ static int register_trace_uprobe(struct trace_uprobe *tu)
return ret;
}
+DEFINE_FREE(free_trace_uprobe, struct trace_uprobe *, if (_T) free_trace_uprobe(_T))
+
/*
* Argument syntax:
* - Add uprobe: p|r[:[GRP/][EVENT]] PATH:OFFSET[%return][(REF)] [FETCHARGS]
*/
static int __trace_uprobe_create(int argc, const char **argv)
{
+ struct traceprobe_parse_context *ctx __free(traceprobe_parse_context) = NULL;
+ struct trace_uprobe *tu __free(free_trace_uprobe) = NULL;
+ const char *trlog __free(trace_probe_log_clear) = NULL;
const char *event = NULL, *group = UPROBE_EVENT_SYSTEM;
- char *arg, *filename, *rctr, *rctr_end, *tmp;
+ struct path path __free(path_put) = {};
unsigned long offset, ref_ctr_offset;
+ char *filename __free(kfree) = NULL;
+ char *arg, *rctr, *rctr_end, *tmp;
char *gbuf __free(kfree) = NULL;
char *buf __free(kfree) = NULL;
enum probe_print_type ptype;
- struct trace_uprobe *tu;
bool is_return = false;
- struct path path;
int i, ret;
ref_ctr_offset = 0;
@@ -565,7 +570,7 @@ static int __trace_uprobe_create(int argc, const char **argv)
if (argc < 2)
return -ECANCELED;
- trace_probe_log_init("trace_uprobe", argc, argv);
+ trlog = trace_probe_log_init("trace_uprobe", argc, argv);
if (argc - 2 > MAX_TRACE_ARGS) {
trace_probe_log_set_index(2);
@@ -585,10 +590,8 @@ static int __trace_uprobe_create(int argc, const char **argv)
/* Find the last occurrence, in case the path contains ':' too. */
arg = strrchr(filename, ':');
- if (!arg || !isdigit(arg[1])) {
- kfree(filename);
+ if (!arg || !isdigit(arg[1]))
return -ECANCELED;
- }
trace_probe_log_set_index(1); /* filename is the 2nd argument */
@@ -596,14 +599,11 @@ static int __trace_uprobe_create(int argc, const char **argv)
ret = kern_path(filename, LOOKUP_FOLLOW, &path);
if (ret) {
trace_probe_log_err(0, FILE_NOT_FOUND);
- kfree(filename);
- trace_probe_log_clear();
return ret;
}
if (!d_is_reg(path.dentry)) {
trace_probe_log_err(0, NO_REGULAR_FILE);
- ret = -EINVAL;
- goto fail_address_parse;
+ return -EINVAL;
}
/* Parse reference counter offset if specified. */
@@ -611,16 +611,14 @@ static int __trace_uprobe_create(int argc, const char **argv)
if (rctr) {
rctr_end = strchr(rctr, ')');
if (!rctr_end) {
- ret = -EINVAL;
rctr_end = rctr + strlen(rctr);
trace_probe_log_err(rctr_end - filename,
REFCNT_OPEN_BRACE);
- goto fail_address_parse;
+ return -EINVAL;
} else if (rctr_end[1] != '\0') {
- ret = -EINVAL;
trace_probe_log_err(rctr_end + 1 - filename,
BAD_REFCNT_SUFFIX);
- goto fail_address_parse;
+ return -EINVAL;
}
*rctr++ = '\0';
@@ -628,7 +626,7 @@ static int __trace_uprobe_create(int argc, const char **argv)
ret = kstrtoul(rctr, 0, &ref_ctr_offset);
if (ret) {
trace_probe_log_err(rctr - filename, BAD_REFCNT);
- goto fail_address_parse;
+ return ret;
}
}
@@ -640,8 +638,7 @@ static int __trace_uprobe_create(int argc, const char **argv)
is_return = true;
} else {
trace_probe_log_err(tmp - filename, BAD_ADDR_SUFFIX);
- ret = -EINVAL;
- goto fail_address_parse;
+ return -EINVAL;
}
}
@@ -649,7 +646,7 @@ static int __trace_uprobe_create(int argc, const char **argv)
ret = kstrtoul(arg, 0, &offset);
if (ret) {
trace_probe_log_err(arg - filename, BAD_UPROBE_OFFS);
- goto fail_address_parse;
+ return ret;
}
/* setup a probe */
@@ -657,12 +654,12 @@ static int __trace_uprobe_create(int argc, const char **argv)
if (event) {
gbuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL);
if (!gbuf)
- goto fail_mem;
+ return -ENOMEM;
ret = traceprobe_parse_event_name(&event, &group, gbuf,
event - argv[0]);
if (ret)
- goto fail_address_parse;
+ return ret;
}
if (!event) {
@@ -671,7 +668,7 @@ static int __trace_uprobe_create(int argc, const char **argv)
tail = kstrdup(kbasename(filename), GFP_KERNEL);
if (!tail)
- goto fail_mem;
+ return -ENOMEM;
ptr = strpbrk(tail, ".-_");
if (ptr)
@@ -679,7 +676,7 @@ static int __trace_uprobe_create(int argc, const char **argv)
buf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL);
if (!buf)
- goto fail_mem;
+ return -ENOMEM;
snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_0x%lx", 'p', tail, offset);
event = buf;
kfree(tail);
@@ -693,51 +690,36 @@ static int __trace_uprobe_create(int argc, const char **argv)
ret = PTR_ERR(tu);
/* This must return -ENOMEM otherwise there is a bug */
WARN_ON_ONCE(ret != -ENOMEM);
- goto fail_address_parse;
+ return ret;
}
tu->offset = offset;
tu->ref_ctr_offset = ref_ctr_offset;
tu->path = path;
- tu->filename = filename;
+ /* Clear @path so that it will not freed by path_put() */
+ memset(&path, 0, sizeof(path));
+ tu->filename = no_free_ptr(filename);
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ ctx->flags = (is_return ? TPARG_FL_RETURN : 0) | TPARG_FL_USER;
/* parse arguments */
for (i = 0; i < argc; i++) {
- struct traceprobe_parse_context *ctx __free(traceprobe_parse_context)
- = kzalloc(sizeof(*ctx), GFP_KERNEL);
-
- if (!ctx) {
- ret = -ENOMEM;
- goto error;
- }
- ctx->flags = (is_return ? TPARG_FL_RETURN : 0) | TPARG_FL_USER;
trace_probe_log_set_index(i + 2);
ret = traceprobe_parse_probe_arg(&tu->tp, i, argv[i], ctx);
if (ret)
- goto error;
+ return ret;
}
ptype = is_ret_probe(tu) ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL;
ret = traceprobe_set_print_fmt(&tu->tp, ptype);
if (ret < 0)
- goto error;
+ return ret;
ret = register_trace_uprobe(tu);
if (!ret)
- goto out;
-
-error:
- free_trace_uprobe(tu);
-out:
- trace_probe_log_clear();
- return ret;
-
-fail_mem:
- ret = -ENOMEM;
-
-fail_address_parse:
- trace_probe_log_clear();
- path_put(&path);
- kfree(filename);
+ tu = NULL;
return ret;
}