summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorShakeel Butt <shakeel.butt@linux.dev>2024-11-25 09:16:17 -0800
committerAndrew Morton <akpm@linux-foundation.org>2025-01-13 22:40:34 -0800
commit9023691d75f29fde884f6e243bcdad6a9dbadb19 (patch)
treeacdf20b2f965aac1c07a16c6437d76d95959d695 /include
parent66539952627c891d8d73cdd619d1937b6b3f66c7 (diff)
mm: mmap_lock: optimize mmap_lock tracepoints
We are starting to deploy mmap_lock tracepoint monitoring across our fleet and the early results showed that these tracepoints are consuming significant amount of CPUs in kernfs_path_from_node when enabled. It seems like the kernel is trying to resolve the cgroup path in the fast path of the locking code path when the tracepoints are enabled. In addition for some application their metrics are regressing when monitoring is enabled. The cgroup path resolution can be slow and should not be done in the fast path. Most userspace tools, like bpftrace, provides functionality to get the cgroup path from cgroup id, so let's just trace the cgroup id and the users can use better tools to get the path in the slow path. Link: https://lkml.kernel.org/r/20241125171617.113892-1-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev> Reviewed-by: Yosry Ahmed <yosryahmed@google.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Roman Gushchin <roman.gushchin@linux.dev> Reviewed-by: Axel Rasmussen <axelrasmussen@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Matthew Wilcox <willy@infradead.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Muchun Song <muchun.song@linux.dev> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Suren Baghdasaryan <surenb@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/memcontrol.h22
-rw-r--r--include/trace/events/mmap_lock.h32
2 files changed, 37 insertions, 17 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5502aa8e138e..b28180269e75 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1046,6 +1046,23 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
void split_page_memcg(struct page *head, int old_order, int new_order);
+static inline u64 cgroup_id_from_mm(struct mm_struct *mm)
+{
+ struct mem_cgroup *memcg;
+ u64 id;
+
+ if (mem_cgroup_disabled())
+ return 0;
+
+ rcu_read_lock();
+ memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
+ if (!memcg)
+ memcg = root_mem_cgroup;
+ id = cgroup_id(memcg->css.cgroup);
+ rcu_read_unlock();
+ return id;
+}
+
#else /* CONFIG_MEMCG */
#define MEM_CGROUP_ID_SHIFT 0
@@ -1466,6 +1483,11 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
static inline void split_page_memcg(struct page *head, int old_order, int new_order)
{
}
+
+static inline u64 cgroup_id_from_mm(struct mm_struct *mm)
+{
+ return 0;
+}
#endif /* CONFIG_MEMCG */
/*
diff --git a/include/trace/events/mmap_lock.h b/include/trace/events/mmap_lock.h
index bc2e3ad787b3..cf9f9faf8914 100644
--- a/include/trace/events/mmap_lock.h
+++ b/include/trace/events/mmap_lock.h
@@ -5,6 +5,7 @@
#if !defined(_TRACE_MMAP_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_MMAP_LOCK_H
+#include <linux/memcontrol.h>
#include <linux/tracepoint.h>
#include <linux/types.h>
@@ -12,64 +13,61 @@ struct mm_struct;
DECLARE_EVENT_CLASS(mmap_lock,
- TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write),
+ TP_PROTO(struct mm_struct *mm, bool write),
- TP_ARGS(mm, memcg_path, write),
+ TP_ARGS(mm, write),
TP_STRUCT__entry(
__field(struct mm_struct *, mm)
- __string(memcg_path, memcg_path)
+ __field(u64, memcg_id)
__field(bool, write)
),
TP_fast_assign(
__entry->mm = mm;
- __assign_str(memcg_path);
+ __entry->memcg_id = cgroup_id_from_mm(mm);
__entry->write = write;
),
TP_printk(
- "mm=%p memcg_path=%s write=%s",
- __entry->mm,
- __get_str(memcg_path),
+ "mm=%p memcg_id=%llu write=%s",
+ __entry->mm, __entry->memcg_id,
__entry->write ? "true" : "false"
)
);
#define DEFINE_MMAP_LOCK_EVENT(name) \
DEFINE_EVENT(mmap_lock, name, \
- TP_PROTO(struct mm_struct *mm, const char *memcg_path, \
- bool write), \
- TP_ARGS(mm, memcg_path, write))
+ TP_PROTO(struct mm_struct *mm, bool write), \
+ TP_ARGS(mm, write))
DEFINE_MMAP_LOCK_EVENT(mmap_lock_start_locking);
DEFINE_MMAP_LOCK_EVENT(mmap_lock_released);
TRACE_EVENT(mmap_lock_acquire_returned,
- TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write,
- bool success),
+ TP_PROTO(struct mm_struct *mm, bool write, bool success),
- TP_ARGS(mm, memcg_path, write, success),
+ TP_ARGS(mm, write, success),
TP_STRUCT__entry(
__field(struct mm_struct *, mm)
- __string(memcg_path, memcg_path)
+ __field(u64, memcg_id)
__field(bool, write)
__field(bool, success)
),
TP_fast_assign(
__entry->mm = mm;
- __assign_str(memcg_path);
+ __entry->memcg_id = cgroup_id_from_mm(mm);
__entry->write = write;
__entry->success = success;
),
TP_printk(
- "mm=%p memcg_path=%s write=%s success=%s",
+ "mm=%p memcg_id=%llu write=%s success=%s",
__entry->mm,
- __get_str(memcg_path),
+ __entry->memcg_id,
__entry->write ? "true" : "false",
__entry->success ? "true" : "false"
)