From 1186618a6a35d43a865448472a261184b608d13c Mon Sep 17 00:00:00 2001 From: tangmeng Date: Fri, 18 Feb 2022 18:59:36 +0800 Subject: kernel/delayacct: move delayacct sysctls to its own file kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. All filesystem syctls now get reviewed by fs folks. This commit follows the commit of fs, move the delayacct sysctl to its own file, kernel/delayacct.c. Signed-off-by: tangmeng Signed-off-by: Luis Chamberlain --- include/linux/delayacct.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux/delayacct.h') diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 3e03d010bd2e..6b16a6930a19 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -61,9 +61,6 @@ extern int delayacct_on; /* Delay accounting turned on/off */ extern struct kmem_cache *delayacct_cache; extern void delayacct_init(void); -extern int sysctl_delayacct(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos); - extern void __delayacct_tsk_init(struct task_struct *); extern void __delayacct_tsk_exit(struct task_struct *); extern void __delayacct_blkio_start(void); -- cgit v1.2.3 From 662ce1dc9caf493c309200edbe38d186f1ea20d0 Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Wed, 1 Jun 2022 15:55:25 -0700 Subject: delayacct: track delays from write-protect copy Delay accounting does not track the delay of write-protect copy. When tasks trigger many write-protect copys(include COW and unsharing of anonymous pages[1]), it may spend a amount of time waiting for them. To get the delay of tasks in write-protect copy, could help users to evaluate the impact of using KSM or fork() or GUP. Also update tools/accounting/getdelays.c: / # ./getdelays -dl -p 231 print delayacct stats ON listen forever PID 231 CPU count real total virtual total delay total delay average 6247 1859000000 2154070021 1674255063 0.268ms IO count delay total delay average 0 0 0ms SWAP count delay total delay average 0 0 0ms RECLAIM count delay total delay average 0 0 0ms THRASHING count delay total delay average 0 0 0ms COMPACT count delay total delay average 3 72758 0ms WPCOPY count delay total delay average 3635 271567604 0ms [1] commit 31cc5bc4af70("mm: support GUP-triggered unsharing of anonymous pages") Link: https://lkml.kernel.org/r/20220409014342.2505532-1-yang.yang29@zte.com.cn Signed-off-by: Yang Yang Reviewed-by: David Hildenbrand Reviewed-by: Jiang Xuexin Reviewed-by: Ran Xiaokai Reviewed-by: wangyong Cc: Jonathan Corbet Cc: Balbir Singh Cc: Mike Kravetz Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- Documentation/accounting/delay-accounting.rst | 5 ++++- include/linux/delayacct.h | 28 +++++++++++++++++++++++++++ include/uapi/linux/taskstats.h | 6 +++++- kernel/delayacct.c | 16 +++++++++++++++ mm/hugetlb.c | 8 ++++++++ mm/memory.c | 8 ++++++++ tools/accounting/getdelays.c | 8 +++++++- 7 files changed, 76 insertions(+), 3 deletions(-) (limited to 'include/linux/delayacct.h') diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst index 197fe319cbec..241d1a87f2cd 100644 --- a/Documentation/accounting/delay-accounting.rst +++ b/Documentation/accounting/delay-accounting.rst @@ -15,6 +15,7 @@ c) swapping in pages d) memory reclaim e) thrashing page cache f) direct compact +g) write-protect copy and makes these statistics available to userspace through the taskstats interface. @@ -48,7 +49,7 @@ this structure. See for a description of the fields pertaining to delay accounting. It will generally be in the form of counters returning the cumulative delay seen for cpu, sync block I/O, swapin, memory reclaim, thrash page -cache, direct compact etc. +cache, direct compact, write-protect copy etc. Taking the difference of two successive readings of a given counter (say cpu_delay_total) for a task will give the delay @@ -117,6 +118,8 @@ Get sum of delays, since system boot, for all pids with tgid 5:: 0 0 0ms COMPACT count delay total delay average 0 0 0ms + WPCOPY count delay total delay average + 0 0 0ms Get IO accounting for pid 1, it works only with -p:: diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 6b16a6930a19..58aea2d7385c 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -45,9 +45,13 @@ struct task_delay_info { u64 compact_start; u64 compact_delay; /* wait for memory compact */ + u64 wpcopy_start; + u64 wpcopy_delay; /* wait for write-protect copy */ + u32 freepages_count; /* total count of memory reclaim */ u32 thrashing_count; /* total count of thrash waits */ u32 compact_count; /* total count of memory compact */ + u32 wpcopy_count; /* total count of write-protect copy */ }; #endif @@ -75,6 +79,8 @@ extern void __delayacct_swapin_start(void); extern void __delayacct_swapin_end(void); extern void __delayacct_compact_start(void); extern void __delayacct_compact_end(void); +extern void __delayacct_wpcopy_start(void); +extern void __delayacct_wpcopy_end(void); static inline void delayacct_tsk_init(struct task_struct *tsk) { @@ -191,6 +197,24 @@ static inline void delayacct_compact_end(void) __delayacct_compact_end(); } +static inline void delayacct_wpcopy_start(void) +{ + if (!static_branch_unlikely(&delayacct_key)) + return; + + if (current->delays) + __delayacct_wpcopy_start(); +} + +static inline void delayacct_wpcopy_end(void) +{ + if (!static_branch_unlikely(&delayacct_key)) + return; + + if (current->delays) + __delayacct_wpcopy_end(); +} + #else static inline void delayacct_init(void) {} @@ -225,6 +249,10 @@ static inline void delayacct_compact_start(void) {} static inline void delayacct_compact_end(void) {} +static inline void delayacct_wpcopy_start(void) +{} +static inline void delayacct_wpcopy_end(void) +{} #endif /* CONFIG_TASK_DELAY_ACCT */ diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index 736154171489..a7f5b11a8f1b 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -34,7 +34,7 @@ */ -#define TASKSTATS_VERSION 12 +#define TASKSTATS_VERSION 13 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ @@ -194,6 +194,10 @@ struct taskstats { __u64 ac_exe_dev; /* program binary device ID */ __u64 ac_exe_inode; /* program binary inode number */ /* v12 end */ + + /* v13: Delay waiting for write-protect copy */ + __u64 wpcopy_count; + __u64 wpcopy_delay_total; }; diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 2c1e18f7c5cf..164ed9ef77a3 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -177,11 +177,14 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp; tmp = d->compact_delay_total + tsk->delays->compact_delay; d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp; + tmp = d->wpcopy_delay_total + tsk->delays->wpcopy_delay; + d->wpcopy_delay_total = (tmp < d->wpcopy_delay_total) ? 0 : tmp; d->blkio_count += tsk->delays->blkio_count; d->swapin_count += tsk->delays->swapin_count; d->freepages_count += tsk->delays->freepages_count; d->thrashing_count += tsk->delays->thrashing_count; d->compact_count += tsk->delays->compact_count; + d->wpcopy_count += tsk->delays->wpcopy_count; raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); return 0; @@ -249,3 +252,16 @@ void __delayacct_compact_end(void) ¤t->delays->compact_delay, ¤t->delays->compact_count); } + +void __delayacct_wpcopy_start(void) +{ + current->delays->wpcopy_start = local_clock(); +} + +void __delayacct_wpcopy_end(void) +{ + delayacct_end(¤t->delays->lock, + ¤t->delays->wpcopy_start, + ¤t->delays->wpcopy_delay, + ¤t->delays->wpcopy_count); +} diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7c468ac1d069..a57e1be41401 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -5230,6 +5231,8 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, pte = huge_ptep_get(ptep); old_page = pte_page(pte); + delayacct_wpcopy_start(); + retry_avoidcopy: /* * If no-one else is actually using this page, we're the exclusive @@ -5240,6 +5243,8 @@ retry_avoidcopy: page_move_anon_rmap(old_page, vma); if (likely(!unshare)) set_huge_ptep_writable(vma, haddr, ptep); + + delayacct_wpcopy_end(); return 0; } VM_BUG_ON_PAGE(PageAnon(old_page) && PageAnonExclusive(old_page), @@ -5309,6 +5314,7 @@ retry_avoidcopy: * race occurs while re-acquiring page table * lock, and our job is done. */ + delayacct_wpcopy_end(); return 0; } @@ -5367,6 +5373,8 @@ out_release_old: put_page(old_page); spin_lock(ptl); /* Caller expects lock to be held */ + + delayacct_wpcopy_end(); return ret; } diff --git a/mm/memory.c b/mm/memory.c index 21dadf03f089..7a089145cad4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3090,6 +3090,8 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) int page_copied = 0; struct mmu_notifier_range range; + delayacct_wpcopy_start(); + if (unlikely(anon_vma_prepare(vma))) goto oom; @@ -3114,6 +3116,8 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) put_page(new_page); if (old_page) put_page(old_page); + + delayacct_wpcopy_end(); return 0; } } @@ -3220,12 +3224,16 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) free_swap_cache(old_page); put_page(old_page); } + + delayacct_wpcopy_end(); return (page_copied && !unshare) ? VM_FAULT_WRITE : 0; oom_free_new: put_page(new_page); oom: if (old_page) put_page(old_page); + + delayacct_wpcopy_end(); return VM_FAULT_OOM; } diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 11e86739456d..e83e6e47a21e 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -207,6 +207,8 @@ static void print_delayacct(struct taskstats *t) "THRASHING%12s%15s%15s\n" " %15llu%15llu%15llums\n" "COMPACT %12s%15s%15s\n" + " %15llu%15llu%15llums\n" + "WPCOPY %12s%15s%15s\n" " %15llu%15llu%15llums\n", "count", "real total", "virtual total", "delay total", "delay average", @@ -234,7 +236,11 @@ static void print_delayacct(struct taskstats *t) "count", "delay total", "delay average", (unsigned long long)t->compact_count, (unsigned long long)t->compact_delay_total, - average_ms(t->compact_delay_total, t->compact_count)); + average_ms(t->compact_delay_total, t->compact_count), + "count", "delay total", "delay average", + (unsigned long long)t->wpcopy_count, + (unsigned long long)t->wpcopy_delay_total, + average_ms(t->wpcopy_delay_total, t->wpcopy_count)); } static void task_context_switch_counts(struct taskstats *t) -- cgit v1.2.3