From 658eb5ab916ddc92f294dbce8e3d449470be9f86 Mon Sep 17 00:00:00 2001 From: Wang Yaxin Date: Tue, 3 Dec 2024 16:48:48 +0800 Subject: delayacct: add delay max to record delay peak Introduce the use cases of delay max, which can help quickly detect potential abnormal delays in the system and record the types and specific details of delay spikes. Problem ======== Delay accounting can track the average delay of processes to show system workload. However, when a process experiences a significant delay, maybe a delay spike, which adversely affects performance, getdelays can only display the average system delay over a period of time. Yet, average delay is unhelpful for diagnosing delay peak. It is not even possible to determine which type of delay has spiked, as this information might be masked by the average delay. Solution ========= the 'delay max' can display delay peak since the system's startup, which can record potential abnormal delays over time, including the type of delay and the maximum delay. This is helpful for quickly identifying crash caused by delay. Use case ========= bash# ./getdelays -d -p 244 print delayacct stats ON PID 244 CPU count real total virtual total delay total delay average delay max 68 192000000 213676651 705643 0.010ms 0.306381ms IO count delay total delay average delay max 0 0 0.000ms 0.000000ms SWAP count delay total delay average delay max 0 0 0.000ms 0.000000ms RECLAIM count delay total delay average delay max 0 0 0.000ms 0.000000ms THRASHING count delay total delay average delay max 0 0 0.000ms 0.000000ms COMPACT count delay total delay average delay max 0 0 0.000ms 0.000000ms WPCOPY count delay total delay average delay max 235 15648284 0.067ms 0.263842ms IRQ count delay total delay average delay max 0 0 0.000ms 0.000000ms [wang.yaxin@zte.com.cn: update docs and fix some spelling errors] Link: https://lkml.kernel.org/r/20241213192700771XKZ8H30OtHSeziGqRVMs0@zte.com.cn Link: https://lkml.kernel.org/r/20241203164848805CS62CQPQWG9GLdQj2_BxS@zte.com.cn Co-developed-by: Wang Yong Signed-off-by: Wang Yong Co-developed-by: xu xin Signed-off-by: xu xin Co-developed-by: Wang Yaxin Signed-off-by: Wang Yaxin Signed-off-by: Kun Jiang Cc: Balbir Singh Cc: David Hildenbrand Cc: Fan Yu Cc: Peilin He Cc: tuqiang Cc: Yang Yang Cc: ye xingchen Cc: Yunkai Zhang Signed-off-by: Andrew Morton --- include/linux/delayacct.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux/delayacct.h') diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 6639f48dac36..56fbfa2c2ac5 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -29,25 +29,32 @@ struct task_delay_info { * XXX_delay contains the accumulated delay time in nanoseconds. */ u64 blkio_start; + u64 blkio_delay_max; u64 blkio_delay; /* wait for sync block io completion */ u64 swapin_start; + u64 swapin_delay_max; u64 swapin_delay; /* wait for swapin */ u32 blkio_count; /* total count of the number of sync block */ /* io operations performed */ u32 swapin_count; /* total count of swapin */ u64 freepages_start; + u64 freepages_delay_max; u64 freepages_delay; /* wait for memory reclaim */ u64 thrashing_start; + u64 thrashing_delay_max; u64 thrashing_delay; /* wait for thrashing page */ u64 compact_start; + u64 compact_delay_max; u64 compact_delay; /* wait for memory compact */ u64 wpcopy_start; + u64 wpcopy_delay_max; u64 wpcopy_delay; /* wait for write-protect copy */ + u64 irq_delay_max; u64 irq_delay; /* wait for IRQ/SOFTIRQ */ u32 freepages_count; /* total count of memory reclaim */ -- cgit v1.2.3 From f65c64f311ee2f1ddc1eb395ed8b20e6b9d14e85 Mon Sep 17 00:00:00 2001 From: Wang Yaxin Date: Fri, 20 Dec 2024 17:31:05 +0800 Subject: delayacct: add delay min to record delay peak Delay accounting can now calculate the average delay of processes, detect the overall system load, and also record the 'delay max' to identify potential abnormal delays. However, 'delay min' can help us identify another useful delay peak. By comparing the difference between 'delay max' and 'delay min', we can understand the optimization space for latency, providing a reference for the optimization of latency performance. Use case ========= bash-4.4# ./getdelays -d -t 242 print delayacct stats ON TGID 242 CPU count real total virtual total delay total delay average delay max delay min 39 156000000 156576579 2111069 0.054ms 0.212296ms 0.031307ms IO count delay total delay average delay max delay min 0 0 0.000ms 0.000000ms 0.000000ms SWAP count delay total delay average delay max delay min 0 0 0.000ms 0.000000ms 0.000000ms RECLAIM count delay total delay average delay max delay min 0 0 0.000ms 0.000000ms 0.000000ms THRASHING count delay total delay average delay max delay min 0 0 0.000ms 0.000000ms 0.000000ms COMPACT count delay total delay average delay max delay min 0 0 0.000ms 0.000000ms 0.000000ms WPCOPY count delay total delay average delay max delay min 156 11215873 0.072ms 0.207403ms 0.033913ms IRQ count delay total delay average delay max delay min 0 0 0.000ms 0.000000ms 0.000000ms Link: https://lkml.kernel.org/r/20241220173105906EOdsPhzjMLYNJJBqgz1ga@zte.com.cn Co-developed-by: Wang Yong Signed-off-by: Wang Yong Co-developed-by: xu xin Signed-off-by: xu xin Signed-off-by: Wang Yaxin Co-developed-by: Kun Jiang Signed-off-by: Kun Jiang Cc: Balbir Singh Cc: David Hildenbrand Cc: Fan Yu Cc: Peilin He Cc: tuqiang Cc: ye xingchen Cc: Yunkai Zhang Signed-off-by: Andrew Morton --- Documentation/accounting/delay-accounting.rst | 32 ++++++++++---------- include/linux/delayacct.h | 7 +++++ include/linux/sched.h | 3 ++ include/uapi/linux/taskstats.h | 8 +++++ kernel/delayacct.c | 32 +++++++++++++++----- kernel/sched/stats.h | 4 +++ tools/accounting/getdelays.c | 42 ++++++++++++++++----------- 7 files changed, 88 insertions(+), 40 deletions(-) (limited to 'include/linux/delayacct.h') diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst index 8a0277428ccf..210c194d4a7b 100644 --- a/Documentation/accounting/delay-accounting.rst +++ b/Documentation/accounting/delay-accounting.rst @@ -107,22 +107,22 @@ Get sum and peak of delays, since system boot, for all pids with tgid 242:: TGID 242 - CPU count real total virtual total delay total delay average delay max - 239 296000000 307724885 1127792 0.005ms 0.238382ms - IO count delay total delay average delay max - 0 0 0.000ms 0.000000ms - SWAP count delay total delay average delay max - 0 0 0.000ms 0.000000ms - RECLAIM count delay total delay average delay max - 0 0 0.000ms 0.000000ms - THRASHING count delay total delay average delay max - 0 0 0.000ms 0.000000ms - COMPACT count delay total delay average delay max - 0 0 0.000ms 0.000000ms - WPCOPY count delay total delay average delay max - 230 19100476 0.083ms 0.383822ms - IRQ count delay total delay average delay max - 0 0 0.000ms 0.000000ms + CPU count real total virtual total delay total delay average delay max delay min + 39 156000000 156576579 2111069 0.054ms 0.212296ms 0.031307ms + IO count delay total delay average delay max delay min + 0 0 0.000ms 0.000000ms 0.000000ms + SWAP count delay total delay average delay max delay min + 0 0 0.000ms 0.000000ms 0.000000ms + RECLAIM count delay total delay average delay max delay min + 0 0 0.000ms 0.000000ms 0.000000ms + THRASHING count delay total delay average delay max delay min + 0 0 0.000ms 0.000000ms 0.000000ms + COMPACT count delay total delay average delay max delay min + 0 0 0.000ms 0.000000ms 0.000000ms + WPCOPY count delay total delay average delay max delay min + 156 11215873 0.072ms 0.207403ms 0.033913ms + IRQ count delay total delay average delay max delay min + 0 0 0.000ms 0.000000ms 0.000000ms Get IO accounting for pid 1, it works only with -p:: diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 56fbfa2c2ac5..800dcc360db2 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -30,9 +30,11 @@ struct task_delay_info { */ u64 blkio_start; u64 blkio_delay_max; + u64 blkio_delay_min; u64 blkio_delay; /* wait for sync block io completion */ u64 swapin_start; u64 swapin_delay_max; + u64 swapin_delay_min; u64 swapin_delay; /* wait for swapin */ u32 blkio_count; /* total count of the number of sync block */ /* io operations performed */ @@ -40,21 +42,26 @@ struct task_delay_info { u64 freepages_start; u64 freepages_delay_max; + u64 freepages_delay_min; u64 freepages_delay; /* wait for memory reclaim */ u64 thrashing_start; u64 thrashing_delay_max; + u64 thrashing_delay_min; u64 thrashing_delay; /* wait for thrashing page */ u64 compact_start; u64 compact_delay_max; + u64 compact_delay_min; u64 compact_delay; /* wait for memory compact */ u64 wpcopy_start; u64 wpcopy_delay_max; + u64 wpcopy_delay_min; u64 wpcopy_delay; /* wait for write-protect copy */ u64 irq_delay_max; + u64 irq_delay_min; u64 irq_delay; /* wait for IRQ/SOFTIRQ */ u32 freepages_count; /* total count of memory reclaim */ diff --git a/include/linux/sched.h b/include/linux/sched.h index a0ae3923b41d..155012467b21 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -401,6 +401,9 @@ struct sched_info { /* Max time spent waiting on a runqueue: */ unsigned long long max_run_delay; + /* Min time spent waiting on a runqueue: */ + unsigned long long min_run_delay; + /* Timestamps: */ /* When did we last run on a CPU? */ diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index e0d1c6fc9f3b..934e20ef7f79 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -73,6 +73,7 @@ struct taskstats { __u64 cpu_count __attribute__((aligned(8))); __u64 cpu_delay_total; __u64 cpu_delay_max; + __u64 cpu_delay_min; /* Following four fields atomically updated using task->delays->lock */ @@ -82,11 +83,13 @@ struct taskstats { __u64 blkio_count; __u64 blkio_delay_total; __u64 blkio_delay_max; + __u64 blkio_delay_min; /* Delay waiting for page fault I/O (swap in only) */ __u64 swapin_count; __u64 swapin_delay_total; __u64 swapin_delay_max; + __u64 swapin_delay_min; /* cpu "wall-clock" running time * On some architectures, value will adjust for cpu time stolen @@ -170,11 +173,13 @@ struct taskstats { __u64 freepages_count; __u64 freepages_delay_total; __u64 freepages_delay_max; + __u64 freepages_delay_min; /* Delay waiting for thrashing page */ __u64 thrashing_count; __u64 thrashing_delay_total; __u64 thrashing_delay_max; + __u64 thrashing_delay_min; /* v10: 64-bit btime to avoid overflow */ __u64 ac_btime64; /* 64-bit begin time */ @@ -183,6 +188,7 @@ struct taskstats { __u64 compact_count; __u64 compact_delay_total; __u64 compact_delay_max; + __u64 compact_delay_min; /* v12 begin */ __u32 ac_tgid; /* thread group ID */ @@ -205,11 +211,13 @@ struct taskstats { __u64 wpcopy_count; __u64 wpcopy_delay_total; __u64 wpcopy_delay_max; + __u64 wpcopy_delay_min; /* v14: Delay waiting for IRQ/SOFTIRQ */ __u64 irq_count; __u64 irq_delay_total; __u64 irq_delay_max; + __u64 irq_delay_min; /* v15: add Delay max */ }; diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 23212a0c88e4..b238eb8c6573 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -95,7 +95,7 @@ void __delayacct_tsk_init(struct task_struct *tsk) * Finish delay accounting for a statistic using its timestamps (@start), * accumulator (@total) and @count */ -static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count, u64 *max) +static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count, u64 *max, u64 *min) { s64 ns = local_clock() - *start; unsigned long flags; @@ -106,6 +106,8 @@ static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *cou (*count)++; if (ns > *max) *max = ns; + if (*min == 0 || ns < *min) + *min = ns; raw_spin_unlock_irqrestore(lock, flags); } } @@ -125,7 +127,8 @@ void __delayacct_blkio_end(struct task_struct *p) &p->delays->blkio_start, &p->delays->blkio_delay, &p->delays->blkio_count, - &p->delays->blkio_delay_max); + &p->delays->blkio_delay_max, + &p->delays->blkio_delay_min); } int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) @@ -157,6 +160,7 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) d->cpu_count += t1; d->cpu_delay_max = tsk->sched_info.max_run_delay; + d->cpu_delay_min = tsk->sched_info.min_run_delay; tmp = (s64)d->cpu_delay_total + t2; d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; tmp = (s64)d->cpu_run_virtual_total + t3; @@ -170,24 +174,31 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ raw_spin_lock_irqsave(&tsk->delays->lock, flags); d->blkio_delay_max = tsk->delays->blkio_delay_max; + d->blkio_delay_min = tsk->delays->blkio_delay_min; tmp = d->blkio_delay_total + tsk->delays->blkio_delay; d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; d->swapin_delay_max = tsk->delays->swapin_delay_max; + d->swapin_delay_min = tsk->delays->swapin_delay_min; tmp = d->swapin_delay_total + tsk->delays->swapin_delay; d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; d->freepages_delay_max = tsk->delays->freepages_delay_max; + d->freepages_delay_min = tsk->delays->freepages_delay_min; tmp = d->freepages_delay_total + tsk->delays->freepages_delay; d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp; d->thrashing_delay_max = tsk->delays->thrashing_delay_max; + d->thrashing_delay_min = tsk->delays->thrashing_delay_min; tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay; d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp; d->compact_delay_max = tsk->delays->compact_delay_max; + d->compact_delay_min = tsk->delays->compact_delay_min; tmp = d->compact_delay_total + tsk->delays->compact_delay; d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp; d->wpcopy_delay_max = tsk->delays->wpcopy_delay_max; + d->wpcopy_delay_min = tsk->delays->wpcopy_delay_min; tmp = d->wpcopy_delay_total + tsk->delays->wpcopy_delay; d->wpcopy_delay_total = (tmp < d->wpcopy_delay_total) ? 0 : tmp; d->irq_delay_max = tsk->delays->irq_delay_max; + d->irq_delay_min = tsk->delays->irq_delay_min; tmp = d->irq_delay_total + tsk->delays->irq_delay; d->irq_delay_total = (tmp < d->irq_delay_total) ? 0 : tmp; d->blkio_count += tsk->delays->blkio_count; @@ -224,7 +235,8 @@ void __delayacct_freepages_end(void) ¤t->delays->freepages_start, ¤t->delays->freepages_delay, ¤t->delays->freepages_count, - ¤t->delays->freepages_delay_max); + ¤t->delays->freepages_delay_max, + ¤t->delays->freepages_delay_min); } void __delayacct_thrashing_start(bool *in_thrashing) @@ -247,7 +259,8 @@ void __delayacct_thrashing_end(bool *in_thrashing) ¤t->delays->thrashing_start, ¤t->delays->thrashing_delay, ¤t->delays->thrashing_count, - ¤t->delays->thrashing_delay_max); + ¤t->delays->thrashing_delay_max, + ¤t->delays->thrashing_delay_min); } void __delayacct_swapin_start(void) @@ -261,7 +274,8 @@ void __delayacct_swapin_end(void) ¤t->delays->swapin_start, ¤t->delays->swapin_delay, ¤t->delays->swapin_count, - ¤t->delays->swapin_delay_max); + ¤t->delays->swapin_delay_max, + ¤t->delays->swapin_delay_min); } void __delayacct_compact_start(void) @@ -275,7 +289,8 @@ void __delayacct_compact_end(void) ¤t->delays->compact_start, ¤t->delays->compact_delay, ¤t->delays->compact_count, - ¤t->delays->compact_delay_max); + ¤t->delays->compact_delay_max, + ¤t->delays->compact_delay_min); } void __delayacct_wpcopy_start(void) @@ -289,7 +304,8 @@ void __delayacct_wpcopy_end(void) ¤t->delays->wpcopy_start, ¤t->delays->wpcopy_delay, ¤t->delays->wpcopy_count, - ¤t->delays->wpcopy_delay_max); + ¤t->delays->wpcopy_delay_max, + ¤t->delays->wpcopy_delay_min); } void __delayacct_irq(struct task_struct *task, u32 delta) @@ -301,6 +317,8 @@ void __delayacct_irq(struct task_struct *task, u32 delta) task->delays->irq_count++; if (delta > task->delays->irq_delay_max) task->delays->irq_delay_max = delta; + if (delta && (!task->delays->irq_delay_min || delta < task->delays->irq_delay_min)) + task->delays->irq_delay_min = delta; raw_spin_unlock_irqrestore(&task->delays->lock, flags); } diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index ed72435aef51..693537b908a1 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -246,6 +246,8 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t) t->sched_info.run_delay += delta; if (delta > t->sched_info.max_run_delay) t->sched_info.max_run_delay = delta; + if (delta && (!t->sched_info.min_run_delay || delta < t->sched_info.min_run_delay)) + t->sched_info.min_run_delay = delta; rq_sched_info_dequeue(rq, delta); } @@ -269,6 +271,8 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t) t->sched_info.pcount++; if (delta > t->sched_info.max_run_delay) t->sched_info.max_run_delay = delta; + if (delta && (!t->sched_info.min_run_delay || delta < t->sched_info.min_run_delay)) + t->sched_info.min_run_delay = delta; rq_sched_info_arrive(rq, delta); } diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index e570bcad185d..100ad3dc091a 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -192,7 +192,7 @@ static int get_family_id(int sd) } #define average_ms(t, c) (t / 1000000ULL / (c ? c : 1)) -#define delay_max_ms(t) (t / 1000000ULL) +#define delay_ms(t) (t / 1000000ULL) static void print_delayacct(struct taskstats *t) { @@ -213,48 +213,56 @@ static void print_delayacct(struct taskstats *t) "IRQ %15s%15s%15s%15s\n" " %15llu%15llu%15.3fms%13.6fms\n", "count", "real total", "virtual total", - "delay total", "delay average", "delay max", + "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->cpu_count, (unsigned long long)t->cpu_run_real_total, (unsigned long long)t->cpu_run_virtual_total, (unsigned long long)t->cpu_delay_total, average_ms((double)t->cpu_delay_total, t->cpu_count), - delay_max_ms((double)t->cpu_delay_max), - "count", "delay total", "delay average", "delay max", + delay_ms((double)t->cpu_delay_max), + delay_ms((double)t->cpu_delay_min), + "count", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->blkio_count, (unsigned long long)t->blkio_delay_total, average_ms((double)t->blkio_delay_total, t->blkio_count), - delay_max_ms((double)t->blkio_delay_max), - "count", "delay total", "delay average", "delay max", + delay_ms((double)t->blkio_delay_max), + delay_ms((double)t->blkio_delay_min), + "count", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->swapin_count, (unsigned long long)t->swapin_delay_total, average_ms((double)t->swapin_delay_total, t->swapin_count), - delay_max_ms((double)t->swapin_delay_max), - "count", "delay total", "delay average", "delay max", + delay_ms((double)t->swapin_delay_max), + delay_ms((double)t->swapin_delay_min), + "count", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->freepages_count, (unsigned long long)t->freepages_delay_total, average_ms((double)t->freepages_delay_total, t->freepages_count), - delay_max_ms((double)t->freepages_delay_max), - "count", "delay total", "delay average", "delay max", + delay_ms((double)t->freepages_delay_max), + delay_ms((double)t->freepages_delay_min), + "count", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->thrashing_count, (unsigned long long)t->thrashing_delay_total, average_ms((double)t->thrashing_delay_total, t->thrashing_count), - delay_max_ms((double)t->thrashing_delay_max), - "count", "delay total", "delay average", "delay max", + delay_ms((double)t->thrashing_delay_max), + delay_ms((double)t->thrashing_delay_min), + "count", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->compact_count, (unsigned long long)t->compact_delay_total, average_ms((double)t->compact_delay_total, t->compact_count), - delay_max_ms((double)t->compact_delay_max), - "count", "delay total", "delay average", "delay max", + delay_ms((double)t->compact_delay_max), + delay_ms((double)t->compact_delay_min), + "count", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->wpcopy_count, (unsigned long long)t->wpcopy_delay_total, average_ms((double)t->wpcopy_delay_total, t->wpcopy_count), - delay_max_ms((double)t->wpcopy_delay_max), - "count", "delay total", "delay average", "delay max", + delay_ms((double)t->wpcopy_delay_max), + delay_ms((double)t->wpcopy_delay_min), + "count", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->irq_count, (unsigned long long)t->irq_delay_total, average_ms((double)t->irq_delay_total, t->irq_count), - delay_max_ms((double)t->irq_delay_max)); + delay_ms((double)t->irq_delay_max), + delay_ms((double)t->irq_delay_min)); } static void task_context_switch_counts(struct taskstats *t) -- cgit v1.2.3