From cd64647f043e3fd3569bcf068f47f030198ff93a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 23 Sep 2013 16:43:58 +0800 Subject: hung_task: Change sysctl_hung_task_check_count to 'int' As 'sysctl_hung_task_check_count' is 'unsigned long' when this value is assigned to max_count in check_hung_uninterruptible_tasks(), it's truncated to 'int' type. This causes a minor artifact: if we write 2^32 to sysctl.hung_task_check_count, hung task detection will be effectively disabled. With this fix, it will still truncate the user input to 32 bits, but reading sysctl.hung_task_check_count reflects the actual truncated value. Signed-off-by: Li Zefan Acked-by: Ingo Molnar Link: http://lkml.kernel.org/r/523FFF4E.9050401@huawei.com Signed-off-by: Ingo Molnar --- kernel/hung_task.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/hung_task.c') diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 3e97fb126e6b..042252383fd2 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -20,7 +20,7 @@ /* * The number of tasks checked: */ -unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; +int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; /* * Limit number of tasks checked in a batch. -- cgit v1.2.3 From 6a716c90a51338009c3bc1f460829afaed8f922d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 19 Oct 2013 18:18:28 +0200 Subject: hung_task debugging: Add tracepoint to report the hang Currently check_hung_task() prints a warning if it detects the problem, but it is not convenient to watch the system logs if user-space wants to be notified about the hang. Add the new trace_sched_process_hang() into check_hung_task(), this way a user-space monitor can easily wait for the hang and potentially resolve a problem. Signed-off-by: Oleg Nesterov Cc: Dave Sullivan Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20131019161828.GA7439@redhat.com Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 19 +++++++++++++++++++ kernel/hung_task.c | 4 ++++ 2 files changed, 23 insertions(+) (limited to 'kernel/hung_task.c') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 2e7d9947a10d..2a652d124fbb 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -424,6 +424,25 @@ TRACE_EVENT(sched_pi_setprio, __entry->oldprio, __entry->newprio) ); +#ifdef CONFIG_DETECT_HUNG_TASK +TRACE_EVENT(sched_process_hang, + TP_PROTO(struct task_struct *tsk), + TP_ARGS(tsk), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + ), + + TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) +); +#endif /* CONFIG_DETECT_HUNG_TASK */ + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 042252383fd2..8807061ca004 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -16,6 +16,7 @@ #include #include #include +#include /* * The number of tasks checked: @@ -92,6 +93,9 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) t->last_switch_count = switch_count; return; } + + trace_sched_process_hang(t); + if (!sysctl_hung_task_warnings) return; sysctl_hung_task_warnings--; -- cgit v1.2.3 From 8b414521bc5375ae8ba18c083af95d44b8da0d04 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 11 Oct 2013 21:39:26 -0300 Subject: hung_task: add method to reset detector In certain occasions it is possible for a hung task detector positive to be false: continuation from a paused VM, for example. Add a method to reset detection, similar as is done with other kernel watchdogs. Acked-by: Don Zickus Acked-by: Paolo Bonzini Signed-off-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- arch/x86/kernel/pvclock.c | 1 + include/linux/sched.h | 8 ++++++++ kernel/hung_task.c | 11 +++++++++++ 3 files changed, 20 insertions(+) (limited to 'kernel/hung_task.c') diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 6279928c0a71..2f355d229a58 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -48,6 +48,7 @@ void pvclock_touch_watchdogs(void) touch_softlockup_watchdog_sync(); clocksource_touch_watchdog(); rcu_cpu_stall_reset(); + reset_hung_task_detector(); } static atomic64_t last_value = ATOMIC64_INIT(0); diff --git a/include/linux/sched.h b/include/linux/sched.h index 6682da36b293..7bb4b4a2a101 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -285,6 +285,14 @@ static inline void lockup_detector_init(void) } #endif +#ifdef CONFIG_DETECT_HUNG_TASK +void reset_hung_task_detector(void); +#else +static inline void reset_hung_task_detector(void) +{ +} +#endif + /* Attach to any functions which should be ignored in wchan output. */ #define __sched __attribute__((__section__(".sched.text"))) diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 3e97fb126e6b..dfdf51534b3e 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -203,6 +203,14 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, return ret; } +static atomic_t reset_hung_task = ATOMIC_INIT(0); + +void reset_hung_task_detector(void) +{ + atomic_set(&reset_hung_task, 1); +} +EXPORT_SYMBOL_GPL(reset_hung_task_detector); + /* * kthread which checks for tasks stuck in D state */ @@ -216,6 +224,9 @@ static int watchdog(void *dummy) while (schedule_timeout_interruptible(timeout_jiffies(timeout))) timeout = sysctl_hung_task_timeout_secs; + if (atomic_xchg(&reset_hung_task, 0)) + continue; + check_hung_uninterruptible_tasks(timeout); } -- cgit v1.2.3