From cf4aebc292fac7f34f8345664320e9d4a42ca76c Mon Sep 17 00:00:00 2001 From: Clark Williams Date: Thu, 7 Feb 2013 09:46:59 -0600 Subject: sched: Move sched.h sysctl bits into separate header Move the sysctl-related bits from include/linux/sched.h into a new file: include/linux/sched/sysctl.h. Then update source files requiring access to those bits by including the new header file. Signed-off-by: Clark Williams Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20130207094659.06dced96@riff.lan Signed-off-by: Ingo Molnar --- include/linux/sched/sysctl.h | 97 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 include/linux/sched/sysctl.h (limited to 'include/linux/sched') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h new file mode 100644 index 000000000000..bac914e458ca --- /dev/null +++ b/include/linux/sched/sysctl.h @@ -0,0 +1,97 @@ +#ifndef _SCHED_SYSCTL_H +#define _SCHED_SYSCTL_H + +#ifdef CONFIG_DETECT_HUNG_TASK +extern unsigned int sysctl_hung_task_panic; +extern unsigned long sysctl_hung_task_check_count; +extern unsigned long sysctl_hung_task_timeout_secs; +extern unsigned long sysctl_hung_task_warnings; +extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos); +#else +/* Avoid need for ifdefs elsewhere in the code */ +enum { sysctl_hung_task_timeout_secs = 0 }; +#endif + +/* + * Default maximum number of active map areas, this limits the number of vmas + * per mm struct. Users can overwrite this number by sysctl but there is a + * problem. + * + * When a program's coredump is generated as ELF format, a section is created + * per a vma. In ELF, the number of sections is represented in unsigned short. + * This means the number of sections should be smaller than 65535 at coredump. + * Because the kernel adds some informative sections to a image of program at + * generating coredump, we need some margin. The number of extra sections is + * 1-3 now and depends on arch. We use "5" as safe margin, here. + */ +#define MAPCOUNT_ELF_CORE_MARGIN (5) +#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN) + +extern int sysctl_max_map_count; + +extern unsigned int sysctl_sched_latency; +extern unsigned int sysctl_sched_min_granularity; +extern unsigned int sysctl_sched_wakeup_granularity; +extern unsigned int sysctl_sched_child_runs_first; + +enum sched_tunable_scaling { + SCHED_TUNABLESCALING_NONE, + SCHED_TUNABLESCALING_LOG, + SCHED_TUNABLESCALING_LINEAR, + SCHED_TUNABLESCALING_END, +}; +extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; + +extern unsigned int sysctl_numa_balancing_scan_delay; +extern unsigned int sysctl_numa_balancing_scan_period_min; +extern unsigned int sysctl_numa_balancing_scan_period_max; +extern unsigned int sysctl_numa_balancing_scan_period_reset; +extern unsigned int sysctl_numa_balancing_scan_size; +extern unsigned int sysctl_numa_balancing_settle_count; + +#ifdef CONFIG_SCHED_DEBUG +extern unsigned int sysctl_sched_migration_cost; +extern unsigned int sysctl_sched_nr_migrate; +extern unsigned int sysctl_sched_time_avg; +extern unsigned int sysctl_timer_migration; +extern unsigned int sysctl_sched_shares_window; + +int sched_proc_update_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, + loff_t *ppos); +#endif +#ifdef CONFIG_SCHED_DEBUG +static inline unsigned int get_sysctl_timer_migration(void) +{ + return sysctl_timer_migration; +} +#else +static inline unsigned int get_sysctl_timer_migration(void) +{ + return 1; +} +#endif +extern unsigned int sysctl_sched_rt_period; +extern int sysctl_sched_rt_runtime; + +#ifdef CONFIG_CFS_BANDWIDTH +extern unsigned int sysctl_sched_cfs_bandwidth_slice; +#endif + +#ifdef CONFIG_SCHED_AUTOGROUP +extern unsigned int sysctl_sched_autogroup_enabled; +#endif + +/* + * default timeslice is 100 msecs (used only for SCHED_RR tasks). + * Timeslices get refilled after they expire. + */ +#define RR_TIMESLICE (100 * HZ / 1000) + +int sched_rt_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); + +#endif /* _SCHED_SYSCTL_H */ -- cgit v1.2.3 From ce0dbbbb30aee6a835511d5be446462388ba9eee Mon Sep 17 00:00:00 2001 From: Clark Williams Date: Thu, 7 Feb 2013 09:47:04 -0600 Subject: sched/rt: Add a tuning knob to allow changing SCHED_RR timeslice Add a /proc/sys/kernel scheduler knob named sched_rr_timeslice_ms that allows global changing of the SCHED_RR timeslice value. User visable value is in milliseconds but is stored as jiffies. Setting to 0 (zero) resets to the default (currently 100ms). Signed-off-by: Clark Williams Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20130207094704.13751796@riff.lan Signed-off-by: Ingo Molnar --- include/linux/sched/sysctl.h | 15 ++++++++++++++- kernel/sched/core.c | 19 +++++++++++++++++++ kernel/sched/rt.c | 6 ++++-- kernel/sysctl.c | 7 +++++++ 4 files changed, 44 insertions(+), 3 deletions(-) (limited to 'include/linux/sched') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index bac914e458ca..d2bb0ae979d0 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -73,6 +73,13 @@ static inline unsigned int get_sysctl_timer_migration(void) return 1; } #endif + +/* + * control realtime throttling: + * + * /proc/sys/kernel/sched_rt_period_us + * /proc/sys/kernel/sched_rt_runtime_us + */ extern unsigned int sysctl_sched_rt_period; extern int sysctl_sched_rt_runtime; @@ -90,7 +97,13 @@ extern unsigned int sysctl_sched_autogroup_enabled; */ #define RR_TIMESLICE (100 * HZ / 1000) -int sched_rt_handler(struct ctl_table *table, int write, +extern int sched_rr_timeslice; + +extern int sched_rr_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); + +extern int sched_rt_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1dff78a9e2ab..4a88f1d51563 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7509,6 +7509,25 @@ static int sched_rt_global_constraints(void) } #endif /* CONFIG_RT_GROUP_SCHED */ +int sched_rr_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret; + static DEFINE_MUTEX(mutex); + + mutex_lock(&mutex); + ret = proc_dointvec(table, write, buffer, lenp, ppos); + /* make sure that internally we keep jiffies */ + /* also, writing zero resets timeslice to default */ + if (!ret && write) { + sched_rr_timeslice = sched_rr_timeslice <= 0 ? + RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice); + } + mutex_unlock(&mutex); + return ret; +} + int sched_rt_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index c25de141c576..fb0f77e402b6 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -7,6 +7,8 @@ #include +int sched_rr_timeslice = RR_TIMESLICE; + static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); struct rt_bandwidth def_rt_bandwidth; @@ -2016,7 +2018,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) if (--p->rt.time_slice) return; - p->rt.time_slice = RR_TIMESLICE; + p->rt.time_slice = sched_rr_timeslice; /* * Requeue to the end of queue if we (and all of our ancestors) are the @@ -2047,7 +2049,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) * Time slice is 0 for SCHED_FIFO tasks */ if (task->policy == SCHED_RR) - return RR_TIMESLICE; + return sched_rr_timeslice; else return 0; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7357e23aaf68..4fc9be955c71 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -404,6 +404,13 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = sched_rt_handler, }, + { + .procname = "sched_rr_timeslice_ms", + .data = &sched_rr_timeslice, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = sched_rr_handler, + }, #ifdef CONFIG_SCHED_AUTOGROUP { .procname = "sched_autogroup_enabled", -- cgit v1.2.3 From 8bd75c77b7c6a3954140dd2e20346aef3efe4a35 Mon Sep 17 00:00:00 2001 From: Clark Williams Date: Thu, 7 Feb 2013 09:47:07 -0600 Subject: sched/rt: Move rt specific bits into new header file Move rt scheduler definitions out of include/linux/sched.h into new file include/linux/sched/rt.h Signed-off-by: Clark Williams Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20130207094707.7b9f825f@riff.lan Signed-off-by: Ingo Molnar --- drivers/spi/spi.c | 2 +- drivers/staging/csr/bh.c | 2 +- drivers/staging/csr/unifi_sme.c | 2 +- drivers/tty/sysrq.c | 1 + fs/select.c | 1 + include/linux/sched.h | 55 ++----------------------------------- include/linux/sched/rt.h | 58 +++++++++++++++++++++++++++++++++++++++ init/init_task.c | 1 + kernel/futex.c | 1 + kernel/hrtimer.c | 1 + kernel/irq/manage.c | 1 + kernel/mutex.c | 1 + kernel/rtmutex-debug.c | 1 + kernel/rtmutex-tester.c | 1 + kernel/rtmutex.c | 1 + kernel/sched/cpupri.c | 2 ++ kernel/sched/sched.h | 1 + kernel/trace/trace.c | 1 + kernel/trace/trace_sched_wakeup.c | 2 +- kernel/watchdog.c | 1 + mm/page-writeback.c | 1 + mm/page_alloc.c | 1 + 22 files changed, 81 insertions(+), 57 deletions(-) create mode 100644 include/linux/sched/rt.h (limited to 'include/linux/sched') diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 19ee901577da..3a6083b386a1 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/staging/csr/bh.c b/drivers/staging/csr/bh.c index 1a1f5c79822a..7b133597e923 100644 --- a/drivers/staging/csr/bh.c +++ b/drivers/staging/csr/bh.c @@ -15,7 +15,7 @@ */ #include "csr_wifi_hip_unifi.h" #include "unifi_priv.h" - +#include /* * --------------------------------------------------------------------------- diff --git a/drivers/staging/csr/unifi_sme.c b/drivers/staging/csr/unifi_sme.c index 7c6c4138fc76..49395da34b7f 100644 --- a/drivers/staging/csr/unifi_sme.c +++ b/drivers/staging/csr/unifi_sme.c @@ -15,7 +15,7 @@ #include "unifi_priv.h" #include "csr_wifi_hip_unifi.h" #include "csr_wifi_hip_conversions.h" - +#include diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index b3c4a250ff86..40e5b3919e27 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -15,6 +15,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include diff --git a/fs/select.c b/fs/select.c index 2ef72d965036..8c1c96c27062 100644 --- a/fs/select.c +++ b/fs/select.c @@ -26,6 +26,7 @@ #include #include #include +#include #include diff --git a/include/linux/sched.h b/include/linux/sched.h index 8fc9b2710a80..33cc42130371 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1164,6 +1164,7 @@ struct sched_entity { /* rq "owned" by this entity/group: */ struct cfs_rq *my_q; #endif + /* * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be * removed when useful for applications beyond shares distribution (e.g. @@ -1191,6 +1192,7 @@ struct sched_rt_entity { #endif }; + struct rcu_node; enum perf_event_task_context { @@ -1596,37 +1598,6 @@ static inline void set_numabalancing_state(bool enabled) } #endif -/* - * Priority of a process goes from 0..MAX_PRIO-1, valid RT - * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH - * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority - * values are inverted: lower p->prio value means higher priority. - * - * The MAX_USER_RT_PRIO value allows the actual maximum - * RT priority to be separate from the value exported to - * user-space. This allows kernel threads to set their - * priority to a value higher than any user task. Note: - * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. - */ - -#define MAX_USER_RT_PRIO 100 -#define MAX_RT_PRIO MAX_USER_RT_PRIO - -#define MAX_PRIO (MAX_RT_PRIO + 40) -#define DEFAULT_PRIO (MAX_RT_PRIO + 20) - -static inline int rt_prio(int prio) -{ - if (unlikely(prio < MAX_RT_PRIO)) - return 1; - return 0; -} - -static inline int rt_task(struct task_struct *p) -{ - return rt_prio(p->prio); -} - static inline struct pid *task_pid(struct task_struct *task) { return task->pids[PIDTYPE_PID].pid; @@ -2054,26 +2025,6 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { } static inline void sched_autogroup_exit(struct signal_struct *sig) { } #endif -#ifdef CONFIG_RT_MUTEXES -extern int rt_mutex_getprio(struct task_struct *p); -extern void rt_mutex_setprio(struct task_struct *p, int prio); -extern void rt_mutex_adjust_pi(struct task_struct *p); -static inline bool tsk_is_pi_blocked(struct task_struct *tsk) -{ - return tsk->pi_blocked_on != NULL; -} -#else -static inline int rt_mutex_getprio(struct task_struct *p) -{ - return p->normal_prio; -} -# define rt_mutex_adjust_pi(p) do { } while (0) -static inline bool tsk_is_pi_blocked(struct task_struct *tsk) -{ - return false; -} -#endif - extern bool yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); extern int task_prio(const struct task_struct *p); @@ -2703,8 +2654,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); -extern void normalize_rt_tasks(void); - #ifdef CONFIG_CGROUP_SCHED extern struct task_group root_task_group; diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h new file mode 100644 index 000000000000..94e19ea28fc3 --- /dev/null +++ b/include/linux/sched/rt.h @@ -0,0 +1,58 @@ +#ifndef _SCHED_RT_H +#define _SCHED_RT_H + +/* + * Priority of a process goes from 0..MAX_PRIO-1, valid RT + * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH + * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority + * values are inverted: lower p->prio value means higher priority. + * + * The MAX_USER_RT_PRIO value allows the actual maximum + * RT priority to be separate from the value exported to + * user-space. This allows kernel threads to set their + * priority to a value higher than any user task. Note: + * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. + */ + +#define MAX_USER_RT_PRIO 100 +#define MAX_RT_PRIO MAX_USER_RT_PRIO + +#define MAX_PRIO (MAX_RT_PRIO + 40) +#define DEFAULT_PRIO (MAX_RT_PRIO + 20) + +static inline int rt_prio(int prio) +{ + if (unlikely(prio < MAX_RT_PRIO)) + return 1; + return 0; +} + +static inline int rt_task(struct task_struct *p) +{ + return rt_prio(p->prio); +} + +#ifdef CONFIG_RT_MUTEXES +extern int rt_mutex_getprio(struct task_struct *p); +extern void rt_mutex_setprio(struct task_struct *p, int prio); +extern void rt_mutex_adjust_pi(struct task_struct *p); +static inline bool tsk_is_pi_blocked(struct task_struct *tsk) +{ + return tsk->pi_blocked_on != NULL; +} +#else +static inline int rt_mutex_getprio(struct task_struct *p) +{ + return p->normal_prio; +} +# define rt_mutex_adjust_pi(p) do { } while (0) +static inline bool tsk_is_pi_blocked(struct task_struct *tsk) +{ + return false; +} +#endif + +extern void normalize_rt_tasks(void); + + +#endif /* _SCHED_RT_H */ diff --git a/init/init_task.c b/init/init_task.c index a031ad14c950..ba0a7f362d9e 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/futex.c b/kernel/futex.c index 19eb089ca003..9618b6e9fb36 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -60,6 +60,7 @@ #include #include #include +#include #include diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 8a9aa59d0d61..c5dde988c0ce 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index e49a288fa479..02115d9592ec 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "internals.h" diff --git a/kernel/mutex.c b/kernel/mutex.c index a307cc9c9526..52f23011b6e0 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -19,6 +19,7 @@ */ #include #include +#include #include #include #include diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c index 16502d3a71c8..13b243a323fa 100644 --- a/kernel/rtmutex-debug.c +++ b/kernel/rtmutex-debug.c @@ -17,6 +17,7 @@ * See rt.c in preempt-rt for proper credits and further information */ #include +#include #include #include #include diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index 98ec49475460..7890b10084a7 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index a242e691c993..1e09308bf2a1 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "rtmutex_common.h" diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 23aa789c53ee..1095e878a46f 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c @@ -28,6 +28,8 @@ */ #include +#include +#include #include "cpupri.h" /* Convert between a 140 based task->prio, and our 102 based cpupri */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ed8de30a040e..cc03cfdf469f 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1,6 +1,7 @@ #include #include +#include #include #include #include diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3c13e46d7d24..4d2e4afd956e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "trace.h" #include "trace_output.h" diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 9fe45fcefca0..75aa97fbe1a1 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -15,8 +15,8 @@ #include #include #include +#include #include - #include "trace.h" static struct trace_array *wakeup_trace; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 75a2ab3d0b02..27689422aa92 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 0713bfbf0954..66a0024becd9 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -35,6 +35,7 @@ #include /* __set_page_dirty_buffers */ #include #include +#include #include /* diff --git a/mm/page_alloc.c b/mm/page_alloc.c index df2022ff0c8a..42d18e46f286 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3