summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/configs.c46
-rw-r--r--kernel/exit.c24
-rw-r--r--kernel/fork.c17
-rw-r--r--kernel/ksyms.c2
-rw-r--r--kernel/pid.c4
-rw-r--r--kernel/posix-timers.c1
-rw-r--r--kernel/sched.c391
-rw-r--r--kernel/signal.c4
-rw-r--r--kernel/sys.c17
9 files changed, 357 insertions, 149 deletions
diff --git a/kernel/configs.c b/kernel/configs.c
index 6a5c0c9d9176..57f54451edbe 100644
--- a/kernel/configs.c
+++ b/kernel/configs.c
@@ -47,7 +47,7 @@
/**************************************************/
/* globals and useful constants */
-static const char IKCONFIG_VERSION[] = "0.6";
+static const char IKCONFIG_VERSION[] __initdata = "0.7";
static ssize_t
ikconfig_read_current(struct file *file, char __user *buf,
@@ -72,32 +72,6 @@ static struct file_operations ikconfig_file_ops = {
.read = ikconfig_read_current,
};
-
-/***************************************************/
-/* build_info_show: let people read the info */
-/* we have on the tools used to build this kernel */
-
-static int build_info_show(struct seq_file *seq, void *v)
-{
- seq_printf(seq,
- "Kernel: %s\nCompiler: %s\nVersion_in_Makefile: %s\n",
- ikconfig_build_info, LINUX_COMPILER, UTS_RELEASE);
- return 0;
-}
-
-static int build_info_open(struct inode *inode, struct file *file)
-{
- return single_open(file, build_info_show, PDE(inode)->data);
-}
-
-static struct file_operations build_info_file_ops = {
- .owner = THIS_MODULE,
- .open = build_info_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/***************************************************/
/* ikconfig_init: start up everything we need to */
@@ -112,26 +86,12 @@ static int __init ikconfig_init(void)
entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO,
&proc_root);
if (!entry)
- goto leave;
+ return -ENOMEM;
entry->proc_fops = &ikconfig_file_ops;
entry->size = kernel_config_data_size;
- /* create the "build_info" file */
- entry = create_proc_entry("config_build_info",
- S_IFREG | S_IRUGO, &proc_root);
- if (!entry)
- goto leave_gz;
- entry->proc_fops = &build_info_file_ops;
-
return 0;
-
-leave_gz:
- /* remove the file from proc */
- remove_proc_entry("config.gz", &proc_root);
-
-leave:
- return -ENOMEM;
}
/***************************************************/
@@ -139,9 +99,7 @@ leave:
static void __exit ikconfig_cleanup(void)
{
- /* remove the files */
remove_proc_entry("config.gz", &proc_root);
- remove_proc_entry("config_build_info", &proc_root);
}
module_init(ikconfig_init);
diff --git a/kernel/exit.c b/kernel/exit.c
index b6174f82adf9..c565fd69d559 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -152,7 +152,7 @@ static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
|| p->state >= TASK_ZOMBIE
|| p->real_parent->pid == 1)
continue;
- if (p->real_parent->pgrp != pgrp
+ if (process_group(p->real_parent) != pgrp
&& p->real_parent->session == p->session) {
ret = 0;
break;
@@ -247,9 +247,9 @@ void __set_special_pids(pid_t session, pid_t pgrp)
curr->session = session;
attach_pid(curr, PIDTYPE_SID, session);
}
- if (curr->pgrp != pgrp) {
+ if (process_group(curr) != pgrp) {
detach_pid(curr, PIDTYPE_PGID);
- curr->pgrp = pgrp;
+ curr->group_leader->__pgrp = pgrp;
attach_pid(curr, PIDTYPE_PGID, pgrp);
}
}
@@ -508,9 +508,9 @@ static inline void reparent_thread(task_t *p, task_t *father, int traced)
* than we are, and it was the only connection
* outside, so the child pgrp is now orphaned.
*/
- if ((p->pgrp != father->pgrp) &&
+ if ((process_group(p) != process_group(father)) &&
(p->session == father->session)) {
- int pgrp = p->pgrp;
+ int pgrp = process_group(p);
if (will_become_orphaned_pgrp(pgrp, NULL) && has_stopped_jobs(pgrp)) {
__kill_pg_info(SIGHUP, (void *)1, pgrp);
@@ -618,12 +618,12 @@ static void exit_notify(struct task_struct *tsk)
t = tsk->real_parent;
- if ((t->pgrp != tsk->pgrp) &&
+ if ((process_group(t) != process_group(tsk)) &&
(t->session == tsk->session) &&
- will_become_orphaned_pgrp(tsk->pgrp, tsk) &&
- has_stopped_jobs(tsk->pgrp)) {
- __kill_pg_info(SIGHUP, (void *)1, tsk->pgrp);
- __kill_pg_info(SIGCONT, (void *)1, tsk->pgrp);
+ will_become_orphaned_pgrp(process_group(tsk), tsk) &&
+ has_stopped_jobs(process_group(tsk))) {
+ __kill_pg_info(SIGHUP, (void *)1, process_group(tsk));
+ __kill_pg_info(SIGCONT, (void *)1, process_group(tsk));
}
/* Let father know we died
@@ -813,10 +813,10 @@ static int eligible_child(pid_t pid, int options, task_t *p)
if (p->pid != pid)
return 0;
} else if (!pid) {
- if (p->pgrp != current->pgrp)
+ if (process_group(p) != process_group(current))
return 0;
} else if (pid != -1) {
- if (p->pgrp != -pid)
+ if (process_group(p) != -pid)
return 0;
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 37d79b4e16e6..f2d3115483da 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -305,7 +305,7 @@ out:
return retval;
fail_nomem:
retval = -ENOMEM;
- fail:
+fail:
vm_unacct_memory(charge);
goto out;
}
@@ -499,7 +499,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
goto fail_nomem;
if (init_new_context(tsk,mm))
- goto free_pt;
+ goto fail_nocontext;
retval = dup_mmap(mm, oldmm);
if (retval)
@@ -514,6 +514,15 @@ free_pt:
mmput(mm);
fail_nomem:
return retval;
+
+fail_nocontext:
+ /*
+ * If init_new_context() failed, we cannot use mmput() to free the mm
+ * because it calls destroy_context()
+ */
+ mm_free_pgd(mm);
+ free_mm(mm);
+ return retval;
}
static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
@@ -925,7 +934,7 @@ struct task_struct *copy_process(unsigned long clone_flags,
*/
p->first_time_slice = 1;
current->time_slice >>= 1;
- p->last_run = jiffies;
+ p->timestamp = sched_clock();
if (!current->time_slice) {
/*
* This case is rare, it happens when the parent has only
@@ -1004,7 +1013,7 @@ struct task_struct *copy_process(unsigned long clone_flags,
attach_pid(p, PIDTYPE_PID, p->pid);
if (thread_group_leader(p)) {
attach_pid(p, PIDTYPE_TGID, p->tgid);
- attach_pid(p, PIDTYPE_PGID, p->pgrp);
+ attach_pid(p, PIDTYPE_PGID, process_group(p));
attach_pid(p, PIDTYPE_SID, p->session);
if (p->pid)
__get_cpu_var(process_counts)++;
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index 9f61a0496c2a..9da2940ac0e6 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -348,8 +348,6 @@ EXPORT_SYMBOL(lock_page);
EXPORT_SYMBOL(unlock_page);
/* device registration */
-EXPORT_SYMBOL(register_chrdev);
-EXPORT_SYMBOL(unregister_chrdev);
EXPORT_SYMBOL(register_blkdev);
EXPORT_SYMBOL(unregister_blkdev);
EXPORT_SYMBOL(tty_register_driver);
diff --git a/kernel/pid.c b/kernel/pid.c
index 00413e3967b9..713f54eaeda9 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -250,13 +250,13 @@ void switch_exec_pids(task_t *leader, task_t *thread)
attach_pid(thread, PIDTYPE_PID, thread->pid);
attach_pid(thread, PIDTYPE_TGID, thread->tgid);
- attach_pid(thread, PIDTYPE_PGID, thread->pgrp);
+ attach_pid(thread, PIDTYPE_PGID, leader->__pgrp);
attach_pid(thread, PIDTYPE_SID, thread->session);
list_add_tail(&thread->tasks, &init_task.tasks);
attach_pid(leader, PIDTYPE_PID, leader->pid);
attach_pid(leader, PIDTYPE_TGID, leader->tgid);
- attach_pid(leader, PIDTYPE_PGID, leader->pgrp);
+ attach_pid(leader, PIDTYPE_PGID, leader->__pgrp);
attach_pid(leader, PIDTYPE_SID, leader->session);
}
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index d9be410a9e62..64940545cb84 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -344,6 +344,7 @@ static inline struct task_struct * good_sigevent(sigevent_t * event)
return NULL;
if ((event->sigev_notify & ~SIGEV_NONE & MIPS_SIGEV) &&
+ event->sigev_signo &&
((unsigned) (event->sigev_signo > SIGRTMAX)))
return NULL;
diff --git a/kernel/sched.c b/kernel/sched.c
index 9dc251a8d8a5..b35f717d1b58 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -14,6 +14,7 @@
* an array-switch method of distributing timeslices
* and per-CPU runqueues. Cleanups and useful suggestions
* by Davide Libenzi, preemptible kernel bits by Robert Love.
+ * 2003-09-03 Interactivity tuning by Con Kolivas.
*/
#include <linux/mm.h>
@@ -59,6 +60,14 @@
#define USER_PRIO(p) ((p)-MAX_RT_PRIO)
#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio)
#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
+#define AVG_TIMESLICE (MIN_TIMESLICE + ((MAX_TIMESLICE - MIN_TIMESLICE) *\
+ (MAX_PRIO-1-NICE_TO_PRIO(0))/(MAX_USER_PRIO - 1)))
+
+/*
+ * Some helpers for converting nanosecond timing to jiffy resolution
+ */
+#define NS_TO_JIFFIES(TIME) ((TIME) / (1000000000 / HZ))
+#define JIFFIES_TO_NS(TIME) ((TIME) * (1000000000 / HZ))
/*
* These are the 'tuning knobs' of the scheduler:
@@ -69,14 +78,18 @@
*/
#define MIN_TIMESLICE ( 10 * HZ / 1000)
#define MAX_TIMESLICE (200 * HZ / 1000)
-#define CHILD_PENALTY 50
+#define ON_RUNQUEUE_WEIGHT 30
+#define CHILD_PENALTY 95
#define PARENT_PENALTY 100
#define EXIT_WEIGHT 3
#define PRIO_BONUS_RATIO 25
+#define MAX_BONUS (MAX_USER_PRIO * PRIO_BONUS_RATIO / 100)
#define INTERACTIVE_DELTA 2
-#define MAX_SLEEP_AVG (10*HZ)
-#define STARVATION_LIMIT (10*HZ)
+#define MAX_SLEEP_AVG (AVG_TIMESLICE * MAX_BONUS)
+#define STARVATION_LIMIT (MAX_SLEEP_AVG)
+#define NS_MAX_SLEEP_AVG (JIFFIES_TO_NS(MAX_SLEEP_AVG))
#define NODE_THRESHOLD 125
+#define CREDIT_LIMIT 100
/*
* If a task is 'interactive' then we reinsert it in the active
@@ -106,6 +119,19 @@
* too hard.
*/
+#define CURRENT_BONUS(p) \
+ (NS_TO_JIFFIES((p)->sleep_avg) * MAX_BONUS / \
+ MAX_SLEEP_AVG)
+
+#ifdef CONFIG_SMP
+#define TIMESLICE_GRANULARITY(p) (MIN_TIMESLICE * \
+ (1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)) * \
+ num_online_cpus())
+#else
+#define TIMESLICE_GRANULARITY(p) (MIN_TIMESLICE * \
+ (1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)))
+#endif
+
#define SCALE(v1,v1_max,v2_max) \
(v1) * (v2_max) / (v1_max)
@@ -116,6 +142,19 @@
#define TASK_INTERACTIVE(p) \
((p)->prio <= (p)->static_prio - DELTA(p))
+#define JUST_INTERACTIVE_SLEEP(p) \
+ (JIFFIES_TO_NS(MAX_SLEEP_AVG * \
+ (MAX_BONUS / 2 + DELTA((p)) + 1) / MAX_BONUS - 1))
+
+#define HIGH_CREDIT(p) \
+ ((p)->interactive_credit > CREDIT_LIMIT)
+
+#define LOW_CREDIT(p) \
+ ((p)->interactive_credit < -CREDIT_LIMIT)
+
+#define TASK_PREEMPTS_CURR(p, rq) \
+ ((p)->prio < (rq)->curr->prio)
+
/*
* BASE_TIMESLICE scales user-nice values [ -20 ... 19 ]
* to time slice values.
@@ -180,7 +219,6 @@ static DEFINE_PER_CPU(struct runqueue, runqueues);
#define this_rq() (&__get_cpu_var(runqueues))
#define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
-#define rt_task(p) ((p)->prio < MAX_RT_PRIO)
/*
* Default context-switch locking:
@@ -320,8 +358,7 @@ static int effective_prio(task_t *p)
if (rt_task(p))
return p->prio;
- bonus = MAX_USER_PRIO*PRIO_BONUS_RATIO*p->sleep_avg/MAX_SLEEP_AVG/100 -
- MAX_USER_PRIO*PRIO_BONUS_RATIO/100/2;
+ bonus = CURRENT_BONUS(p) - MAX_BONUS / 2;
prio = p->static_prio - bonus;
if (prio < MAX_RT_PRIO)
@@ -340,6 +377,82 @@ static inline void __activate_task(task_t *p, runqueue_t *rq)
nr_running_inc(rq);
}
+static void recalc_task_prio(task_t *p, unsigned long long now)
+{
+ unsigned long long __sleep_time = now - p->timestamp;
+ unsigned long sleep_time;
+
+ if (__sleep_time > NS_MAX_SLEEP_AVG)
+ sleep_time = NS_MAX_SLEEP_AVG;
+ else
+ sleep_time = (unsigned long)__sleep_time;
+
+ if (likely(sleep_time > 0)) {
+ /*
+ * User tasks that sleep a long time are categorised as
+ * idle and will get just interactive status to stay active &
+ * prevent them suddenly becoming cpu hogs and starving
+ * other processes.
+ */
+ if (p->mm && p->activated != -1 &&
+ sleep_time > JUST_INTERACTIVE_SLEEP(p)){
+ p->sleep_avg = JIFFIES_TO_NS(MAX_SLEEP_AVG -
+ AVG_TIMESLICE);
+ if (!HIGH_CREDIT(p))
+ p->interactive_credit++;
+ } else {
+ /*
+ * The lower the sleep avg a task has the more
+ * rapidly it will rise with sleep time.
+ */
+ sleep_time *= (MAX_BONUS - CURRENT_BONUS(p)) ? : 1;
+
+ /*
+ * Tasks with low interactive_credit are limited to
+ * one timeslice worth of sleep avg bonus.
+ */
+ if (LOW_CREDIT(p) &&
+ sleep_time > JIFFIES_TO_NS(task_timeslice(p)))
+ sleep_time =
+ JIFFIES_TO_NS(task_timeslice(p));
+
+ /*
+ * Non high_credit tasks waking from uninterruptible
+ * sleep are limited in their sleep_avg rise as they
+ * are likely to be cpu hogs waiting on I/O
+ */
+ if (p->activated == -1 && !HIGH_CREDIT(p) && p->mm){
+ if (p->sleep_avg >= JUST_INTERACTIVE_SLEEP(p))
+ sleep_time = 0;
+ else if (p->sleep_avg + sleep_time >=
+ JUST_INTERACTIVE_SLEEP(p)){
+ p->sleep_avg =
+ JUST_INTERACTIVE_SLEEP(p);
+ sleep_time = 0;
+ }
+ }
+
+ /*
+ * This code gives a bonus to interactive tasks.
+ *
+ * The boost works by updating the 'average sleep time'
+ * value here, based on ->timestamp. The more time a task
+ * spends sleeping, the higher the average gets - and the
+ * higher the priority boost gets as well.
+ */
+ p->sleep_avg += sleep_time;
+
+ if (p->sleep_avg > NS_MAX_SLEEP_AVG){
+ p->sleep_avg = NS_MAX_SLEEP_AVG;
+ if (!HIGH_CREDIT(p))
+ p->interactive_credit++;
+ }
+ }
+ }
+
+ p->prio = effective_prio(p);
+}
+
/*
* activate_task - move a task to the runqueue and do priority recalculation
*
@@ -348,34 +461,33 @@ static inline void __activate_task(task_t *p, runqueue_t *rq)
*/
static inline void activate_task(task_t *p, runqueue_t *rq)
{
- long sleep_time = jiffies - p->last_run - 1;
+ unsigned long long now = sched_clock();
- if (sleep_time > 0) {
- int sleep_avg;
+ recalc_task_prio(p, now);
+ /*
+ * This checks to make sure it's not an uninterruptible task
+ * that is now waking up.
+ */
+ if (!p->activated){
/*
- * This code gives a bonus to interactive tasks.
- *
- * The boost works by updating the 'average sleep time'
- * value here, based on ->last_run. The more time a task
- * spends sleeping, the higher the average gets - and the
- * higher the priority boost gets as well.
+ * Tasks which were woken up by interrupts (ie. hw events)
+ * are most likely of interactive nature. So we give them
+ * the credit of extending their sleep time to the period
+ * of time they spend on the runqueue, waiting for execution
+ * on a CPU, first time around:
*/
- sleep_avg = p->sleep_avg + sleep_time;
-
+ if (in_interrupt())
+ p->activated = 2;
+ else
/*
- * 'Overflow' bonus ticks go to the waker as well, so the
- * ticks are not lost. This has the effect of further
- * boosting tasks that are related to maximum-interactive
- * tasks.
+ * Normal first-time wakeups get a credit too for on-runqueue
+ * time, but it will be weighted down:
*/
- if (sleep_avg > MAX_SLEEP_AVG)
- sleep_avg = MAX_SLEEP_AVG;
- if (p->sleep_avg != sleep_avg) {
- p->sleep_avg = sleep_avg;
- p->prio = effective_prio(p);
+ p->activated = 1;
}
- }
+ p->timestamp = now;
+
__activate_task(p, rq);
}
@@ -496,13 +608,19 @@ repeat_lock_task:
task_rq_unlock(rq, &flags);
goto repeat_lock_task;
}
- if (old_state == TASK_UNINTERRUPTIBLE)
+ if (old_state == TASK_UNINTERRUPTIBLE){
rq->nr_uninterruptible--;
+ /*
+ * Tasks on involuntary sleep don't earn
+ * sleep_avg beyond just interactive state.
+ */
+ p->activated = -1;
+ }
if (sync)
__activate_task(p, rq);
else {
activate_task(p, rq);
- if (p->prio < rq->curr->prio)
+ if (TASK_PREEMPTS_CURR(p, rq))
resched_task(rq->curr);
}
success = 1;
@@ -551,8 +669,14 @@ void wake_up_forked_process(task_t * p)
* and children as well, to keep max-interactive tasks
* from forking tasks that are max-interactive.
*/
- current->sleep_avg = current->sleep_avg * PARENT_PENALTY / 100;
- p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100;
+ current->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(current) *
+ PARENT_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS);
+
+ p->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(p) *
+ CHILD_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS);
+
+ p->interactive_credit = 0;
+
p->prio = effective_prio(p);
set_task_cpu(p, smp_processor_id());
@@ -593,8 +717,9 @@ void sched_exit(task_t * p)
* the sleep_avg of the parent as well.
*/
if (p->sleep_avg < p->parent->sleep_avg)
- p->parent->sleep_avg = (p->parent->sleep_avg * EXIT_WEIGHT +
- p->sleep_avg) / (EXIT_WEIGHT + 1);
+ p->parent->sleep_avg = p->parent->sleep_avg /
+ (EXIT_WEIGHT + 1) * EXIT_WEIGHT + p->sleep_avg /
+ (EXIT_WEIGHT + 1);
}
/**
@@ -960,10 +1085,10 @@ static inline runqueue_t *find_busiest_queue(runqueue_t *this_rq, int this_cpu,
if (likely(!busiest))
goto out;
- *imbalance = (max_load - nr_running) / 2;
+ *imbalance = max_load - nr_running;
/* It needs an at least ~25% imbalance to trigger balancing. */
- if (!idle && (*imbalance < (max_load + 3)/4)) {
+ if (!idle && ((*imbalance)*4 < max_load)) {
busiest = NULL;
goto out;
}
@@ -973,7 +1098,7 @@ static inline runqueue_t *find_busiest_queue(runqueue_t *this_rq, int this_cpu,
* Make sure nothing changed since we checked the
* runqueue length.
*/
- if (busiest->nr_running <= nr_running + 1) {
+ if (busiest->nr_running <= nr_running) {
spin_unlock(&busiest->lock);
busiest = NULL;
}
@@ -996,13 +1121,31 @@ static inline void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t
* Note that idle threads have a prio of MAX_PRIO, for this test
* to be always true for them.
*/
- if (p->prio < this_rq->curr->prio)
+ if (TASK_PREEMPTS_CURR(p, this_rq))
set_need_resched();
- else {
- if (p->prio == this_rq->curr->prio &&
- p->time_slice > this_rq->curr->time_slice)
- set_need_resched();
- }
+}
+
+/*
+ * Previously:
+ *
+ * #define CAN_MIGRATE_TASK(p,rq,this_cpu) \
+ * ((!idle || (NS_TO_JIFFIES(now - (p)->timestamp) > \
+ * cache_decay_ticks)) && !task_running(rq, p) && \
+ * cpu_isset(this_cpu, (p)->cpus_allowed))
+ */
+
+static inline int
+can_migrate_task(task_t *tsk, runqueue_t *rq, int this_cpu, int idle)
+{
+ unsigned long delta = sched_clock() - tsk->timestamp;
+
+ if (!idle && (delta <= JIFFIES_TO_NS(cache_decay_ticks)))
+ return 0;
+ if (task_running(rq, tsk))
+ return 0;
+ if (!cpu_isset(this_cpu, tsk->cpus_allowed))
+ return 0;
+ return 1;
}
/*
@@ -1026,6 +1169,12 @@ static void load_balance(runqueue_t *this_rq, int idle, cpumask_t cpumask)
goto out;
/*
+ * We only want to steal a number of tasks equal to 1/2 the imbalance,
+ * otherwise we'll just shift the imbalance to the new queue:
+ */
+ imbalance /= 2;
+
+ /*
* We first consider expired tasks. Those will likely not be
* executed in the near future, and they are most likely to
* be cache-cold, thus switching CPUs has the least effect
@@ -1064,14 +1213,9 @@ skip_queue:
* 3) are cache-hot on their current CPU.
*/
-#define CAN_MIGRATE_TASK(p,rq,this_cpu) \
- ((idle || (jiffies - (p)->last_run > cache_decay_ticks)) && \
- !task_running(rq, p) && \
- cpu_isset(this_cpu, (p)->cpus_allowed))
-
curr = curr->prev;
- if (!CAN_MIGRATE_TASK(tmp, busiest, this_cpu)) {
+ if (!can_migrate_task(tmp, busiest, this_cpu, idle)) {
if (curr != head)
goto skip_queue;
idx++;
@@ -1233,14 +1377,11 @@ void scheduler_tick(int user_ticks, int sys_ticks)
spin_lock(&rq->lock);
/*
* The task was running during this tick - update the
- * time slice counter and the sleep average. Note: we
- * do not update a thread's priority until it either
- * goes to sleep or uses up its timeslice. This makes
- * it possible for interactive tasks to use up their
- * timeslices at their highest priority levels.
+ * time slice counter. Note: we do not update a thread's
+ * priority until it either goes to sleep or uses up its
+ * timeslice. This makes it possible for interactive tasks
+ * to use up their timeslices at their highest priority levels.
*/
- if (p->sleep_avg)
- p->sleep_avg--;
if (unlikely(rt_task(p))) {
/*
* RR tasks need a special form of timeslice management.
@@ -1264,12 +1405,39 @@ void scheduler_tick(int user_ticks, int sys_ticks)
p->time_slice = task_timeslice(p);
p->first_time_slice = 0;
+ if (!rq->expired_timestamp)
+ rq->expired_timestamp = jiffies;
if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
- if (!rq->expired_timestamp)
- rq->expired_timestamp = jiffies;
enqueue_task(p, rq->expired);
} else
enqueue_task(p, rq->active);
+ } else {
+ /*
+ * Prevent a too long timeslice allowing a task to monopolize
+ * the CPU. We do this by splitting up the timeslice into
+ * smaller pieces.
+ *
+ * Note: this does not mean the task's timeslices expire or
+ * get lost in any way, they just might be preempted by
+ * another task of equal priority. (one with higher
+ * priority would have preempted this task already.) We
+ * requeue this task to the end of the list on this priority
+ * level, which is in essence a round-robin of tasks with
+ * equal priority.
+ *
+ * This only applies to tasks in the interactive
+ * delta range with at least TIMESLICE_GRANULARITY to requeue.
+ */
+ if (TASK_INTERACTIVE(p) && !((task_timeslice(p) -
+ p->time_slice) % TIMESLICE_GRANULARITY(p)) &&
+ (p->time_slice >= TIMESLICE_GRANULARITY(p)) &&
+ (p->array == rq->active)) {
+
+ dequeue_task(p, rq->active);
+ set_tsk_need_resched(p);
+ p->prio = effective_prio(p);
+ enqueue_task(p, rq->active);
+ }
}
out_unlock:
spin_unlock(&rq->lock);
@@ -1288,6 +1456,8 @@ asmlinkage void schedule(void)
runqueue_t *rq;
prio_array_t *array;
struct list_head *queue;
+ unsigned long long now;
+ unsigned long run_time;
int idx;
/*
@@ -1308,7 +1478,20 @@ need_resched:
rq = this_rq();
release_kernel_lock(prev);
- prev->last_run = jiffies;
+ now = sched_clock();
+ if (likely(now - prev->timestamp < NS_MAX_SLEEP_AVG))
+ run_time = now - prev->timestamp;
+ else
+ run_time = NS_MAX_SLEEP_AVG;
+
+ /*
+ * Tasks with interactive credits get charged less run_time
+ * at high sleep_avg to delay them losing their interactive
+ * status
+ */
+ if (HIGH_CREDIT(prev))
+ run_time /= (CURRENT_BONUS(prev) ? : 1);
+
spin_lock_irq(&rq->lock);
/*
@@ -1358,12 +1541,33 @@ pick_next_task:
queue = array->queue + idx;
next = list_entry(queue->next, task_t, run_list);
+ if (next->activated > 0) {
+ unsigned long long delta = now - next->timestamp;
+
+ if (next->activated == 1)
+ delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128;
+
+ array = next->array;
+ dequeue_task(next, array);
+ recalc_task_prio(next, next->timestamp + delta);
+ enqueue_task(next, array);
+ }
+ next->activated = 0;
switch_tasks:
prefetch(next);
clear_tsk_need_resched(prev);
RCU_qsctr(task_cpu(prev))++;
+ prev->sleep_avg -= run_time;
+ if ((long)prev->sleep_avg <= 0){
+ prev->sleep_avg = 0;
+ if (!(HIGH_CREDIT(prev) || LOW_CREDIT(prev)))
+ prev->interactive_credit--;
+ }
+ prev->timestamp = now;
+
if (likely(prev != next)) {
+ next->timestamp = now;
rq->nr_switches++;
rq->curr = next;
@@ -1603,6 +1807,7 @@ void set_user_nice(task_t *p, long nice)
unsigned long flags;
prio_array_t *array;
runqueue_t *rq;
+ int old_prio, new_prio, delta;
if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
return;
@@ -1611,6 +1816,12 @@ void set_user_nice(task_t *p, long nice)
* the task might be in the middle of scheduling on another CPU.
*/
rq = task_rq_lock(p, &flags);
+ /*
+ * The RT priorities are set via setscheduler(), but we still
+ * allow the 'normal' nice value to be set - but as expected
+ * it wont have any effect on scheduling until the task is
+ * not SCHED_NORMAL:
+ */
if (rt_task(p)) {
p->static_prio = NICE_TO_PRIO(nice);
goto out_unlock;
@@ -1618,16 +1829,20 @@ void set_user_nice(task_t *p, long nice)
array = p->array;
if (array)
dequeue_task(p, array);
+
+ old_prio = p->prio;
+ new_prio = NICE_TO_PRIO(nice);
+ delta = new_prio - old_prio;
p->static_prio = NICE_TO_PRIO(nice);
- p->prio = NICE_TO_PRIO(nice);
+ p->prio += delta;
+
if (array) {
enqueue_task(p, array);
/*
- * If the task is running and lowered its priority,
- * or increased its priority then reschedule its CPU:
+ * If the task increased its priority or is running and
+ * lowered its priority, then reschedule its CPU:
*/
- if ((NICE_TO_PRIO(nice) < p->static_prio) ||
- task_running(rq, p))
+ if (delta < 0 || (delta > 0 && task_running(rq, p)))
resched_task(rq->curr);
}
out_unlock:
@@ -2384,6 +2599,12 @@ static void move_task_away(struct task_struct *p, int dest_cpu)
local_irq_restore(flags);
}
+typedef struct {
+ int cpu;
+ struct completion startup_done;
+ task_t *task;
+} migration_startup_t;
+
/*
* migration_thread - this is a highprio system thread that performs
* thread migration by bumping thread off CPU then 'pushing' onto
@@ -2393,20 +2614,21 @@ static int migration_thread(void * data)
{
/* Marking "param" __user is ok, since we do a set_fs(KERNEL_DS); */
struct sched_param __user param = { .sched_priority = MAX_RT_PRIO-1 };
- int cpu = (long) data;
+ migration_startup_t *startup = data;
+ int cpu = startup->cpu;
runqueue_t *rq;
int ret;
+ startup->task = current;
+ complete(&startup->startup_done);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule();
+
+ BUG_ON(smp_processor_id() != cpu);
+
daemonize("migration/%d", cpu);
set_fs(KERNEL_DS);
- /*
- * Either we are running on the right CPU, or there's a a
- * migration thread on this CPU, guaranteed (we're started
- * serially).
- */
- set_cpus_allowed(current, cpumask_of_cpu(cpu));
-
ret = setscheduler(0, SCHED_FIFO, &param);
rq = this_rq();
@@ -2445,13 +2667,30 @@ static int migration_call(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
+ long cpu = (long) hcpu;
+ migration_startup_t startup;
+
switch (action) {
case CPU_ONLINE:
- printk("Starting migration thread for cpu %li\n",
- (long)hcpu);
- kernel_thread(migration_thread, hcpu, CLONE_KERNEL);
- while (!cpu_rq((long)hcpu)->migration_thread)
+
+ printk("Starting migration thread for cpu %li\n", cpu);
+
+ startup.cpu = cpu;
+ startup.task = NULL;
+ init_completion(&startup.startup_done);
+
+ kernel_thread(migration_thread, &startup, CLONE_KERNEL);
+ wait_for_completion(&startup.startup_done);
+ wait_task_inactive(startup.task);
+
+ startup.task->thread_info->cpu = cpu;
+ startup.task->cpus_allowed = cpumask_of_cpu(cpu);
+
+ wake_up_process(startup.task);
+
+ while (!cpu_rq(cpu)->migration_thread)
yield();
+
break;
}
return NOTIFY_OK;
@@ -2574,6 +2813,8 @@ void __might_sleep(char *file, int line)
prev_jiffy = jiffies;
printk(KERN_ERR "Debug: sleeping function called from invalid"
" context at %s:%d\n", file, line);
+ printk("in_atomic():%d, irqs_disabled():%d\n",
+ in_atomic(), irqs_disabled());
dump_stack();
}
#endif
diff --git a/kernel/signal.c b/kernel/signal.c
index 72333be1fd42..852da1a009da 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1139,7 +1139,7 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid)
static int kill_something_info(int sig, struct siginfo *info, int pid)
{
if (!pid) {
- return kill_pg_info(sig, info, current->pgrp);
+ return kill_pg_info(sig, info, process_group(current));
} else if (pid == -1) {
int retval = 0, count = 0;
struct task_struct * p;
@@ -1798,7 +1798,7 @@ relock:
/* signals can be posted during this window */
- if (is_orphaned_pgrp(current->pgrp))
+ if (is_orphaned_pgrp(process_group(current)))
goto relock;
spin_lock_irq(&current->sighand->siglock);
diff --git a/kernel/sys.c b/kernel/sys.c
index b172afa53be1..9eda26d6745c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -290,7 +290,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
break;
case PRIO_PGRP:
if (!who)
- who = current->pgrp;
+ who = process_group(current);
for_each_task_pid(who, PIDTYPE_PGID, p, l, pid)
error = set_one_prio(p, niceval, error);
break;
@@ -346,7 +346,7 @@ asmlinkage long sys_getpriority(int which, int who)
break;
case PRIO_PGRP:
if (!who)
- who = current->pgrp;
+ who = process_group(current);
for_each_task_pid(who, PIDTYPE_PGID, p, l, pid) {
niceval = 20 - task_nice(p);
if (niceval > retval)
@@ -982,11 +982,12 @@ ok_pgid:
if (err)
goto out;
- if (p->pgrp != pgid) {
+ if (process_group(p) != pgid) {
detach_pid(p, PIDTYPE_PGID);
- p->pgrp = pgid;
+ p->group_leader->__pgrp = pgid;
attach_pid(p, PIDTYPE_PGID, pgid);
}
+
err = 0;
out:
/* All paths lead to here, thus we are safe. -DaveM */
@@ -997,7 +998,7 @@ out:
asmlinkage long sys_getpgid(pid_t pid)
{
if (!pid) {
- return current->pgrp;
+ return process_group(current);
} else {
int retval;
struct task_struct *p;
@@ -1009,7 +1010,7 @@ asmlinkage long sys_getpgid(pid_t pid)
if (p) {
retval = security_task_getpgid(p);
if (!retval)
- retval = p->pgrp;
+ retval = process_group(p);
}
read_unlock(&tasklist_lock);
return retval;
@@ -1019,7 +1020,7 @@ asmlinkage long sys_getpgid(pid_t pid)
asmlinkage long sys_getpgrp(void)
{
/* SMP - assuming writes are word atomic this is fine */
- return current->pgrp;
+ return process_group(current);
}
asmlinkage long sys_getsid(pid_t pid)
@@ -1062,7 +1063,7 @@ asmlinkage long sys_setsid(void)
__set_special_pids(current->pid, current->pid);
current->tty = NULL;
current->tty_old_pgrp = 0;
- err = current->pgrp;
+ err = process_group(current);
out:
write_unlock_irq(&tasklist_lock);
return err;