summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/posix-cpu-timers.c288
-rw-r--r--kernel/posix-timers.c10
-rw-r--r--kernel/sched.c33
-rw-r--r--kernel/signal.c1
6 files changed, 331 insertions, 5 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index d680ace0fdda..0ac3efc9d071 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -7,7 +7,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
sysctl.o capability.o ptrace.o timer.o user.o \
signal.o sys.o kmod.o workqueue.o pid.o \
rcupdate.o intermodule.o extable.o params.o posix-timers.o \
- kthread.o wait.o kfifo.o sys_ni.o
+ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o
obj-$(CONFIG_FUTEX) += futex.o
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
diff --git a/kernel/fork.c b/kernel/fork.c
index f6b929e69f5b..bc0633f9730a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -749,6 +749,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
+ sig->sched_time = 0;
task_lock(current->group_leader);
memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
@@ -877,6 +878,7 @@ static task_t *copy_process(unsigned long clone_flags,
p->utime = cputime_zero;
p->stime = cputime_zero;
+ p->sched_time = 0;
p->rchar = 0; /* I/O counter: bytes read */
p->wchar = 0; /* I/O counter: bytes written */
p->syscr = 0; /* I/O counter: read syscalls */
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
new file mode 100644
index 000000000000..fdc54f75aa15
--- /dev/null
+++ b/kernel/posix-cpu-timers.c
@@ -0,0 +1,288 @@
+/*
+ * Implement CPU time clocks for the POSIX clock interface.
+ */
+
+#include <linux/sched.h>
+#include <linux/posix-timers.h>
+#include <asm/uaccess.h>
+#include <linux/errno.h>
+
+union cpu_time_count {
+ cputime_t cpu;
+ unsigned long long sched;
+};
+
+static int check_clock(clockid_t which_clock)
+{
+ int error = 0;
+ struct task_struct *p;
+ const pid_t pid = CPUCLOCK_PID(which_clock);
+
+ if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_MAX)
+ return -EINVAL;
+
+ if (pid == 0)
+ return 0;
+
+ read_lock(&tasklist_lock);
+ p = find_task_by_pid(pid);
+ if (!p || (CPUCLOCK_PERTHREAD(which_clock) ?
+ p->tgid != current->tgid : p->tgid != pid)) {
+ error = -EINVAL;
+ }
+ read_unlock(&tasklist_lock);
+
+ return error;
+}
+
+static void sample_to_timespec(clockid_t which_clock,
+ union cpu_time_count cpu,
+ struct timespec *tp)
+{
+ if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
+ tp->tv_sec = div_long_long_rem(cpu.sched,
+ NSEC_PER_SEC, &tp->tv_nsec);
+ } else {
+ cputime_to_timespec(cpu.cpu, tp);
+ }
+}
+
+static inline cputime_t prof_ticks(struct task_struct *p)
+{
+ return cputime_add(p->utime, p->stime);
+}
+static inline cputime_t virt_ticks(struct task_struct *p)
+{
+ return p->utime;
+}
+static inline unsigned long long sched_ns(struct task_struct *p)
+{
+ return (p == current) ? current_sched_time(p) : p->sched_time;
+}
+
+int posix_cpu_clock_getres(clockid_t which_clock, struct timespec *tp)
+{
+ int error = check_clock(which_clock);
+ if (!error) {
+ tp->tv_sec = 0;
+ tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
+ if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
+ /*
+ * If sched_clock is using a cycle counter, we
+ * don't have any idea of its true resolution
+ * exported, but it is much more than 1s/HZ.
+ */
+ tp->tv_nsec = 1;
+ }
+ }
+ return error;
+}
+
+int posix_cpu_clock_set(clockid_t which_clock, const struct timespec *tp)
+{
+ /*
+ * You can never reset a CPU clock, but we check for other errors
+ * in the call before failing with EPERM.
+ */
+ int error = check_clock(which_clock);
+ if (error == 0) {
+ error = -EPERM;
+ }
+ return error;
+}
+
+
+/*
+ * Sample a per-thread clock for the given task.
+ */
+static int cpu_clock_sample(clockid_t which_clock, struct task_struct *p,
+ union cpu_time_count *cpu)
+{
+ switch (CPUCLOCK_WHICH(which_clock)) {
+ default:
+ return -EINVAL;
+ case CPUCLOCK_PROF:
+ cpu->cpu = prof_ticks(p);
+ break;
+ case CPUCLOCK_VIRT:
+ cpu->cpu = virt_ticks(p);
+ break;
+ case CPUCLOCK_SCHED:
+ cpu->sched = sched_ns(p);
+ break;
+ }
+ return 0;
+}
+
+/*
+ * Sample a process (thread group) clock for the given group_leader task.
+ * Must be called with tasklist_lock held for reading.
+ */
+static int cpu_clock_sample_group(clockid_t which_clock,
+ struct task_struct *p,
+ union cpu_time_count *cpu)
+{
+ struct task_struct *t = p;
+ unsigned long flags;
+ switch (CPUCLOCK_WHICH(which_clock)) {
+ default:
+ return -EINVAL;
+ case CPUCLOCK_PROF:
+ spin_lock_irqsave(&p->sighand->siglock, flags);
+ cpu->cpu = cputime_add(p->signal->utime, p->signal->stime);
+ do {
+ cpu->cpu = cputime_add(cpu->cpu, prof_ticks(t));
+ t = next_thread(t);
+ } while (t != p);
+ spin_unlock_irqrestore(&p->sighand->siglock, flags);
+ break;
+ case CPUCLOCK_VIRT:
+ spin_lock_irqsave(&p->sighand->siglock, flags);
+ cpu->cpu = p->signal->utime;
+ do {
+ cpu->cpu = cputime_add(cpu->cpu, virt_ticks(t));
+ t = next_thread(t);
+ } while (t != p);
+ spin_unlock_irqrestore(&p->sighand->siglock, flags);
+ break;
+ case CPUCLOCK_SCHED:
+ spin_lock_irqsave(&p->sighand->siglock, flags);
+ cpu->sched = p->signal->sched_time;
+ /* Add in each other live thread. */
+ while ((t = next_thread(t)) != p) {
+ cpu->sched += t->sched_time;
+ }
+ if (p->tgid == current->tgid) {
+ /*
+ * We're sampling ourselves, so include the
+ * cycles not yet banked. We still omit
+ * other threads running on other CPUs,
+ * so the total can always be behind as
+ * much as max(nthreads-1,ncpus) * (NSEC_PER_SEC/HZ).
+ */
+ cpu->sched += current_sched_time(current);
+ } else {
+ cpu->sched += p->sched_time;
+ }
+ spin_unlock_irqrestore(&p->sighand->siglock, flags);
+ break;
+ }
+ return 0;
+}
+
+
+int posix_cpu_clock_get(clockid_t which_clock, struct timespec *tp)
+{
+ const pid_t pid = CPUCLOCK_PID(which_clock);
+ int error = -EINVAL;
+ union cpu_time_count rtn;
+
+ if (pid == 0) {
+ /*
+ * Special case constant value for our own clocks.
+ * We don't have to do any lookup to find ourselves.
+ */
+ if (CPUCLOCK_PERTHREAD(which_clock)) {
+ /*
+ * Sampling just ourselves we can do with no locking.
+ */
+ error = cpu_clock_sample(which_clock,
+ current, &rtn);
+ } else {
+ read_lock(&tasklist_lock);
+ error = cpu_clock_sample_group(which_clock,
+ current, &rtn);
+ read_unlock(&tasklist_lock);
+ }
+ } else {
+ /*
+ * Find the given PID, and validate that the caller
+ * should be able to see it.
+ */
+ struct task_struct *p;
+ read_lock(&tasklist_lock);
+ p = find_task_by_pid(pid);
+ if (p) {
+ if (CPUCLOCK_PERTHREAD(which_clock)) {
+ if (p->tgid == current->tgid) {
+ error = cpu_clock_sample(which_clock,
+ p, &rtn);
+ }
+ } else if (p->tgid == pid && p->signal) {
+ error = cpu_clock_sample_group(which_clock,
+ p, &rtn);
+ }
+ }
+ read_unlock(&tasklist_lock);
+ }
+
+ if (error)
+ return error;
+ sample_to_timespec(which_clock, rtn, tp);
+ return 0;
+}
+
+/*
+ * These can't be called, since timer_create never works.
+ */
+int posix_cpu_timer_set(struct k_itimer *timer, int flags,
+ struct itimerspec *old, struct itimerspec *new)
+{
+ BUG();
+ return -EINVAL;
+}
+int posix_cpu_timer_del(struct k_itimer *timer)
+{
+ BUG();
+ return -EINVAL;
+}
+void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *spec)
+{
+ BUG();
+}
+
+
+#define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED)
+#define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED)
+
+static int process_cpu_clock_getres(clockid_t which_clock, struct timespec *tp)
+{
+ return posix_cpu_clock_getres(PROCESS_CLOCK, tp);
+}
+static int process_cpu_clock_get(clockid_t which_clock, struct timespec *tp)
+{
+ return posix_cpu_clock_get(PROCESS_CLOCK, tp);
+}
+static int thread_cpu_clock_getres(clockid_t which_clock, struct timespec *tp)
+{
+ return posix_cpu_clock_getres(THREAD_CLOCK, tp);
+}
+static int thread_cpu_clock_get(clockid_t which_clock, struct timespec *tp)
+{
+ return posix_cpu_clock_get(THREAD_CLOCK, tp);
+}
+
+
+static __init int init_posix_cpu_timers(void)
+{
+ struct k_clock process = {
+ .clock_getres = process_cpu_clock_getres,
+ .clock_get = process_cpu_clock_get,
+ .clock_set = do_posix_clock_nosettime,
+ .timer_create = do_posix_clock_notimer_create,
+ .nsleep = do_posix_clock_nonanosleep,
+ };
+ struct k_clock thread = {
+ .clock_getres = thread_cpu_clock_getres,
+ .clock_get = thread_cpu_clock_get,
+ .clock_set = do_posix_clock_nosettime,
+ .timer_create = do_posix_clock_notimer_create,
+ .nsleep = do_posix_clock_nonanosleep,
+ };
+
+ register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
+ register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
+
+ return 0;
+}
+__initcall(init_posix_cpu_timers);
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index d04a2f17e395..09b2d6b4634f 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -200,13 +200,15 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
#ifdef CLOCK_DISPATCH_DIRECT
#define CLOCK_DISPATCH(clock, call, arglist) \
- ((*posix_clocks[clock].call) arglist)
+ ((clock) < 0 ? posix_cpu_##call arglist : \
+ (*posix_clocks[clock].call) arglist)
#define DEFHOOK(name) if (clock->name == NULL) clock->name = common_##name
#define COMMONDEFN static
#else
#define CLOCK_DISPATCH(clock, call, arglist) \
- (posix_clocks[clock].call != NULL \
- ? (*posix_clocks[clock].call) arglist : common_##call arglist)
+ ((clock) < 0 ? posix_cpu_##call arglist : \
+ (posix_clocks[clock].call != NULL \
+ ? (*posix_clocks[clock].call) arglist : common_##call arglist))
#define DEFHOOK(name) (void) 0 /* Nothing here. */
#define COMMONDEFN static inline
#endif
@@ -277,6 +279,8 @@ static inline void common_default_hooks(struct k_clock *clock)
*/
static inline int invalid_clockid(clockid_t which_clock)
{
+ if (which_clock < 0) /* CPU clock, posix_cpu_* will check it */
+ return 0;
if ((unsigned) which_clock >= MAX_CLOCKS)
return 1;
if (posix_clocks[which_clock].clock_getres != NULL)
diff --git a/kernel/sched.c b/kernel/sched.c
index 0888acbe3f66..8176366cfd8f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2242,6 +2242,32 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
EXPORT_PER_CPU_SYMBOL(kstat);
/*
+ * This is called on clock ticks and on context switches.
+ * Bank in p->sched_time the ns elapsed since the last tick or switch.
+ */
+static inline void update_cpu_clock(task_t *p, runqueue_t *rq,
+ unsigned long long now)
+{
+ unsigned long long last = max(p->timestamp, rq->timestamp_last_tick);
+ p->sched_time += now - last;
+}
+
+/*
+ * Return current->sched_time plus any more ns on the sched_clock
+ * that have not yet been banked.
+ */
+unsigned long long current_sched_time(const task_t *tsk)
+{
+ unsigned long long ns;
+ unsigned long flags;
+ local_irq_save(flags);
+ ns = max(tsk->timestamp, task_rq(tsk)->timestamp_last_tick);
+ ns = tsk->sched_time + (sched_clock() - ns);
+ local_irq_restore(flags);
+ return ns;
+}
+
+/*
* We place interactive tasks back into the active array, if possible.
*
* To guarantee that this does not starve expired tasks we ignore the
@@ -2419,8 +2445,11 @@ void scheduler_tick(void)
int cpu = smp_processor_id();
runqueue_t *rq = this_rq();
task_t *p = current;
+ unsigned long long now = sched_clock();
+
+ update_cpu_clock(p, rq, now);
- rq->timestamp_last_tick = sched_clock();
+ rq->timestamp_last_tick = now;
if (p == rq->idle) {
if (wake_priority_sleeper(rq))
@@ -2804,6 +2833,8 @@ switch_tasks:
clear_tsk_need_resched(prev);
rcu_qsctr_inc(task_cpu(prev));
+ update_cpu_clock(prev, rq, now);
+
prev->sleep_avg -= run_time;
if ((long)prev->sleep_avg <= 0)
prev->sleep_avg = 0;
diff --git a/kernel/signal.c b/kernel/signal.c
index 9e87ab3f8f21..3f1df438d23c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -381,6 +381,7 @@ void __exit_signal(struct task_struct *tsk)
sig->maj_flt += tsk->maj_flt;
sig->nvcsw += tsk->nvcsw;
sig->nivcsw += tsk->nivcsw;
+ sig->sched_time += tsk->sched_time;
spin_unlock(&sighand->siglock);
sig = NULL; /* Marker for below. */
}