diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Makefile | 2 | ||||
| -rw-r--r-- | kernel/fork.c | 2 | ||||
| -rw-r--r-- | kernel/posix-cpu-timers.c | 288 | ||||
| -rw-r--r-- | kernel/posix-timers.c | 10 | ||||
| -rw-r--r-- | kernel/sched.c | 33 | ||||
| -rw-r--r-- | kernel/signal.c | 1 |
6 files changed, 331 insertions, 5 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index d680ace0fdda..0ac3efc9d071 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -7,7 +7,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ sysctl.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o intermodule.o extable.o params.o posix-timers.o \ - kthread.o wait.o kfifo.o sys_ni.o + kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o obj-$(CONFIG_FUTEX) += futex.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o diff --git a/kernel/fork.c b/kernel/fork.c index f6b929e69f5b..bc0633f9730a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -749,6 +749,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; + sig->sched_time = 0; task_lock(current->group_leader); memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); @@ -877,6 +878,7 @@ static task_t *copy_process(unsigned long clone_flags, p->utime = cputime_zero; p->stime = cputime_zero; + p->sched_time = 0; p->rchar = 0; /* I/O counter: bytes read */ p->wchar = 0; /* I/O counter: bytes written */ p->syscr = 0; /* I/O counter: read syscalls */ diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c new file mode 100644 index 000000000000..fdc54f75aa15 --- /dev/null +++ b/kernel/posix-cpu-timers.c @@ -0,0 +1,288 @@ +/* + * Implement CPU time clocks for the POSIX clock interface. + */ + +#include <linux/sched.h> +#include <linux/posix-timers.h> +#include <asm/uaccess.h> +#include <linux/errno.h> + +union cpu_time_count { + cputime_t cpu; + unsigned long long sched; +}; + +static int check_clock(clockid_t which_clock) +{ + int error = 0; + struct task_struct *p; + const pid_t pid = CPUCLOCK_PID(which_clock); + + if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_MAX) + return -EINVAL; + + if (pid == 0) + return 0; + + read_lock(&tasklist_lock); + p = find_task_by_pid(pid); + if (!p || (CPUCLOCK_PERTHREAD(which_clock) ? + p->tgid != current->tgid : p->tgid != pid)) { + error = -EINVAL; + } + read_unlock(&tasklist_lock); + + return error; +} + +static void sample_to_timespec(clockid_t which_clock, + union cpu_time_count cpu, + struct timespec *tp) +{ + if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { + tp->tv_sec = div_long_long_rem(cpu.sched, + NSEC_PER_SEC, &tp->tv_nsec); + } else { + cputime_to_timespec(cpu.cpu, tp); + } +} + +static inline cputime_t prof_ticks(struct task_struct *p) +{ + return cputime_add(p->utime, p->stime); +} +static inline cputime_t virt_ticks(struct task_struct *p) +{ + return p->utime; +} +static inline unsigned long long sched_ns(struct task_struct *p) +{ + return (p == current) ? current_sched_time(p) : p->sched_time; +} + +int posix_cpu_clock_getres(clockid_t which_clock, struct timespec *tp) +{ + int error = check_clock(which_clock); + if (!error) { + tp->tv_sec = 0; + tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ); + if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { + /* + * If sched_clock is using a cycle counter, we + * don't have any idea of its true resolution + * exported, but it is much more than 1s/HZ. + */ + tp->tv_nsec = 1; + } + } + return error; +} + +int posix_cpu_clock_set(clockid_t which_clock, const struct timespec *tp) +{ + /* + * You can never reset a CPU clock, but we check for other errors + * in the call before failing with EPERM. + */ + int error = check_clock(which_clock); + if (error == 0) { + error = -EPERM; + } + return error; +} + + +/* + * Sample a per-thread clock for the given task. + */ +static int cpu_clock_sample(clockid_t which_clock, struct task_struct *p, + union cpu_time_count *cpu) +{ + switch (CPUCLOCK_WHICH(which_clock)) { + default: + return -EINVAL; + case CPUCLOCK_PROF: + cpu->cpu = prof_ticks(p); + break; + case CPUCLOCK_VIRT: + cpu->cpu = virt_ticks(p); + break; + case CPUCLOCK_SCHED: + cpu->sched = sched_ns(p); + break; + } + return 0; +} + +/* + * Sample a process (thread group) clock for the given group_leader task. + * Must be called with tasklist_lock held for reading. + */ +static int cpu_clock_sample_group(clockid_t which_clock, + struct task_struct *p, + union cpu_time_count *cpu) +{ + struct task_struct *t = p; + unsigned long flags; + switch (CPUCLOCK_WHICH(which_clock)) { + default: + return -EINVAL; + case CPUCLOCK_PROF: + spin_lock_irqsave(&p->sighand->siglock, flags); + cpu->cpu = cputime_add(p->signal->utime, p->signal->stime); + do { + cpu->cpu = cputime_add(cpu->cpu, prof_ticks(t)); + t = next_thread(t); + } while (t != p); + spin_unlock_irqrestore(&p->sighand->siglock, flags); + break; + case CPUCLOCK_VIRT: + spin_lock_irqsave(&p->sighand->siglock, flags); + cpu->cpu = p->signal->utime; + do { + cpu->cpu = cputime_add(cpu->cpu, virt_ticks(t)); + t = next_thread(t); + } while (t != p); + spin_unlock_irqrestore(&p->sighand->siglock, flags); + break; + case CPUCLOCK_SCHED: + spin_lock_irqsave(&p->sighand->siglock, flags); + cpu->sched = p->signal->sched_time; + /* Add in each other live thread. */ + while ((t = next_thread(t)) != p) { + cpu->sched += t->sched_time; + } + if (p->tgid == current->tgid) { + /* + * We're sampling ourselves, so include the + * cycles not yet banked. We still omit + * other threads running on other CPUs, + * so the total can always be behind as + * much as max(nthreads-1,ncpus) * (NSEC_PER_SEC/HZ). + */ + cpu->sched += current_sched_time(current); + } else { + cpu->sched += p->sched_time; + } + spin_unlock_irqrestore(&p->sighand->siglock, flags); + break; + } + return 0; +} + + +int posix_cpu_clock_get(clockid_t which_clock, struct timespec *tp) +{ + const pid_t pid = CPUCLOCK_PID(which_clock); + int error = -EINVAL; + union cpu_time_count rtn; + + if (pid == 0) { + /* + * Special case constant value for our own clocks. + * We don't have to do any lookup to find ourselves. + */ + if (CPUCLOCK_PERTHREAD(which_clock)) { + /* + * Sampling just ourselves we can do with no locking. + */ + error = cpu_clock_sample(which_clock, + current, &rtn); + } else { + read_lock(&tasklist_lock); + error = cpu_clock_sample_group(which_clock, + current, &rtn); + read_unlock(&tasklist_lock); + } + } else { + /* + * Find the given PID, and validate that the caller + * should be able to see it. + */ + struct task_struct *p; + read_lock(&tasklist_lock); + p = find_task_by_pid(pid); + if (p) { + if (CPUCLOCK_PERTHREAD(which_clock)) { + if (p->tgid == current->tgid) { + error = cpu_clock_sample(which_clock, + p, &rtn); + } + } else if (p->tgid == pid && p->signal) { + error = cpu_clock_sample_group(which_clock, + p, &rtn); + } + } + read_unlock(&tasklist_lock); + } + + if (error) + return error; + sample_to_timespec(which_clock, rtn, tp); + return 0; +} + +/* + * These can't be called, since timer_create never works. + */ +int posix_cpu_timer_set(struct k_itimer *timer, int flags, + struct itimerspec *old, struct itimerspec *new) +{ + BUG(); + return -EINVAL; +} +int posix_cpu_timer_del(struct k_itimer *timer) +{ + BUG(); + return -EINVAL; +} +void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *spec) +{ + BUG(); +} + + +#define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED) +#define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED) + +static int process_cpu_clock_getres(clockid_t which_clock, struct timespec *tp) +{ + return posix_cpu_clock_getres(PROCESS_CLOCK, tp); +} +static int process_cpu_clock_get(clockid_t which_clock, struct timespec *tp) +{ + return posix_cpu_clock_get(PROCESS_CLOCK, tp); +} +static int thread_cpu_clock_getres(clockid_t which_clock, struct timespec *tp) +{ + return posix_cpu_clock_getres(THREAD_CLOCK, tp); +} +static int thread_cpu_clock_get(clockid_t which_clock, struct timespec *tp) +{ + return posix_cpu_clock_get(THREAD_CLOCK, tp); +} + + +static __init int init_posix_cpu_timers(void) +{ + struct k_clock process = { + .clock_getres = process_cpu_clock_getres, + .clock_get = process_cpu_clock_get, + .clock_set = do_posix_clock_nosettime, + .timer_create = do_posix_clock_notimer_create, + .nsleep = do_posix_clock_nonanosleep, + }; + struct k_clock thread = { + .clock_getres = thread_cpu_clock_getres, + .clock_get = thread_cpu_clock_get, + .clock_set = do_posix_clock_nosettime, + .timer_create = do_posix_clock_notimer_create, + .nsleep = do_posix_clock_nonanosleep, + }; + + register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process); + register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread); + + return 0; +} +__initcall(init_posix_cpu_timers); diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index d04a2f17e395..09b2d6b4634f 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -200,13 +200,15 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) #ifdef CLOCK_DISPATCH_DIRECT #define CLOCK_DISPATCH(clock, call, arglist) \ - ((*posix_clocks[clock].call) arglist) + ((clock) < 0 ? posix_cpu_##call arglist : \ + (*posix_clocks[clock].call) arglist) #define DEFHOOK(name) if (clock->name == NULL) clock->name = common_##name #define COMMONDEFN static #else #define CLOCK_DISPATCH(clock, call, arglist) \ - (posix_clocks[clock].call != NULL \ - ? (*posix_clocks[clock].call) arglist : common_##call arglist) + ((clock) < 0 ? posix_cpu_##call arglist : \ + (posix_clocks[clock].call != NULL \ + ? (*posix_clocks[clock].call) arglist : common_##call arglist)) #define DEFHOOK(name) (void) 0 /* Nothing here. */ #define COMMONDEFN static inline #endif @@ -277,6 +279,8 @@ static inline void common_default_hooks(struct k_clock *clock) */ static inline int invalid_clockid(clockid_t which_clock) { + if (which_clock < 0) /* CPU clock, posix_cpu_* will check it */ + return 0; if ((unsigned) which_clock >= MAX_CLOCKS) return 1; if (posix_clocks[which_clock].clock_getres != NULL) diff --git a/kernel/sched.c b/kernel/sched.c index 0888acbe3f66..8176366cfd8f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2242,6 +2242,32 @@ DEFINE_PER_CPU(struct kernel_stat, kstat); EXPORT_PER_CPU_SYMBOL(kstat); /* + * This is called on clock ticks and on context switches. + * Bank in p->sched_time the ns elapsed since the last tick or switch. + */ +static inline void update_cpu_clock(task_t *p, runqueue_t *rq, + unsigned long long now) +{ + unsigned long long last = max(p->timestamp, rq->timestamp_last_tick); + p->sched_time += now - last; +} + +/* + * Return current->sched_time plus any more ns on the sched_clock + * that have not yet been banked. + */ +unsigned long long current_sched_time(const task_t *tsk) +{ + unsigned long long ns; + unsigned long flags; + local_irq_save(flags); + ns = max(tsk->timestamp, task_rq(tsk)->timestamp_last_tick); + ns = tsk->sched_time + (sched_clock() - ns); + local_irq_restore(flags); + return ns; +} + +/* * We place interactive tasks back into the active array, if possible. * * To guarantee that this does not starve expired tasks we ignore the @@ -2419,8 +2445,11 @@ void scheduler_tick(void) int cpu = smp_processor_id(); runqueue_t *rq = this_rq(); task_t *p = current; + unsigned long long now = sched_clock(); + + update_cpu_clock(p, rq, now); - rq->timestamp_last_tick = sched_clock(); + rq->timestamp_last_tick = now; if (p == rq->idle) { if (wake_priority_sleeper(rq)) @@ -2804,6 +2833,8 @@ switch_tasks: clear_tsk_need_resched(prev); rcu_qsctr_inc(task_cpu(prev)); + update_cpu_clock(prev, rq, now); + prev->sleep_avg -= run_time; if ((long)prev->sleep_avg <= 0) prev->sleep_avg = 0; diff --git a/kernel/signal.c b/kernel/signal.c index 9e87ab3f8f21..3f1df438d23c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -381,6 +381,7 @@ void __exit_signal(struct task_struct *tsk) sig->maj_flt += tsk->maj_flt; sig->nvcsw += tsk->nvcsw; sig->nivcsw += tsk->nivcsw; + sig->sched_time += tsk->sched_time; spin_unlock(&sighand->siglock); sig = NULL; /* Marker for below. */ } |
