From 4aecda303c46f18edbc3bddaaa92e0c17ff94a22 Mon Sep 17 00:00:00 2001 From: Hirokazu Takata Date: Wed, 13 Oct 2004 07:26:25 -0700 Subject: [PATCH] m32r: fix syscall table This patch fixes the system call table for m32r. The latest kernel cannot be linked for m32r, because the following experimental syscalls doesn't exist in the prepatch kernel of bk-tree. * include/asm-m32r/unistd.h: - Remove syscalls from #285(perfctr_info) to #293(keyctl). * arch/m32r/kernel/entry.S: ditto. Signed-off-by: Hirokazu Takata Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-m32r/unistd.h | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/asm-m32r/unistd.h b/include/asm-m32r/unistd.h index 31aca939104f..a506573b7b69 100644 --- a/include/asm-m32r/unistd.h +++ b/include/asm-m32r/unistd.h @@ -294,25 +294,16 @@ #define __NR_mq_getsetattr (__NR_mq_open+5) #define __NR_sys_kexec_load 283 #define __NR_waitid 284 -#define __NR_perfctr_info 285 -#define __NR_vperfctr_open (__NR_perfctr_info+1) -#define __NR_vperfctr_control (__NR_perfctr_info+2) -#define __NR_vperfctr_unlink (__NR_perfctr_info+3) -#define __NR_vperfctr_iresume (__NR_perfctr_info+4) -#define __NR_vperfctr_read (__NR_perfctr_info+5) -#define __NR_add_key 291 -#define __NR_request_key 292 -#define __NR_keyctl 293 -#define NR_syscalls 294 +#define NR_syscalls 285 -/* user-visible error numbers are in the range -1 - -128: see +/* user-visible error numbers are in the range -1 - -124: see * */ #define __syscall_return(type, res) \ do { \ - if ((unsigned long)(res) >= (unsigned long)(-(128 + 1))) { \ + if ((unsigned long)(res) >= (unsigned long)(-(124 + 1))) { \ /* Avoid using "res" which is declared to be in register r0; \ errno might expand to a function call and clobber it. */ \ int __err = -(res); \ -- cgit v1.2.3 From 531deb70f383aa32f24121326d5cdf79f2558e1e Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 13 Oct 2004 07:27:01 -0700 Subject: [PATCH] ptep_establish smp race x86 PAE >4G This avoid userspace mm corruption during COWs with threads (i.e. malloc;fork;clone) on x86 PAE with >4G of ram Signed-Off-By: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/pgtable.h | 8 ++++++++ include/asm-i386/pgtable-3level.h | 1 + 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 29573197c3ee..cf791b073e76 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -13,11 +13,19 @@ * Note: the old pte is known to not be writable, so we don't need to * worry about dirty bits etc getting lost. */ +#ifndef __HAVE_ARCH_SET_PTE_ATOMIC #define ptep_establish(__vma, __address, __ptep, __entry) \ do { \ set_pte(__ptep, __entry); \ flush_tlb_page(__vma, __address); \ } while (0) +#else /* __HAVE_ARCH_SET_PTE_ATOMIC */ +#define ptep_establish(__vma, __address, __ptep, __entry) \ +do { \ + set_pte_atomic(__ptep, __entry); \ + flush_tlb_page(__vma, __address); \ +} while (0) +#endif /* __HAVE_ARCH_SET_PTE_ATOMIC */ #endif #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index e48ab3d07c83..80a24b01d47e 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h @@ -54,6 +54,7 @@ static inline void set_pte(pte_t *ptep, pte_t pte) smp_wmb(); ptep->pte_low = pte.pte_low; } +#define __HAVE_ARCH_SET_PTE_ATOMIC #define set_pte_atomic(pteptr,pteval) \ set_64bit((unsigned long long *)(pteptr),pte_val(pteval)) #define set_pmd(pmdptr,pmdval) \ -- cgit v1.2.3 From b9877c907d56b803b5b0241c2465ce768809fce9 Mon Sep 17 00:00:00 2001 From: Tim Schmielau Date: Wed, 13 Oct 2004 07:27:49 -0700 Subject: [PATCH] Fix reporting of process start times Derive process start times from the posix_clock_monotonic notion of uptime instead of "jiffies", consistent with the earlier change to /proc/uptime itself. (http://linus.bkbits.net:8080/linux-2.5/cset@3ef4851dGg0fxX58R9Zv8SIq9fzNmQ?na%0Av=index.html|src/.|src/fs|src/fs/proc|related/fs/proc/proc_misc.c) Process start times are reported to userspace in units of 1/USER_HZ since boot, thus applications as procps need the value of "uptime" to convert them into absolute time. Currently "uptime" is derived from an ntp-corrected time base, but process start time is derived from the free-running "jiffies" counter. This results in inaccurate, drifting process start times as seen by the user, even if the exported number stays constant, because the users notion of "jiffies" changes in time. It's John Stultz's patch anyways, which I only messed up a bit, but since people started trading signed-off lines on lkml: Signed-off-by: Tim Schmielau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 6 +++++- include/linux/acct.h | 23 +++++++++++++++-------- include/linux/sched.h | 2 +- include/linux/times.h | 20 ++++++++++++++++++++ kernel/acct.c | 10 +++++++++- kernel/fork.c | 2 +- mm/oom_kill.c | 19 +++++++++++++------ 7 files changed, 64 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/fs/proc/array.c b/fs/proc/array.c index fc5c7846df32..272908775622 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -360,7 +360,11 @@ int proc_pid_stat(struct task_struct *task, char * buffer) read_unlock(&tasklist_lock); /* Temporary variable needed for gcc-2.96 */ - start_time = jiffies_64_to_clock_t(task->start_time - INITIAL_JIFFIES); + /* convert timespec -> nsec*/ + start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC + + task->start_time.tv_nsec; + /* convert nsec -> ticks */ + start_time = nsec_to_clock_t(start_time); res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \ diff --git a/include/linux/acct.h b/include/linux/acct.h index b46ce1ac1c6a..a6ab17c49aa1 100644 --- a/include/linux/acct.h +++ b/include/linux/acct.h @@ -172,17 +172,24 @@ static inline u32 jiffies_to_AHZ(unsigned long x) #endif } -static inline u64 jiffies_64_to_AHZ(u64 x) +static inline u64 nsec_to_AHZ(u64 x) { -#if (TICK_NSEC % (NSEC_PER_SEC / AHZ)) == 0 -#if HZ != AHZ - do_div(x, HZ / AHZ); -#endif -#else - x *= TICK_NSEC; +#if (NSEC_PER_SEC % AHZ) == 0 do_div(x, (NSEC_PER_SEC / AHZ)); +#elif (AHZ % 512) == 0 + x *= AHZ/512; + do_div(x, (NSEC_PER_SEC / 512)); +#else + /* + * max relative error 5.7e-8 (1.8s per year) for AHZ <= 1024, + * overflow after 64.99 years. + * exact for AHZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ... + */ + x *= 9; + do_div(x, (unsigned long)((9ull * NSEC_PER_SEC + (AHZ/2)) + / AHZ)); #endif - return x; + return x; } #endif /* __KERNEL */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 90f5cb645116..8810b551082a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -508,7 +508,7 @@ struct task_struct { struct timer_list real_timer; unsigned long utime, stime; unsigned long nvcsw, nivcsw; /* context switch counts */ - u64 start_time; + struct timespec start_time; /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ unsigned long min_flt, maj_flt; /* process credentials */ diff --git a/include/linux/times.h b/include/linux/times.h index ff00f334ffaa..0c5aa078dad4 100644 --- a/include/linux/times.h +++ b/include/linux/times.h @@ -55,6 +55,26 @@ static inline u64 jiffies_64_to_clock_t(u64 x) } #endif +static inline u64 nsec_to_clock_t(u64 x) +{ +#if (NSEC_PER_SEC % USER_HZ) == 0 + do_div(x, (NSEC_PER_SEC / USER_HZ)); +#elif (USER_HZ % 512) == 0 + x *= USER_HZ/512; + do_div(x, (NSEC_PER_SEC / 512)); +#else + /* + * max relative error 5.7e-8 (1.8s per year) for USER_HZ <= 1024, + * overflow after 64.99 years. + * exact for HZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ... + */ + x *= 9; + do_div(x, (unsigned long)((9ull * NSEC_PER_SEC + (USER_HZ/2)) + / USER_HZ)); +#endif + return x; +} + struct tms { clock_t tms_utime; clock_t tms_stime; diff --git a/kernel/acct.c b/kernel/acct.c index daf23c4efab4..fb6989a34f6e 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -384,6 +384,8 @@ static void do_acct_process(long exitcode, struct file *file) unsigned long vsize; unsigned long flim; u64 elapsed; + u64 run_time; + struct timespec uptime; /* * First check to see if there is enough free_space to continue @@ -401,7 +403,13 @@ static void do_acct_process(long exitcode, struct file *file) ac.ac_version = ACCT_VERSION | ACCT_BYTEORDER; strlcpy(ac.ac_comm, current->comm, sizeof(ac.ac_comm)); - elapsed = jiffies_64_to_AHZ(get_jiffies_64() - current->start_time); + /* calculate run_time in nsec*/ + do_posix_clock_monotonic_gettime(&uptime); + run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec; + run_time -= (u64)current->start_time.tv_sec*NSEC_PER_SEC + + current->start_time.tv_nsec; + /* convert nsec -> AHZ */ + elapsed = nsec_to_AHZ(run_time); #if ACCT_VERSION==3 ac.ac_etime = encode_float(elapsed); #else diff --git a/kernel/fork.c b/kernel/fork.c index 7e73e420441e..70f604c3937b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -992,7 +992,7 @@ static task_t *copy_process(unsigned long clone_flags, p->utime = p->stime = 0; p->lock_depth = -1; /* -1 = no lock */ - p->start_time = get_jiffies_64(); + do_posix_clock_monotonic_gettime(&p->start_time); p->security = NULL; p->io_context = NULL; p->io_wait = NULL; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 48f6dde410b3..3868e29e85be 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -26,6 +26,7 @@ /** * oom_badness - calculate a numeric value for how bad this task has been * @p: task struct of which task we should calculate + * @p: current uptime in seconds * * The formula used is relatively simple and documented inline in the * function. The main rationale is that we want to select a good task @@ -41,7 +42,7 @@ * of least surprise ... (be careful when you change it) */ -static unsigned long badness(struct task_struct *p) +static unsigned long badness(struct task_struct *p, unsigned long uptime) { unsigned long points, cpu_time, run_time, s; @@ -56,12 +57,16 @@ static unsigned long badness(struct task_struct *p) points = p->mm->total_vm; /* - * CPU time is in seconds and run time is in minutes. There is no - * particular reason for this other than that it turned out to work - * very well in practice. + * CPU time is in tens of seconds and run time is in thousands + * of seconds. There is no particular reason for this other than + * that it turned out to work very well in practice. */ cpu_time = (p->utime + p->stime) >> (SHIFT_HZ + 3); - run_time = (get_jiffies_64() - p->start_time) >> (SHIFT_HZ + 10); + + if (uptime >= p->start_time.tv_sec) + run_time = (uptime - p->start_time.tv_sec) >> 10; + else + run_time = 0; s = int_sqrt(cpu_time); if (s) @@ -111,10 +116,12 @@ static struct task_struct * select_bad_process(void) unsigned long maxpoints = 0; struct task_struct *g, *p; struct task_struct *chosen = NULL; + struct timespec uptime; + do_posix_clock_monotonic_gettime(&uptime); do_each_thread(g, p) if (p->pid) { - unsigned long points = badness(p); + unsigned long points = badness(p, uptime.tv_sec); if (points > maxpoints) { chosen = p; maxpoints = points; -- cgit v1.2.3 From 11f6410f2a6b35360a44a375e8fef19e61e2bea7 Mon Sep 17 00:00:00 2001 From: "Paolo \\'Blaisorblade\\' Giarrusso" Date: Wed, 13 Oct 2004 07:28:22 -0700 Subject: [PATCH] uml: don't declare cpu_online - fix compilation error Avoid redeclaring again (resulting in a compilation error) cpu_online and cpu_*_map, which are now declared elsewhere. Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-um/smp.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/asm-um/smp.h b/include/asm-um/smp.h index ee768c27ca0e..4412d5d9c26b 100644 --- a/include/asm-um/smp.h +++ b/include/asm-um/smp.h @@ -8,10 +8,6 @@ #include "asm/current.h" #include "linux/cpumask.h" -extern cpumask_t cpu_online_map; -extern cpumask_t cpu_possible_map; - - #define smp_processor_id() (current_thread->cpu) #define cpu_logical_map(n) (n) #define cpu_number_map(n) (n) @@ -19,8 +15,6 @@ extern cpumask_t cpu_possible_map; extern int hard_smp_processor_id(void); #define NO_PROC_ID -1 -#define cpu_online(cpu) cpu_isset(cpu, cpu_online_map) - extern int ncpus; -- cgit v1.2.3 From e02c6482bc619536780e3dbf95ca2b9202fbad11 Mon Sep 17 00:00:00 2001 From: "Paolo \\'Blaisorblade\\' Giarrusso" Date: Wed, 13 Oct 2004 07:28:58 -0700 Subject: [PATCH] uml: finish update for 2.6.8 API changes Add some updates for API changes in 2.6.8 which were not included in the original UML patch; these fixes were detected by some warnings, so I probably missed some more ones. Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-um/dma-mapping.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-um/dma-mapping.h b/include/asm-um/dma-mapping.h index 2ea88281161d..13e6291f7151 100644 --- a/include/asm-um/dma-mapping.h +++ b/include/asm-um/dma-mapping.h @@ -1,6 +1,8 @@ #ifndef _ASM_DMA_MAPPING_H #define _ASM_DMA_MAPPING_H +#include + static inline int dma_supported(struct device *dev, u64 mask) { -- cgit v1.2.3 From 8c225dbc5a7b13801a8254aae0ccebab8e4bece7 Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Wed, 13 Oct 2004 07:31:45 -0700 Subject: [PATCH] H8/300 some error/warning fix Signed-off-by: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/h8300/kernel/ptrace.c | 2 +- arch/h8300/lib/checksum.c | 1 + arch/h8300/platform/h8300h/generic/timer.c | 4 ++-- include/asm-h8300/bitops.h | 2 ++ 4 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/h8300/kernel/ptrace.c b/arch/h8300/kernel/ptrace.c index f1189ccd9344..b60d234d1d27 100644 --- a/arch/h8300/kernel/ptrace.c +++ b/arch/h8300/kernel/ptrace.c @@ -107,7 +107,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: { - unsigned long tmp; + unsigned long tmp = 0; if ((addr & 3) || addr < 0 || addr >= sizeof(struct user)) { ret = -EIO; diff --git a/arch/h8300/lib/checksum.c b/arch/h8300/lib/checksum.c index 6691d0b935b7..5aa688d9242d 100644 --- a/arch/h8300/lib/checksum.c +++ b/arch/h8300/lib/checksum.c @@ -32,6 +32,7 @@ of the assembly has to go. */ #include +#include static inline unsigned short from32to16(unsigned long x) { diff --git a/arch/h8300/platform/h8300h/generic/timer.c b/arch/h8300/platform/h8300h/generic/timer.c index ffc0349b74f9..6590f89e521a 100644 --- a/arch/h8300/platform/h8300h/generic/timer.c +++ b/arch/h8300/platform/h8300h/generic/timer.c @@ -32,7 +32,7 @@ #define H8300_TIMER_FREQ CONFIG_CPU_CLOCK*1000/8192 /* Timer input freq. */ -int platform_timer_setup(irqreturn_t (*timer_int)(int, void *, struct pt_regs *)) +void __init platform_timer_setup(irqreturn_t (*timer_int)(int, void *, struct pt_regs *)) { /* setup 8bit timer ch2 */ ctrl_outb(H8300_TIMER_FREQ / HZ, TCORA2); /* set interval */ @@ -69,7 +69,7 @@ void platform_timer_eoi(void) #define H8300_TIMER_FREQ CONFIG_CPU_CLOCK*1000/8 /* Timer input freq. */ -int platform_timer_setup(irqreturn_t (*timer_int)(int, void *, struct pt_regs *)) +void __init platform_timer_setup(irqreturn_t (*timer_int)(int, void *, struct pt_regs *)) { *(unsigned short *)GRA= H8300_TIMER_FREQ / HZ; /* set interval */ *(unsigned short *)TCNT=0; /* clear counter */ diff --git a/include/asm-h8300/bitops.h b/include/asm-h8300/bitops.h index 893e6f1c7efd..94b7a46a6043 100644 --- a/include/asm-h8300/bitops.h +++ b/include/asm-h8300/bitops.h @@ -273,6 +273,8 @@ found_middle: return result + __ffs(tmp); } +#define find_first_bit(addr, size) find_next_bit(addr, size, 0) + /* * Every architecture must define this function. It's the fastest * way of searching a 140-bit bitmap where the first 100 bits are -- cgit v1.2.3 From 538ce05c0ef4055cf29a92a4abcdf139d180a0f9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 13 Oct 2004 21:00:06 -0700 Subject: Fix threaded user page write memory ordering Make sure we order the writes to a newly created page with the page table update that potentially exposes the page to another CPU. This is a no-op on any architecture where getting the page table spinlock will already do the ordering (notably x86), but other architectures can care. --- include/linux/highmem.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 232d8fdb557c..7153aef34d5c 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -40,6 +40,8 @@ static inline void clear_user_highpage(struct page *page, unsigned long vaddr) void *addr = kmap_atomic(page, KM_USER0); clear_user_page(addr, vaddr, page); kunmap_atomic(addr, KM_USER0); + /* Make sure this page is cleared on other CPU's too before using it */ + smp_wmb(); } static inline void clear_highpage(struct page *page) @@ -73,6 +75,8 @@ static inline void copy_user_highpage(struct page *to, struct page *from, unsign copy_user_page(vto, vfrom, vaddr, to); kunmap_atomic(vfrom, KM_USER0); kunmap_atomic(vto, KM_USER1); + /* Make sure this page is cleared on other CPU's too before using it */ + smp_wmb(); } static inline void copy_highpage(struct page *to, struct page *from) -- cgit v1.2.3