From 2eb724ed574e90c52ebd7cf321458f35963cdf06 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 4 Nov 2002 18:18:24 -0800 Subject: [PATCH] fix mod_timer() race If two CPUs run mod_timer against the same not-pending timer then they have no locking relationship. They can both see the timer as not-pending and they both add the timer to their cpu-local list. The CPU which gets there second corrupts the first CPU's lists. This was causing Dave Hansen's 8-way to oops after a couple of minutes of specweb testing. I believe that to fix this we need locking which is associated with the timer itself. The easy fix is hashed spinlocking based on the timer's address. The hard fix is a lock inside the timer itself. It is hard because init_timer() becomes compulsory, to initialise that spinlock. An unknown number of code paths in the kernel just wipe the timer to all-zeroes and start using it. I chose the hard way - it is cleaner and more idiomatic. The patch also adds a "magic number" to the timer so we can detect when a timer was not correctly initialised. A warning and stack backtrace is generated and the timer is fixed up. After 16 such warnings the warning mechanism shuts itself up until a reboot. It took six patches to my kernel to stop the warnings from coming out. The uninitialised timers are extremely easy to find and fix. But it will take some time to weed them all out. Maybe we should go for the hashed locking... Note that the new timer->lock means that we can clean up some awkward "oh we raced, let's try again" code in timer.c. But to do that we'd also need to take timer->lock in the commonly-called del_timer(), so I left it as-is. The lock is not needed in add_timer() because concurrent add_timer()/add_timer() and concurrent add_timer()/mod_timer() are illegal. --- kernel/timer.c | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/timer.c b/kernel/timer.c index 3c19cec3570d..ffc50df1a072 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -69,6 +69,30 @@ static DEFINE_PER_CPU(tvec_base_t, tvec_bases) = { SPIN_LOCK_UNLOCKED }; /* Fake initialization needed to avoid compiler breakage */ static DEFINE_PER_CPU(struct tasklet_struct, timer_tasklet) = { NULL }; +static void check_timer_failed(timer_t *timer) +{ + static int whine_count; + if (whine_count < 16) { + whine_count++; + printk("Uninitialised timer!\n"); + printk("This is just a warning. Your computer is OK\n"); + printk("function=0x%p, data=0x%lx\n", + timer->function, timer->data); + dump_stack(); + } + /* + * Now fix it up + */ + spin_lock_init(&timer->lock); + timer->magic = TIMER_MAGIC; +} + +static inline void check_timer(timer_t *timer) +{ + if (timer->magic != TIMER_MAGIC) + check_timer_failed(timer); +} + static inline void internal_add_timer(tvec_base_t *base, timer_t *timer) { unsigned long expires = timer->expires; @@ -129,6 +153,8 @@ void add_timer(timer_t *timer) BUG_ON(timer_pending(timer) || !timer->function); + check_timer(timer); + spin_lock_irqsave(&base->lock, flags); internal_add_timer(base, timer); timer->base = base; @@ -150,6 +176,8 @@ void add_timer_on(struct timer_list *timer, int cpu) BUG_ON(timer_pending(timer) || !timer->function); + check_timer(timer); + spin_lock_irqsave(&base->lock, flags); internal_add_timer(base, timer); timer->base = base; @@ -182,6 +210,9 @@ int mod_timer(timer_t *timer, unsigned long expires) int ret = 0; BUG_ON(!timer->function); + + check_timer(timer); + /* * This is a common optimization triggered by the * networking code - if the timer is re-modified @@ -190,7 +221,7 @@ int mod_timer(timer_t *timer, unsigned long expires) if (timer->expires == expires && timer_pending(timer)) return 1; - local_irq_save(flags); + spin_lock_irqsave(&timer->lock, flags); new_base = &per_cpu(tvec_bases, smp_processor_id()); repeat: old_base = timer->base; @@ -207,7 +238,7 @@ repeat: spin_lock(&new_base->lock); } /* - * The timer base might have changed while we were + * The timer base might have been cancelled while we were * trying to take the lock(s): */ if (timer->base != old_base) { @@ -232,7 +263,8 @@ repeat: if (old_base && (new_base != old_base)) spin_unlock(&old_base->lock); - spin_unlock_irqrestore(&new_base->lock, flags); + spin_unlock(&new_base->lock); + spin_unlock_irqrestore(&timer->lock, flags); return ret; } @@ -253,6 +285,8 @@ int del_timer(timer_t *timer) unsigned long flags; tvec_base_t *base; + check_timer(timer); + repeat: base = timer->base; if (!base) @@ -290,6 +324,8 @@ int del_timer_sync(timer_t *timer) tvec_base_t *base; int i, ret = 0; + check_timer(timer); + del_again: ret += del_timer(timer); -- cgit v1.2.3