From 83cd4fe27ad8446619b2e030b171b858501de87d Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 21 May 2010 17:09:41 -0700 Subject: sched: Change nohz idle load balancing logic to push model In the new push model, all idle CPUs indeed go into nohz mode. There is still the concept of idle load balancer (performing the load balancing on behalf of all the idle cpu's in the system). Busy CPU kicks the nohz balancer when any of the nohz CPUs need idle load balancing. The kickee CPU does the idle load balancing on behalf of all idle CPUs instead of the normal idle balance. This addresses the below two problems with the current nohz ilb logic: * the idle load balancer continued to have periodic ticks during idle and wokeup frequently, even though it did not have any rebalancing to do on behalf of any of the idle CPUs. * On x86 and CPUs that have APIC timer stoppage on idle CPUs, this periodic wakeup can result in a periodic additional interrupt on a CPU doing the timer broadcast. Also currently we are migrating the unpinned timers from an idle to the cpu doing idle load balancing (when all the cpus in the system are idle, there is no idle load balancing cpu and timers get added to the same idle cpu where the request was made. So the existing optimization works only on semi idle system). And In semi idle system, we no longer have periodic ticks on the idle load balancer CPU. Using that cpu will add more delays to the timers than intended (as that cpu's timer base may not be uptodate wrt jiffies etc). This was causing mysterious slowdowns during boot etc. For now, in the semi idle case, use the nearest busy cpu for migrating timers from an idle cpu. This is good for power-savings anyway. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: Peter Zijlstra Cc: Thomas Gleixner LKML-Reference: <1274486981.2840.46.camel@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 1d7b9bc1c034..5f171f04ab00 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -408,13 +408,7 @@ void tick_nohz_stop_sched_tick(int inidle) * the scheduler tick in nohz_restart_sched_tick. */ if (!ts->tick_stopped) { - if (select_nohz_load_balancer(1)) { - /* - * sched tick not stopped! - */ - cpumask_clear_cpu(cpu, nohz_cpu_mask); - goto out; - } + select_nohz_load_balancer(1); ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; -- cgit v1.2.3 From 698f93159a735bd29a8767c9f60d9b2d75870f8e Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 2 Jul 2010 20:41:51 +0200 Subject: fix comment/printk typos concerning "already" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Uwe Kleine-König Signed-off-by: Jiri Kosina --- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/isdn/hardware/mISDN/avmfritz.c | 2 +- drivers/isdn/hardware/mISDN/hfcmulti.c | 4 ++-- drivers/isdn/hardware/mISDN/hfcpci.c | 2 +- drivers/isdn/hardware/mISDN/mISDNinfineon.c | 2 +- drivers/isdn/hardware/mISDN/speedfax.c | 2 +- drivers/isdn/hardware/mISDN/w6692.c | 2 +- drivers/isdn/hisax/callc.c | 2 +- drivers/isdn/hisax/tei.c | 2 +- drivers/isdn/mISDN/tei.c | 2 +- drivers/media/video/zoran/zoran_device.c | 2 +- drivers/scsi/bfa/bfa_ioim.c | 2 +- drivers/usb/host/fhci-sched.c | 2 +- kernel/time/tick-broadcast.c | 2 +- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- 15 files changed, 16 insertions(+), 16 deletions(-) (limited to 'kernel/time') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index dff48701b84d..ec1711f7c5c5 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1087,7 +1087,7 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, } else { epoch->flags = 0; atomic_set(&epoch->epoch_size, 0); - /* atomic_set(&epoch->active, 0); is alrady zero */ + /* atomic_set(&epoch->active, 0); is already zero */ if (rv == FE_STILL_LIVE) rv = FE_RECYCLED; } diff --git a/drivers/isdn/hardware/mISDN/avmfritz.c b/drivers/isdn/hardware/mISDN/avmfritz.c index d4215369bb59..472a2af79446 100644 --- a/drivers/isdn/hardware/mISDN/avmfritz.c +++ b/drivers/isdn/hardware/mISDN/avmfritz.c @@ -1116,7 +1116,7 @@ fritz_remove_pci(struct pci_dev *pdev) release_card(card); else if (debug) - pr_info("%s: drvdata allready removed\n", __func__); + pr_info("%s: drvdata already removed\n", __func__); } static struct pci_device_id fcpci_ids[] __devinitdata = { diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c index 095ed76ebe80..d3171954fa45 100644 --- a/drivers/isdn/hardware/mISDN/hfcmulti.c +++ b/drivers/isdn/hardware/mISDN/hfcmulti.c @@ -4268,7 +4268,7 @@ init_card(struct hfc_multi *hc) goto error; /* * Finally enable IRQ output - * this is only allowed, if an IRQ routine is allready + * this is only allowed, if an IRQ routine is already * established for this HFC, so don't do that earlier */ spin_lock_irqsave(&hc->lock, flags); @@ -5212,7 +5212,7 @@ static void __devexit hfc_remove_pci(struct pci_dev *pdev) spin_unlock_irqrestore(&HFClock, flags); } else { if (debug) - printk(KERN_DEBUG "%s: drvdata allready removed\n", + printk(KERN_DEBUG "%s: drvdata already removed\n", __func__); } } diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c index 5940a2c12074..65ded0576013 100644 --- a/drivers/isdn/hardware/mISDN/hfcpci.c +++ b/drivers/isdn/hardware/mISDN/hfcpci.c @@ -1773,7 +1773,7 @@ init_card(struct hfc_pci *hc) inithfcpci(hc); /* * Finally enable IRQ output - * this is only allowed, if an IRQ routine is allready + * this is only allowed, if an IRQ routine is already * established for this HFC, so don't do that earlier */ enable_hwirq(hc); diff --git a/drivers/isdn/hardware/mISDN/mISDNinfineon.c b/drivers/isdn/hardware/mISDN/mISDNinfineon.c index f5b3d2b26a08..4975976e93ac 100644 --- a/drivers/isdn/hardware/mISDN/mISDNinfineon.c +++ b/drivers/isdn/hardware/mISDN/mISDNinfineon.c @@ -1150,7 +1150,7 @@ inf_remove(struct pci_dev *pdev) if (card) release_card(card); else - pr_debug("%s: drvdata allready removed\n", __func__); + pr_debug("%s: drvdata already removed\n", __func__); } static struct pci_driver infineon_driver = { diff --git a/drivers/isdn/hardware/mISDN/speedfax.c b/drivers/isdn/hardware/mISDN/speedfax.c index d097a4e40e2b..9e07246bb9e7 100644 --- a/drivers/isdn/hardware/mISDN/speedfax.c +++ b/drivers/isdn/hardware/mISDN/speedfax.c @@ -484,7 +484,7 @@ sfax_remove_pci(struct pci_dev *pdev) if (card) release_card(card); else - pr_debug("%s: drvdata allready removed\n", __func__); + pr_debug("%s: drvdata already removed\n", __func__); } static struct pci_device_id sfaxpci_ids[] __devinitdata = { diff --git a/drivers/isdn/hardware/mISDN/w6692.c b/drivers/isdn/hardware/mISDN/w6692.c index 31f9d71fb22f..9e84870b971c 100644 --- a/drivers/isdn/hardware/mISDN/w6692.c +++ b/drivers/isdn/hardware/mISDN/w6692.c @@ -1402,7 +1402,7 @@ w6692_remove_pci(struct pci_dev *pdev) release_card(card); else if (debug) - pr_notice("%s: drvdata allready removed\n", __func__); + pr_notice("%s: drvdata already removed\n", __func__); } static struct pci_device_id w6692_ids[] = { diff --git a/drivers/isdn/hisax/callc.c b/drivers/isdn/hisax/callc.c index f58ded8f403f..f150330b5a23 100644 --- a/drivers/isdn/hisax/callc.c +++ b/drivers/isdn/hisax/callc.c @@ -1172,7 +1172,7 @@ CallcFreeChan(struct IsdnCardState *csta) kfree(csta->channel[i].b_st); csta->channel[i].b_st = NULL; } else - printk(KERN_WARNING "CallcFreeChan b_st ch%d allready freed\n", i); + printk(KERN_WARNING "CallcFreeChan b_st ch%d already freed\n", i); if (i || test_bit(FLG_TWO_DCHAN, &csta->HW_Flags)) { release_d_st(csta->channel + i); } else diff --git a/drivers/isdn/hisax/tei.c b/drivers/isdn/hisax/tei.c index f4cb178b0666..842f9c9e875d 100644 --- a/drivers/isdn/hisax/tei.c +++ b/drivers/isdn/hisax/tei.c @@ -130,7 +130,7 @@ tei_id_request(struct FsmInst *fi, int event, void *arg) if (st->l2.tei != -1) { st->ma.tei_m.printdebug(&st->ma.tei_m, - "assign request for allready asigned tei %d", + "assign request for already asigned tei %d", st->l2.tei); return; } diff --git a/drivers/isdn/mISDN/tei.c b/drivers/isdn/mISDN/tei.c index 34e898fe2f4f..1b85d9d27496 100644 --- a/drivers/isdn/mISDN/tei.c +++ b/drivers/isdn/mISDN/tei.c @@ -457,7 +457,7 @@ tei_id_request(struct FsmInst *fi, int event, void *arg) if (tm->l2->tei != GROUP_TEI) { tm->tei_m.printdebug(&tm->tei_m, - "assign request for allready assigned tei %d", + "assign request for already assigned tei %d", tm->l2->tei); return; } diff --git a/drivers/media/video/zoran/zoran_device.c b/drivers/media/video/zoran/zoran_device.c index e6ad4b205611..6f846abee3e4 100644 --- a/drivers/media/video/zoran/zoran_device.c +++ b/drivers/media/video/zoran/zoran_device.c @@ -484,7 +484,7 @@ zr36057_overlay (struct zoran *zr, zr->overlay_settings.format); /* Start and length of each line MUST be 4-byte aligned. - * This should be allready checked before the call to this routine. + * This should be already checked before the call to this routine. * All error messages are internal driver checking only! */ /* video display top and bottom registers */ diff --git a/drivers/scsi/bfa/bfa_ioim.c b/drivers/scsi/bfa/bfa_ioim.c index 687f3d6e252b..3d2eac104308 100644 --- a/drivers/scsi/bfa/bfa_ioim.c +++ b/drivers/scsi/bfa/bfa_ioim.c @@ -488,7 +488,7 @@ bfa_ioim_sm_cleanup_qfull(struct bfa_ioim_s *ioim, enum bfa_ioim_event event) case BFA_IOIM_SM_ABORT: /** - * IO is alraedy being cleaned up implicitly + * IO is already being cleaned up implicitly */ ioim->io_cbfn = __bfa_cb_ioim_abort; break; diff --git a/drivers/usb/host/fhci-sched.c b/drivers/usb/host/fhci-sched.c index 4f2cbdcc0273..a42ef380e917 100644 --- a/drivers/usb/host/fhci-sched.c +++ b/drivers/usb/host/fhci-sched.c @@ -125,7 +125,7 @@ void fhci_transaction_confirm(struct fhci_usb *usb, struct packet *pkt) /* * Flush all transmitted packets from BDs * This routine is called when disabling the USB port to flush all - * transmissions that are allready scheduled in the BDs + * transmissions that are already scheduled in the BDs */ void fhci_flush_all_transmissions(struct fhci_usb *usb) { diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index b3bafd5fc66d..48b2761b5668 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -188,7 +188,7 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) /* * Setup the next period for devices, which do not have * periodic mode. We read dev->next_event first and add to it - * when the event alrady expired. clockevents_program_event() + * when the event already expired. clockevents_program_event() * sets dev->next_event only when the event is really * programmed to the device. */ diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6fb890187de0..4dc8805154ee 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -201,7 +201,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, int offset, end; if (fq->q.last_in & INET_FRAG_COMPLETE) { - pr_debug("Allready completed\n"); + pr_debug("Already completed\n"); goto err; } -- cgit v1.2.3 From 396e894d289d69bacf5acd983c97cd6e21a14c08 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Jul 2010 15:12:27 +0200 Subject: sched: Revert nohz_ratelimit() for now Norbert reported that nohz_ratelimit() causes his laptop to burn about 4W (40%) extra. For now back out the change and see if we can adjust the power management code to make better decisions. Reported-by: Norbert Preining Signed-off-by: Peter Zijlstra Acked-by: Mike Galbraith Cc: Arjan van de Ven LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 ------ kernel/sched.c | 10 ---------- kernel/time/tick-sched.c | 2 +- 3 files changed, 1 insertion(+), 17 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/sched.h b/include/linux/sched.h index 747fcaedddb7..6e0bb86de990 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -273,17 +273,11 @@ extern cpumask_var_t nohz_cpu_mask; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) extern int select_nohz_load_balancer(int cpu); extern int get_nohz_load_balancer(void); -extern int nohz_ratelimit(int cpu); #else static inline int select_nohz_load_balancer(int cpu) { return 0; } - -static inline int nohz_ratelimit(int cpu) -{ - return 0; -} #endif /* diff --git a/kernel/sched.c b/kernel/sched.c index f52a8801b7a2..63b4a14682fa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1232,16 +1232,6 @@ void wake_up_idle_cpu(int cpu) smp_send_reschedule(cpu); } -int nohz_ratelimit(int cpu) -{ - struct rq *rq = cpu_rq(cpu); - u64 diff = rq->clock - rq->nohz_stamp; - - rq->nohz_stamp = rq->clock; - - return diff < (NSEC_PER_SEC / HZ) >> 1; -} - #endif /* CONFIG_NO_HZ */ static u64 sched_avg_period(void) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 813993b5fb61..f898af608171 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -325,7 +325,7 @@ void tick_nohz_stop_sched_tick(int inidle) } while (read_seqretry(&xtime_lock, seq)); if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || - arch_needs_cpu(cpu) || nohz_ratelimit(cpu)) { + arch_needs_cpu(cpu)) { next_jiffies = last_jiffies + 1; delta_jiffies = 1; } else { -- cgit v1.2.3 From ce3bf7ab22527183634a76512d9854a38615e4d5 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 13 Jul 2010 17:56:19 -0700 Subject: time: Implement timespec_add After accidentally misusing timespec_add_safe, I wanted to make sure we don't accidently trip over that issue again, so I created a simple timespec_add() function which we can use to replace the instances of timespec_add_safe() that don't want the overflow detection. Signed-off-by: John Stultz LKML-Reference: <1279068988-21864-3-git-send-email-johnstul@us.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/time.h | 16 ++++++++++++++++ kernel/time/timekeeping.c | 6 +++--- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/time.h b/include/linux/time.h index ea3559f0b3f2..9072df83de1f 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -76,9 +76,25 @@ extern unsigned long mktime(const unsigned int year, const unsigned int mon, const unsigned int min, const unsigned int sec); extern void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec); + +/* + * timespec_add_safe assumes both values are positive and checks + * for overflow. It will return TIME_T_MAX if the reutrn would be + * smaller then either of the arguments. + */ extern struct timespec timespec_add_safe(const struct timespec lhs, const struct timespec rhs); + +static inline struct timespec timespec_add(struct timespec lhs, + struct timespec rhs) +{ + struct timespec ts_delta; + set_normalized_timespec(&ts_delta, lhs.tv_sec + rhs.tv_sec, + lhs.tv_nsec + rhs.tv_nsec); + return ts_delta; +} + /* * sub = lhs - rhs, in normalized form */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index caf8d4d4f5c8..623fe3d504dc 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -579,9 +579,9 @@ static int timekeeping_resume(struct sys_device *dev) if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) { ts = timespec_sub(ts, timekeeping_suspend_time); - xtime = timespec_add_safe(xtime, ts); + xtime = timespec_add(xtime, ts); wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); - total_sleep_time = timespec_add_safe(total_sleep_time, ts); + total_sleep_time = timespec_add(total_sleep_time, ts); } /* re-base the last cycle value */ timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); @@ -887,7 +887,7 @@ EXPORT_SYMBOL_GPL(getboottime); */ void monotonic_to_bootbased(struct timespec *ts) { - *ts = timespec_add_safe(*ts, total_sleep_time); + *ts = timespec_add(*ts, total_sleep_time); } EXPORT_SYMBOL_GPL(monotonic_to_bootbased); -- cgit v1.2.3 From 592913ecb87a9e06f98ddb55b298f1a66bf94c6b Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 13 Jul 2010 17:56:20 -0700 Subject: time: Kill off CONFIG_GENERIC_TIME Now that all arches have been converted over to use generic time via clocksources or arch_gettimeoffset(), we can remove the GENERIC_TIME config option and simplify the generic code. Signed-off-by: John Stultz LKML-Reference: <1279068988-21864-4-git-send-email-johnstul@us.ibm.com> Signed-off-by: Thomas Gleixner --- Documentation/kernel-parameters.txt | 3 +- arch/alpha/Kconfig | 4 --- arch/arm/Kconfig | 4 --- arch/avr32/Kconfig | 3 -- arch/blackfin/Kconfig | 3 -- arch/cris/Kconfig | 3 -- arch/frv/Kconfig | 4 --- arch/h8300/Kconfig | 4 --- arch/ia64/Kconfig | 4 --- arch/m32r/Kconfig | 3 -- arch/m68k/Kconfig | 3 -- arch/m68knommu/Kconfig | 4 --- arch/microblaze/Kconfig | 3 -- arch/mips/Kconfig | 4 --- arch/mn10300/Kconfig | 3 -- arch/parisc/Kconfig | 4 --- arch/powerpc/Kconfig | 3 -- arch/s390/Kconfig | 3 -- arch/score/Kconfig | 3 -- arch/sh/Kconfig | 3 -- arch/sparc/Kconfig | 3 -- arch/um/Kconfig.common | 4 --- arch/x86/Kconfig | 5 +--- arch/xtensa/Kconfig | 3 -- drivers/Makefile | 4 ++- drivers/acpi/acpi_pad.c | 2 +- drivers/acpi/processor_idle.c | 2 +- drivers/misc/Kconfig | 4 +-- kernel/time.c | 16 ----------- kernel/time/Kconfig | 4 +-- kernel/time/clocksource.c | 4 +-- kernel/time/timekeeping.c | 55 +++---------------------------------- kernel/trace/Kconfig | 4 +-- 33 files changed, 19 insertions(+), 159 deletions(-) (limited to 'kernel/time') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 2b2407d9a6d0..8abdfd7cb571 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -73,7 +73,6 @@ parameter is applicable: MTD MTD (Memory Technology Device) support is enabled. NET Appropriate network support is enabled. NUMA NUMA support is enabled. - GENERIC_TIME The generic timeofday code is enabled. NFS Appropriate NFS support is enabled. OSS OSS sound support is enabled. PV_OPS A paravirtualized kernel is enabled. @@ -468,7 +467,7 @@ and is between 256 and 4096 characters. It is defined in the file clocksource is not available, it defaults to PIT. Format: { pit | tsc | cyclone | pmtmr } - clocksource= [GENERIC_TIME] Override the default clocksource + clocksource= Override the default clocksource Format: Override the default clocksource and use the clocksource with the name specified. diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 3e2e540a0f2a..b9647bb66d13 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -47,10 +47,6 @@ config GENERIC_CALIBRATE_DELAY bool default y -config GENERIC_TIME - bool - default y - config GENERIC_CMOS_UPDATE def_bool y diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 98922f7d2d12..655b4ae76314 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -41,10 +41,6 @@ config SYS_SUPPORTS_APM_EMULATION config GENERIC_GPIO bool -config GENERIC_TIME - bool - default y - config ARCH_USES_GETTIMEOFFSET bool default n diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index f2b319333184..f51572772e21 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -45,9 +45,6 @@ config GENERIC_IRQ_PROBE config RWSEM_GENERIC_SPINLOCK def_bool y -config GENERIC_TIME - def_bool y - config GENERIC_CLOCKEVENTS def_bool y diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index f66294b4f9d2..c88fd3584122 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -614,9 +614,6 @@ comment "Kernel Timer/Scheduler" source kernel/Kconfig.hz -config GENERIC_TIME - def_bool y - config GENERIC_CLOCKEVENTS bool "Generic clock events" default y diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index e25bf4440b51..887ef855be2a 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -20,9 +20,6 @@ config RWSEM_GENERIC_SPINLOCK config RWSEM_XCHGADD_ALGORITHM bool -config GENERIC_TIME - def_bool y - config GENERIC_CMOS_UPDATE def_bool y diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index 4b5830bcbe2e..16399bd24993 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -40,10 +40,6 @@ config GENERIC_HARDIRQS_NO__DO_IRQ bool default y -config GENERIC_TIME - bool - default y - config TIME_LOW_RES bool default y diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 53cc669e6d59..988b6ff34cc4 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -62,10 +62,6 @@ config GENERIC_CALIBRATE_DELAY bool default y -config GENERIC_TIME - bool - default y - config GENERIC_BUG bool depends on BUG diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 95610820041e..8711d13cd79f 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -82,10 +82,6 @@ config GENERIC_CALIBRATE_DELAY bool default y -config GENERIC_TIME - bool - default y - config GENERIC_TIME_VSYSCALL bool default y diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 3a9319f93e89..836abbbc9c04 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -44,9 +44,6 @@ config HZ int default 100 -config GENERIC_TIME - def_bool y - config ARCH_USES_GETTIMEOFFSET def_bool y diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 2e3737b92ffc..8030e2481d97 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -59,9 +59,6 @@ config HZ int default 100 -config GENERIC_TIME - def_bool y - config ARCH_USES_GETTIMEOFFSET def_bool y diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig index efeb6033fc17..2609c394e1df 100644 --- a/arch/m68knommu/Kconfig +++ b/arch/m68knommu/Kconfig @@ -63,10 +63,6 @@ config GENERIC_CALIBRATE_DELAY bool default y -config GENERIC_TIME - bool - default y - config GENERIC_CMOS_UPDATE bool default y diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 505a08592423..14f03cea94a1 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -48,9 +48,6 @@ config GENERIC_IRQ_PROBE config GENERIC_CALIBRATE_DELAY def_bool y -config GENERIC_TIME - def_bool y - config GENERIC_TIME_VSYSCALL def_bool n diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index cdaae942623d..01c44cbdf163 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -733,10 +733,6 @@ config GENERIC_CLOCKEVENTS bool default y -config GENERIC_TIME - bool - default y - config GENERIC_CMOS_UPDATE bool default y diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 1c4565a9102b..444b9f918fdf 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -46,9 +46,6 @@ config GENERIC_FIND_NEXT_BIT config GENERIC_HWEIGHT def_bool y -config GENERIC_TIME - def_bool y - config GENERIC_BUG def_bool y diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 05a366a5c4d5..907417d187e1 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -66,10 +66,6 @@ config GENERIC_CALIBRATE_DELAY bool default y -config GENERIC_TIME - bool - default y - config TIME_LOW_RES bool depends on SMP diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 2031a2846865..25e6bf457457 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -29,9 +29,6 @@ config MMU config GENERIC_CMOS_UPDATE def_bool y -config GENERIC_TIME - def_bool y - config GENERIC_TIME_VSYSCALL def_bool y diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index bee1c0f794cf..f0777a47e3a5 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -40,9 +40,6 @@ config ARCH_HAS_ILOG2_U64 config GENERIC_HWEIGHT def_bool y -config GENERIC_TIME - def_bool y - config GENERIC_TIME_VSYSCALL def_bool y diff --git a/arch/score/Kconfig b/arch/score/Kconfig index 55d413e6dcf2..be4a15584751 100644 --- a/arch/score/Kconfig +++ b/arch/score/Kconfig @@ -55,9 +55,6 @@ config GENERIC_CALIBRATE_DELAY config GENERIC_CLOCKEVENTS def_bool y -config GENERIC_TIME - def_bool y - config SCHED_NO_NO_OMIT_FRAME_POINTER def_bool y diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 82868fee21fd..33990fa95af0 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -98,9 +98,6 @@ config GENERIC_CALIBRATE_DELAY config GENERIC_IOMAP bool -config GENERIC_TIME - def_bool y - config GENERIC_CLOCKEVENTS def_bool y diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index c0015db247ba..1cd0d9d3c761 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -66,9 +66,6 @@ config BITS default 32 if SPARC32 default 64 if SPARC64 -config GENERIC_TIME - def_bool y - config ARCH_USES_GETTIMEOFFSET bool default y if SPARC32 diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common index 0d207e73a758..7c8e277f6d34 100644 --- a/arch/um/Kconfig.common +++ b/arch/um/Kconfig.common @@ -55,10 +55,6 @@ config GENERIC_BUG default y depends on BUG -config GENERIC_TIME - bool - default y - config GENERIC_CLOCKEVENTS bool default y diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index dcb0593b4a66..546b610ad71c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -72,9 +72,6 @@ config ARCH_DEFCONFIG default "arch/x86/configs/i386_defconfig" if X86_32 default "arch/x86/configs/x86_64_defconfig" if X86_64 -config GENERIC_TIME - def_bool y - config GENERIC_CMOS_UPDATE def_bool y @@ -2046,7 +2043,7 @@ config SCx200 config SCx200HR_TIMER tristate "NatSemi SCx200 27MHz High-Resolution Timer Support" - depends on SCx200 && GENERIC_TIME + depends on SCx200 default y ---help--- This driver provides a clocksource built upon the on-chip diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index ebe228d02b08..0859bfd8ae93 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -48,9 +48,6 @@ config HZ int default 100 -config GENERIC_TIME - def_bool y - source "init/Kconfig" source "kernel/Kconfig.freezer" diff --git a/drivers/Makefile b/drivers/Makefile index 91874e048552..ae473445ad6d 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -101,7 +101,9 @@ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ obj-$(CONFIG_ARCH_SHMOBILE) += sh/ -obj-$(CONFIG_GENERIC_TIME) += clocksource/ +ifndef CONFIG_ARCH_USES_GETTIMEOFFSET +obj-y += clocksource/ +endif obj-$(CONFIG_DMA_ENGINE) += dma/ obj-$(CONFIG_DCA) += dca/ obj-$(CONFIG_HID) += hid/ diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index 446aced33aff..b76848c80be3 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -77,7 +77,7 @@ static void power_saving_mwait_init(void) power_saving_mwait_eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | (highest_subcstate - 1); -#if defined(CONFIG_GENERIC_TIME) && defined(CONFIG_X86) +#if defined(CONFIG_X86) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: case X86_VENDOR_INTEL: diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index e9a8026d39f0..294e10b5480a 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -264,7 +264,7 @@ int acpi_processor_resume(struct acpi_device * device) return 0; } -#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86) +#if defined(CONFIG_X86) static void tsc_check_state(int state) { switch (boot_cpu_data.x86_vendor) { diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 26386a92f5aa..5b9ba4834ce1 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -72,7 +72,7 @@ config ATMEL_TCLIB config ATMEL_TCB_CLKSRC bool "TC Block Clocksource" - depends on ATMEL_TCLIB && GENERIC_TIME + depends on ATMEL_TCLIB default y help Select this to get a high precision clocksource based on a @@ -240,7 +240,7 @@ config CS5535_MFGPT_DEFAULT_IRQ config CS5535_CLOCK_EVENT_SRC tristate "CS5535/CS5536 high-res timer (MFGPT) events" - depends on GENERIC_TIME && GENERIC_CLOCKEVENTS && CS5535_MFGPT + depends on GENERIC_CLOCKEVENTS && CS5535_MFGPT help This driver provides a clock event source based on the MFGPT timer(s) in the CS5535 and CS5536 companion chips. diff --git a/kernel/time.c b/kernel/time.c index 848b1c2ab09a..ba9b338d1835 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -300,22 +300,6 @@ struct timespec timespec_trunc(struct timespec t, unsigned gran) } EXPORT_SYMBOL(timespec_trunc); -#ifndef CONFIG_GENERIC_TIME -/* - * Simulate gettimeofday using do_gettimeofday which only allows a timeval - * and therefore only yields usec accuracy - */ -void getnstimeofday(struct timespec *tv) -{ - struct timeval x; - - do_gettimeofday(&x); - tv->tv_sec = x.tv_sec; - tv->tv_nsec = x.tv_usec * NSEC_PER_USEC; -} -EXPORT_SYMBOL_GPL(getnstimeofday); -#endif - /* Converts Gregorian date to seconds since 1970-01-01 00:00:00. * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 * => year=1980, mon=12, day=31, hour=23, min=59, sec=59. diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 95ed42951e0a..f06a8a365648 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -6,7 +6,7 @@ config TICK_ONESHOT config NO_HZ bool "Tickless System (Dynamic Ticks)" - depends on GENERIC_TIME && GENERIC_CLOCKEVENTS + depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS select TICK_ONESHOT help This option enables a tickless system: timer interrupts will @@ -15,7 +15,7 @@ config NO_HZ config HIGH_RES_TIMERS bool "High Resolution Timer Support" - depends on GENERIC_TIME && GENERIC_CLOCKEVENTS + depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS select TICK_ONESHOT help This option enables high resolution timer support. If your diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index f08e99c1d561..c543d21b4e54 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -531,7 +531,7 @@ static u64 clocksource_max_deferment(struct clocksource *cs) return max_nsecs - (max_nsecs >> 5); } -#ifdef CONFIG_GENERIC_TIME +#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET /** * clocksource_select - Select the best clocksource available @@ -577,7 +577,7 @@ static void clocksource_select(void) } } -#else /* CONFIG_GENERIC_TIME */ +#else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */ static inline void clocksource_select(void) { } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 623fe3d504dc..73edd4074b50 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -173,8 +173,6 @@ void timekeeping_leap_insert(int leapsecond) update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); } -#ifdef CONFIG_GENERIC_TIME - /** * timekeeping_forward_now - update clock to the current time * @@ -376,52 +374,6 @@ void timekeeping_notify(struct clocksource *clock) tick_clock_notify(); } -#else /* GENERIC_TIME */ - -static inline void timekeeping_forward_now(void) { } - -/** - * ktime_get - get the monotonic time in ktime_t format - * - * returns the time in ktime_t format - */ -ktime_t ktime_get(void) -{ - struct timespec now; - - ktime_get_ts(&now); - - return timespec_to_ktime(now); -} -EXPORT_SYMBOL_GPL(ktime_get); - -/** - * ktime_get_ts - get the monotonic clock in timespec format - * @ts: pointer to timespec variable - * - * The function calculates the monotonic clock from the realtime - * clock and the wall_to_monotonic offset and stores the result - * in normalized timespec format in the variable pointed to by @ts. - */ -void ktime_get_ts(struct timespec *ts) -{ - struct timespec tomono; - unsigned long seq; - - do { - seq = read_seqbegin(&xtime_lock); - getnstimeofday(ts); - tomono = wall_to_monotonic; - - } while (read_seqretry(&xtime_lock, seq)); - - set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec, - ts->tv_nsec + tomono.tv_nsec); -} -EXPORT_SYMBOL_GPL(ktime_get_ts); - -#endif /* !GENERIC_TIME */ - /** * ktime_get_real - get the real (wall-) time in ktime_t format * @@ -784,10 +736,11 @@ void update_wall_time(void) return; clock = timekeeper.clock; -#ifdef CONFIG_GENERIC_TIME - offset = (clock->read(clock) - clock->cycle_last) & clock->mask; -#else + +#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET offset = timekeeper.cycle_interval; +#else + offset = (clock->read(clock) - clock->cycle_last) & clock->mask; #endif timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 8b1797c4545b..7531ddaf3afe 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -153,7 +153,7 @@ config IRQSOFF_TRACER bool "Interrupts-off Latency Tracer" default n depends on TRACE_IRQFLAGS_SUPPORT - depends on GENERIC_TIME + depends on !ARCH_USES_GETTIMEOFFSET select TRACE_IRQFLAGS select GENERIC_TRACER select TRACER_MAX_TRACE @@ -175,7 +175,7 @@ config IRQSOFF_TRACER config PREEMPT_TRACER bool "Preemption-off Latency Tracer" default n - depends on GENERIC_TIME + depends on !ARCH_USES_GETTIMEOFFSET depends on PREEMPT select GENERIC_TRACER select TRACER_MAX_TRACE -- cgit v1.2.3 From 7615856ebfee52b080c22d263ca4debbd0df0ac1 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 13 Jul 2010 17:56:23 -0700 Subject: timkeeping: Fix update_vsyscall to provide wall_to_monotonic offset update_vsyscall() did not provide the wall_to_monotoinc offset, so arch specific implementations tend to reference wall_to_monotonic directly. This limits future cleanups in the timekeeping core, so this patch fixes the update_vsyscall interface to provide wall_to_monotonic, allowing wall_to_monotonic to be made static as planned in Documentation/feature-removal-schedule.txt Signed-off-by: John Stultz Cc: Martin Schwidefsky Cc: Anton Blanchard Cc: Paul Mackerras Cc: Tony Luck LKML-Reference: <1279068988-21864-7-git-send-email-johnstul@us.ibm.com> Signed-off-by: Thomas Gleixner --- arch/ia64/kernel/time.c | 7 ++++--- arch/powerpc/kernel/time.c | 8 ++++---- arch/s390/kernel/time.c | 8 ++++---- arch/x86/kernel/vsyscall_64.c | 6 +++--- include/linux/clocksource.h | 6 ++++-- kernel/time/timekeeping.c | 9 ++++++--- 6 files changed, 25 insertions(+), 19 deletions(-) (limited to 'kernel/time') diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 653b3c46ea82..ed6f22eb5b12 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -471,7 +471,8 @@ void update_vsyscall_tz(void) { } -void update_vsyscall(struct timespec *wall, struct clocksource *c, u32 mult) +void update_vsyscall(struct timespec *wall, struct timespec *wtm, + struct clocksource *c, u32 mult) { unsigned long flags; @@ -487,9 +488,9 @@ void update_vsyscall(struct timespec *wall, struct clocksource *c, u32 mult) /* copy kernel time structures */ fsyscall_gtod_data.wall_time.tv_sec = wall->tv_sec; fsyscall_gtod_data.wall_time.tv_nsec = wall->tv_nsec; - fsyscall_gtod_data.monotonic_time.tv_sec = wall_to_monotonic.tv_sec + fsyscall_gtod_data.monotonic_time.tv_sec = wtm->tv_sec + wall->tv_sec; - fsyscall_gtod_data.monotonic_time.tv_nsec = wall_to_monotonic.tv_nsec + fsyscall_gtod_data.monotonic_time.tv_nsec = wtm->tv_nsec + wall->tv_nsec; /* normalize */ diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 0711d60f40b0..e215f76bba1c 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -849,8 +849,8 @@ static cycle_t timebase_read(struct clocksource *cs) return (cycle_t)get_tb(); } -void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, - u32 mult) +void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, + struct clocksource *clock, u32 mult) { u64 new_tb_to_xs, new_stamp_xsec; @@ -882,8 +882,8 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, vdso_data->tb_orig_stamp = clock->cycle_last; vdso_data->stamp_xsec = new_stamp_xsec; vdso_data->tb_to_xs = new_tb_to_xs; - vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec; - vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec; + vdso_data->wtom_clock_sec = wtm->tv_sec; + vdso_data->wtom_clock_nsec = wtm->tv_nsec; vdso_data->stamp_xtime = *wall_time; smp_wmb(); ++(vdso_data->tb_update_count); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index a2163c95eb98..aeb30c6f279c 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -207,8 +207,8 @@ struct clocksource * __init clocksource_default_clock(void) return &clocksource_tod; } -void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, - u32 mult) +void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, + struct clocksource *clock, u32 mult) { if (clock != &clocksource_tod) return; @@ -219,8 +219,8 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, vdso_data->xtime_tod_stamp = clock->cycle_last; vdso_data->xtime_clock_sec = wall_time->tv_sec; vdso_data->xtime_clock_nsec = wall_time->tv_nsec; - vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec; - vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec; + vdso_data->wtom_clock_sec = wtm->tv_sec; + vdso_data->wtom_clock_nsec = wtm->tv_nsec; vdso_data->ntp_mult = mult; smp_wmb(); ++vdso_data->tb_update_count; diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index dce0c3c5a787..dcbb28c4b694 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -73,8 +73,8 @@ void update_vsyscall_tz(void) write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } -void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, - u32 mult) +void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, + struct clocksource *clock, u32 mult) { unsigned long flags; @@ -87,7 +87,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, vsyscall_gtod_data.clock.shift = clock->shift; vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; - vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; + vsyscall_gtod_data.wall_to_monotonic = *wtm; vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 5ea3c60c160c..21677d99a161 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -313,11 +313,13 @@ clocksource_calc_mult_shift(struct clocksource *cs, u32 freq, u32 minsec) #ifdef CONFIG_GENERIC_TIME_VSYSCALL extern void -update_vsyscall(struct timespec *ts, struct clocksource *c, u32 mult); +update_vsyscall(struct timespec *ts, struct timespec *wtm, + struct clocksource *c, u32 mult); extern void update_vsyscall_tz(void); #else static inline void -update_vsyscall(struct timespec *ts, struct clocksource *c, u32 mult) +update_vsyscall(struct timespec *ts, struct timespec *wtm, + struct clocksource *c, u32 mult) { } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 73edd4074b50..b15c3acafd5a 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -170,7 +170,8 @@ void timekeeping_leap_insert(int leapsecond) { xtime.tv_sec += leapsecond; wall_to_monotonic.tv_sec -= leapsecond; - update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); + update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, + timekeeper.mult); } /** @@ -326,7 +327,8 @@ int do_settimeofday(struct timespec *tv) timekeeper.ntp_error = 0; ntp_clear(); - update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); + update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, + timekeeper.mult); write_sequnlock_irqrestore(&xtime_lock, flags); @@ -809,7 +811,8 @@ void update_wall_time(void) } /* check to see if there is a new clocksource to use */ - update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); + update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, + timekeeper.mult); } /** -- cgit v1.2.3 From 8ab4351a4c888016620f43bde605b3d0964af339 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 13 Jul 2010 17:56:25 -0700 Subject: hrtimer: Cleanup direct access to wall_to_monotonic Provides an accessor function to replace hrtimer.c's direct access of wall_to_monotonic. This will allow wall_to_monotonic to be made static as planned in Documentation/feature-removal-schedule.txt Signed-off-by: John Stultz LKML-Reference: <1279068988-21864-9-git-send-email-johnstul@us.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/time.h | 3 ++- kernel/hrtimer.c | 9 ++++----- kernel/time/timekeeping.c | 5 +++++ 3 files changed, 11 insertions(+), 6 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/time.h b/include/linux/time.h index 9072df83de1f..a57e0f67b3d0 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -126,7 +126,8 @@ extern int timekeeping_suspended; unsigned long get_seconds(void); struct timespec current_kernel_time(void); -struct timespec __current_kernel_time(void); /* does not hold xtime_lock */ +struct timespec __current_kernel_time(void); /* does not take xtime_lock */ +struct timespec __get_wall_to_monotonic(void); /* does not take xtime_lock */ struct timespec get_monotonic_coarse(void); #define CURRENT_TIME (current_kernel_time()) diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 5c69e996bd0f..809f48c70553 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -90,7 +90,7 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) do { seq = read_seqbegin(&xtime_lock); xts = __current_kernel_time(); - tom = wall_to_monotonic; + tom = __get_wall_to_monotonic(); } while (read_seqretry(&xtime_lock, seq)); xtim = timespec_to_ktime(xts); @@ -612,7 +612,7 @@ static int hrtimer_reprogram(struct hrtimer *timer, static void retrigger_next_event(void *arg) { struct hrtimer_cpu_base *base; - struct timespec realtime_offset; + struct timespec realtime_offset, wtm; unsigned long seq; if (!hrtimer_hres_active()) @@ -620,10 +620,9 @@ static void retrigger_next_event(void *arg) do { seq = read_seqbegin(&xtime_lock); - set_normalized_timespec(&realtime_offset, - -wall_to_monotonic.tv_sec, - -wall_to_monotonic.tv_nsec); + wtm = __get_wall_to_monotonic(); } while (read_seqretry(&xtime_lock, seq)); + set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); base = &__get_cpu_var(hrtimer_bases); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b15c3acafd5a..fb61c2ed3660 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -858,6 +858,11 @@ struct timespec __current_kernel_time(void) return xtime; } +struct timespec __get_wall_to_monotonic(void) +{ + return wall_to_monotonic; +} + struct timespec current_kernel_time(void) { struct timespec now; -- cgit v1.2.3 From 0fb86b06298b6cd3205cac2e68a499f269282dac Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 13 Jul 2010 17:56:26 -0700 Subject: timekeeping: Make xtime and wall_to_monotonic static This patch makes xtime and wall_to_monotonic static, as planned in Documentation/feature-removal-schedule.txt. This will allow for further cleanups to the timekeeping core. Signed-off-by: John Stultz LKML-Reference: <1279068988-21864-10-git-send-email-johnstul@us.ibm.com> Signed-off-by: Thomas Gleixner --- Documentation/feature-removal-schedule.txt | 10 ---------- include/linux/time.h | 2 -- kernel/time/timekeeping.c | 4 ++-- 3 files changed, 2 insertions(+), 14 deletions(-) (limited to 'kernel/time') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 1571c0c83dba..cd648dbb5146 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -549,16 +549,6 @@ Who: Avi Kivity ---------------------------- -What: xtime, wall_to_monotonic -When: 2.6.36+ -Files: kernel/time/timekeeping.c include/linux/time.h -Why: Cleaning up timekeeping internal values. Please use - existing timekeeping accessor functions to access - the equivalent functionality. -Who: John Stultz - ----------------------------- - What: KVM kernel-allocated memory slots When: July 2010 Why: Since 2.6.25, kvm supports user-allocated memory slots, which are diff --git a/include/linux/time.h b/include/linux/time.h index a57e0f67b3d0..cb34e35fabac 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -113,8 +113,6 @@ static inline struct timespec timespec_sub(struct timespec lhs, #define timespec_valid(ts) \ (((ts)->tv_sec >= 0) && (((unsigned long) (ts)->tv_nsec) < NSEC_PER_SEC)) -extern struct timespec xtime; -extern struct timespec wall_to_monotonic; extern seqlock_t xtime_lock; extern void read_persistent_clock(struct timespec *ts); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index fb61c2ed3660..e14c839e9faa 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -153,8 +153,8 @@ __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); * - wall_to_monotonic is no longer the boot time, getboottime must be * used instead. */ -struct timespec xtime __attribute__ ((aligned (16))); -struct timespec wall_to_monotonic __attribute__ ((aligned (16))); +static struct timespec xtime __attribute__ ((aligned (16))); +static struct timespec wall_to_monotonic __attribute__ ((aligned (16))); static struct timespec total_sleep_time; /* -- cgit v1.2.3 From 852db46d55e85b475a72e665ca08d3317769ceef Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 13 Jul 2010 17:56:28 -0700 Subject: clocksource: Add __clocksource_updatefreq_hz/khz methods To properly handle clocksources that change frequencies at the clocksource->enable() point, this patch adds a method that will update the clocksource's mult/shift and max_idle_ns values. Signed-off-by: John Stultz LKML-Reference: <1279068988-21864-12-git-send-email-johnstul@us.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 11 +++++++++++ kernel/time/clocksource.c | 29 ++++++++++++++++++++++++----- 2 files changed, 35 insertions(+), 5 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 21677d99a161..c37b21ad5a3b 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -292,6 +292,8 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec); */ extern int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq); +extern void +__clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq); static inline int clocksource_register_hz(struct clocksource *cs, u32 hz) { @@ -303,6 +305,15 @@ static inline int clocksource_register_khz(struct clocksource *cs, u32 khz) return __clocksource_register_scale(cs, 1000, khz); } +static inline void __clocksource_updatefreq_hz(struct clocksource *cs, u32 hz) +{ + __clocksource_updatefreq_scale(cs, 1, hz); +} + +static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz) +{ + __clocksource_updatefreq_scale(cs, 1000, khz); +} static inline void clocksource_calc_mult_shift(struct clocksource *cs, u32 freq, u32 minsec) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c543d21b4e54..c18d7efa1b4b 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -639,19 +639,18 @@ static void clocksource_enqueue(struct clocksource *cs) #define MAX_UPDATE_LENGTH 5 /* Seconds */ /** - * __clocksource_register_scale - Used to install new clocksources + * __clocksource_updatefreq_scale - Used update clocksource with new freq * @t: clocksource to be registered * @scale: Scale factor multiplied against freq to get clocksource hz * @freq: clocksource frequency (cycles per second) divided by scale * - * Returns -EBUSY if registration fails, zero otherwise. + * This should only be called from the clocksource->enable() method. * * This *SHOULD NOT* be called directly! Please use the - * clocksource_register_hz() or clocksource_register_khz helper functions. + * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions. */ -int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) +void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) { - /* * Ideally we want to use some of the limits used in * clocksource_max_deferment, to provide a more informed @@ -662,7 +661,27 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) NSEC_PER_SEC/scale, MAX_UPDATE_LENGTH*scale); cs->max_idle_ns = clocksource_max_deferment(cs); +} +EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); + +/** + * __clocksource_register_scale - Used to install new clocksources + * @t: clocksource to be registered + * @scale: Scale factor multiplied against freq to get clocksource hz + * @freq: clocksource frequency (cycles per second) divided by scale + * + * Returns -EBUSY if registration fails, zero otherwise. + * + * This *SHOULD NOT* be called directly! Please use the + * clocksource_register_hz() or clocksource_register_khz helper functions. + */ +int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) +{ + + /* Intialize mult/shift and max_idle_ns */ + __clocksource_updatefreq_scale(cs, scale, freq); + /* Add clocksource to the clcoksource list */ mutex_lock(&clocksource_mutex); clocksource_enqueue(cs); clocksource_select(); -- cgit v1.2.3 From af5ab277ded04bd9bc6b048c5a2f0e7d70ef0867 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 27 Jul 2010 21:02:10 -0700 Subject: clockevents: Remove the per cpu tick skew Historically, Linux has tried to make the regular timer tick on the various CPUs not happen at the same time, to avoid contention on xtime_lock. Nowadays, with the tickless kernel, this contention no longer happens since time keeping and updating are done differently. In addition, this skew is actually hurting power consumption in a measurable way on many-core systems. Signed-off-by: Arjan van de Ven LKML-Reference: <20100727210210.58d3118c@infradead.org> Signed-off-by: Thomas Gleixner --- kernel/time/tick-sched.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 813993b5fb61..74644ccd786c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -780,7 +780,6 @@ void tick_setup_sched_timer(void) { struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); ktime_t now = ktime_get(); - u64 offset; /* * Emulate tick processing via per-CPU hrtimers: @@ -790,10 +789,6 @@ void tick_setup_sched_timer(void) /* Get the next period (per cpu) */ hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); - offset = ktime_to_ns(tick_period) >> 1; - do_div(offset, num_possible_cpus()); - offset *= smp_processor_id(); - hrtimer_add_expires_ns(&ts->sched_timer, offset); for (;;) { hrtimer_forward(&ts->sched_timer, now, tick_period); -- cgit v1.2.3 From deda2e81961e96be4f2c09328baca4710a2fd1a0 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Mon, 9 Aug 2010 14:20:09 -0700 Subject: timekeeping: Fix overflow in rawtime tv_nsec on 32 bit archs The tv_nsec is a long and when added to the shifted interval it can wrap and become negative which later causes looping problems in the getrawmonotonic(). The edge case occurs when the system has slept for a short period of time of ~2 seconds. A trace printk of the values in this patch illustrate the problem: ftrace time stamp: log 43.716079: logarithmic_accumulation: raw: 3d0913 tv_nsec d687faa 43.718513: logarithmic_accumulation: raw: 3d0913 tv_nsec da588bd 43.722161: logarithmic_accumulation: raw: 3d0913 tv_nsec de291d0 46.349925: logarithmic_accumulation: raw: 7a122600 tv_nsec e1f9ae3 46.349930: logarithmic_accumulation: raw: 1e848980 tv_nsec 8831c0e3 The kernel starts looping at 46.349925 in the getrawmonotonic() due to the negative value from adding the raw value to tv_nsec. A simple solution is to accumulate into a u64, and then normalize it to a timespec_t. Signed-off-by: Jason Wessel [ Reworked variable names and simplified some of the code. - John ] Signed-off-by: John Stultz Cc: Thomas Gleixner Cc: H. Peter Anvin Signed-off-by: Linus Torvalds --- kernel/time/timekeeping.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e14c839e9faa..e960d824263f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -690,6 +690,7 @@ static void timekeeping_adjust(s64 offset) static cycle_t logarithmic_accumulation(cycle_t offset, int shift) { u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; + u64 raw_nsecs; /* If the offset is smaller then a shifted interval, do nothing */ if (offset < timekeeper.cycle_interval<= NSEC_PER_SEC) { - raw_time.tv_nsec -= NSEC_PER_SEC; + /* Accumulate raw time */ + raw_nsecs = timekeeper.raw_interval << shift; + raw_nsecs += raw_time.tv_nsec; + while (raw_nsecs >= NSEC_PER_SEC) { + raw_nsecs -= NSEC_PER_SEC; raw_time.tv_sec++; } + raw_time.tv_nsec = raw_nsecs; /* Accumulate error between NTP and clock interval */ timekeeper.ntp_error += tick_length << shift; -- cgit v1.2.3 From c7dcf87a6881bf796faee83003163eb3de41a309 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 13 Aug 2010 11:30:58 -0700 Subject: time: Workaround gcc loop optimization that causes 64bit div errors Early 4.3 versions of gcc apparently aggressively optimize the raw time accumulation loop, replacing it with a divide. On 32bit systems, this causes the following link errors: undefined reference to `__umoddi3' undefined reference to `__udivdi3' The gcc issue has been fixed in 4.4 and greater. This patch replaces the accumulation loop with a do_div, as suggested by Linus. Signed-off-by: John Stultz CC: Jason Wessel CC: Larry Finger CC: Ingo Molnar CC: Linus Torvalds Signed-off-by: Linus Torvalds --- kernel/time/timekeeping.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e960d824263f..49010d822f72 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -710,9 +710,10 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) /* Accumulate raw time */ raw_nsecs = timekeeper.raw_interval << shift; raw_nsecs += raw_time.tv_nsec; - while (raw_nsecs >= NSEC_PER_SEC) { - raw_nsecs -= NSEC_PER_SEC; - raw_time.tv_sec++; + if (raw_nsecs >= NSEC_PER_SEC) { + u64 raw_secs = raw_nsecs; + raw_nsecs = do_div(raw_secs, NSEC_PER_SEC); + raw_time.tv_sec += raw_secs; } raw_time.tv_nsec = raw_nsecs; -- cgit v1.2.3