[PATCH] Time interpolator: Scalability enhancements and high resolution time for IA64

This has been in the ia64 (and hence -mm) trees for a couple of months. Changelog: * Affects only architectures which define CONFIG_TIME_INTERPOLATION (currently only IA64) * Genericize time interpolation, make time interpolators easily usable and provide instructions on how to use the interpolator for other architectures. * Provide nanosecond resolution for clock_gettime and an accuracy up to the time interpolator time base. * clock_getres() reports resolution of underlying time basis which is typically <50ns and may be 1ns on some systems. * Make time interpolator self-tuning to limit time jumps and to make the interpolators work correctly on systems with broken time base specifications. * SMP scalability: Make clock_gettime and gettimeofday scale O(1) by removing the cmpxchg for most clocks (tested for up to 512 CPUs) * IA64: provide asm fastcall that doubles the performance of gettimeofday and clock_gettime on SGI and other IA64 systems (asm fastcalls scale O(1) together with the scalability fixes). * IA64: provide nojitter kernel option so that IA64 systems with correctly synchronized ITC counters may also enjoy the scalability enhancements. Performance measurements for single calls (ITC cycles): A. 4 way Intel IA64 SMP system (kmart) ITC offsets: kmart:/usr/src/noship-tests # dmesg|grep synchr CPU 1: synchronized ITC with CPU 0 (last diff 1 cycles, maxerr 417 cycles) CPU 2: synchronized ITC with CPU 0 (last diff 2 cycles, maxerr 417 cycles) CPU 3: synchronized ITC with CPU 0 (last diff 1 cycles, maxerr 417 cycles) A.1. Current kernel code kmart:/usr/src/noship-tests # ./dmt gettimeofday cycles: 3737 220 215 215 215 215 215 215 215 215 clock_gettime(REAL) cycles: 4058 575 564 576 565 566 558 558 558 558 clock_gettime(MONO) cycles: 1583 621 609 609 609 609 609 609 609 609 clock_gettime(PROCESS) cycles: 71428 298 259 259 259 259 259 259 259 259 clock_gettime(THREAD) cycles: 3982 336 290 298 298 298 298 286 286 286 A.2 New code using cmpxchg kmart:/usr/src/noship-tests # ./dmt gettimeofday cycles: 3145 213 216 213 213 213 213 213 213 213 clock_gettime(REAL) cycles: 3185 230 210 210 210 210 210 210 210 210 clock_gettime(MONO) cycles: 284 217 217 216 216 216 216 216 216 216 clock_gettime(PROCESS) cycles: 68857 289 270 259 259 259 259 259 259 259 clock_gettime(THREAD) cycles: 3862 339 298 298 298 298 290 286 286 286 A.3 New code with cmpxchg switched off (nojitter kernel option) kmart:/usr/src/noship-tests # ./dmt gettimeofday cycles: 3195 219 219 212 212 212 212 212 212 212 clock_gettime(REAL) cycles: 3003 228 205 205 205 205 205 205 205 205 clock_gettime(MONO) cycles: 279 209 209 209 208 208 208 208 208 208 clock_gettime(PROCESS) cycles: 65849 292 259 259 268 270 270 259 259 259 B. SGI SN2 system running 512 IA64 CPUs. B.1. Current kernel code [root@ascender noship-tests]# ./dmt gettimeofday cycles: 17221 1028 1007 1004 1004 1004 1010 25928 1002 1003 clock_gettime(REAL) cycles: 10388 1099 1055 1044 1064 1063 1051 1056 1061 1056 clock_gettime(MONO) cycles: 2363 96 96 96 96 96 96 96 96 96 clock_gettime(PROCESS) cycles: 46537 804 660 666 666 666 666 666 666 666 clock_gettime(THREAD) cycles: 10945 727 710 684 685 686 685 686 685 686 B.2 New code ascender:~/noship-tests # ./dmt gettimeofday cycles: 3874 610 588 588 588 588 588 588 588 588 clock_gettime(REAL) cycles: 3893 612 588 582 588 588 588 588 588 588 clock_gettime(MONO) cycles: 686 595 595 588 588 588 588 588 588 588 clock_gettime(PROCESS) cycles: 290759 322 269 269 259 265 265 265 259 259 clock_gettime(THREAD) cycles: 5153 358 306 298 296 304 290 298 298 298 Scalability of time functions (in time it takes to do a million calls): ======================================================================= A. 4 way Intel IA SMP system (kmart) A.1 Current code kmart:/usr/src/noship-tests # ./todscale -n1000000 CPUS WALL WALL/CPUS 1 0.192 0.192 2 1.125 0.563 4 9.229 2.307 A.2 New code using cmpxchg kmart:/usr/src/noship-tests # ./todscale CPUS WALL WALL/CPUS 1 0.188 0.188 2 0.457 0.229 4 0.413 0.103 (the measurement with 4 cpus may fluctuate up to 15.x somehow) A.3 New code without cmpxchg (nojitter kernel option) kmart:/usr/src/noship-tests # ./todscale -n10000000 CPUS WALL WALL/CPUS 1 0.180 0.180 2 0.180 0.090 4 0.252 0.063 B. SGI SN2 system running 512 IA64 CPUs. The system has a global monotonic clock and therefore has no need for compensation. Current code uses a cmpxchg. New code has no cmpxchg. B.1 current code ascender:~/noship-tests # ./todscale CPUS WALL WALL/CPUS 1 0.850 0.850 2 1.767 0.884 4 6.124 1.531 8 20.777 2.597 16 57.693 3.606 32 164.688 5.146 64 456.647 7.135 128 1093.371 8.542 256 2778.257 10.853 (System crash at 512 CPUs) B.2 New code ascender:~/noship-tests # ./todscale -n1000000 CPUS WALL WALL/CPUS 1 0.426 0.426 2 0.429 0.215 4 0.436 0.109 8 0.452 0.057 16 0.454 0.028 32 0.457 0.014 64 0.459 0.007 128 0.466 0.004 256 0.474 0.002 512 0.518 0.001 Clock Accuracy ============== A. 4 CPU SMP system A.1 Old code kmart:/usr/src/noship-tests # ./cdisp Gettimeofday() = 1092124757.270305000 CLOCK_REALTIME= 1092124757.270382000 resolution= 0.000976563 CLOCK_MONOTONIC= 89.696726590 resolution= 0.000976563 CLOCK_PROCESS_CPUTIME_ID= 0.001242507 resolution= 0.000000001 CLOCK_THREAD_CPUTIME_ID= 0.001255310 resolution= 0.000000001 A.2 New code kmart:/usr/src/noship-tests # ./cdisp Gettimeofday() = 1092124478.194530000 CLOCK_REALTIME= 1092124478.194603399 resolution= 0.000000001 CLOCK_MONOTONIC= 88.198315204 resolution= 0.000000001 CLOCK_PROCESS_CPUTIME_ID= 0.001241235 resolution= 0.000000001 CLOCK_THREAD_CPUTIME_ID= 0.001254747 resolution= 0.000000001 Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
author: Christoph Lameter <clameter@sgi.com> 2004-09-07 17:46:02 -0700
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2004-09-07 17:46:02 -0700
commit: bd46a4f183290995b9c2fc4de40670e995482769 (patch)
tree: 8903c89ca648af88cb8ebc48af4e602f43bb6b9f /include/linux
parent: c9daeae66c9eea359c65210229b3962e66642a43 (diff)
1 files changed, 43 insertions, 80 deletions
diff --git a/include/linux/timex.h b/include/linux/timex.h
index c9e92e016524..356dc5e9295c 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -47,14 +47,18 @@
  *      kernel PLL updated to 1994-12-13 specs (rfc-1589)
  * 1997-08-30    Ulrich Windl
  *      Added new constant NTP_PHASE_LIMIT
+ * 2004-08-12    Christoph Lameter
+ *      Reworked time interpolation logic
  */
 #ifndef _LINUX_TIMEX_H
 #define _LINUX_TIMEX_H
 
 #include <linux/config.h>
 #include <linux/compiler.h>
+#include <linux/jiffies.h>
 
 #include <asm/param.h>
+#include <asm/io.h>
 
 /*
  * The following defines establish the engineering parameters of the PLL
@@ -320,101 +324,60 @@ extern long pps_stbcnt;		/* stability limit exceeded */
 
 #ifdef CONFIG_TIME_INTERPOLATION
 
-struct time_interpolator {
-	/* cache-hot stuff first: */
-	unsigned long (*get_offset) (void);
-	void (*update) (long);
-	void (*reset) (void);
+#define TIME_SOURCE_CPU 0
+#define TIME_SOURCE_MMIO64 1
+#define TIME_SOURCE_MMIO32 2
+#define TIME_SOURCE_FUNCTION 3
+
+/* For proper operations time_interpolator clocks must run slightly slower
+ * than the standard clock since the interpolator may only correct by having
+ * time jump forward during a tick. A slower clock is usually a side effect
+ * of the integer divide of the nanoseconds in a second by the frequency.
+ * The accuracy of the division can be increased by specifying a shift.
+ * However, this may cause the clock not to be slow enough.
+ * The interpolator will self-tune the clock by slowing down if no
+ * resets occur or speeding up if the time jumps per analysis cycle
+ * become too high.
+ *
+ * Setting jitter compensates for a fluctuating timesource by comparing
+ * to the last value read from the timesource to insure that an earlier value
+ * is not returned by a later call. The price to pay
+ * for the compensation is that the timer routines are not as scalable anymore.
+ */
 
-	/* cache-cold stuff follows here: */
-	struct time_interpolator *next;
+#define INTERPOLATOR_ADJUST 65536
+#define INTERPOLATOR_MAX_SKIP 10*INTERPOLATOR_ADJUST
+
+struct time_interpolator {
+	unsigned short source;		/* time source flags */
+	unsigned char shift;		/* increases accuracy of multiply by shifting. */
+			/* Note that bits may be lost if shift is set too high */
+	unsigned char jitter;		/* if set compensate for fluctuations */
+	unsigned nsec_per_cyc;		/* set by register_time_interpolator() */
+	void *addr;			/* address of counter or function */
+	unsigned long offset;		/* nsec offset at last update of interpolator */
+	unsigned long last_counter;	/* counter value in units of the counter at last update */
+	unsigned long last_cycle;	/* Last timer value if TIME_SOURCE_JITTER is set */
 	unsigned long frequency;	/* frequency in counts/second */
 	long drift;			/* drift in parts-per-million (or -1) */
+	unsigned long skips;		/* skips forward */
+	unsigned long ns_skipped;	/* nanoseconds skipped */
+	struct time_interpolator *next;
 };
 
-extern volatile unsigned long last_nsec_offset;
-#ifndef __HAVE_ARCH_CMPXCHG
-extern spin_lock_t last_nsec_offset_lock;
-#endif
-extern struct time_interpolator *time_interpolator;
-
 extern void register_time_interpolator(struct time_interpolator *);
 extern void unregister_time_interpolator(struct time_interpolator *);
-
-/* Called with xtime WRITE-lock acquired.  */
-static inline void
-time_interpolator_update(long delta_nsec)
-{
-	struct time_interpolator *ti = time_interpolator;
-
-	if (last_nsec_offset > 0) {
-#ifdef __HAVE_ARCH_CMPXCHG
-		unsigned long new, old;
-
-		do {
-			old = last_nsec_offset;
-			if (old > delta_nsec)
-				new = old - delta_nsec;
-			else
-				new = 0;
-		} while (cmpxchg(&last_nsec_offset, old, new) != old);
-#else
-		/*
-		 * This really hurts, because it serializes gettimeofday(), but without an
-		 * atomic single-word compare-and-exchange, there isn't all that much else
-		 * we can do.
-		 */
-		spin_lock(&last_nsec_offset_lock);
-		{
-			last_nsec_offset -= min(last_nsec_offset, delta_nsec);
-		}
-		spin_unlock(&last_nsec_offset_lock);
-#endif
-	}
-
-	if (ti)
-		(*ti->update)(delta_nsec);
-}
-
-/* Called with xtime WRITE-lock acquired.  */
-static inline void
-time_interpolator_reset(void)
-{
-	struct time_interpolator *ti = time_interpolator;
-
-	last_nsec_offset = 0;
-	if (ti)
-		(*ti->reset)();
-}
-
-/* Called with xtime READ-lock acquired.  */
-static inline unsigned long
-time_interpolator_get_offset(void)
-{
-	struct time_interpolator *ti = time_interpolator;
-	if (ti)
-		return (*ti->get_offset)();
-	return last_nsec_offset;
-}
+extern void time_interpolator_reset(void);
+extern unsigned long time_interpolator_resolution(void);
+extern unsigned long time_interpolator_get_offset(void);
 
 #else /* !CONFIG_TIME_INTERPOLATION */
 
 static inline void
-time_interpolator_update(long delta_nsec)
-{
-}
-
-static inline void
 time_interpolator_reset(void)
 {
 }
 
-static inline unsigned long
-time_interpolator_get_offset(void)
-{
-	return 0;
-}
-
 #endif /* !CONFIG_TIME_INTERPOLATION */
 
 #endif /* KERNEL */
author	Christoph Lameter <clameter@sgi.com>	2004-09-07 17:46:02 -0700
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2004-09-07 17:46:02 -0700
commit	bd46a4f183290995b9c2fc4de40670e995482769 (patch)
tree	8903c89ca648af88cb8ebc48af4e602f43bb6b9f /include/linux
parent	c9daeae66c9eea359c65210229b3962e66642a43 (diff)