summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <rth@kanga.twiddle.home>2004-04-02 19:43:31 -0800
committerLinus Torvalds <torvalds@ppc970.osdl.org>2004-04-02 19:43:31 -0800
commit77afcd086ba4003282f3d40820c24826ff24b2aa (patch)
treeb3c1a4c32137369bc1edcec02db1572a20abc2ca
parentaa8c926585fbff7f109c3d6d98b6261213f6d8f3 (diff)
[ALPHA] Detect and export cache shapes to userland.
-rw-r--r--arch/alpha/kernel/setup.c222
-rw-r--r--include/asm-alpha/elf.h38
2 files changed, 254 insertions, 6 deletions
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index fd3b3871778d..d1f1e0cdfa61 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -38,8 +38,9 @@
#include <linux/sysrq.h>
#include <linux/reboot.h>
#endif
-
#include <linux/notifier.h>
+#include <asm/io.h>
+
extern struct notifier_block *panic_notifier_list;
static int alpha_panic_event(struct notifier_block *, unsigned long, void *);
static struct notifier_block alpha_panic_block = {
@@ -64,6 +65,11 @@ static struct notifier_block alpha_panic_block = {
struct hwrpb_struct *hwrpb;
unsigned long srm_hae;
+int alpha_l1i_cacheshape;
+int alpha_l1d_cacheshape;
+int alpha_l2_cacheshape;
+int alpha_l3_cacheshape;
+
#ifdef CONFIG_VERBOSE_MCHECK
/* 0=minimum, 1=verbose, 2=all */
/* These can be overridden via the command line, ie "verbose_mcheck=2") */
@@ -113,6 +119,7 @@ static struct alpha_machine_vector *get_sysvec(unsigned long, unsigned long,
static struct alpha_machine_vector *get_sysvec_byname(const char *);
static void get_sysnames(unsigned long, unsigned long, unsigned long,
char **, char **);
+static void determine_cpu_caches (unsigned int);
static char command_line[COMMAND_LINE_SIZE];
char saved_command_line[COMMAND_LINE_SIZE];
@@ -672,6 +679,9 @@ setup_arch(char **cmdline_p)
/* Find our memory. */
setup_memory(kernel_end);
+ /* First guess at cpu cache sizes. Do this before init_arch. */
+ determine_cpu_caches(cpu->type);
+
/* Initialize the machine. Usually has to do with setting up
DMA windows and the like. */
if (alpha_mv.init_arch)
@@ -1156,6 +1166,18 @@ get_nr_processors(struct percpu_struct *cpubase, unsigned long num)
return count;
}
+static void
+show_cache_size (struct seq_file *f, const char *which, int shape)
+{
+ if (shape == -1)
+ seq_printf (f, "%s\t\t: n/a\n", which);
+ else if (shape == 0)
+ seq_printf (f, "%s\t\t: unknown\n", which);
+ else
+ seq_printf (f, "%s\t\t: %dK, %d-way, %db line\n",
+ which, shape >> 10, shape & 15,
+ 1 << ((shape >> 4) & 15));
+}
static int
show_cpuinfo(struct seq_file *f, void *slot)
@@ -1229,9 +1251,202 @@ show_cpuinfo(struct seq_file *f, void *slot)
num_online_cpus(), cpu_present_mask);
#endif
+ show_cache_size (f, "L1 Icache", alpha_l1i_cacheshape);
+ show_cache_size (f, "L1 Dcache", alpha_l1d_cacheshape);
+ show_cache_size (f, "L2 cache", alpha_l2_cacheshape);
+ show_cache_size (f, "L3 cache", alpha_l3_cacheshape);
+
return 0;
}
+static int __init
+read_mem_block(int *addr, int stride, int size)
+{
+ long nloads = size / stride, cnt, tmp;
+
+ __asm__ __volatile__(
+ " rpcc %0\n"
+ "1: ldl %3,0(%2)\n"
+ " subq %1,1,%1\n"
+ /* Next two XORs introduce an explicit data dependency between
+ consecutive loads in the loop, which will give us true load
+ latency. */
+ " xor %3,%2,%2\n"
+ " xor %3,%2,%2\n"
+ " addq %2,%4,%2\n"
+ " bne %1,1b\n"
+ " rpcc %3\n"
+ " subl %3,%0,%0\n"
+ : "=&r" (cnt), "=&r" (nloads), "=&r" (addr), "=&r" (tmp)
+ : "r" (stride), "1" (nloads), "2" (addr));
+
+ return cnt / (size / stride);
+}
+
+#define CSHAPE(totalsize, linesize, assoc) \
+ ((totalsize & ~0xff) | (linesize << 4) | assoc)
+
+/* ??? EV5 supports up to 64M, but did the systems with more than
+ 16M of BCACHE ever exist? */
+#define MAX_BCACHE_SIZE 16*1024*1024
+
+/* Note that the offchip caches are direct mapped on all Alphas. */
+static int __init
+external_cache_probe(int minsize, int width)
+{
+ int cycles, prev_cycles = 1000000;
+ int stride = 1 << width;
+ long size = minsize, maxsize = MAX_BCACHE_SIZE * 2;
+
+ if (maxsize > (max_low_pfn + 1) << PAGE_SHIFT)
+ maxsize = 1 << (floor_log2(max_low_pfn + 1) + PAGE_SHIFT);
+
+ /* Get the first block cached. */
+ read_mem_block(__va(0), stride, size);
+
+ while (size < maxsize) {
+ /* Get an average load latency in cycles. */
+ cycles = read_mem_block(__va(0), stride, size);
+ if (cycles > prev_cycles * 2) {
+ /* Fine, we exceed the cache. */
+ printk("%ldK Bcache detected; load hit latency %d "
+ "cycles, load miss latency %d cycles\n",
+ size >> 11, prev_cycles, cycles);
+ return CSHAPE(size >> 1, width, 1);
+ }
+ /* Try to get the next block cached. */
+ read_mem_block(__va(size), stride, size);
+ prev_cycles = cycles;
+ size <<= 1;
+ }
+ return -1; /* No BCACHE found. */
+}
+
+static void __init
+determine_cpu_caches (unsigned int cpu_type)
+{
+ int L1I, L1D, L2, L3;
+
+ switch (cpu_type) {
+ case EV4_CPU:
+ case EV45_CPU:
+ {
+ if (cpu_type == EV4_CPU)
+ L1I = CSHAPE(8*1024, 5, 1);
+ else
+ L1I = CSHAPE(16*1024, 5, 1);
+ L1D = L1I;
+ L3 = -1;
+
+ /* BIU_CTL is a write-only Abox register. PALcode has a
+ shadow copy, and may be available from some versions
+ of the CSERVE PALcall. If we can get it, then
+
+ unsigned long biu_ctl, size;
+ size = 128*1024 * (1 << ((biu_ctl >> 28) & 7));
+ L2 = CSHAPE (size, 5, 1);
+
+ Unfortunately, we can't rely on that.
+ */
+ L2 = external_cache_probe(128*1024, 5);
+ break;
+ }
+
+ case LCA4_CPU:
+ {
+ unsigned long car, size;
+
+ L1I = L1D = CSHAPE(8*1024, 5, 1);
+ L3 = -1;
+
+ car = *(vuip) phys_to_virt (0x120000078);
+ size = 64*1024 * (1 << ((car >> 5) & 7));
+ /* No typo -- 8 byte cacheline size. Whodathunk. */
+ L2 = (car & 1 ? CSHAPE (size, 3, 1) : -1);
+ break;
+ }
+
+ case EV5_CPU:
+ case EV56_CPU:
+ {
+ unsigned long sc_ctl, width;
+
+ L1I = L1D = CSHAPE(8*1024, 5, 1);
+
+ /* Check the line size of the Scache. */
+ sc_ctl = *(vulp) phys_to_virt (0xfffff000a8);
+ width = sc_ctl & 0x1000 ? 6 : 5;
+ L2 = CSHAPE (96*1024, width, 3);
+
+ /* BC_CONTROL and BC_CONFIG are write-only IPRs. PALcode
+ has a shadow copy, and may be available from some versions
+ of the CSERVE PALcall. If we can get it, then
+
+ unsigned long bc_control, bc_config, size;
+ size = 1024*1024 * (1 << ((bc_config & 7) - 1));
+ L3 = (bc_control & 1 ? CSHAPE (size, width, 1) : -1);
+
+ Unfortunately, we can't rely on that.
+ */
+ L3 = external_cache_probe(1024*1024, width);
+ break;
+ }
+
+ case PCA56_CPU:
+ case PCA57_CPU:
+ {
+ unsigned long cbox_config, size;
+
+ if (cpu_type == PCA56_CPU) {
+ L1I = CSHAPE(16*1024, 6, 1);
+ L1D = CSHAPE(8*1024, 5, 1);
+ } else {
+ L1I = CSHAPE(32*1024, 6, 2);
+ L1D = CSHAPE(16*1024, 5, 1);
+ }
+ L3 = -1;
+
+ cbox_config = *(vulp) phys_to_virt (0xfffff00008);
+ size = 512*1024 * (1 << ((cbox_config >> 12) & 3));
+
+#if 0
+ L2 = ((cbox_config >> 31) & 1 ? CSHAPE (size, 6, 1) : -1);
+#else
+ L2 = external_cache_probe(512*1024, 6);
+#endif
+ break;
+ }
+
+ case EV6_CPU:
+ case EV67_CPU:
+ case EV68CB_CPU:
+ case EV68AL_CPU:
+ case EV68CX_CPU:
+ case EV69_CPU:
+ L1I = L1D = CSHAPE(64*1024, 6, 2);
+ L2 = external_cache_probe(1024*1024, 6);
+ L3 = -1;
+ break;
+
+ case EV7_CPU:
+ case EV79_CPU:
+ L1I = L1D = CSHAPE(64*1024, 6, 2);
+ L2 = CSHAPE(7*1024*1024/4, 6, 7);
+ L3 = -1;
+ break;
+
+ default:
+ /* Nothing known about this cpu type. */
+ L1I = L1D = L2 = L3 = 0;
+ break;
+ }
+
+ alpha_l1i_cacheshape = L1I;
+ alpha_l1d_cacheshape = L1D;
+ alpha_l2_cacheshape = L2;
+ alpha_l3_cacheshape = L3;
+}
+
/*
* We show only CPU #0 info.
*/
@@ -1260,9 +1475,8 @@ struct seq_operations cpuinfo_op = {
};
-static int alpha_panic_event(struct notifier_block *this,
- unsigned long event,
- void *ptr)
+static int
+alpha_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
{
#if 1
/* FIXME FIXME FIXME */
diff --git a/include/asm-alpha/elf.h b/include/asm-alpha/elf.h
index df5aad88a5f2..b8f13146ee0c 100644
--- a/include/asm-alpha/elf.h
+++ b/include/asm-alpha/elf.h
@@ -137,10 +137,44 @@ extern int dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task);
: amask (AMASK_CIX) ? "ev6" : "ev67"); \
})
+/* Reserve these numbers for any future use of a VDSO. */
+#if 0
+#define AT_SYSINFO 32
+#define AT_SYSINFO_EHDR 33
+#endif
+
+/* More complete cache descriptions than AT_[DIU]CACHEBSIZE. If the
+ value is -1, then the cache doesn't exist. Otherwise:
+
+ bit 0-3: Cache set-associativity; 0 means fully associative.
+ bit 4-7: Log2 of cacheline size.
+ bit 8-31: Size of the entire cache >> 8.
+ bit 32-63: Reserved.
+*/
+
+#define AT_L1I_CACHESHAPE 34
+#define AT_L1D_CACHESHAPE 35
+#define AT_L2_CACHESHAPE 36
+#define AT_L3_CACHESHAPE 37
+
#ifdef __KERNEL__
+
#define SET_PERSONALITY(EX, IBCS2) \
set_personality(((EX).e_flags & EF_ALPHA_32BIT) \
? PER_LINUX_32BIT : (IBCS2) ? PER_SVR4 : PER_LINUX)
-#endif
-#endif
+extern int alpha_l1i_cacheshape;
+extern int alpha_l1d_cacheshape;
+extern int alpha_l2_cacheshape;
+extern int alpha_l3_cacheshape;
+
+#define ARCH_DLINFO \
+ do { \
+ NEW_AUX_ENT(AT_L1I_CACHESHAPE, alpha_l1i_cacheshape); \
+ NEW_AUX_ENT(AT_L1D_CACHESHAPE, alpha_l1d_cacheshape); \
+ NEW_AUX_ENT(AT_L2_CACHESHAPE, alpha_l2_cacheshape); \
+ NEW_AUX_ENT(AT_L3_CACHESHAPE, alpha_l3_cacheshape); \
+ } while (0)
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_ALPHA_ELF_H */