diff options
| author | Richard Henderson <rth@kanga.twiddle.home> | 2004-04-02 19:43:31 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2004-04-02 19:43:31 -0800 |
| commit | 77afcd086ba4003282f3d40820c24826ff24b2aa (patch) | |
| tree | b3c1a4c32137369bc1edcec02db1572a20abc2ca | |
| parent | aa8c926585fbff7f109c3d6d98b6261213f6d8f3 (diff) | |
[ALPHA] Detect and export cache shapes to userland.
| -rw-r--r-- | arch/alpha/kernel/setup.c | 222 | ||||
| -rw-r--r-- | include/asm-alpha/elf.h | 38 |
2 files changed, 254 insertions, 6 deletions
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index fd3b3871778d..d1f1e0cdfa61 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -38,8 +38,9 @@ #include <linux/sysrq.h> #include <linux/reboot.h> #endif - #include <linux/notifier.h> +#include <asm/io.h> + extern struct notifier_block *panic_notifier_list; static int alpha_panic_event(struct notifier_block *, unsigned long, void *); static struct notifier_block alpha_panic_block = { @@ -64,6 +65,11 @@ static struct notifier_block alpha_panic_block = { struct hwrpb_struct *hwrpb; unsigned long srm_hae; +int alpha_l1i_cacheshape; +int alpha_l1d_cacheshape; +int alpha_l2_cacheshape; +int alpha_l3_cacheshape; + #ifdef CONFIG_VERBOSE_MCHECK /* 0=minimum, 1=verbose, 2=all */ /* These can be overridden via the command line, ie "verbose_mcheck=2") */ @@ -113,6 +119,7 @@ static struct alpha_machine_vector *get_sysvec(unsigned long, unsigned long, static struct alpha_machine_vector *get_sysvec_byname(const char *); static void get_sysnames(unsigned long, unsigned long, unsigned long, char **, char **); +static void determine_cpu_caches (unsigned int); static char command_line[COMMAND_LINE_SIZE]; char saved_command_line[COMMAND_LINE_SIZE]; @@ -672,6 +679,9 @@ setup_arch(char **cmdline_p) /* Find our memory. */ setup_memory(kernel_end); + /* First guess at cpu cache sizes. Do this before init_arch. */ + determine_cpu_caches(cpu->type); + /* Initialize the machine. Usually has to do with setting up DMA windows and the like. */ if (alpha_mv.init_arch) @@ -1156,6 +1166,18 @@ get_nr_processors(struct percpu_struct *cpubase, unsigned long num) return count; } +static void +show_cache_size (struct seq_file *f, const char *which, int shape) +{ + if (shape == -1) + seq_printf (f, "%s\t\t: n/a\n", which); + else if (shape == 0) + seq_printf (f, "%s\t\t: unknown\n", which); + else + seq_printf (f, "%s\t\t: %dK, %d-way, %db line\n", + which, shape >> 10, shape & 15, + 1 << ((shape >> 4) & 15)); +} static int show_cpuinfo(struct seq_file *f, void *slot) @@ -1229,9 +1251,202 @@ show_cpuinfo(struct seq_file *f, void *slot) num_online_cpus(), cpu_present_mask); #endif + show_cache_size (f, "L1 Icache", alpha_l1i_cacheshape); + show_cache_size (f, "L1 Dcache", alpha_l1d_cacheshape); + show_cache_size (f, "L2 cache", alpha_l2_cacheshape); + show_cache_size (f, "L3 cache", alpha_l3_cacheshape); + return 0; } +static int __init +read_mem_block(int *addr, int stride, int size) +{ + long nloads = size / stride, cnt, tmp; + + __asm__ __volatile__( + " rpcc %0\n" + "1: ldl %3,0(%2)\n" + " subq %1,1,%1\n" + /* Next two XORs introduce an explicit data dependency between + consecutive loads in the loop, which will give us true load + latency. */ + " xor %3,%2,%2\n" + " xor %3,%2,%2\n" + " addq %2,%4,%2\n" + " bne %1,1b\n" + " rpcc %3\n" + " subl %3,%0,%0\n" + : "=&r" (cnt), "=&r" (nloads), "=&r" (addr), "=&r" (tmp) + : "r" (stride), "1" (nloads), "2" (addr)); + + return cnt / (size / stride); +} + +#define CSHAPE(totalsize, linesize, assoc) \ + ((totalsize & ~0xff) | (linesize << 4) | assoc) + +/* ??? EV5 supports up to 64M, but did the systems with more than + 16M of BCACHE ever exist? */ +#define MAX_BCACHE_SIZE 16*1024*1024 + +/* Note that the offchip caches are direct mapped on all Alphas. */ +static int __init +external_cache_probe(int minsize, int width) +{ + int cycles, prev_cycles = 1000000; + int stride = 1 << width; + long size = minsize, maxsize = MAX_BCACHE_SIZE * 2; + + if (maxsize > (max_low_pfn + 1) << PAGE_SHIFT) + maxsize = 1 << (floor_log2(max_low_pfn + 1) + PAGE_SHIFT); + + /* Get the first block cached. */ + read_mem_block(__va(0), stride, size); + + while (size < maxsize) { + /* Get an average load latency in cycles. */ + cycles = read_mem_block(__va(0), stride, size); + if (cycles > prev_cycles * 2) { + /* Fine, we exceed the cache. */ + printk("%ldK Bcache detected; load hit latency %d " + "cycles, load miss latency %d cycles\n", + size >> 11, prev_cycles, cycles); + return CSHAPE(size >> 1, width, 1); + } + /* Try to get the next block cached. */ + read_mem_block(__va(size), stride, size); + prev_cycles = cycles; + size <<= 1; + } + return -1; /* No BCACHE found. */ +} + +static void __init +determine_cpu_caches (unsigned int cpu_type) +{ + int L1I, L1D, L2, L3; + + switch (cpu_type) { + case EV4_CPU: + case EV45_CPU: + { + if (cpu_type == EV4_CPU) + L1I = CSHAPE(8*1024, 5, 1); + else + L1I = CSHAPE(16*1024, 5, 1); + L1D = L1I; + L3 = -1; + + /* BIU_CTL is a write-only Abox register. PALcode has a + shadow copy, and may be available from some versions + of the CSERVE PALcall. If we can get it, then + + unsigned long biu_ctl, size; + size = 128*1024 * (1 << ((biu_ctl >> 28) & 7)); + L2 = CSHAPE (size, 5, 1); + + Unfortunately, we can't rely on that. + */ + L2 = external_cache_probe(128*1024, 5); + break; + } + + case LCA4_CPU: + { + unsigned long car, size; + + L1I = L1D = CSHAPE(8*1024, 5, 1); + L3 = -1; + + car = *(vuip) phys_to_virt (0x120000078); + size = 64*1024 * (1 << ((car >> 5) & 7)); + /* No typo -- 8 byte cacheline size. Whodathunk. */ + L2 = (car & 1 ? CSHAPE (size, 3, 1) : -1); + break; + } + + case EV5_CPU: + case EV56_CPU: + { + unsigned long sc_ctl, width; + + L1I = L1D = CSHAPE(8*1024, 5, 1); + + /* Check the line size of the Scache. */ + sc_ctl = *(vulp) phys_to_virt (0xfffff000a8); + width = sc_ctl & 0x1000 ? 6 : 5; + L2 = CSHAPE (96*1024, width, 3); + + /* BC_CONTROL and BC_CONFIG are write-only IPRs. PALcode + has a shadow copy, and may be available from some versions + of the CSERVE PALcall. If we can get it, then + + unsigned long bc_control, bc_config, size; + size = 1024*1024 * (1 << ((bc_config & 7) - 1)); + L3 = (bc_control & 1 ? CSHAPE (size, width, 1) : -1); + + Unfortunately, we can't rely on that. + */ + L3 = external_cache_probe(1024*1024, width); + break; + } + + case PCA56_CPU: + case PCA57_CPU: + { + unsigned long cbox_config, size; + + if (cpu_type == PCA56_CPU) { + L1I = CSHAPE(16*1024, 6, 1); + L1D = CSHAPE(8*1024, 5, 1); + } else { + L1I = CSHAPE(32*1024, 6, 2); + L1D = CSHAPE(16*1024, 5, 1); + } + L3 = -1; + + cbox_config = *(vulp) phys_to_virt (0xfffff00008); + size = 512*1024 * (1 << ((cbox_config >> 12) & 3)); + +#if 0 + L2 = ((cbox_config >> 31) & 1 ? CSHAPE (size, 6, 1) : -1); +#else + L2 = external_cache_probe(512*1024, 6); +#endif + break; + } + + case EV6_CPU: + case EV67_CPU: + case EV68CB_CPU: + case EV68AL_CPU: + case EV68CX_CPU: + case EV69_CPU: + L1I = L1D = CSHAPE(64*1024, 6, 2); + L2 = external_cache_probe(1024*1024, 6); + L3 = -1; + break; + + case EV7_CPU: + case EV79_CPU: + L1I = L1D = CSHAPE(64*1024, 6, 2); + L2 = CSHAPE(7*1024*1024/4, 6, 7); + L3 = -1; + break; + + default: + /* Nothing known about this cpu type. */ + L1I = L1D = L2 = L3 = 0; + break; + } + + alpha_l1i_cacheshape = L1I; + alpha_l1d_cacheshape = L1D; + alpha_l2_cacheshape = L2; + alpha_l3_cacheshape = L3; +} + /* * We show only CPU #0 info. */ @@ -1260,9 +1475,8 @@ struct seq_operations cpuinfo_op = { }; -static int alpha_panic_event(struct notifier_block *this, - unsigned long event, - void *ptr) +static int +alpha_panic_event(struct notifier_block *this, unsigned long event, void *ptr) { #if 1 /* FIXME FIXME FIXME */ diff --git a/include/asm-alpha/elf.h b/include/asm-alpha/elf.h index df5aad88a5f2..b8f13146ee0c 100644 --- a/include/asm-alpha/elf.h +++ b/include/asm-alpha/elf.h @@ -137,10 +137,44 @@ extern int dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task); : amask (AMASK_CIX) ? "ev6" : "ev67"); \ }) +/* Reserve these numbers for any future use of a VDSO. */ +#if 0 +#define AT_SYSINFO 32 +#define AT_SYSINFO_EHDR 33 +#endif + +/* More complete cache descriptions than AT_[DIU]CACHEBSIZE. If the + value is -1, then the cache doesn't exist. Otherwise: + + bit 0-3: Cache set-associativity; 0 means fully associative. + bit 4-7: Log2 of cacheline size. + bit 8-31: Size of the entire cache >> 8. + bit 32-63: Reserved. +*/ + +#define AT_L1I_CACHESHAPE 34 +#define AT_L1D_CACHESHAPE 35 +#define AT_L2_CACHESHAPE 36 +#define AT_L3_CACHESHAPE 37 + #ifdef __KERNEL__ + #define SET_PERSONALITY(EX, IBCS2) \ set_personality(((EX).e_flags & EF_ALPHA_32BIT) \ ? PER_LINUX_32BIT : (IBCS2) ? PER_SVR4 : PER_LINUX) -#endif -#endif +extern int alpha_l1i_cacheshape; +extern int alpha_l1d_cacheshape; +extern int alpha_l2_cacheshape; +extern int alpha_l3_cacheshape; + +#define ARCH_DLINFO \ + do { \ + NEW_AUX_ENT(AT_L1I_CACHESHAPE, alpha_l1i_cacheshape); \ + NEW_AUX_ENT(AT_L1D_CACHESHAPE, alpha_l1d_cacheshape); \ + NEW_AUX_ENT(AT_L2_CACHESHAPE, alpha_l2_cacheshape); \ + NEW_AUX_ENT(AT_L3_CACHESHAPE, alpha_l3_cacheshape); \ + } while (0) + +#endif /* __KERNEL__ */ +#endif /* __ASM_ALPHA_ELF_H */ |
