diff options
| author | Linus Torvalds <torvalds@athlon.transmeta.com> | 2002-02-04 20:03:43 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@athlon.transmeta.com> | 2002-02-04 20:03:43 -0800 |
| commit | 70d68bd32041d22febb277038641d55c6ac7b57a (patch) | |
| tree | 6288fe675e36fc874ed284519f2d1d7a8e4c789e | |
| parent | 48ad999d6fe95727a27b9ec82e522398d05cd928 (diff) | |
v2.4.7.3 -> v2.4.7.4
- David Mosberger: IA64 update
- Geert Uytterhoeven: cleanup, new atyfb
- Marcelo Tosatti: zone aging fixes
- me, others: limit IO requests sanely
115 files changed, 6054 insertions, 4826 deletions
diff --git a/Documentation/Configure.help b/Documentation/Configure.help index 659e3960e155..dea48b8d64a3 100644 --- a/Documentation/Configure.help +++ b/Documentation/Configure.help @@ -3245,6 +3245,18 @@ CONFIG_FB_ATY module will be called atyfb.o. If you want to compile it as a module, say M here and read Documentation/modules.txt. +ATI Mach64 GX display support (EXPERIMENTAL) +CONFIG_FB_ATY_GX + This options adds support for the first generation ATI Mach64 + graphics chips, i.e. the Mach64 GX and CX. Note that this support is + limited. + +ATI Mach64 CT/VT/GT/LT display support (EXPERIMENTAL) +CONFIG_FB_ATY_CT + This option adss support for ATI Mach64 graphics chips starting + with the Mach64 CT family. This includes the Mach64 VT (limited + support), GT (3D RAGE family), and LT. + ATI Rage128 display support (EXPERIMENTAL) CONFIG_FB_ATY128 This driver supports graphics boards with the ATI Rage128 chips. diff --git a/MAINTAINERS b/MAINTAINERS index edcc37efc9c6..e95b5b73972b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -608,6 +608,13 @@ M: nils@kernelconcepts.de W: http://www.kernelconcepts.de/ S: Maintained +IA64 (Itanium) PLATFORM +P: David Mosberger-Tang +M: davidm@hpl.hp.com +L: linux-ia64@linuxia64.org +W: http://www.linuxia64.org/ +S: Maintained + IBM MCA SCSI SUBSYSTEM DRIVER P: Michael Lang M: langa2@kph.uni-mainz.de @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 4 SUBLEVEL = 8 -EXTRAVERSION =-pre3 +EXTRAVERSION =-pre4 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 771be87cbc8c..c08d2864fdbd 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -14,18 +14,18 @@ AWK := awk export AWK LINKFLAGS = -static -T arch/$(ARCH)/vmlinux.lds -AFLAGS += -Wa,-x AFLAGS_KERNEL := -mconstant-gp EXTRA = -CFLAGS := $(CFLAGS) -pipe $(EXTRA) -Wa,-x -ffixed-r13 -mfixed-range=f10-f15,f32-f127 \ - -funwind-tables -falign-functions=32 -# -frename-registers (this crashes the Nov 17 compiler...) +CFLAGS := $(CFLAGS) -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f10-f15,f32-f127 -falign-functions=32 CFLAGS_KERNEL := -mconstant-gp -ifeq ($(CONFIG_ITANIUM_ASTEP_SPECIFIC),y) - CFLAGS += -ma-step +GCC_VERSION=$(shell $(CROSS_COMPILE)$(HOSTCC) -v 2>&1 | fgrep 'gcc version' | cut -f3 -d' ' | cut -f1 -d'.') + +ifneq ($(GCC_VERSION),2) + CFLAGS += -frename-registers endif + ifeq ($(CONFIG_ITANIUM_BSTEP_SPECIFIC),y) CFLAGS += -mb-step endif diff --git a/arch/ia64/boot/bootloader.c b/arch/ia64/boot/bootloader.c index f50cd164eb73..edf3b789d556 100644 --- a/arch/ia64/boot/bootloader.c +++ b/arch/ia64/boot/bootloader.c @@ -87,9 +87,6 @@ _start (void) asm volatile ("movl gp=__gp;;" ::: "memory"); asm volatile ("mov sp=%0" :: "r"(stack) : "memory"); asm volatile ("bsw.1;;"); -#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC - asm volative ("nop 0;; nop 0;; nop 0;;"); -#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */ ssc(0, 0, 0, 0, SSC_CONSOLE_INIT); diff --git a/arch/ia64/config.in b/arch/ia64/config.in index eede12452a75..a93083828d04 100644 --- a/arch/ia64/config.in +++ b/arch/ia64/config.in @@ -26,6 +26,12 @@ define_bool CONFIG_SBUS n define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n +if [ "$CONFIG_IA64_HP_SIM" = "n" ]; then + define_bool CONFIG_ACPI y + define_bool CONFIG_ACPI_INTERPRETER y + define_bool CONFIG_ACPI_KERNEL_CONFIG y +fi + choice 'IA-64 processor type' \ "Itanium CONFIG_ITANIUM \ McKinley CONFIG_MCKINLEY" Itanium @@ -44,7 +50,6 @@ choice 'Kernel page size' \ if [ "$CONFIG_ITANIUM" = "y" ]; then define_bool CONFIG_IA64_BRL_EMU y - bool ' Enable Itanium A-step specific code' CONFIG_ITANIUM_ASTEP_SPECIFIC bool ' Enable Itanium B-step specific code' CONFIG_ITANIUM_BSTEP_SPECIFIC if [ "$CONFIG_ITANIUM_BSTEP_SPECIFIC" = "y" ]; then bool ' Enable Itanium B0-step specific code' CONFIG_ITANIUM_B0_SPECIFIC @@ -59,7 +64,7 @@ if [ "$CONFIG_ITANIUM" = "y" ]; then if [ "$CONFIG_ITANIUM_CSTEP_SPECIFIC" = "y" ]; then bool ' Enable Itanium C0-step specific code' CONFIG_ITANIUM_C0_SPECIFIC fi - if [ "$CONFIG_ITANIUM_ASTEP_SPECIFIC" = "y" -o "$CONFIG_ITANIUM_B0_SPECIFIC" = "y" \ + if [ "$CONFIG_ITANIUM_B0_SPECIFIC" = "y" \ -o "$CONFIG_ITANIUM_B1_SPECIFIC" = "y" -o "$CONFIG_ITANIUM_B2_SPECIFIC" = "y" ]; then define_bool CONFIG_ITANIUM_PTCG n else @@ -84,13 +89,7 @@ fi if [ "$CONFIG_IA64_DIG" = "y" ]; then bool ' Force interrupt redirection' CONFIG_IA64_HAVE_IRQREDIR bool ' Enable IA-64 Machine Check Abort' CONFIG_IA64_MCA - bool ' Enable ACPI 2.0 with errata 1.3' CONFIG_ACPI20 - bool ' ACPI kernel configuration manager (EXPERIMENTAL)' CONFIG_ACPI_KERNEL_CONFIG - if [ "$CONFIG_ACPI_KERNEL_CONFIG" = "y" ]; then - define_bool CONFIG_PM y - define_bool CONFIG_ACPI y - define_bool CONFIG_ACPI_INTERPRETER y - fi + define_bool CONFIG_PM y fi if [ "$CONFIG_IA64_SGI_SN1" = "y" ]; then @@ -112,7 +111,7 @@ define_bool CONFIG_KCORE_ELF y # On IA-64, we always want an ELF /proc/kcore. bool 'SMP support' CONFIG_SMP bool 'Performance monitor support' CONFIG_PERFMON tristate '/proc/pal support' CONFIG_IA64_PALINFO -tristate '/proc/efi support' CONFIG_IA64_EFIVARS +tristate '/proc/efi/vars support' CONFIG_EFI_VARS bool 'Networking support' CONFIG_NET bool 'System V IPC' CONFIG_SYSVIPC @@ -123,6 +122,8 @@ tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC if [ "$CONFIG_IA64_HP_SIM" = "n" ]; then +source drivers/acpi/Config.in + bool 'PCI support' CONFIG_PCI source drivers/pci/Config.in @@ -247,6 +248,10 @@ endmenu source drivers/usb/Config.in +if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + source net/bluetooth/Config.in +fi + fi # !HP_SIM if [ "$CONFIG_IA64_HP_SIM" != "n" -o "$CONFIG_IA64_GENERIC" != "n" ]; then diff --git a/arch/ia64/hp/hpsim_setup.c b/arch/ia64/hp/hpsim_setup.c index dfa83e13588e..16964d5eb33b 100644 --- a/arch/ia64/hp/hpsim_setup.c +++ b/arch/ia64/hp/hpsim_setup.c @@ -27,28 +27,15 @@ extern struct console hpsim_cons; /* * Simulator system call. */ -inline long -ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr) -{ -#ifdef __GCC_DOESNT_KNOW_IN_REGS__ - register long in0 asm ("r32") = arg0; - register long in1 asm ("r33") = arg1; - register long in2 asm ("r34") = arg2; - register long in3 asm ("r35") = arg3; -#else - register long in0 asm ("in0") = arg0; - register long in1 asm ("in1") = arg1; - register long in2 asm ("in2") = arg2; - register long in3 asm ("in3") = arg3; -#endif - register long r8 asm ("r8"); - register long r15 asm ("r15") = nr; - - asm volatile ("break 0x80001" - : "=r"(r8) - : "r"(r15), "r"(in0), "r"(in1), "r"(in2), "r"(in3)); - return r8; -} +asm (".text\n" + ".align 32\n" + ".global ia64_ssc\n" + ".proc ia64_ssc\n" + "ia64_ssc:\n" + "mov r15=r36\n" + "break 0x80001\n" + "br.ret.sptk.many rp\n" + ".endp\n"); void ia64_ssc_connect_irq (long intr, long irq) diff --git a/arch/ia64/ia32/Makefile b/arch/ia64/ia32/Makefile index 834e24fff7e8..d43d3b6f812a 100644 --- a/arch/ia64/ia32/Makefile +++ b/arch/ia64/ia32/Makefile @@ -11,7 +11,8 @@ all: ia32.o O_TARGET := ia32.o -obj-y := ia32_entry.o sys_ia32.o ia32_ioctl.o ia32_signal.o ia32_support.o ia32_traps.o binfmt_elf32.o +obj-y := ia32_entry.o sys_ia32.o ia32_ioctl.o ia32_signal.o ia32_support.o ia32_traps.o \ + binfmt_elf32.o ia32_ldt.o clean:: diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c index 636eaeac4803..2068534a8f9e 100644 --- a/arch/ia64/ia32/binfmt_elf32.c +++ b/arch/ia64/ia32/binfmt_elf32.c @@ -2,8 +2,11 @@ * IA-32 ELF support. * * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com> + * Copyright (C) 2001 Hewlett-Packard Co + * Copyright (C) 2001 David Mosberger-Tang <davidm@hpl.hp.com> * * 06/16/00 A. Mallick initialize csd/ssd/tssd/cflg for ia32_load_state + * 04/13/01 D. Mosberger dropped saving tssd in ar.k1---it's not needed */ #include <linux/config.h> @@ -35,8 +38,8 @@ #undef CLOCKS_PER_SEC #define CLOCKS_PER_SEC IA32_CLOCKS_PER_SEC -extern void ia64_elf32_init(struct pt_regs *regs); -extern void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address); +extern void ia64_elf32_init (struct pt_regs *regs); +extern void put_dirty_page (struct task_struct * tsk, struct page *page, unsigned long address); #define ELF_PLAT_INIT(_r) ia64_elf32_init(_r) #define setup_arg_pages(bprm) ia32_setup_arg_pages(bprm) @@ -49,7 +52,7 @@ extern void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned unsigned long *ia32_gdt_table, *ia32_tss; struct page * -put_shared_page(struct task_struct * tsk, struct page *page, unsigned long address) +put_shared_page (struct task_struct * tsk, struct page *page, unsigned long address) { pgd_t * pgd; pmd_t * pmd; @@ -83,85 +86,99 @@ put_shared_page(struct task_struct * tsk, struct page *page, unsigned long addre return 0; } -void ia64_elf32_init(struct pt_regs *regs) +void +ia64_elf32_init (struct pt_regs *regs) { + struct vm_area_struct *vma; int nr; - put_shared_page(current, virt_to_page(ia32_gdt_table), IA32_PAGE_OFFSET); + /* + * Map GDT and TSS below 4GB, where the processor can find them. We need to map + * it with privilege level 3 because the IVE uses non-privileged accesses to these + * tables. IA-32 segmentation is used to protect against IA-32 accesses to them. + */ + put_shared_page(current, virt_to_page(ia32_gdt_table), IA32_GDT_OFFSET); if (PAGE_SHIFT <= IA32_PAGE_SHIFT) - put_shared_page(current, virt_to_page(ia32_tss), IA32_PAGE_OFFSET + PAGE_SIZE); + put_shared_page(current, virt_to_page(ia32_tss), IA32_TSS_OFFSET); - nr = smp_processor_id(); + /* + * Install LDT as anonymous memory. This gives us all-zero segment descriptors + * until a task modifies them via modify_ldt(). + */ + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (vma) { + vma->vm_mm = current->mm; + vma->vm_start = IA32_LDT_OFFSET; + vma->vm_end = vma->vm_start + PAGE_ALIGN(IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE); + vma->vm_page_prot = PAGE_SHARED; + vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE; + vma->vm_ops = NULL; + vma->vm_pgoff = 0; + vma->vm_file = NULL; + vma->vm_private_data = NULL; + insert_vm_struct(current->mm, vma); + } - /* Do all the IA-32 setup here */ + nr = smp_processor_id(); - current->thread.map_base = 0x40000000; - current->thread.task_size = 0xc0000000; /* use what Linux/x86 uses... */ + current->thread.map_base = IA32_PAGE_OFFSET/3; + current->thread.task_size = IA32_PAGE_OFFSET; /* use what Linux/x86 uses... */ set_fs(USER_DS); /* set addr limit for new TASK_SIZE */ - /* setup ia32 state for ia32_load_state */ + /* Setup the segment selectors */ + regs->r16 = (__USER_DS << 16) | __USER_DS; /* ES == DS, GS, FS are zero */ + regs->r17 = (__USER_DS << 16) | __USER_CS; /* SS, CS; ia32_load_state() sets TSS and LDT */ - current->thread.eflag = IA32_EFLAG; - current->thread.csd = IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0xBL, 1L, 3L, 1L, 1L, 1L); - current->thread.ssd = IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0x3L, 1L, 3L, 1L, 1L, 1L); - current->thread.tssd = IA64_SEG_DESCRIPTOR(IA32_PAGE_OFFSET + PAGE_SIZE, 0x1FFFL, 0xBL, - 1L, 3L, 1L, 1L, 1L); - - /* CS descriptor */ - __asm__("mov ar.csd = %0" : /* no outputs */ - : "r" IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0xBL, 1L, - 3L, 1L, 1L, 1L)); - /* SS descriptor */ - __asm__("mov ar.ssd = %0" : /* no outputs */ - : "r" IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0x3L, 1L, - 3L, 1L, 1L, 1L)); - /* EFLAGS */ - __asm__("mov ar.eflag = %0" : /* no outputs */ : "r" (IA32_EFLAG)); - - /* Control registers */ - __asm__("mov ar.fsr = %0" - : /* no outputs */ - : "r" ((ulong)IA32_FSR_DEFAULT)); - __asm__("mov ar.fcr = %0" - : /* no outputs */ - : "r" ((ulong)IA32_FCR_DEFAULT)); - __asm__("mov ar.fir = r0"); - __asm__("mov ar.fdr = r0"); - current->thread.old_iob = ia64_get_kr(IA64_KR_IO_BASE); - ia64_set_kr(IA64_KR_IO_BASE, IA32_IOBASE); - - /* Get the segment selectors right */ - regs->r16 = (__USER_DS << 16) | (__USER_DS); /* ES == DS, GS, FS are zero */ - regs->r17 = (_TSS(nr) << 48) | (_LDT(nr) << 32) - | (__USER_DS << 16) | __USER_CS; - - /* Setup other segment descriptors - ESD, DSD, FSD, GSD */ - regs->r24 = IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0x3L, 1L, 3L, 1L, 1L, 1L); - regs->r27 = IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0x3L, 1L, 3L, 1L, 1L, 1L); - regs->r28 = IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0x3L, 1L, 3L, 1L, 1L, 1L); - regs->r29 = IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0x3L, 1L, 3L, 1L, 1L, 1L); - - /* Setup the LDT and GDT */ - regs->r30 = ia32_gdt_table[_LDT(nr)]; - regs->r31 = IA64_SEG_DESCRIPTOR(0xc0000000L, 0x400L, 0x3L, 1L, 3L, - 1L, 1L, 1L); - - /* Clear psr.ac */ - regs->cr_ipsr &= ~IA64_PSR_AC; + /* Setup the segment descriptors */ + regs->r24 = IA32_SEG_UNSCRAMBLE(ia32_gdt_table[__USER_DS >> 3]); /* ESD */ + regs->r27 = IA32_SEG_UNSCRAMBLE(ia32_gdt_table[__USER_DS >> 3]); /* DSD */ + regs->r28 = 0; /* FSD (null) */ + regs->r29 = 0; /* GSD (null) */ + regs->r30 = IA32_SEG_UNSCRAMBLE(ia32_gdt_table[_LDT(nr)]); /* LDTD */ + /* + * Setup GDTD. Note: GDTD is the descrambled version of the pseudo-descriptor + * format defined by Figure 3-11 "Pseudo-Descriptor Format" in the IA-32 + * architecture manual. + */ + regs->r31 = IA32_SEG_UNSCRAMBLE(IA32_SEG_DESCRIPTOR(IA32_GDT_OFFSET, IA32_PAGE_SIZE - 1, 0, + 0, 0, 0, 0, 0, 0)); + + ia64_psr(regs)->ac = 0; /* turn off alignment checking */ regs->loadrs = 0; -} + /* + * According to the ABI %edx points to an `atexit' handler. Since we don't have + * one we'll set it to 0 and initialize all the other registers just to make + * things more deterministic, ala the i386 implementation. + */ + regs->r8 = 0; /* %eax */ + regs->r11 = 0; /* %ebx */ + regs->r9 = 0; /* %ecx */ + regs->r10 = 0; /* %edx */ + regs->r13 = 0; /* %ebp */ + regs->r14 = 0; /* %esi */ + regs->r15 = 0; /* %edi */ -#undef STACK_TOP -#define STACK_TOP ((IA32_PAGE_OFFSET/3) * 2) + current->thread.eflag = IA32_EFLAG; + current->thread.fsr = IA32_FSR_DEFAULT; + current->thread.fcr = IA32_FCR_DEFAULT; + current->thread.fir = 0; + current->thread.fdr = 0; + current->thread.csd = IA32_SEG_UNSCRAMBLE(ia32_gdt_table[__USER_CS >> 3]); + current->thread.ssd = IA32_SEG_UNSCRAMBLE(ia32_gdt_table[__USER_DS >> 3]); + current->thread.tssd = IA32_SEG_UNSCRAMBLE(ia32_gdt_table[_TSS(nr)]); + + ia32_load_state(current); +} -int ia32_setup_arg_pages(struct linux_binprm *bprm) +int +ia32_setup_arg_pages (struct linux_binprm *bprm) { unsigned long stack_base; struct vm_area_struct *mpnt; int i; - stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE; + stack_base = IA32_STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE; bprm->p += stack_base; if (bprm->loader) @@ -175,7 +192,7 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm) { mpnt->vm_mm = current->mm; mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p; - mpnt->vm_end = STACK_TOP; + mpnt->vm_end = IA32_STACK_TOP; mpnt->vm_page_prot = PAGE_COPY; mpnt->vm_flags = VM_STACK_FLAGS; mpnt->vm_ops = NULL; @@ -197,15 +214,15 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm) } static unsigned long -ia32_mm_addr(unsigned long addr) +ia32_mm_addr (unsigned long addr) { struct vm_area_struct *vma; if ((vma = find_vma(current->mm, addr)) == NULL) - return(ELF_PAGESTART(addr)); + return ELF_PAGESTART(addr); if (vma->vm_start > addr) - return(ELF_PAGESTART(addr)); - return(ELF_PAGEALIGN(addr)); + return ELF_PAGESTART(addr); + return ELF_PAGEALIGN(addr); } /* @@ -232,22 +249,9 @@ elf_map32 (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int p */ if (addr == 0) addr += PAGE_SIZE; -#if 1 set_brk(ia32_mm_addr(addr), addr + eppnt->p_memsz); memset((char *) addr + eppnt->p_filesz, 0, eppnt->p_memsz - eppnt->p_filesz); kernel_read(filep, eppnt->p_offset, (char *) addr, eppnt->p_filesz); retval = (unsigned long) addr; -#else - /* doesn't work yet... */ -# define IA32_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_EXEC_PAGESIZE-1)) -# define IA32_PAGEOFFSET(_v) ((_v) & (ELF_EXEC_PAGESIZE-1)) -# define IA32_PAGEALIGN(_v) (((_v) + ELF_EXEC_PAGESIZE - 1) & ~(ELF_EXEC_PAGESIZE - 1)) - - down_write(¤t->mm->mmap_sem); - retval = ia32_do_mmap(filep, IA32_PAGESTART(addr), - eppnt->p_filesz + IA32_PAGEOFFSET(eppnt->p_vaddr), prot, type, - eppnt->p_offset - IA32_PAGEOFFSET(eppnt->p_vaddr)); - up_write(¤t->mm->mmap_sem); -#endif return retval; } diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S index c8e6c5a202fd..5b90ca5c2953 100644 --- a/arch/ia64/ia32/ia32_entry.S +++ b/arch/ia64/ia32/ia32_entry.S @@ -140,7 +140,7 @@ ia32_syscall_table: data8 sys_lchown data8 sys32_ni_syscall /* old break syscall holder */ data8 sys32_ni_syscall - data8 sys_lseek + data8 sys32_lseek data8 sys_getpid /* 20 */ data8 sys_mount data8 sys_oldumount @@ -233,7 +233,7 @@ ia32_syscall_table: data8 sys32_ni_syscall data8 sys_iopl /* 110 */ data8 sys_vhangup - data8 sys32_ni_syscall // used to be sys_idle + data8 sys32_ni_syscall /* used to be sys_idle */ data8 sys32_ni_syscall data8 sys32_wait4 data8 sys_swapoff /* 115 */ @@ -244,7 +244,7 @@ ia32_syscall_table: data8 sys_clone /* 120 */ data8 sys_setdomainname data8 sys32_newuname - data8 sys_modify_ldt + data8 sys32_modify_ldt data8 sys_adjtimex data8 sys32_mprotect /* 125 */ data8 sys_sigprocmask @@ -286,13 +286,13 @@ ia32_syscall_table: data8 sys32_nanosleep data8 sys_mremap data8 sys_setresuid - data8 sys_getresuid /* 165 */ + data8 sys32_getresuid /* 165 */ data8 sys_vm86 data8 sys_query_module data8 sys_poll data8 sys_nfsservctl data8 sys_setresgid /* 170 */ - data8 sys_getresgid + data8 sys32_getresgid data8 sys_prctl data8 sys32_rt_sigreturn data8 sys32_rt_sigaction diff --git a/arch/ia64/ia32/ia32_ldt.c b/arch/ia64/ia32/ia32_ldt.c new file mode 100644 index 000000000000..48bf44ceefce --- /dev/null +++ b/arch/ia64/ia32/ia32_ldt.c @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2001 Hewlett-Packard Co + * Copyright (C) 2001 David Mosberger-Tang <davidm@hpl.hp.com> + * + * Adapted from arch/i386/kernel/ldt.c + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/vmalloc.h> + +#include <asm/uaccess.h> +#include <asm/ia32.h> + +/* + * read_ldt() is not really atomic - this is not a problem since synchronization of reads + * and writes done to the LDT has to be assured by user-space anyway. Writes are atomic, + * to protect the security checks done on new descriptors. + */ +static int +read_ldt (void *ptr, unsigned long bytecount) +{ + char *src, *dst, buf[256]; /* temporary buffer (don't overflow kernel stack!) */ + unsigned long bytes_left, n; + + if (bytecount > IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE) + bytecount = IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE; + + bytes_left = bytecount; + + src = (void *) IA32_LDT_OFFSET; + dst = ptr; + + while (bytes_left) { + n = sizeof(buf); + if (n > bytes_left) + n = bytes_left; + + /* + * We know we're reading valid memory, but we still must guard against + * running out of memory. + */ + if (__copy_from_user(buf, src, n)) + return -EFAULT; + + if (copy_to_user(dst, buf, n)) + return -EFAULT; + + src += n; + dst += n; + bytes_left -= n; + } + return bytecount; +} + +static int +write_ldt (void * ptr, unsigned long bytecount, int oldmode) +{ + struct ia32_modify_ldt_ldt_s ldt_info; + __u64 entry; + + if (bytecount != sizeof(ldt_info)) + return -EINVAL; + if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) + return -EFAULT; + + if (ldt_info.entry_number >= IA32_LDT_ENTRIES) + return -EINVAL; + if (ldt_info.contents == 3) { + if (oldmode) + return -EINVAL; + if (ldt_info.seg_not_present == 0) + return -EINVAL; + } + + if (ldt_info.base_addr == 0 && ldt_info.limit == 0 + && (oldmode || (ldt_info.contents == 0 && ldt_info.read_exec_only == 1 + && ldt_info.seg_32bit == 0 && ldt_info.limit_in_pages == 0 + && ldt_info.seg_not_present == 1 && ldt_info.useable == 0))) + /* allow LDTs to be cleared by the user */ + entry = 0; + else + /* we must set the "Accessed" bit as IVE doesn't emulate it */ + entry = IA32_SEG_DESCRIPTOR(ldt_info.base_addr, ldt_info.limit, + (((ldt_info.read_exec_only ^ 1) << 1) + | (ldt_info.contents << 2)) | 1, + 1, 3, ldt_info.seg_not_present ^ 1, + (oldmode ? 0 : ldt_info.useable), + ldt_info.seg_32bit, + ldt_info.limit_in_pages); + /* + * Install the new entry. We know we're accessing valid (mapped) user-level + * memory, but we still need to guard against out-of-memory, hence we must use + * put_user(). + */ + return __put_user(entry, (__u64 *) IA32_LDT_OFFSET + ldt_info.entry_number); +} + +asmlinkage int +sys32_modify_ldt (int func, void *ptr, unsigned int bytecount) +{ + int ret = -ENOSYS; + + switch (func) { + case 0: + ret = read_ldt(ptr, bytecount); + break; + case 1: + ret = write_ldt(ptr, bytecount, 1); + break; + case 0x11: + ret = write_ldt(ptr, bytecount, 0); + break; + } + return ret; +} diff --git a/arch/ia64/ia32/ia32_support.c b/arch/ia64/ia32/ia32_support.c index da8da13fab9f..3939c8fd01f6 100644 --- a/arch/ia64/ia32/ia32_support.c +++ b/arch/ia64/ia32/ia32_support.c @@ -1,6 +1,11 @@ /* * IA32 helper functions * + * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com> + * Copyright (C) 2000 Asit K. Mallick <asit.k.mallick@intel.com> + * Copyright (C) 2001 Hewlett-Packard Co + * Copyright (C) 2001 David Mosberger-Tang <davidm@hpl.hp.com> + * * 06/16/00 A. Mallick added csd/ssd/tssd for ia32 thread context * 02/19/01 D. Mosberger dropped tssd; it's not needed */ @@ -21,7 +26,7 @@ extern unsigned long *ia32_gdt_table, *ia32_tss; extern void die_if_kernel (char *str, struct pt_regs *regs, long err); void -ia32_save_state (struct thread_struct *thread) +ia32_save_state (struct task_struct *t) { unsigned long eflag, fsr, fcr, fir, fdr, csd, ssd; @@ -33,28 +38,30 @@ ia32_save_state (struct thread_struct *thread) "mov %5=ar.csd;" "mov %6=ar.ssd;" : "=r"(eflag), "=r"(fsr), "=r"(fcr), "=r"(fir), "=r"(fdr), "=r"(csd), "=r"(ssd)); - thread->eflag = eflag; - thread->fsr = fsr; - thread->fcr = fcr; - thread->fir = fir; - thread->fdr = fdr; - thread->csd = csd; - thread->ssd = ssd; - asm ("mov ar.k0=%0 ;;" :: "r"(thread->old_iob)); + t->thread.eflag = eflag; + t->thread.fsr = fsr; + t->thread.fcr = fcr; + t->thread.fir = fir; + t->thread.fdr = fdr; + t->thread.csd = csd; + t->thread.ssd = ssd; + ia64_set_kr(IA64_KR_IO_BASE, t->thread.old_iob); } void -ia32_load_state (struct thread_struct *thread) +ia32_load_state (struct task_struct *t) { unsigned long eflag, fsr, fcr, fir, fdr, csd, ssd; + struct pt_regs *regs = ia64_task_regs(t); + int nr; - eflag = thread->eflag; - fsr = thread->fsr; - fcr = thread->fcr; - fir = thread->fir; - fdr = thread->fdr; - csd = thread->csd; - ssd = thread->ssd; + eflag = t->thread.eflag; + fsr = t->thread.fsr; + fcr = t->thread.fcr; + fir = t->thread.fir; + fdr = t->thread.fdr; + csd = t->thread.csd; + ssd = t->thread.ssd; asm volatile ("mov ar.eflag=%0;" "mov ar.fsr=%1;" @@ -64,17 +71,22 @@ ia32_load_state (struct thread_struct *thread) "mov ar.csd=%5;" "mov ar.ssd=%6;" :: "r"(eflag), "r"(fsr), "r"(fcr), "r"(fir), "r"(fdr), "r"(csd), "r"(ssd)); - asm ("mov %0=ar.k0 ;;" : "=r"(thread->old_iob)); - asm ("mov ar.k0=%0 ;;" :: "r"(IA32_IOBASE)); + current->thread.old_iob = ia64_get_kr(IA64_KR_IO_BASE); + ia64_set_kr(IA64_KR_IO_BASE, IA32_IOBASE); + + /* load TSS and LDT while preserving SS and CS: */ + nr = smp_processor_id(); + regs->r17 = (_TSS(nr) << 48) | (_LDT(nr) << 32) | (__u32) regs->r17; } /* - * Setup IA32 GDT and TSS + * Setup IA32 GDT and TSS */ void -ia32_gdt_init(void) +ia32_gdt_init (void) { - unsigned long gdt_and_tss_page; + unsigned long gdt_and_tss_page, ldt_size; + int nr; /* allocate two IA-32 pages of memory: */ gdt_and_tss_page = __get_free_pages(GFP_KERNEL, @@ -86,17 +98,28 @@ ia32_gdt_init(void) /* Zero the gdt and tss */ memset((void *) gdt_and_tss_page, 0, 2*IA32_PAGE_SIZE); - /* CS descriptor in IA-32 format */ - ia32_gdt_table[4] = IA32_SEG_DESCRIPTOR(0L, 0xBFFFFFFFL, 0xBL, 1L, - 3L, 1L, 1L, 1L, 1L); - - /* DS descriptor in IA-32 format */ - ia32_gdt_table[5] = IA32_SEG_DESCRIPTOR(0L, 0xBFFFFFFFL, 0x3L, 1L, - 3L, 1L, 1L, 1L, 1L); + /* CS descriptor in IA-32 (scrambled) format */ + ia32_gdt_table[__USER_CS >> 3] = + IA32_SEG_DESCRIPTOR(0, (IA32_PAGE_OFFSET - 1) >> IA32_PAGE_SHIFT, + 0xb, 1, 3, 1, 1, 1, 1); + + /* DS descriptor in IA-32 (scrambled) format */ + ia32_gdt_table[__USER_DS >> 3] = + IA32_SEG_DESCRIPTOR(0, (IA32_PAGE_OFFSET - 1) >> IA32_PAGE_SHIFT, + 0x3, 1, 3, 1, 1, 1, 1); + + /* We never change the TSS and LDT descriptors, so we can share them across all CPUs. */ + ldt_size = PAGE_ALIGN(IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE); + for (nr = 0; nr < NR_CPUS; ++nr) { + ia32_gdt_table[_TSS(nr)] = IA32_SEG_DESCRIPTOR(IA32_TSS_OFFSET, 235, + 0xb, 0, 3, 1, 1, 1, 0); + ia32_gdt_table[_LDT(nr)] = IA32_SEG_DESCRIPTOR(IA32_LDT_OFFSET, ldt_size - 1, + 0x2, 0, 3, 1, 1, 1, 0); + } } /* - * Handle bad IA32 interrupt via syscall + * Handle bad IA32 interrupt via syscall */ void ia32_bad_interrupt (unsigned long int_num, struct pt_regs *regs) @@ -106,8 +129,7 @@ ia32_bad_interrupt (unsigned long int_num, struct pt_regs *regs) die_if_kernel("Bad IA-32 interrupt", regs, int_num); siginfo.si_signo = SIGTRAP; - siginfo.si_errno = int_num; /* XXX is it legal to abuse si_errno like this? */ + siginfo.si_errno = int_num; /* XXX is it OK to abuse si_errno like this? */ siginfo.si_code = TRAP_BRKPT; force_sig_info(SIGTRAP, &siginfo, current); } - diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index bb6e33770c4c..1e89a017ad98 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -232,10 +232,17 @@ do_mmap_fake(struct file *file, unsigned long addr, unsigned long len, back = NULL; if ((baddr = (addr & PAGE_MASK)) != addr && get_user(c, (char *)baddr) == 0) { front = kmalloc(addr - baddr, GFP_KERNEL); + if (!front) + return -ENOMEM; __copy_user(front, (void *)baddr, addr - baddr); } if (addr && ((addr + len) & ~PAGE_MASK) && get_user(c, (char *)(addr + len)) == 0) { back = kmalloc(PAGE_SIZE - ((addr + len) & ~PAGE_MASK), GFP_KERNEL); + if (!back) { + if (front) + kfree(front); + return -ENOMEM; + } __copy_user(back, (char *)addr + len, PAGE_SIZE - ((addr + len) & ~PAGE_MASK)); } down_write(¤t->mm->mmap_sem); @@ -334,9 +341,9 @@ sys32_mmap(struct mmap_arg_struct *arg) down_write(¤t->mm->mmap_sem); retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, a.offset >> PAGE_SHIFT); up_write(¤t->mm->mmap_sem); -#else // CONFIG_IA64_PAGE_SIZE_4KB +#else retval = ia32_do_mmap(file, a.addr, a.len, a.prot, a.flags, a.fd, a.offset); -#endif // CONFIG_IA64_PAGE_SIZE_4KB +#endif if (file) fput(file); return retval; @@ -660,9 +667,11 @@ ia32_utime(char * filename, struct utimbuf_32 *times32) long ret; if (times32) { - get_user(tv[0].tv_sec, ×32->atime); + if (get_user(tv[0].tv_sec, ×32->atime)) + return -EFAULT; tv[0].tv_usec = 0; - get_user(tv[1].tv_sec, ×32->mtime); + if (get_user(tv[1].tv_sec, ×32->mtime)) + return -EFAULT; tv[1].tv_usec = 0; set_fs (KERNEL_DS); tvp = tv; @@ -747,15 +756,18 @@ filldir32 (void *__buf, const char *name, int namlen, off_t offset, ino_t ino, buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) return -EINVAL; + buf->error = -EFAULT; /* only used if we fail.. */ dirent = buf->previous; if (dirent) - put_user(offset, &dirent->d_off); + if (put_user(offset, &dirent->d_off)) + return -EFAULT; dirent = buf->current_dir; buf->previous = dirent; - put_user(ino, &dirent->d_ino); - put_user(reclen, &dirent->d_reclen); - copy_to_user(dirent->d_name, name, namlen); - put_user(0, dirent->d_name + namlen); + if (put_user(ino, &dirent->d_ino) + || put_user(reclen, &dirent->d_reclen) + || copy_to_user(dirent->d_name, name, namlen) + || put_user(0, dirent->d_name + namlen)) + return -EFAULT; ((char *) dirent) += reclen; buf->current_dir = dirent; buf->count -= reclen; @@ -786,7 +798,9 @@ sys32_getdents (unsigned int fd, void * dirent, unsigned int count) error = buf.error; lastdirent = buf.previous; if (lastdirent) { - put_user(file->f_pos, &lastdirent->d_off); + error = -EINVAL; + if (put_user(file->f_pos, &lastdirent->d_off)) + goto out_putf; error = count - buf.count; } @@ -807,11 +821,12 @@ fillonedir32 (void * __buf, const char * name, int namlen, off_t offset, ino_t i return -EINVAL; buf->count++; dirent = buf->dirent; - put_user(ino, &dirent->d_ino); - put_user(offset, &dirent->d_offset); - put_user(namlen, &dirent->d_namlen); - copy_to_user(dirent->d_name, name, namlen); - put_user(0, dirent->d_name + namlen); + if (put_user(ino, &dirent->d_ino) + || put_user(offset, &dirent->d_offset) + || put_user(namlen, &dirent->d_namlen) + || copy_to_user(dirent->d_name, name, namlen) + || put_user(0, dirent->d_name + namlen)) + return -EFAULT; return 0; } @@ -862,8 +877,10 @@ sys32_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval32 *tv if (tvp32) { time_t sec, usec; - get_user(sec, &tvp32->tv_sec); - get_user(usec, &tvp32->tv_usec); + ret = -EFAULT; + if (get_user(sec, &tvp32->tv_sec) + || get_user(usec, &tvp32->tv_usec)) + goto out_nofds; ret = -EINVAL; if (sec < 0 || usec < 0) @@ -916,8 +933,12 @@ sys32_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval32 *tv usec = timeout % HZ; usec *= (1000000/HZ); } - put_user(sec, (int *)&tvp32->tv_sec); - put_user(usec, (int *)&tvp32->tv_usec); + if (put_user(sec, (int *)&tvp32->tv_sec) + || put_user(usec, (int *)&tvp32->tv_usec)) + { + ret = -EFAULT; + goto out; + } } if (ret < 0) @@ -1558,16 +1579,15 @@ do_sys32_semctl(int first, int second, int third, void *uptr) { union semun fourth; u32 pad; - int err, err2; + int err = 0, err2; struct semid64_ds s; struct semid_ds32 *usp; mm_segment_t old_fs; if (!uptr) return -EINVAL; - err = -EFAULT; - if (get_user (pad, (u32 *)uptr)) - return err; + if (get_user(pad, (u32 *)uptr)) + return -EFAULT; if(third == SETVAL) fourth.val = (int)pad; else @@ -1749,15 +1769,14 @@ do_sys32_shmat (int first, int second, int third, int version, void *uptr) { unsigned long raddr; u32 *uaddr = (u32 *)A((u32)third); - int err = -EINVAL; + int err; if (version == 1) - return err; + return -EINVAL; err = sys_shmat (first, uptr, second, &raddr); if (err) return err; - err = put_user (raddr, uaddr); - return err; + return put_user(raddr, uaddr); } static int @@ -1886,8 +1905,7 @@ sys32_ipc (u32 call, int first, int second, int third, u32 ptr, u32 fifth) break; case SHMAT: - err = do_sys32_shmat (first, second, third, - version, (void *)AA(ptr)); + err = do_sys32_shmat (first, second, third, version, (void *)AA(ptr)); break; case SHMDT: err = sys_shmdt ((char *)AA(ptr)); @@ -2125,7 +2143,7 @@ getreg(struct task_struct *child, int regno) case PT_CS: return((unsigned int)__USER_CS); default: - printk("getregs:unknown register %d\n", regno); + printk(KERN_ERR "getregs:unknown register %d\n", regno); break; } @@ -2177,14 +2195,16 @@ putreg(struct task_struct *child, int regno, unsigned int value) case PT_GS: case PT_SS: if (value != __USER_DS) - printk("setregs:try to set invalid segment register %d = %x\n", regno, value); + printk(KERN_ERR "setregs:try to set invalid segment register %d = %x\n", + regno, value); break; case PT_CS: if (value != __USER_CS) - printk("setregs:try to set invalid segment register %d = %x\n", regno, value); + printk(KERN_ERR "setregs:try to set invalid segment register %d = %x\n", + regno, value); break; default: - printk("getregs:unknown register %d\n", regno); + printk(KERN_ERR "getregs:unknown register %d\n", regno); break; } @@ -2240,7 +2260,6 @@ put_fpreg(int regno, struct _fpreg_ia32 *reg, struct pt_regs *ptp, struct switch } __copy_to_user(reg, f, sizeof(*reg)); - return; } void @@ -2334,8 +2353,8 @@ asmlinkage long sys_ptrace(long, pid_t, unsigned long, unsigned long, long, long * the address of `stack' will not be the address of the `pt_regs'. */ asmlinkage long -sys32_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data, - long arg4, long arg5, long arg6, long arg7, long stack) +sys32_ptrace (int request, pid_t pid, unsigned int addr, unsigned int data, + long arg4, long arg5, long arg6, long arg7, long stack) { struct pt_regs *regs = (struct pt_regs *) &stack; struct task_struct *child; @@ -2379,7 +2398,7 @@ sys32_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data, case PTRACE_PEEKDATA: /* read word at location addr */ ret = ia32_peek(regs, child, addr, &value); if (ret == 0) - ret = put_user(value, (unsigned int *)data); + ret = put_user(value, (unsigned int *)A(data)); else ret = -EIO; goto out; @@ -2398,12 +2417,12 @@ sys32_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data, break; case IA32_PTRACE_GETREGS: - if (!access_ok(VERIFY_WRITE, (int *)data, 17*sizeof(int))) { + if (!access_ok(VERIFY_WRITE, (int *) A(data), 17*sizeof(int))) { ret = -EIO; break; } for ( i = 0; i < 17*sizeof(int); i += sizeof(int) ) { - __put_user(getreg(child, i),(unsigned int *) data); + __put_user(getreg(child, i), (unsigned int *) A(data)); data += sizeof(int); } ret = 0; @@ -2412,12 +2431,12 @@ sys32_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data, case IA32_PTRACE_SETREGS: { unsigned int tmp; - if (!access_ok(VERIFY_READ, (int *)data, 17*sizeof(int))) { + if (!access_ok(VERIFY_READ, (int *) A(data), 17*sizeof(int))) { ret = -EIO; break; } for ( i = 0; i < 17*sizeof(int); i += sizeof(int) ) { - __get_user(tmp, (unsigned int *) data); + __get_user(tmp, (unsigned int *) A(data)); putreg(child, i, tmp); data += sizeof(int); } @@ -2426,11 +2445,11 @@ sys32_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data, } case IA32_PTRACE_GETFPREGS: - ret = save_ia32_fpstate(child, (struct _fpstate_ia32 *)data); + ret = save_ia32_fpstate(child, (struct _fpstate_ia32 *) A(data)); break; case IA32_PTRACE_SETFPREGS: - ret = restore_ia32_fpstate(child, (struct _fpstate_ia32 *)data); + ret = restore_ia32_fpstate(child, (struct _fpstate_ia32 *) A(data)); break; case PTRACE_SYSCALL: /* continue, stop after next syscall */ @@ -2547,8 +2566,8 @@ sys32_ni_syscall(int dummy0, int dummy1, int dummy2, int dummy3, { struct pt_regs *regs = (struct pt_regs *)&stack; - printk("IA32 syscall #%d issued, maybe we should implement it\n", - (int)regs->r1); + printk(KERN_WARNING "IA32 syscall #%d issued, maybe we should implement it\n", + (int)regs->r1); return(sys_ni_syscall()); } @@ -2558,7 +2577,7 @@ sys32_ni_syscall(int dummy0, int dummy1, int dummy2, int dummy3, #define IOLEN ((65536 / 4) * 4096) asmlinkage long -sys_iopl (int level, long arg1, long arg2, long arg3) +sys_iopl (int level) { extern unsigned long ia64_iobase; int fd; @@ -2570,7 +2589,7 @@ sys_iopl (int level, long arg1, long arg2, long arg3) if (level != 3) return(-EINVAL); /* Trying to gain more privileges? */ - __asm__ __volatile__("mov %0=ar.eflag ;;" : "=r"(old)); + asm volatile ("mov %0=ar.eflag ;;" : "=r"(old)); if (level > ((old >> 12) & 3)) { if (!capable(CAP_SYS_RAWIO)) return -EPERM; @@ -2587,17 +2606,13 @@ sys_iopl (int level, long arg1, long arg2, long arg3) } down_write(¤t->mm->mmap_sem); - lock_kernel(); - addr = do_mmap_pgoff(file, IA32_IOBASE, - IOLEN, PROT_READ|PROT_WRITE, MAP_SHARED, - (ia64_iobase & ~PAGE_OFFSET) >> PAGE_SHIFT); - - unlock_kernel(); + IOLEN, PROT_READ|PROT_WRITE, MAP_SHARED, + (ia64_iobase & ~PAGE_OFFSET) >> PAGE_SHIFT); up_write(¤t->mm->mmap_sem); if (addr >= 0) { - __asm__ __volatile__("mov ar.k0=%0 ;;" :: "r"(addr)); + ia64_set_kr(IA64_KR_IO_BASE, addr); old = (old & ~0x3000) | (level << 12); __asm__ __volatile__("mov ar.eflag=%0 ;;" :: "r"(old)); } @@ -2608,7 +2623,7 @@ sys_iopl (int level, long arg1, long arg2, long arg3) } asmlinkage long -sys_ioperm (unsigned long from, unsigned long num, int on) +sys_ioperm (unsigned int from, unsigned int num, int on) { /* @@ -2621,7 +2636,7 @@ sys_ioperm (unsigned long from, unsigned long num, int on) * XXX proper ioperm() support should be emulated by * manipulating the page protections... */ - return(sys_iopl(3, 0, 0, 0)); + return sys_iopl(3); } typedef struct { @@ -2750,6 +2765,54 @@ sys32_newuname(struct new_utsname * name) return ret; } +extern asmlinkage long sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid); + +asmlinkage long +sys32_getresuid (u16 *ruid, u16 *euid, u16 *suid) +{ + uid_t a, b, c; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + ret = sys_getresuid(&a, &b, &c); + set_fs(old_fs); + + if (put_user(a, ruid) || put_user(b, euid) || put_user(c, suid)) + return -EFAULT; + return ret; +} + +extern asmlinkage long sys_getresgid (gid_t *rgid, gid_t *egid, gid_t *sgid); + +asmlinkage long +sys32_getresgid(u16 *rgid, u16 *egid, u16 *sgid) +{ + gid_t a, b, c; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + ret = sys_getresgid(&a, &b, &c); + set_fs(old_fs); + + if (!ret) { + ret = put_user(a, rgid); + ret |= put_user(b, egid); + ret |= put_user(c, sgid); + } + return ret; +} + +int +sys32_lseek (unsigned int fd, int offset, unsigned int whence) +{ + extern off_t sys_lseek (unsigned int fd, off_t offset, unsigned int origin); + + /* Sign-extension of "offset" is important here... */ + return sys_lseek(fd, offset, whence); +} + #ifdef NOTYET /* UNTESTED FOR IA64 FROM HERE DOWN */ /* In order to reduce some races, while at the same time doing additional @@ -3235,7 +3298,7 @@ sys32_rt_sigpending(sigset_t32 *set, __kernel_size_t32 sigsetsize) siginfo_t32 * siginfo64to32(siginfo_t32 *d, siginfo_t *s) { - memset (&d, 0, sizeof(siginfo_t32)); + memset(d, 0, sizeof(siginfo_t32)); d->si_signo = s->si_signo; d->si_errno = s->si_errno; d->si_code = s->si_code; @@ -3442,27 +3505,6 @@ sys32_setresgid(__kernel_gid_t32 rgid, __kernel_gid_t32 egid, return sys_setresgid(srgid, segid, ssgid); } -extern asmlinkage long sys_getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid); - -asmlinkage long -sys32_getresgid(__kernel_gid_t32 *rgid, __kernel_gid_t32 *egid, - __kernel_gid_t32 *sgid) -{ - gid_t a, b, c; - int ret; - mm_segment_t old_fs = get_fs(); - - set_fs (KERNEL_DS); - ret = sys_getresgid(&a, &b, &c); - set_fs (old_fs); - if (!ret) { - ret = put_user (a, rgid); - ret |= put_user (b, egid); - ret |= put_user (c, sgid); - } - return ret; -} - extern asmlinkage long sys_getgroups(int gidsetsize, gid_t *grouplist); asmlinkage long diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 1cebaefc84da..5f61c2063425 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -13,13 +13,13 @@ O_TARGET := kernel.o export-objs := ia64_ksyms.o -obj-y := acpi.o entry.o gate.o efi.o efi_stub.o ia64_ksyms.o irq.o irq_ia64.o irq_sapic.o ivt.o \ +obj-y := acpi.o entry.o gate.o efi.o efi_stub.o ia64_ksyms.o irq.o irq_ia64.o irq_lsapic.o ivt.o \ machvec.o pal.o process.o perfmon.o ptrace.o sal.o semaphore.o setup.o \ signal.o sys_ia64.o traps.o time.o unaligned.o unwind.o obj-$(CONFIG_IA64_GENERIC) += machvec.o iosapic.o obj-$(CONFIG_IA64_DIG) += iosapic.o obj-$(CONFIG_IA64_PALINFO) += palinfo.o -obj-$(CONFIG_IA64_EFIVARS) += efivars.o +obj-$(CONFIG_EFI_VARS) += efivars.o obj-$(CONFIG_PCI) += pci.o obj-$(CONFIG_SMP) += smp.o smpboot.o obj-$(CONFIG_IA64_MCA) += mca.o mca_asm.o diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index b60f7bb160fe..2ef86a8a4025 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -25,14 +25,12 @@ #include <linux/irq.h> #include <asm/acpi-ext.h> +#include <asm/acpikcfg.h> #include <asm/efi.h> #include <asm/io.h> #include <asm/iosapic.h> #include <asm/machvec.h> #include <asm/page.h> -#ifdef CONFIG_ACPI_KERNEL_CONFIG -# include <asm/acpikcfg.h> -#endif #undef ACPI_DEBUG /* Guess what this does? */ @@ -40,7 +38,8 @@ int __initdata available_cpus; int __initdata total_cpus; -void (*pm_idle)(void); +void (*pm_idle) (void); +void (*pm_power_off) (void); asm (".weak iosapic_register_legacy_irq"); asm (".weak iosapic_init"); @@ -206,11 +205,21 @@ acpi20_parse_madt (acpi_madt_t *madt) case ACPI20_ENTRY_IO_SAPIC: iosapic = (acpi_entry_iosapic_t *) p; if (iosapic_init) - iosapic_init(iosapic->address, iosapic->irq_base); + /* + * The PCAT_COMPAT flag indicates that the system has a + * dual-8259 compatible setup. + */ + iosapic_init(iosapic->address, iosapic->irq_base, +#ifdef CONFIG_ITANIUM + 1 /* fw on some Itanium systems is broken... */ +#else + (madt->flags & MADT_PCAT_COMPAT) +#endif + ); break; case ACPI20_ENTRY_PLATFORM_INT_SOURCE: - printk("ACPI 2.0 MADT: PLATFORM INT SOUCE\n"); + printk("ACPI 2.0 MADT: PLATFORM INT SOURCE\n"); acpi20_platform(p); break; @@ -257,6 +266,7 @@ acpi20_parse_madt (acpi_madt_t *madt) int __init acpi20_parse (acpi20_rsdp_t *rsdp20) { +# ifdef CONFIG_ACPI acpi_xsdt_t *xsdt; acpi_desc_table_hdr_t *hdrp; int tables, i; @@ -287,9 +297,7 @@ acpi20_parse (acpi20_rsdp_t *rsdp20) hdrp->oem_revision >> 16, hdrp->oem_revision & 0xffff); -#ifdef CONFIG_ACPI_KERNEL_CONFIG acpi_cf_init((void *)rsdp20); -#endif tables =(hdrp->length -sizeof(acpi_desc_table_hdr_t))>>3; @@ -305,17 +313,16 @@ acpi20_parse (acpi20_rsdp_t *rsdp20) acpi20_parse_madt((acpi_madt_t *) hdrp); } -#ifdef CONFIG_ACPI_KERNEL_CONFIG acpi_cf_terminate(); -#endif -#ifdef CONFIG_SMP +# ifdef CONFIG_SMP if (available_cpus == 0) { printk("ACPI: Found 0 CPUS; assuming 1\n"); available_cpus = 1; /* We've got at least one of these, no? */ } smp_boot_data.cpu_count = total_cpus; -#endif +# endif +# endif /* CONFIG_ACPI */ return 1; } /* @@ -395,7 +402,12 @@ acpi_parse_msapic (acpi_sapic_t *msapic) case ACPI_ENTRY_IO_SAPIC: iosapic = (acpi_entry_iosapic_t *) p; if (iosapic_init) - iosapic_init(iosapic->address, iosapic->irq_base); + /* + * The ACPI I/O SAPIC table doesn't have a PCAT_COMPAT + * flag like the MADT table, but we can safely assume that + * ACPI 1.0b systems have a dual-8259 setup. + */ + iosapic_init(iosapic->address, iosapic->irq_base, 1); break; case ACPI_ENTRY_INT_SRC_OVERRIDE: @@ -421,6 +433,7 @@ acpi_parse_msapic (acpi_sapic_t *msapic) int __init acpi_parse (acpi_rsdp_t *rsdp) { +# ifdef CONFIG_ACPI acpi_rsdt_t *rsdt; acpi_desc_table_hdr_t *hdrp; long tables, i; @@ -439,9 +452,7 @@ acpi_parse (acpi_rsdp_t *rsdp) printk("ACPI: %.6s %.8s %d.%d\n", rsdt->header.oem_id, rsdt->header.oem_table_id, rsdt->header.oem_revision >> 16, rsdt->header.oem_revision & 0xffff); -#ifdef CONFIG_ACPI_KERNEL_CONFIG acpi_cf_init(rsdp); -#endif tables = (rsdt->header.length - sizeof(acpi_desc_table_hdr_t)) / 8; for (i = 0; i < tables; i++) { @@ -454,16 +465,15 @@ acpi_parse (acpi_rsdp_t *rsdp) acpi_parse_msapic((acpi_sapic_t *) hdrp); } -#ifdef CONFIG_ACPI_KERNEL_CONFIG acpi_cf_terminate(); -#endif -#ifdef CONFIG_SMP +# ifdef CONFIG_SMP if (available_cpus == 0) { printk("ACPI: Found 0 CPUS; assuming 1\n"); available_cpus = 1; /* We've got at least one of these, no? */ } smp_boot_data.cpu_count = total_cpus; -#endif +# endif +# endif /* CONFIG_ACPI */ return 1; } diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index c0e2265cb085..761bba3b3ca7 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -18,10 +18,12 @@ * Goutham Rao: <goutham.rao@intel.com> * Skip non-WB memory and ignore empty memory ranges. */ +#include <linux/config.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/types.h> #include <linux/time.h> +#include <linux/proc_fs.h> #include <asm/efi.h> #include <asm/io.h> @@ -36,6 +38,17 @@ extern efi_status_t efi_call_phys (void *, ...); struct efi efi; static efi_runtime_services_t *runtime; +/* + * efi_dir is allocated here, but the directory isn't created + * here, as proc_mkdir() doesn't work this early in the bootup + * process. Therefore, each module, like efivars, must test for + * if (!efi_dir) efi_dir = proc_mkdir("efi", NULL); + * prior to creating their own entries under /proc/efi. + */ +#ifdef CONFIG_PROC_FS +struct proc_dir_entry *efi_dir = NULL; +#endif + static unsigned long mem_limit = ~0UL; static efi_status_t @@ -220,10 +233,8 @@ efi_map_pal_code (void) /* * The only ITLB entry in region 7 that is used is the one installed by * __start(). That entry covers a 64MB range. - * - * XXX Fixme: should be dynamic here (for page size) */ - mask = ~((1 << _PAGE_SIZE_64M) - 1); + mask = ~((1 << KERNEL_PG_SHIFT) - 1); vaddr = PAGE_OFFSET + md->phys_addr; /* @@ -246,14 +257,14 @@ efi_map_pal_code (void) printk("CPU %d: mapping PAL code [0x%lx-0x%lx) into [0x%lx-0x%lx)\n", smp_processor_id(), md->phys_addr, md->phys_addr + (md->num_pages << 12), - vaddr & mask, (vaddr & mask) + 64*1024*1024); + vaddr & mask, (vaddr & mask) + KERNEL_PG_SIZE); /* * Cannot write to CRx with PSR.ic=1 */ ia64_clear_ic(flags); ia64_itr(0x1, IA64_TR_PALCODE, vaddr & mask, - pte_val(mk_pte_phys(md->phys_addr, PAGE_KERNEL)), _PAGE_SIZE_64M); + pte_val(mk_pte_phys(md->phys_addr, PAGE_KERNEL)), KERNEL_PG_SHIFT); local_irq_restore(flags); ia64_srlz_i(); } @@ -441,3 +452,35 @@ efi_enter_virtual_mode (void) efi.get_next_high_mono_count = __va(runtime->get_next_high_mono_count); efi.reset_system = __va(runtime->reset_system); } + +/* + * Walk the EFI memory map looking for the I/O port range. There can only be one entry of + * this type, other I/O port ranges should be described via ACPI. + */ +u64 +efi_get_iobase (void) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) { + /* paranoia attribute checking */ + if (md->attribute == (EFI_MEMORY_UC | EFI_MEMORY_RUNTIME)) + return md->phys_addr; + } + } + return 0; +} + +static void __exit +efivars_exit(void) +{ + remove_proc_entry(efi_dir->name, NULL); +} diff --git a/arch/ia64/kernel/efivars.c b/arch/ia64/kernel/efivars.c index a253241bfd44..14254e8bf401 100644 --- a/arch/ia64/kernel/efivars.c +++ b/arch/ia64/kernel/efivars.c @@ -6,8 +6,8 @@ * This code takes all variables accessible from EFI runtime and * exports them via /proc * - * Reads to /proc/efi/varname return an efi_variable_t structure. - * Writes to /proc/efi/varname must be an efi_variable_t structure. + * Reads to /proc/efi/vars/varname return an efi_variable_t structure. + * Writes to /proc/efi/vars/varname must be an efi_variable_t structure. * Writes with DataSize = 0 or Attributes = 0 deletes the variable. * Writes with a new value in VariableName+VendorGuid creates * a new variable. @@ -29,6 +29,15 @@ * * Changelog: * + * 20 April 2001 - Matt Domsch <Matt_Domsch@dell.com> + * Moved vars from /proc/efi to /proc/efi/vars, and made + * efi.c own the /proc/efi directory. + * v0.03 release to linux-ia64@linuxia64.org + * + * 26 March 2001 - Matt Domsch <Matt_Domsch@dell.com> + * At the request of Stephane, moved ownership of /proc/efi + * to efi.c, and now efivars lives under /proc/efi/vars. + * * 12 March 2001 - Matt Domsch <Matt_Domsch@dell.com> * Feedback received from Stephane Eranian incorporated. * efivar_write() checks copy_from_user() return value. @@ -57,7 +66,7 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@Dell.com>"); MODULE_DESCRIPTION("/proc interface to EFI Variables"); -#define EFIVARS_VERSION "0.02 2001-Mar-12" +#define EFIVARS_VERSION "0.03 2001-Apr-20" static int efivar_read(char *page, char **start, off_t off, @@ -92,7 +101,7 @@ typedef struct _efivar_entry_t { spinlock_t efivars_lock = SPIN_LOCK_UNLOCKED; static LIST_HEAD(efivar_list); -static struct proc_dir_entry *efi_dir = NULL; +static struct proc_dir_entry *efi_vars_dir = NULL; #define efivar_entry(n) list_entry(n, efivar_entry_t, list) @@ -188,7 +197,7 @@ efivar_create_proc_entry(unsigned long variable_name_size, /* Create the entry in proc */ - new_efivar->entry = create_proc_entry(short_name, 0600, efi_dir); + new_efivar->entry = create_proc_entry(short_name, 0600, efi_vars_dir); kfree(short_name); short_name = NULL; if (!new_efivar->entry) return 1; @@ -286,7 +295,7 @@ efivar_write(struct file *file, const char *buffer, /* Since the data ptr we've currently got is probably for a different variable find the right variable. This allows any properly formatted data structure to - be written to any of the files in /proc/efi and it will work. + be written to any of the files in /proc/efi/vars and it will work. */ list_for_each(pos, &efivar_list) { search_efivar = efivar_entry(pos); @@ -320,7 +329,7 @@ efivar_write(struct file *file, const char *buffer, if (!var_data->DataSize || !var_data->Attributes) { /* We just deleted the NVRAM variable */ - remove_proc_entry(efivar->entry->name, efi_dir); + remove_proc_entry(efivar->entry->name, efi_vars_dir); list_del(&efivar->list); kfree(efivar); } @@ -354,12 +363,22 @@ efivars_init(void) printk(KERN_INFO "EFI Variables Facility v%s\n", EFIVARS_VERSION); + /* Since efi.c happens before procfs is available, + we create the directory here if it doesn't + already exist. There's probably a better way + to do this. + */ + if (!efi_dir) + efi_dir = proc_mkdir("efi", NULL); + + efi_vars_dir = proc_mkdir("vars", efi_dir); + + + /* Per EFI spec, the maximum storage allocated for both the variable name and variable data is 1024 bytes. */ - efi_dir = proc_mkdir("efi", NULL); - memset(variable_name, 0, 1024); do { @@ -401,11 +420,11 @@ efivars_exit(void) list_for_each(pos, &efivar_list) { efivar = efivar_entry(pos); - remove_proc_entry(efivar->entry->name, efi_dir); + remove_proc_entry(efivar->entry->name, efi_vars_dir); list_del(&efivar->list); kfree(efivar); } - remove_proc_entry(efi_dir->name, NULL); + remove_proc_entry(efi_vars_dir->name, efi_dir); spin_unlock(&efivars_lock); } diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 283138f4eaa4..7380e13fadfd 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -140,8 +140,8 @@ GLOBAL_ENTRY(ia64_switch_to) dep r20=0,in0,61,3 // physical address of "current" ;; st8 [r22]=sp // save kernel stack pointer of old task - shr.u r26=r20,_PAGE_SIZE_64M - mov r16=1 + shr.u r26=r20,KERNEL_PG_SHIFT + mov r16=KERNEL_PG_NUM ;; cmp.ne p6,p7=r26,r16 // check >= 64M && < 128M adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0 @@ -175,7 +175,7 @@ GLOBAL_ENTRY(ia64_switch_to) ;; srlz.d or r23=r25,r20 // construct PA | page properties - mov r25=_PAGE_SIZE_64M<<2 + mov r25=KERNEL_PG_SHIFT<<2 ;; mov cr.itir=r25 mov cr.ifa=in0 // VA of next task... @@ -212,23 +212,20 @@ GLOBAL_ENTRY(save_switch_stack) .save @priunat,r17 mov r17=ar.unat // preserve caller's .body -#if !(defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) \ - || defined(CONFIG_ITANIUM_B0_SPECIFIC) || defined(CONFIG_ITANIUM_B1_SPECIFIC)) +#if !(defined(CONFIG_ITANIUM_B0_SPECIFIC) || defined(CONFIG_ITANIUM_B1_SPECIFIC)) adds r3=80,sp ;; lfetch.fault.excl.nt1 [r3],128 #endif mov ar.rsc=0 // put RSE in mode: enforced lazy, little endian, pl 0 -#if !(defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) \ - || defined(CONFIG_ITANIUM_B0_SPECIFIC) || defined(CONFIG_ITANIUM_B1_SPECIFIC)) +#if !(defined(CONFIG_ITANIUM_B0_SPECIFIC) || defined(CONFIG_ITANIUM_B1_SPECIFIC)) adds r2=16+128,sp ;; lfetch.fault.excl.nt1 [r2],128 lfetch.fault.excl.nt1 [r3],128 #endif adds r14=SW(R4)+16,sp -#if !(defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) \ - || defined(CONFIG_ITANIUM_B0_SPECIFIC) || defined(CONFIG_ITANIUM_B1_SPECIFIC)) +#if !(defined(CONFIG_ITANIUM_B0_SPECIFIC) || defined(CONFIG_ITANIUM_B1_SPECIFIC)) ;; lfetch.fault.excl [r2] lfetch.fault.excl [r3] @@ -325,8 +322,7 @@ ENTRY(load_switch_stack) .prologue .altrp b7 .body -#if !(defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) \ - || defined(CONFIG_ITANIUM_B0_SPECIFIC) || defined(CONFIG_ITANIUM_B1_SPECIFIC)) +#if !(defined(CONFIG_ITANIUM_B0_SPECIFIC) || defined(CONFIG_ITANIUM_B1_SPECIFIC)) lfetch.fault.nt1 [sp] #endif @@ -496,15 +492,13 @@ END(ia64_trace_syscall) GLOBAL_ENTRY(ia64_ret_from_clone) PT_REGS_UNWIND_INFO(0) -#ifdef CONFIG_SMP /* - * In SMP mode, we need to call invoke_schedule_tail to complete the scheduling process. + * We need to call schedule_tail() to complete the scheduling process. * Called by ia64_switch_to after do_fork()->copy_thread(). r8 contains the * address of the previously executing task. */ br.call.sptk.few rp=invoke_schedule_tail .ret8: -#endif adds r2=IA64_TASK_PTRACE_OFFSET,r13 ;; ld8 r2=[r2] @@ -530,14 +524,9 @@ END(ia64_ret_from_syscall) // fall through GLOBAL_ENTRY(ia64_leave_kernel) PT_REGS_UNWIND_INFO(0) - cmp.eq p16,p0=r0,r0 // set the "first_time" flag - movl r15=PERCPU_ADDR+IA64_CPU_SOFTIRQ_ACTIVE_OFFSET // r15 = &cpu_data.softirq.active - ;; - ld8 r2=[r15] + lfetch.fault [sp] movl r14=.restart ;; - lfetch.fault [sp] - shr.u r3=r2,32 // r3 = cpu_data.softirq.mask MOVBR(.ret.sptk,rp,r14,.restart) .restart: adds r17=IA64_TASK_NEED_RESCHED_OFFSET,r13 @@ -546,37 +535,28 @@ GLOBAL_ENTRY(ia64_leave_kernel) adds r19=IA64_TASK_PFM_NOTIFY_OFFSET,r13 #endif ;; - ld8 r17=[r17] // load current->need_resched - ld4 r18=[r18] // load current->sigpending -(p16) and r2=r2,r3 // r2 <- (softirq.active & softirq.mask) - ;; #ifdef CONFIG_PERFMON ld8 r19=[r19] // load current->task.pfm_notify #endif -(p16) cmp4.ne.unc p6,p0=r2,r0 // p6 <- (softirq.active & softirq.mask) != 0 -(pUser) cmp.ne.unc p7,p0=r17,r0 // current->need_resched != 0? + ld8 r17=[r17] // load current->need_resched + ld4 r18=[r18] // load current->sigpending ;; -(pUser) cmp.ne.unc p8,p0=r18,r0 // current->sigpending != 0? #ifdef CONFIG_PERFMON cmp.ne p9,p0=r19,r0 // current->task.pfm_notify != 0? #endif - cmp.ne p16,p0=r0,r0 // clear the "first_time" flag +(pUser) cmp.ne.unc p7,p0=r17,r0 // current->need_resched != 0? +(pUser) cmp.ne.unc p8,p0=r18,r0 // current->sigpending != 0? ;; -# if __GNUC__ < 3 -(p6) br.call.spnt.many b7=invoke_do_softirq -# else -(p6) br.call.spnt.many b7=do_softirq -# endif + adds r2=PT(R8)+16,r12 + adds r3=PT(R9)+16,r12 #ifdef CONFIG_PERFMON (p9) br.call.spnt.many b7=pfm_overflow_notify #endif -# if __GNUC__ < 3 +#if __GNUC__ < 3 (p7) br.call.spnt.many b7=invoke_schedule #else (p7) br.call.spnt.many b7=schedule #endif - adds r2=PT(R8)+16,r12 - adds r3=PT(R9)+16,r12 (p8) br.call.spnt.many b7=handle_signal_delivery // check & deliver pending signals ;; // start restoring the state saved on the kernel stack (struct pt_regs): @@ -634,14 +614,6 @@ GLOBAL_ENTRY(ia64_leave_kernel) ;; bsw.0 // switch back to bank 0 ;; -#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC - nop.i 0x0 - ;; - nop.i 0x0 - ;; - nop.i 0x0 - ;; -#endif adds r16=16,r12 adds r17=24,r12 ;; @@ -792,7 +764,6 @@ ENTRY(handle_syscall_error) br.cond.sptk.many ia64_leave_kernel END(handle_syscall_error) -# ifdef CONFIG_SMP /* * Invoke schedule_tail(task) while preserving in0-in7, which may be needed * in case a system call gets restarted. @@ -809,29 +780,7 @@ ENTRY(invoke_schedule_tail) br.ret.sptk.many rp END(invoke_schedule_tail) -# endif /* CONFIG_SMP */ - #if __GNUC__ < 3 - /* - * Invoke do_softirq() while preserving in0-in7, which may be needed - * in case a system call gets restarted. Note that declaring do_softirq() - * with asmlinkage() is NOT enough because that will only preserve as many - * registers as there are formal arguments. - * - * XXX fix me: with gcc 3.0, we won't need this anymore because syscall_linkage - * renders all eight input registers (in0-in7) as "untouchable". - */ -ENTRY(invoke_do_softirq) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc loc1=ar.pfs,8,2,0,0 - mov loc0=rp - ;; - .body - br.call.sptk.few rp=do_softirq -.ret13: mov ar.pfs=loc1 - mov rp=loc0 - br.ret.sptk.many rp -END(invoke_do_softirq) /* * Invoke schedule() while preserving in0-in7, which may be needed @@ -1187,7 +1136,7 @@ sys_call_table: data8 sys_newfstat data8 sys_clone2 data8 sys_getdents64 - data8 ia64_ni_syscall // 1215 + data8 sys_getunwind // 1215 data8 ia64_ni_syscall data8 ia64_ni_syscall data8 ia64_ni_syscall diff --git a/arch/ia64/kernel/entry.h b/arch/ia64/kernel/entry.h index 5d24db0b14bc..34051263d75e 100644 --- a/arch/ia64/kernel/entry.h +++ b/arch/ia64/kernel/entry.h @@ -1,7 +1,7 @@ #include <linux/config.h> /* XXX fixme */ -#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_B1_SPECIFIC) +#if defined(CONFIG_ITANIUM_B1_SPECIFIC) # define MOVBR(type,br,gr,lbl) mov br=gr #else # define MOVBR(type,br,gr,lbl) mov##type br=gr,lbl diff --git a/arch/ia64/kernel/fw-emu.c b/arch/ia64/kernel/fw-emu.c index 98344e4e8762..78ee7766db2f 100644 --- a/arch/ia64/kernel/fw-emu.c +++ b/arch/ia64/kernel/fw-emu.c @@ -20,7 +20,7 @@ #define MB (1024*1024UL) -#define NUM_MEM_DESCS 2 +#define NUM_MEM_DESCS 3 static char fw_mem[( sizeof(struct ia64_boot_param) + sizeof(efi_system_table_t) @@ -121,68 +121,63 @@ offtime (unsigned long t, efi_time_t *tp) */ extern void pal_emulator_static (void); -asm (" - .proc pal_emulator_static -pal_emulator_static: - mov r8=-1 - - mov r9=256 - ;; - cmp.gtu p6,p7=r9,r28 /* r28 <= 255? */ -(p6) br.cond.sptk.few static - ;; - mov r9=512 - ;; - cmp.gtu p6,p7=r9,r28 -(p6) br.cond.sptk.few stacked - ;; -static: cmp.eq p6,p7=6,r28 /* PAL_PTCE_INFO */ -(p7) br.cond.sptk.few 1f - ;; - mov r8=0 /* status = 0 */ - movl r9=0x100000000 /* tc.base */ - movl r10=0x0000000200000003 /* count[0], count[1] */ - movl r11=0x1000000000002000 /* stride[0], stride[1] */ - br.cond.sptk.few rp - -1: cmp.eq p6,p7=14,r28 /* PAL_FREQ_RATIOS */ -(p7) br.cond.sptk.few 1f - mov r8=0 /* status = 0 */ - movl r9 =0x100000064 /* proc_ratio (1/100) */ - movl r10=0x100000100 /* bus_ratio<<32 (1/256) */ - movl r11=0x100000064 /* itc_ratio<<32 (1/100) */ - ;; -1: cmp.eq p6,p7=19,r28 /* PAL_RSE_INFO */ -(p7) br.cond.sptk.few 1f - mov r8=0 /* status = 0 */ - mov r9=96 /* num phys stacked */ - mov r10=0 /* hints */ - mov r11=0 - br.cond.sptk.few rp - -1: cmp.eq p6,p7=1,r28 /* PAL_CACHE_FLUSH */ -(p7) br.cond.sptk.few 1f - mov r9=ar.lc - movl r8=524288 /* flush 512k million cache lines (16MB) */ - ;; - mov ar.lc=r8 - movl r8=0xe000000000000000 - ;; -.loop: fc r8 - add r8=32,r8 - br.cloop.sptk.few .loop - sync.i - ;; - srlz.i - ;; - mov ar.lc=r9 - mov r8=r0 -1: br.cond.sptk.few rp - -stacked: - br.ret.sptk.few rp - - .endp pal_emulator_static\n"); +asm ( +" .proc pal_emulator_static\n" +"pal_emulator_static:" +" mov r8=-1\n" +" mov r9=256\n" +" ;;\n" +" cmp.gtu p6,p7=r9,r28 /* r28 <= 255? */\n" +"(p6) br.cond.sptk.few static\n" +" ;;\n" +" mov r9=512\n" +" ;;\n" +" cmp.gtu p6,p7=r9,r28\n" +"(p6) br.cond.sptk.few stacked\n" +" ;;\n" +"static: cmp.eq p6,p7=6,r28 /* PAL_PTCE_INFO */\n" +"(p7) br.cond.sptk.few 1f\n" +" ;;\n" +" mov r8=0 /* status = 0 */\n" +" movl r9=0x100000000 /* tc.base */\n" +" movl r10=0x0000000200000003 /* count[0], count[1] */\n" +" movl r11=0x1000000000002000 /* stride[0], stride[1] */\n" +" br.cond.sptk.few rp\n" +"1: cmp.eq p6,p7=14,r28 /* PAL_FREQ_RATIOS */\n" +"(p7) br.cond.sptk.few 1f\n" +" mov r8=0 /* status = 0 */\n" +" movl r9 =0x100000064 /* proc_ratio (1/100) */\n" +" movl r10=0x100000100 /* bus_ratio<<32 (1/256) */\n" +" movl r11=0x100000064 /* itc_ratio<<32 (1/100) */\n" +" ;;\n" +"1: cmp.eq p6,p7=19,r28 /* PAL_RSE_INFO */\n" +"(p7) br.cond.sptk.few 1f\n" +" mov r8=0 /* status = 0 */\n" +" mov r9=96 /* num phys stacked */\n" +" mov r10=0 /* hints */\n" +" mov r11=0\n" +" br.cond.sptk.few rp\n" +"1: cmp.eq p6,p7=1,r28 /* PAL_CACHE_FLUSH */\n" +"(p7) br.cond.sptk.few 1f\n" +" mov r9=ar.lc\n" +" movl r8=524288 /* flush 512k million cache lines (16MB) */\n" +" ;;\n" +" mov ar.lc=r8\n" +" movl r8=0xe000000000000000\n" +" ;;\n" +".loop: fc r8\n" +" add r8=32,r8\n" +" br.cloop.sptk.few .loop\n" +" sync.i\n" +" ;;\n" +" srlz.i\n" +" ;;\n" +" mov ar.lc=r9\n" +" mov r8=r0\n" +"1: br.cond.sptk.few rp\n" +"stacked:\n" +" br.ret.sptk.few rp\n" +" .endp pal_emulator_static\n"); /* Macro to emulate SAL call using legacy IN and OUT calls to CF8, CFC etc.. */ @@ -437,8 +432,17 @@ sys_fw_init (const char *args, int arglen) sal_systab->checksum = -checksum; - /* fill in a memory descriptor: */ + /* simulate free memory at physical address zero */ md = &efi_memmap[0]; + md->type = EFI_BOOT_SERVICES_DATA; + md->pad = 0; + md->phys_addr = 0*MB; + md->virt_addr = 0; + md->num_pages = (1*MB) >> 12; /* 1MB (in 4KB pages) */ + md->attribute = EFI_MEMORY_WB; + + /* fill in a memory descriptor: */ + md = &efi_memmap[1]; md->type = EFI_CONVENTIONAL_MEMORY; md->pad = 0; md->phys_addr = 2*MB; @@ -447,7 +451,7 @@ sys_fw_init (const char *args, int arglen) md->attribute = EFI_MEMORY_WB; /* descriptor for firmware emulator: */ - md = &efi_memmap[1]; + md = &efi_memmap[2]; md->type = EFI_PAL_CODE; md->pad = 0; md->phys_addr = 1*MB; @@ -462,7 +466,7 @@ sys_fw_init (const char *args, int arglen) */ /* descriptor for high memory (>4GB): */ - md = &efi_memmap[2]; + md = &efi_memmap[3]; md->type = EFI_CONVENTIONAL_MEMORY; md->pad = 0; md->phys_addr = 4096*MB; diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S index d588b2988512..05942e22977d 100644 --- a/arch/ia64/kernel/gate.S +++ b/arch/ia64/kernel/gate.S @@ -15,15 +15,23 @@ .section .text.gate,"ax" - .align PAGE_SIZE +# define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET) +# define ARG1_OFF (16 + IA64_SIGFRAME_ARG1_OFFSET) +# define ARG2_OFF (16 + IA64_SIGFRAME_ARG2_OFFSET) +# define RBS_BASE_OFF (16 + IA64_SIGFRAME_RBS_BASE_OFFSET) +# define SIGHANDLER_OFF (16 + IA64_SIGFRAME_HANDLER_OFFSET) +# define SIGCONTEXT_OFF (16 + IA64_SIGFRAME_SIGCONTEXT_OFFSET) -# define SIGINFO_OFF 16 -# define SIGCONTEXT_OFF (SIGINFO_OFF + ((IA64_SIGINFO_SIZE + 15) & ~15)) # define FLAGS_OFF IA64_SIGCONTEXT_FLAGS_OFFSET # define CFM_OFF IA64_SIGCONTEXT_CFM_OFFSET # define FR6_OFF IA64_SIGCONTEXT_FR6_OFFSET # define BSP_OFF IA64_SIGCONTEXT_AR_BSP_OFFSET # define RNAT_OFF IA64_SIGCONTEXT_AR_RNAT_OFFSET +# define UNAT_OFF IA64_SIGCONTEXT_AR_UNAT_OFFSET +# define FPSR_OFF IA64_SIGCONTEXT_AR_FPSR_OFFSET +# define PR_OFF IA64_SIGCONTEXT_PR_OFFSET +# define RP_OFF IA64_SIGCONTEXT_B0_OFFSET +# define SP_OFF IA64_SIGCONTEXT_R12_OFFSET # define base0 r2 # define base1 r3 /* @@ -31,17 +39,9 @@ * * +===============================+ * | | - * // struct sigcontext // + * // struct sigframe // * | | - * +===============================+ <-- sp+SIGCONTEXT_OFF - * | | - * // rest of siginfo // - * | | - * + +---------------+ - * | | siginfo.code | - * +---------------+---------------+ - * | siginfo.errno | siginfo.signo | - * +-------------------------------+ <-- sp+SIGINFO_OFF + * +-------------------------------+ <-- sp+16 * | 16 byte of scratch | * | space | * +-------------------------------+ <-- sp @@ -51,46 +51,60 @@ * incoming general register may be a NaT value (including sp, in which case the * process ends up dying with a SIGSEGV). * - * The first need to do is a cover to get the registers onto the backing store. - * Once that is done, we invoke the signal handler which may modify some of the - * machine state. After returning from the signal handler, we return control to - * the previous context by executing a sigreturn system call. A signal handler - * may call the rt_sigreturn() function to directly return to a given sigcontext. - * However, the user-level sigreturn() needs to do much more than calling the - * rt_sigreturn() system call as it needs to unwind the stack to restore preserved - * registers that may have been saved on the signal handler's call stack. - * - * On entry: - * r2 = signal number - * r3 = plabel of signal handler - * r15 = new register backing store - * [sp+16] = sigframe + * The first thing need to do is a cover to get the registers onto the backing + * store. Once that is done, we invoke the signal handler which may modify some + * of the machine state. After returning from the signal handler, we return + * control to the previous context by executing a sigreturn system call. A signal + * handler may call the rt_sigreturn() function to directly return to a given + * sigcontext. However, the user-level sigreturn() needs to do much more than + * calling the rt_sigreturn() system call as it needs to unwind the stack to + * restore preserved registers that may have been saved on the signal handler's + * call stack. */ GLOBAL_ENTRY(ia64_sigtramp) - ld8 r10=[r3],8 // get signal handler entry point - br.call.sptk.many rp=invoke_sighandler -END(ia64_sigtramp) + // describe the state that is active when we get here: + .prologue + .unwabi @svr4, 's' // mark this as a sigtramp handler (saves scratch regs) + .savesp ar.unat, UNAT_OFF+SIGCONTEXT_OFF + .savesp ar.fpsr, FPSR_OFF+SIGCONTEXT_OFF + .savesp pr, PR_OFF+SIGCONTEXT_OFF + .savesp rp, RP_OFF+SIGCONTEXT_OFF + .vframesp SP_OFF+SIGCONTEXT_OFF + .body -ENTRY(invoke_sighandler) - ld8 gp=[r3] // get signal handler's global pointer - mov b6=r10 + .prologue + adds base0=SIGHANDLER_OFF,sp + adds base1=RBS_BASE_OFF,sp + br.call.sptk.many rp=1f +1: + ld8 r17=[base0],(ARG0_OFF-SIGHANDLER_OFF) // get pointer to signal handler's plabel + ld8 r15=[base1],(ARG1_OFF-RBS_BASE_OFF) // get address of new RBS base (or NULL) cover // push args in interrupted frame onto backing store ;; + .save ar.pfs, r8 alloc r8=ar.pfs,0,0,3,0 // get CFM0, EC0, and CPL0 into r8 + ld8 out0=[base0],16 // load arg0 (signum) + ;; + ld8 out1=[base1] // load arg1 (siginfop) + ld8 r10=[r17],8 // get signal handler entry point ;; - mov r17=ar.bsp // fetch ar.bsp + ld8 out2=[base0] // load arg2 (sigcontextp) + ld8 gp=[r17] // get signal handler's global pointer cmp.ne p8,p0=r15,r0 // do we need to switch the rbs? - mov out0=r2 // signal number + + mov.m r17=ar.bsp // fetch ar.bsp + .spillsp.p p8, ar.rnat, RNAT_OFF+SIGCONTEXT_OFF (p8) br.cond.spnt.few setup_rbs // yup -> (clobbers r14 and r16) back_from_setup_rbs: adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp ;; + .spillsp ar.bsp, BSP_OFF+SIGCONTEXT_OFF st8 [base0]=r17,(CFM_OFF-BSP_OFF) // save sc_ar_bsp - dep r8=0,r8,38,26 // clear EC0, CPL0 and reserved bits + dep r8=0,r8,38,26 // clear EC0, CPL0 and reserved bits adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp ;; - + .spillsp ar.pfs, CFM_OFF st8 [base0]=r8 // save CFM0 adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp ;; @@ -99,14 +113,13 @@ back_from_setup_rbs: ;; stf.spill [base0]=f8,32 stf.spill [base1]=f9,32 + mov b6=r10 ;; stf.spill [base0]=f10,32 stf.spill [base1]=f11,32 - adds out1=SIGINFO_OFF,sp // siginfo pointer ;; stf.spill [base0]=f12,32 stf.spill [base1]=f13,32 - adds out2=SIGCONTEXT_OFF,sp // sigcontext pointer ;; stf.spill [base0]=f14,32 stf.spill [base1]=f15,32 @@ -140,9 +153,8 @@ back_from_restore_rbs: ldf.fill f15=[base1],32 mov r15=__NR_rt_sigreturn break __BREAK_SYSCALL -END(invoke_sighandler) -ENTRY(setup_rbs) +setup_rbs: flushrs // must be first in insn mov ar.rsc=0 // put RSE into enforced lazy mode adds r16=(RNAT_OFF+SIGCONTEXT_OFF),sp @@ -150,13 +162,13 @@ ENTRY(setup_rbs) mov r14=ar.rnat // get rnat as updated by flushrs mov ar.bspstore=r15 // set new register backing store area ;; + .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF st8 [r16]=r14 // save sc_ar_rnat mov ar.rsc=0xf // set RSE into eager mode, pl 3 invala // invalidate ALAT br.cond.sptk.many back_from_setup_rbs -END(setup_rbs) -ENTRY(restore_rbs) +restore_rbs: flushrs mov ar.rsc=0 // put RSE into enforced lazy mode adds r16=(RNAT_OFF+SIGCONTEXT_OFF),sp @@ -168,4 +180,4 @@ ENTRY(restore_rbs) mov ar.rsc=0xf // (will be restored later on from sc_ar_rsc) // invala not necessary as that will happen when returning to user-mode br.cond.sptk.many back_from_restore_rbs -END(restore_rbs) +END(ia64_sigtramp) diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index 5dc27921c880..590a59f7922f 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -63,17 +63,17 @@ start_ap: * that maps the kernel's text and data: */ rsm psr.i | psr.ic - mov r16=((ia64_rid(IA64_REGION_ID_KERNEL, PAGE_OFFSET) << 8) | (_PAGE_SIZE_64M << 2)) + mov r16=((ia64_rid(IA64_REGION_ID_KERNEL, PAGE_OFFSET) << 8) | (KERNEL_PG_SHIFT << 2)) ;; srlz.i - mov r18=_PAGE_SIZE_64M<<2 - movl r17=PAGE_OFFSET + 64*1024*1024 + mov r18=KERNEL_PG_SHIFT<<2 + movl r17=PAGE_OFFSET + KERNEL_PG_NUM*KERNEL_PG_SIZE ;; mov rr[r17]=r16 mov cr.itir=r18 mov cr.ifa=r17 mov r16=IA64_TR_KERNEL - movl r18=(64*1024*1024 | PAGE_KERNEL) + movl r18=(KERNEL_PG_NUM*KERNEL_PG_SIZE | PAGE_KERNEL) ;; srlz.i ;; @@ -111,7 +111,7 @@ start_ap: ;; #ifdef CONFIG_IA64_EARLY_PRINTK - mov r3=(6<<8) | (_PAGE_SIZE_64M<<2) + mov r3=(6<<8) | (KERNEL_PG_SHIFT<<2) movl r2=6<<61 ;; mov rr[r2]=r3 @@ -123,11 +123,12 @@ start_ap: #define isAP p2 // are we an Application Processor? #define isBP p3 // are we the Bootstrap Processor? +#ifdef CONFIG_SMP /* * Find the init_task for the currently booting CPU. At poweron, and in - * UP mode, cpu_now_booting is 0. + * UP mode, cpucount is 0. */ - movl r3=cpu_now_booting + movl r3=cpucount ;; ld4 r3=[r3] // r3 <- smp_processor_id() movl r2=init_tasks @@ -135,6 +136,11 @@ start_ap: shladd r2=r3,3,r2 ;; ld8 r2=[r2] +#else + mov r3=0 + movl r2=init_task_union + ;; +#endif cmp4.ne isAP,isBP=r3,r0 ;; // RAW on r2 extr r3=r2,0,61 // r3 == phys addr of task struct @@ -182,7 +188,7 @@ alive_msg: #endif /* CONFIG_IA64_EARLY_PRINTK */ #ifdef CONFIG_SMP -(isAP) br.call.sptk.few rp=smp_callin +(isAP) br.call.sptk.few rp=start_secondary .ret0: (isAP) br.cond.sptk.few self #endif @@ -212,8 +218,7 @@ GLOBAL_ENTRY(ia64_save_debug_regs) add r19=IA64_NUM_DBG_REGS*8,in0 ;; 1: mov r16=dbr[r18] -#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC) \ - || defined(CONFIG_ITANIUM_C0_SPECIFIC) +#if defined(CONFIG_ITANIUM_C0_SPECIFIC) ;; srlz.d #endif @@ -230,8 +235,7 @@ END(ia64_save_debug_regs) GLOBAL_ENTRY(ia64_load_debug_regs) alloc r16=ar.pfs,1,0,0,0 -#if !(defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) \ - || defined(CONFIG_ITANIUM_B0_SPECIFIC) || defined(CONFIG_ITANIUM_B1_SPECIFIC)) +#if !(defined(CONFIG_ITANIUM_B0_SPECIFIC) || defined(CONFIG_ITANIUM_B1_SPECIFIC)) lfetch.nta [in0] #endif mov r20=ar.lc // preserve ar.lc @@ -244,8 +248,7 @@ GLOBAL_ENTRY(ia64_load_debug_regs) add r18=1,r18 ;; mov dbr[r18]=r16 -#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC) \ - || defined(CONFIG_ITANIUM_C0_SPECIFIC) +#if defined(CONFIG_ITANIUM_BSTEP_SPECIFIC) || defined(CONFIG_ITANIUM_C0_SPECIFIC) ;; srlz.d #endif diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index fa875a4e9615..0c94d0b1c1cc 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -7,6 +7,7 @@ #include <linux/string.h> EXPORT_SYMBOL_NOVERS(memset); +EXPORT_SYMBOL(memchr); EXPORT_SYMBOL(memcmp); EXPORT_SYMBOL_NOVERS(memcpy); EXPORT_SYMBOL(memmove); @@ -30,6 +31,9 @@ EXPORT_SYMBOL(enable_irq); EXPORT_SYMBOL(disable_irq); EXPORT_SYMBOL(disable_irq_nosync); +#include <linux/interrupt.h> +EXPORT_SYMBOL(probe_irq_mask); + #include <linux/in6.h> #include <asm/checksum.h> /* not coded yet?? EXPORT_SYMBOL(csum_ipv6_magic); */ @@ -48,15 +52,14 @@ EXPORT_SYMBOL_NOVERS(__down); EXPORT_SYMBOL_NOVERS(__down_interruptible); EXPORT_SYMBOL_NOVERS(__down_trylock); EXPORT_SYMBOL_NOVERS(__up); -EXPORT_SYMBOL_NOVERS(__down_read_failed); -EXPORT_SYMBOL_NOVERS(__down_write_failed); -EXPORT_SYMBOL_NOVERS(__rwsem_wake); #include <asm/page.h> EXPORT_SYMBOL(clear_page); #include <asm/processor.h> -EXPORT_SYMBOL(cpu_data); +# ifndef CONFIG_NUMA +EXPORT_SYMBOL(_cpu_data); +# endif EXPORT_SYMBOL(kernel_thread); #include <asm/system.h> @@ -78,6 +81,7 @@ EXPORT_SYMBOL(synchronize_irq); EXPORT_SYMBOL(smp_call_function); EXPORT_SYMBOL(smp_call_function_single); EXPORT_SYMBOL(cpu_online_map); +EXPORT_SYMBOL(ia64_cpu_to_sapicid); #include <linux/smp.h> EXPORT_SYMBOL(smp_num_cpus); @@ -137,3 +141,8 @@ EXPORT_SYMBOL(ia64_pal_call_static); extern struct efi efi; EXPORT_SYMBOL(efi); + +#include <linux/proc_fs.h> +extern struct proc_dir_entry *efi_dir; +EXPORT_SYMBOL(efi_dir); + diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 2be224ec3cfe..13d7067d4a85 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -20,7 +20,7 @@ * Here is what the interrupt logic between a PCI device and the CPU looks like: * * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, INTD). The - * device is uniquely identified by its bus-, device-, and slot-number (the function + * device is uniquely identified by its bus--, and slot-number (the function * number does not matter here because all functions share the same interrupt * lines). * @@ -51,6 +51,7 @@ #include <linux/irq.h> #include <asm/acpi-ext.h> +#include <asm/acpikcfg.h> #include <asm/delay.h> #include <asm/io.h> #include <asm/iosapic.h> @@ -59,9 +60,6 @@ #include <asm/ptrace.h> #include <asm/system.h> -#ifdef CONFIG_ACPI_KERNEL_CONFIG -# include <asm/acpikcfg.h> -#endif #undef DEBUG_IRQ_ROUTING @@ -207,7 +205,45 @@ unmask_irq (unsigned int irq) static void iosapic_set_affinity (unsigned int irq, unsigned long mask) { - printk("iosapic_set_affinity: not implemented yet\n"); +#ifdef CONFIG_SMP + unsigned long flags; + u32 high32, low32; + int dest, pin; + char *addr; + + mask &= (1UL << smp_num_cpus) - 1; + + if (!mask || irq >= IA64_NUM_VECTORS) + return; + + dest = cpu_physical_id(ffz(~mask)); + + pin = iosapic_irq[irq].pin; + addr = iosapic_irq[irq].addr; + + if (pin < 0) + return; /* not an IOSAPIC interrupt */ + + /* dest contains both id and eid */ + high32 = dest << IOSAPIC_DEST_SHIFT; + + spin_lock_irqsave(&iosapic_lock, flags); + { + /* get current delivery mode by reading the low32 */ + writel(IOSAPIC_RTE_LOW(pin), addr + IOSAPIC_REG_SELECT); + low32 = readl(addr + IOSAPIC_WINDOW); + + /* change delivery mode to fixed */ + low32 &= ~(7 << IOSAPIC_DELIVERY_SHIFT); + low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT); + + writel(IOSAPIC_RTE_HIGH(pin), addr + IOSAPIC_REG_SELECT); + writel(high32, addr + IOSAPIC_WINDOW); + writel(IOSAPIC_RTE_LOW(pin), addr + IOSAPIC_REG_SELECT); + writel(low32, addr + IOSAPIC_WINDOW); + } + spin_unlock_irqrestore(&iosapic_lock, flags); +#endif } /* @@ -330,7 +366,7 @@ iosapic_register_legacy_irq (unsigned long irq, } void __init -iosapic_init (unsigned long phys_addr, unsigned int base_irq) +iosapic_init (unsigned long phys_addr, unsigned int base_irq, int pcat_compat) { struct hw_interrupt_type *irq_type; int i, irq, max_pin, vector; @@ -348,13 +384,7 @@ iosapic_init (unsigned long phys_addr, unsigned int base_irq) /* * Fetch the PCI interrupt routing table: */ -#ifdef CONFIG_ACPI_KERNEL_CONFIG acpi_cf_get_pci_vectors(&pci_irq.route, &pci_irq.num_routes); -#else - pci_irq.route = - (struct pci_vector_struct *) __va(ia64_boot_param->pci_vectors); - pci_irq.num_routes = ia64_boot_param->num_pci_vectors; -#endif } addr = ioremap(phys_addr, 0); @@ -365,7 +395,7 @@ iosapic_init (unsigned long phys_addr, unsigned int base_irq) printk("IOSAPIC: version %x.%x, address 0x%lx, IRQs 0x%02x-0x%02x\n", (ver & 0xf0) >> 4, (ver & 0x0f), phys_addr, base_irq, base_irq + max_pin); - if (base_irq == 0) + if ((base_irq == 0) && pcat_compat) /* * Map the legacy ISA devices into the IOSAPIC data. Some of these may * get reprogrammed later on with data from the ACPI Interrupt Source diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index 2a0cc2424e58..c0a9f341be91 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -626,6 +626,8 @@ unsigned int do_IRQ(unsigned long irq, struct pt_regs *regs) desc->handler->end(irq); spin_unlock(&desc->lock); } + if (local_softirq_pending()) + do_softirq(); return 1; } diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 09fb3526fa02..89e48dca23ac 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -72,6 +72,11 @@ void ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) { unsigned long saved_tpr; +#ifdef CONFIG_SMP +# define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE) +#else +# define IS_RESCHEDULE(vec) (0) +#endif #if IRQ_DEBUG { @@ -110,24 +115,25 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) */ saved_tpr = ia64_get_tpr(); ia64_srlz_d(); - do { - ia64_set_tpr(vector); - ia64_srlz_d(); - - do_IRQ(local_vector_to_irq(vector), regs); - - /* - * Disable interrupts and send EOI: - */ - local_irq_disable(); - ia64_set_tpr(saved_tpr); + while (vector != IA64_SPURIOUS_INT_VECTOR) { + if (!IS_RESCHEDULE(vector)) { + ia64_set_tpr(vector); + ia64_srlz_d(); + + do_IRQ(local_vector_to_irq(vector), regs); + + /* + * Disable interrupts and send EOI: + */ + local_irq_disable(); + ia64_set_tpr(saved_tpr); + } ia64_eoi(); vector = ia64_get_ivr(); - } while (vector != IA64_SPURIOUS_INT_VECTOR); + } } #ifdef CONFIG_SMP - extern void handle_IPI (int irq, void *dev_id, struct pt_regs *regs); static struct irqaction ipi_irqaction = { @@ -147,7 +153,7 @@ register_percpu_irq (ia64_vector vec, struct irqaction *action) if (irq_to_vector(irq) == vec) { desc = irq_desc(irq); desc->status |= IRQ_PER_CPU; - desc->handler = &irq_type_ia64_sapic; + desc->handler = &irq_type_ia64_lsapic; if (action) setup_irq(irq, action); } diff --git a/arch/ia64/kernel/irq_sapic.c b/arch/ia64/kernel/irq_lsapic.c index a431275a8db5..01e999a61ccb 100644 --- a/arch/ia64/kernel/irq_sapic.c +++ b/arch/ia64/kernel/irq_lsapic.c @@ -1,10 +1,10 @@ /* - * SAPIC Interrupt Controller + * LSAPIC Interrupt Controller * * This takes care of interrupts that are generated by the CPU's * internal Streamlined Advanced Programmable Interrupt Controller - * (SAPIC), such as the ITC and IPI interrupts. - * + * (LSAPIC), such as the ITC and IPI interrupts. + * * Copyright (C) 1999 VA Linux Systems * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> * Copyright (C) 2000 Hewlett-Packard Co @@ -15,24 +15,24 @@ #include <linux/irq.h> static unsigned int -sapic_noop_startup (unsigned int irq) +lsapic_noop_startup (unsigned int irq) { return 0; } static void -sapic_noop (unsigned int irq) +lsapic_noop (unsigned int irq) { /* nuthing to do... */ } -struct hw_interrupt_type irq_type_ia64_sapic = { - typename: "SAPIC", - startup: sapic_noop_startup, - shutdown: sapic_noop, - enable: sapic_noop, - disable: sapic_noop, - ack: sapic_noop, - end: sapic_noop, - set_affinity: (void (*)(unsigned int, unsigned long)) sapic_noop +struct hw_interrupt_type irq_type_ia64_lsapic = { + typename: "LSAPIC", + startup: lsapic_noop_startup, + shutdown: lsapic_noop, + enable: lsapic_noop, + disable: lsapic_noop, + ack: lsapic_noop, + end: lsapic_noop, + set_affinity: (void (*)(unsigned int, unsigned long)) lsapic_noop }; diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index ffec15271003..393079fd68a3 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -9,16 +9,14 @@ * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB handler now uses virtual PT. */ /* - * This file defines the interrupt vector table used by the CPU. + * This file defines the interruption vector table used by the CPU. * It does not include one entry per possible cause of interruption. * - * External interrupts only use 1 entry. All others are internal interrupts - * * The first 20 entries of the table contain 64 bundles each while the * remaining 48 entries contain only 16 bundles each. * * The 64 bundles are used to allow inlining the whole handler for critical - * interrupts like TLB misses. + * interruptions like TLB misses. * * For each entry, the comment is as follows: * @@ -27,7 +25,7 @@ * entry number ---------/ / / / * size of the entry -------------/ / / * vector name -------------------------------------/ / - * related interrupts (what is the real interrupt?) ----------/ + * interruptions triggering this vector ----------------------/ * * The table is 32KB in size and must be aligned on 32KB boundary. * (The CPU ignores the 15 lower bits of the address) @@ -363,7 +361,7 @@ ENTRY(page_fault) ;; ssm psr.ic | PSR_DEFAULT_BITS ;; - srlz.i // guarantee that interrupt collection is enabled + srlz.i // guarantee that interruption collectin is on ;; (p15) ssm psr.i // restore psr.i movl r14=ia64_leave_kernel @@ -536,8 +534,7 @@ ENTRY(iaccess_bit) ;; 1: ld8 r18=[r17] ;; -# if defined(CONFIG_IA32_SUPPORT) && \ - (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_B0_SPECIFIC)) +# if defined(CONFIG_IA32_SUPPORT) && defined(CONFIG_ITANIUM_B0_SPECIFIC) /* * Erratum 85 (Access bit fault could be reported before page not present fault) * If the PTE is indicates the page is not present, then just turn this into a @@ -567,8 +564,7 @@ ENTRY(iaccess_bit) ;; 1: ld8 r18=[r17] ;; -# if defined(CONFIG_IA32_SUPPORT) && \ - (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_B0_SPECIFIC)) +# if defined(CONFIG_IA32_SUPPORT) && defined(CONFIG_ITANIUM_B0_SPECIFIC) /* * Erratum 85 (Access bit fault could be reported before page not present fault) * If the PTE is indicates the page is not present, then just turn this into a @@ -650,7 +646,7 @@ ENTRY(break_fault) ssm psr.ic | PSR_DEFAULT_BITS ;; - srlz.i // guarantee that interrupt collection is enabled + srlz.i // guarantee that interruption collection is on cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 ;; (p15) ssm psr.i // restore psr.i @@ -702,7 +698,7 @@ ENTRY(break_fault) st8 [r16]=r18 // store new value for cr.isr (p8) br.call.sptk.many b6=b6 // ignore this return addr - br.call.sptk.many rp=ia64_trace_syscall // rp will be overwritten (ignored) + br.cond.sptk.many ia64_trace_syscall // NOT REACHED END(break_fault) @@ -724,11 +720,14 @@ ENTRY(demine_args) tnat.nz p15,p0=in7 (p11) mov in3=-1 + tnat.nz p8,p0=r15 // demining r15 is not a must, but it is safer + (p12) mov in4=-1 (p13) mov in5=-1 ;; (p14) mov in6=-1 (p15) mov in7=-1 +(p8) mov r15=-1 br.ret.sptk.many rp END(demine_args) @@ -790,7 +789,7 @@ ENTRY(dispatch_illegal_op_fault) SAVE_MIN_WITH_COVER ssm psr.ic | PSR_DEFAULT_BITS ;; - srlz.i // guarantee that interrupt collection is enabled + srlz.i // guarantee that interruption collection is on ;; (p15) ssm psr.i // restore psr.i adds r3=8,r2 // set up second base pointer for SAVE_REST @@ -839,7 +838,7 @@ ENTRY(dispatch_to_ia32_handler) mov r14=cr.isr ssm psr.ic | PSR_DEFAULT_BITS ;; - srlz.i // guarantee that interrupt collection is enabled + srlz.i // guarantee that interruption collection is on ;; (p15) ssm psr.i adds r3=8,r2 // Base pointer for SAVE_REST @@ -890,8 +889,7 @@ ENTRY(dispatch_to_ia32_handler) ;; mov rp=r15 (p8) br.call.sptk.many b6=b6 - ;; - br.call.sptk.many rp=ia32_trace_syscall // rp will be overwritten (ignored) + br.cond.sptk.many ia32_trace_syscall non_ia32_syscall: alloc r15=ar.pfs,0,0,2,0 @@ -928,7 +926,7 @@ ENTRY(non_syscall) ssm psr.ic | PSR_DEFAULT_BITS ;; - srlz.i // guarantee that interrupt collection is enabled + srlz.i // guarantee that interruption collection is on ;; (p15) ssm psr.i // restore psr.i movl r15=ia64_leave_kernel @@ -961,7 +959,7 @@ ENTRY(dispatch_unaligned_handler) ssm psr.ic | PSR_DEFAULT_BITS ;; - srlz.i // guarantee that interrupt collection is enabled + srlz.i // guarantee that interruption collection is on ;; (p15) ssm psr.i // restore psr.i adds r3=8,r2 // set up second base pointer @@ -1003,7 +1001,7 @@ ENTRY(dispatch_to_fault_handler) ;; ssm psr.ic | PSR_DEFAULT_BITS ;; - srlz.i // guarantee that interrupt collection is enabled + srlz.i // guarantee that interruption collection is on ;; (p15) ssm psr.i // restore psr.i adds r3=8,r2 // set up second base pointer for SAVE_REST diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index ae02f6e34ced..19e445ff44dc 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -27,6 +27,7 @@ #include <asm/mca.h> #include <asm/irq.h> +#include <asm/machvec.h> typedef struct ia64_fptr { diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S index 2acbf2a13b77..45e0ffa47726 100644 --- a/arch/ia64/kernel/mca_asm.S +++ b/arch/ia64/kernel/mca_asm.S @@ -672,7 +672,7 @@ ia64_monarch_init_handler: // mov r17=cr.lid // XXX fix me: this is wrong: hard_smp_processor_id() is a pair of lid/eid - movl r18=__cpu_physical_id + movl r18=ia64_cpu_to_sapicid ;; dep r18=0,r18,61,3 // convert to physical address ;; diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h index 6c300adf3746..a7e15a010909 100644 --- a/arch/ia64/kernel/minstate.h +++ b/arch/ia64/kernel/minstate.h @@ -235,12 +235,6 @@ stf.spill [r2]=f8,32; \ stf.spill [r3]=f9,32 -#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC -# define STOPS nop.i 0x0;; nop.i 0x0;; nop.i 0x0;; -#else -# define STOPS -#endif - -#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(cover, mov rCRIFS=cr.ifs,) STOPS -#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(cover, mov rCRIFS=cr.ifs, mov r15=r19) STOPS -#define SAVE_MIN DO_SAVE_MIN( , mov rCRIFS=r0, ) STOPS +#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(cover, mov rCRIFS=cr.ifs,) +#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(cover, mov rCRIFS=cr.ifs, mov r15=r19) +#define SAVE_MIN DO_SAVE_MIN( , mov rCRIFS=r0, ) diff --git a/arch/ia64/kernel/pci.c b/arch/ia64/kernel/pci.c index 91e11dc513c2..091086975fa8 100644 --- a/arch/ia64/kernel/pci.c +++ b/arch/ia64/kernel/pci.c @@ -1,6 +1,6 @@ /* * pci.c - Low-Level PCI Access in IA-64 - * + * * Derived from bios32.c of i386 tree. */ #include <linux/config.h> @@ -53,7 +53,7 @@ struct pci_fixup pcibios_fixups[] = { #define PCI_CONFIG_ADDRESS(dev, where) \ (((u64) dev->bus->number << 16) | ((u64) (dev->devfn & 0xff) << 8) | (where & 0xff)) -static int +static int pci_conf_read_config_byte(struct pci_dev *dev, int where, u8 *value) { s64 status; @@ -64,7 +64,7 @@ pci_conf_read_config_byte(struct pci_dev *dev, int where, u8 *value) return status; } -static int +static int pci_conf_read_config_word(struct pci_dev *dev, int where, u16 *value) { s64 status; @@ -75,7 +75,7 @@ pci_conf_read_config_word(struct pci_dev *dev, int where, u16 *value) return status; } -static int +static int pci_conf_read_config_dword(struct pci_dev *dev, int where, u32 *value) { s64 status; @@ -86,19 +86,19 @@ pci_conf_read_config_dword(struct pci_dev *dev, int where, u32 *value) return status; } -static int +static int pci_conf_write_config_byte (struct pci_dev *dev, int where, u8 value) { return ia64_sal_pci_config_write(PCI_CONFIG_ADDRESS(dev, where), 1, value); } -static int +static int pci_conf_write_config_word (struct pci_dev *dev, int where, u16 value) { return ia64_sal_pci_config_write(PCI_CONFIG_ADDRESS(dev, where), 2, value); } -static int +static int pci_conf_write_config_dword (struct pci_dev *dev, int where, u32 value) { return ia64_sal_pci_config_write(PCI_CONFIG_ADDRESS(dev, where), 4, value); @@ -116,7 +116,7 @@ struct pci_ops pci_conf = { /* * Initialization. Uses the SAL interface */ -void __init +void __init pcibios_init (void) { # define PCI_BUSES_TO_SCAN 255 @@ -125,7 +125,7 @@ pcibios_init (void) platform_pci_fixup(0); /* phase 0 initialization (before PCI bus has been scanned) */ printk("PCI: Probing PCI hardware\n"); - for (i = 0; i < PCI_BUSES_TO_SCAN; i++) + for (i = 0; i < PCI_BUSES_TO_SCAN; i++) pci_scan_bus(i, &pci_conf, NULL); platform_pci_fixup(1); /* phase 1 initialization (after PCI bus has been scanned) */ @@ -146,14 +146,14 @@ void __init pcibios_update_resource (struct pci_dev *dev, struct resource *root, struct resource *res, int resource) { - unsigned long where, size; - u32 reg; + unsigned long where, size; + u32 reg; - where = PCI_BASE_ADDRESS_0 + (resource * 4); - size = res->end - res->start; - pci_read_config_dword(dev, where, ®); - reg = (reg & size) | (((u32)(res->start - root->start)) & ~size); - pci_write_config_dword(dev, where, reg); + where = PCI_BASE_ADDRESS_0 + (resource * 4); + size = res->end - res->start; + pci_read_config_dword(dev, where, ®); + reg = (reg & size) | (((u32)(res->start - root->start)) & ~size); + pci_write_config_dword(dev, where, reg); /* ??? FIXME -- record old value for shutdown. */ } @@ -190,7 +190,7 @@ pcibios_align_resource (void *data, struct resource *res, unsigned long size) /* * PCI BIOS setup, always defaults to SAL interface */ -char * __init +char * __init pcibios_setup (char *str) { return NULL; diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index ac619c40dc20..488f63591daf 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -32,6 +32,7 @@ #include <asm/processor.h> #include <asm/signal.h> #include <asm/system.h> +#include <asm/system.h> #include <asm/uaccess.h> #include <asm/delay.h> /* for ia64_get_itc() */ @@ -467,7 +468,7 @@ pfm_smpl_buffer_alloc(pfm_context_t *ctx, unsigned long which_pmds, unsigned lon if (size > current->rlim[RLIMIT_MEMLOCK].rlim_cur) return -EAGAIN; /* find some free area in address space */ - addr = get_unmapped_area(NULL, 0, size, 0, 0); + addr = get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE); if (!addr) goto no_addr; DBprintk((" entries=%ld aligned size=%ld, unmapped @0x%lx\n", entries, size, addr)); @@ -573,12 +574,8 @@ pfx_is_sane(pfreq_context_t *pfx) /* cannot send to process 1, 0 means do not notify */ if (pfx->notify_pid < 0 || pfx->notify_pid == 1) return 0; - /* asked for sampling, but nothing to record ! */ - if (pfx->smpl_entries > 0 && pfm_smpl_entry_size(&pfx->smpl_regs, 1) == 0) return 0; - /* probably more to add here */ - return 1; } @@ -786,26 +783,22 @@ pfm_read_pmds(struct task_struct *ta, perfmon_req_t *req, int count) /* XXX: ctx locking may be required here */ for (i = 0; i < count; i++, req++) { - int k; - if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT; if (!PMD_IS_IMPL(tmp.pfr_reg.reg_num)) return -EINVAL; - k = tmp.pfr_reg.reg_num - PMU_FIRST_COUNTER; - if (PMD_IS_COUNTER(tmp.pfr_reg.reg_num)) { if (ta == current){ val = ia64_get_pmd(tmp.pfr_reg.reg_num); } else { - val = th->pmd[k]; + val = th->pmd[tmp.pfr_reg.reg_num]; } val &= pmu_conf.perf_ovfl_val; /* * lower part of .val may not be zero, so we must be an addition because of * residual count (see update_counters). */ - val += ctx->ctx_pmds[k].val; + val += ctx->ctx_pmds[tmp.pfr_reg.reg_num - PMU_FIRST_COUNTER].val; } else { /* for now */ if (ta != current) return -EINVAL; @@ -1646,7 +1639,7 @@ perfmon_init (void) pmu_conf.pfm_is_disabled = 1; - printk("perfmon: version %s\n", PFM_VERSION); + printk("perfmon: version %s (sampling format v%d)\n", PFM_VERSION, PFM_SMPL_HDR_VERSION); printk("perfmon: Interrupt vectored to %u\n", IA64_PERFMON_VECTOR); if ((status=ia64_pal_perf_mon_info(pmu_conf.impl_regs, &pm_info)) != 0) { @@ -1658,11 +1651,8 @@ perfmon_init (void) pmu_conf.num_pmds = find_num_pm_regs(pmu_conf.impl_regs); pmu_conf.num_pmcs = find_num_pm_regs(&pmu_conf.impl_regs[4]); - printk("perfmon: Counters are %d bits\n", pm_info.pal_perf_mon_info_s.width); - printk("perfmon: Maximum counter value 0x%lx\n", pmu_conf.perf_ovfl_val); - printk("perfmon: %ld PMC/PMD pairs\n", pmu_conf.max_counters); - printk("perfmon: %ld PMCs, %ld PMDs\n", pmu_conf.num_pmcs, pmu_conf.num_pmds); - printk("perfmon: Sampling format v%d\n", PFM_SMPL_HDR_VERSION); + printk("perfmon: %d bits counters (max value 0x%lx)\n", pm_info.pal_perf_mon_info_s.width, pmu_conf.perf_ovfl_val); + printk("perfmon: %ld PMC/PMD pairs, %ld PMCs, %ld PMDs\n", pmu_conf.max_counters, pmu_conf.num_pmcs, pmu_conf.num_pmds); /* sanity check */ if (pmu_conf.num_pmds >= IA64_NUM_PMD_REGS || pmu_conf.num_pmcs >= IA64_NUM_PMC_REGS) { diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 79403c0fcd82..d1fc4cb4e92c 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -143,7 +143,7 @@ ia64_save_extra (struct task_struct *task) pfm_save_regs(task); #endif if (IS_IA32_PROCESS(ia64_task_regs(task))) - ia32_save_state(&task->thread); + ia32_save_state(task); } void @@ -156,7 +156,7 @@ ia64_load_extra (struct task_struct *task) pfm_load_regs(task); #endif if (IS_IA32_PROCESS(ia64_task_regs(task))) - ia32_load_state(&task->thread); + ia32_load_state(task); } /* @@ -282,10 +282,11 @@ copy_thread (int nr, unsigned long clone_flags, * state from the current task to the new task */ if (IS_IA32_PROCESS(ia64_task_regs(current))) - ia32_save_state(&p->thread); + ia32_save_state(p); #endif #ifdef CONFIG_PERFMON - if (current->thread.pfm_context) + p->thread.pfm_pend_notify = 0; + if (p->thread.pfm_context) retval = pfm_inherit(p); #endif return retval; @@ -294,11 +295,10 @@ copy_thread (int nr, unsigned long clone_flags, void do_copy_regs (struct unw_frame_info *info, void *arg) { - unsigned long ar_bsp, addr, mask, sp, nat_bits = 0, ip, ar_rnat; + unsigned long mask, sp, nat_bits = 0, ip, ar_rnat, urbs_end, cfm; elf_greg_t *dst = arg; struct pt_regs *pt; char nat; - long val; int i; memset(dst, 0, sizeof(elf_gregset_t)); /* don't leak any kernel bits to user-level */ @@ -309,17 +309,13 @@ do_copy_regs (struct unw_frame_info *info, void *arg) unw_get_sp(info, &sp); pt = (struct pt_regs *) (sp + 16); - ar_bsp = ia64_get_user_bsp(current, pt); + urbs_end = ia64_get_user_rbs_end(current, pt, &cfm); - /* - * Write portion of RSE backing store living on the kernel stack to the VM of the - * process. - */ - for (addr = pt->ar_bspstore; addr < ar_bsp; addr += 8) - if (ia64_peek(current, ar_bsp, addr, &val) == 0) - access_process_vm(current, addr, &val, sizeof(val), 1); + if (ia64_sync_user_rbs(current, info->sw, pt->ar_bspstore, urbs_end) < 0) + return; - ia64_peek(current, ar_bsp, (long) ia64_rse_rnat_addr((long *) addr - 1), &ar_rnat); + ia64_peek(current, info->sw, urbs_end, (long) ia64_rse_rnat_addr((long *) urbs_end), + &ar_rnat); /* * coredump format: @@ -347,7 +343,7 @@ do_copy_regs (struct unw_frame_info *info, void *arg) unw_get_rp(info, &ip); dst[42] = ip + ia64_psr(pt)->ri; - dst[43] = pt->cr_ifs & 0x3fffffffff; + dst[43] = cfm; dst[44] = pt->cr_ipsr & IA64_PSR_UM; unw_get_ar(info, UNW_AR_RSC, &dst[45]); @@ -355,7 +351,7 @@ do_copy_regs (struct unw_frame_info *info, void *arg) * For bsp and bspstore, unw_get_ar() would return the kernel * addresses, but we need the user-level addresses instead: */ - dst[46] = ar_bsp; + dst[46] = urbs_end; /* note: by convention PT_AR_BSP points to the end of the urbs! */ dst[47] = pt->ar_bspstore; dst[48] = ar_rnat; unw_get_ar(info, UNW_AR_CCV, &dst[49]); @@ -528,13 +524,11 @@ machine_restart (char *restart_cmd) void machine_halt (void) { - printk("machine_halt: need PAL or ACPI version here!!\n"); - machine_restart(0); } void machine_power_off (void) { - printk("machine_power_off: unimplemented (need ACPI version here)\n"); - machine_halt (); + if (pm_power_off) + pm_power_off(); } diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 89842b32eb5c..e13f8bf4235b 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -144,12 +144,10 @@ ia64_decrement_ip (struct pt_regs *regs) } /* - * This routine is used to read an rnat bits that are stored on the - * kernel backing store. Since, in general, the alignment of the user - * and kernel are different, this is not completely trivial. In - * essence, we need to construct the user RNAT based on up to two - * kernel RNAT values and/or the RNAT value saved in the child's - * pt_regs. + * This routine is used to read an rnat bits that are stored on the kernel backing store. + * Since, in general, the alignment of the user and kernel are different, this is not + * completely trivial. In essence, we need to construct the user RNAT based on up to two + * kernel RNAT values and/or the RNAT value saved in the child's pt_regs. * * user rbs * @@ -182,20 +180,18 @@ ia64_decrement_ip (struct pt_regs *regs) * +--------+ * <--- child_stack->ar_bspstore * - * The way to think of this code is as follows: bit 0 in the user rnat - * corresponds to some bit N (0 <= N <= 62) in one of the kernel rnat - * value. The kernel rnat value holding this bit is stored in - * variable rnat0. rnat1 is loaded with the kernel rnat value that + * The way to think of this code is as follows: bit 0 in the user rnat corresponds to some + * bit N (0 <= N <= 62) in one of the kernel rnat value. The kernel rnat value holding + * this bit is stored in variable rnat0. rnat1 is loaded with the kernel rnat value that * form the upper bits of the user rnat value. * * Boundary cases: * - * o when reading the rnat "below" the first rnat slot on the kernel - * backing store, rnat0/rnat1 are set to 0 and the low order bits - * are merged in from pt->ar_rnat. + * o when reading the rnat "below" the first rnat slot on the kernel backing store, + * rnat0/rnat1 are set to 0 and the low order bits are merged in from pt->ar_rnat. * - * o when reading the rnat "above" the last rnat slot on the kernel - * backing store, rnat0/rnat1 gets its value from sw->ar_rnat. + * o when reading the rnat "above" the last rnat slot on the kernel backing store, + * rnat0/rnat1 gets its value from sw->ar_rnat. */ static unsigned long get_rnat (struct pt_regs *pt, struct switch_stack *sw, @@ -289,57 +285,82 @@ put_rnat (struct pt_regs *pt, struct switch_stack *sw, } } +/* + * Read a word from the user-level backing store of task CHILD. ADDR is the user-level + * address to read the word from, VAL a pointer to the return value, and USER_BSP gives + * the end of the user-level backing store (i.e., it's the address that would be in ar.bsp + * after the user executed a "cover" instruction). + * + * This routine takes care of accessing the kernel register backing store for those + * registers that got spilled there. It also takes care of calculating the appropriate + * RNaT collection words. + */ long -ia64_peek (struct task_struct *child, unsigned long user_bsp, unsigned long addr, long *val) +ia64_peek (struct task_struct *child, struct switch_stack *child_stack, unsigned long user_rbs_end, + unsigned long addr, long *val) { - unsigned long *bspstore, *krbs, regnum, *laddr, *ubsp = (long *) user_bsp; - struct switch_stack *child_stack; + unsigned long *bspstore, *krbs, regnum, *laddr, *urbs_end, *rnat_addr; struct pt_regs *child_regs; size_t copied; long ret; + urbs_end = (long *) user_rbs_end; laddr = (unsigned long *) addr; child_regs = ia64_task_regs(child); - child_stack = (struct switch_stack *) (child->thread.ksp + 16); bspstore = (unsigned long *) child_regs->ar_bspstore; krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; - if (laddr >= bspstore && laddr <= ia64_rse_rnat_addr(ubsp)) { + if (laddr >= bspstore && laddr <= ia64_rse_rnat_addr(urbs_end)) { /* * Attempt to read the RBS in an area that's actually on the kernel RBS => * read the corresponding bits in the kernel RBS. */ - if (ia64_rse_is_rnat_slot(laddr)) - ret = get_rnat(child_regs, child_stack, krbs, laddr); - else { - if (laddr >= ubsp) - ret = 0; - else { - regnum = ia64_rse_num_regs(bspstore, laddr); - ret = *ia64_rse_skip_regs(krbs, regnum); - } + rnat_addr = ia64_rse_rnat_addr(laddr); + ret = get_rnat(child_regs, child_stack, krbs, rnat_addr); + + if (laddr == rnat_addr) { + /* return NaT collection word itself */ + *val = ret; + return 0; + } + + if (((1UL << ia64_rse_slot_num(laddr)) & ret) != 0) { + /* + * It is implementation dependent whether the data portion of a + * NaT value gets saved on a st8.spill or RSE spill (e.g., see + * EAS 2.6, 4.4.4.6 Register Spill and Fill). To get consistent + * behavior across all possible IA-64 implementations, we return + * zero in this case. + */ + *val = 0; + return 0; + } + + if (laddr < urbs_end) { + /* the desired word is on the kernel RBS and is not a NaT */ + regnum = ia64_rse_num_regs(bspstore, laddr); + *val = *ia64_rse_skip_regs(krbs, regnum); + return 0; } - } else { - copied = access_process_vm(child, addr, &ret, sizeof(ret), 0); - if (copied != sizeof(ret)) - return -EIO; } + copied = access_process_vm(child, addr, &ret, sizeof(ret), 0); + if (copied != sizeof(ret)) + return -EIO; *val = ret; return 0; } long -ia64_poke (struct task_struct *child, unsigned long user_bsp, unsigned long addr, long val) +ia64_poke (struct task_struct *child, struct switch_stack *child_stack, unsigned long user_rbs_end, + unsigned long addr, long val) { - unsigned long *bspstore, *krbs, regnum, *laddr, *ubsp = (long *) user_bsp; - struct switch_stack *child_stack; + unsigned long *bspstore, *krbs, regnum, *laddr, *urbs_end = (long *) user_rbs_end; struct pt_regs *child_regs; laddr = (unsigned long *) addr; child_regs = ia64_task_regs(child); - child_stack = (struct switch_stack *) (child->thread.ksp + 16); bspstore = (unsigned long *) child_regs->ar_bspstore; krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; - if (laddr >= bspstore && laddr <= ia64_rse_rnat_addr(ubsp)) { + if (laddr >= bspstore && laddr <= ia64_rse_rnat_addr(urbs_end)) { /* * Attempt to write the RBS in an area that's actually on the kernel RBS * => write the corresponding bits in the kernel RBS. @@ -347,7 +368,7 @@ ia64_poke (struct task_struct *child, unsigned long user_bsp, unsigned long addr if (ia64_rse_is_rnat_slot(laddr)) put_rnat(child_regs, child_stack, krbs, laddr, val); else { - if (laddr < ubsp) { + if (laddr < urbs_end) { regnum = ia64_rse_num_regs(bspstore, laddr); *ia64_rse_skip_regs(krbs, regnum) = val; } @@ -359,11 +380,13 @@ ia64_poke (struct task_struct *child, unsigned long user_bsp, unsigned long addr } /* - * Calculate the user-level address that would have been in ar.bsp had the user executed a - * "cover" instruction right before entering the kernel. + * Calculate the address of the end of the user-level register backing store. This is the + * address that would have been stored in ar.bsp if the user had executed a "cover" + * instruction right before entering the kernel. If CFMP is not NULL, it is used to + * return the "current frame mask" that was active at the time the kernel was entered. */ unsigned long -ia64_get_user_bsp (struct task_struct *child, struct pt_regs *pt) +ia64_get_user_rbs_end (struct task_struct *child, struct pt_regs *pt, unsigned long *cfmp) { unsigned long *krbs, *bspstore, cfm; struct unw_frame_info info; @@ -372,6 +395,7 @@ ia64_get_user_bsp (struct task_struct *child, struct pt_regs *pt) krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; bspstore = (unsigned long *) pt->ar_bspstore; ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19)); + cfm = pt->cr_ifs & ~(1UL << 63); if ((long) pt->cr_ifs >= 0) { /* @@ -385,81 +409,102 @@ ia64_get_user_bsp (struct task_struct *child, struct pt_regs *pt) ndirty += (cfm & 0x7f); } } + if (cfmp) + *cfmp = cfm; return (unsigned long) ia64_rse_skip_regs(bspstore, ndirty); } /* * Synchronize (i.e, write) the RSE backing store living in kernel space to the VM of the - * indicated child process. - * - * If new_bsp is non-zero, the bsp will (effectively) be updated to the new value upon - * resumption of the child process. This is accomplished by setting the loadrs value to - * zero and the bspstore value to the new bsp value. - * - * When new_bsp and flush_user_rbs are both 0, the register backing store in kernel space - * is written to user space and the loadrs and bspstore values are left alone. - * - * When new_bsp is zero and flush_user_rbs is 1 (non-zero), loadrs is set to 0, and the - * bspstore value is set to the old bsp value. This will cause the stacked registers (r32 - * and up) to be obtained entirely from the child's memory space rather than from the - * kernel. (This makes it easier to write code for modifying the stacked registers in - * multi-threaded programs.) - * - * Note: I had originally written this function without the flush_user_rbs parameter; it - * was written so that loadrs would always be set to zero. But I had problems with - * certain system calls apparently causing a portion of the RBS to be zeroed. (I still - * don't understand why this was happening.) Anyway, it'd definitely less intrusive to - * leave loadrs and bspstore alone if possible. + * CHILD task. SW and PT are the pointers to the switch_stack and pt_regs structures, + * respectively. USER_RBS_END is the user-level address at which the backing store ends. */ -static long -sync_kernel_register_backing_store (struct task_struct *child, long user_bsp, long new_bsp, - int flush_user_rbs) +long +ia64_sync_user_rbs (struct task_struct *child, struct switch_stack *sw, + unsigned long user_rbs_start, unsigned long user_rbs_end) { - struct pt_regs *child_regs = ia64_task_regs(child); unsigned long addr, val; long ret; - /* - * Return early if nothing to do. Note that new_bsp will be zero if the caller - * wants to force synchronization without changing bsp. - */ - if (user_bsp == new_bsp) - return 0; - - /* Write portion of backing store living on kernel stack to the child's VM. */ - for (addr = child_regs->ar_bspstore; addr < user_bsp; addr += 8) { - ret = ia64_peek(child, user_bsp, addr, &val); - if (ret != 0) + /* now copy word for word from kernel rbs to user rbs: */ + for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) { + ret = ia64_peek(child, sw, user_rbs_end, addr, &val); + if (ret < 0) return ret; if (access_process_vm(child, addr, &val, sizeof(val), 1) != sizeof(val)) return -EIO; } + return 0; +} - if (new_bsp != 0) { - flush_user_rbs = 1; - user_bsp = new_bsp; - } +/* + * Simulate user-level "flushrs". Note: we can't just add pt->loadrs>>16 to + * pt->ar_bspstore because the kernel backing store and the user-level backing store may + * have different alignments (and therefore a different number of intervening rnat slots). + */ +static void +user_flushrs (struct task_struct *task, struct pt_regs *pt) +{ + unsigned long *krbs; + long ndirty; - if (flush_user_rbs) { - child_regs->loadrs = 0; - child_regs->ar_bspstore = user_bsp; - } - return 0; + krbs = (unsigned long *) task + IA64_RBS_OFFSET/8; + ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19)); + + pt->ar_bspstore = (unsigned long) ia64_rse_skip_regs((unsigned long *) pt->ar_bspstore, + ndirty); + pt->loadrs = 0; } +/* + * Synchronize the RSE backing store of CHILD and all tasks that share the address space + * with it. CHILD_URBS_END is the address of the end of the register backing store of + * CHILD. If MAKE_WRITABLE is set, a user-level "flushrs" is simulated such that the VM + * can be written via ptrace() and the tasks will pick up the newly written values. It + * would be OK to unconditionally simulate a "flushrs", but this would be more intrusive + * than strictly necessary (e.g., it would make it impossible to obtain the original value + * of ar.bspstore). + */ static void -sync_thread_rbs (struct task_struct *child, long bsp, struct mm_struct *mm, int make_writable) +threads_sync_user_rbs (struct task_struct *child, unsigned long child_urbs_end, int make_writable) { + struct switch_stack *sw; + unsigned long urbs_end; struct task_struct *p; - read_lock(&tasklist_lock); + struct mm_struct *mm; + struct pt_regs *pt; + long multi_threaded; + + task_lock(child); { - for_each_task(p) { - if (p->mm == mm && p->state != TASK_RUNNING) - sync_kernel_register_backing_store(p, bsp, 0, make_writable); + mm = child->mm; + multi_threaded = mm && (atomic_read(&mm->mm_users) > 1); + } + task_unlock(child); + + if (!multi_threaded) { + sw = (struct switch_stack *) (child->thread.ksp + 16); + pt = ia64_task_regs(child); + ia64_sync_user_rbs(child, sw, pt->ar_bspstore, child_urbs_end); + if (make_writable) + user_flushrs(child, pt); + } else { + read_lock(&tasklist_lock); + { + for_each_task(p) { + if (p->mm == mm && p->state != TASK_RUNNING) { + sw = (struct switch_stack *) (p->thread.ksp + 16); + pt = ia64_task_regs(p); + urbs_end = ia64_get_user_rbs_end(p, pt, NULL); + ia64_sync_user_rbs(p, sw, pt->ar_bspstore, urbs_end); + if (make_writable) + user_flushrs(p, pt); + } + } } + read_unlock(&tasklist_lock); } - read_unlock(&tasklist_lock); - child->thread.flags |= IA64_THREAD_KRBS_SYNCED; + child->thread.flags |= IA64_THREAD_KRBS_SYNCED; /* set the flag in the child thread only */ } /* @@ -528,7 +573,7 @@ access_fr (struct unw_frame_info *info, int regnum, int hi, unsigned long *data, static int access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data, int write_access) { - unsigned long *ptr, regnum, bsp, rnat_addr; + unsigned long *ptr, regnum, urbs_end, rnat_addr; struct switch_stack *sw; struct unw_frame_info info; struct pt_regs *pt; @@ -625,13 +670,24 @@ access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data /* scratch state */ switch (addr) { case PT_AR_BSP: - bsp = ia64_get_user_bsp(child, pt); - if (write_access) - return sync_kernel_register_backing_store(child, bsp, *data, 1); - else { - *data = bsp; - return 0; - } + /* + * By convention, we use PT_AR_BSP to refer to the end of the user-level + * backing store. Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof) to get + * the real value of ar.bsp at the time the kernel was entered. + */ + urbs_end = ia64_get_user_rbs_end(child, pt, NULL); + if (write_access) { + if (*data != urbs_end) { + if (ia64_sync_user_rbs(child, sw, + pt->ar_bspstore, urbs_end) < 0) + return -1; + /* simulate user-level write of ar.bsp: */ + pt->loadrs = 0; + pt->ar_bspstore = *data; + } + } else + *data = urbs_end; + return 0; case PT_CFM: if ((long) pt->cr_ifs < 0) { @@ -666,12 +722,12 @@ access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data return 0; case PT_AR_RNAT: - bsp = ia64_get_user_bsp(child, pt); - rnat_addr = (long) ia64_rse_rnat_addr((long *) bsp - 1); + urbs_end = ia64_get_user_rbs_end(child, pt, NULL); + rnat_addr = (long) ia64_rse_rnat_addr((long *) urbs_end); if (write_access) - return ia64_poke(child, bsp, rnat_addr, *data); + return ia64_poke(child, sw, urbs_end, rnat_addr, *data); else - return ia64_peek(child, bsp, rnat_addr, data); + return ia64_peek(child, sw, urbs_end, rnat_addr, data); case PT_R1: case PT_R2: case PT_R3: case PT_R8: case PT_R9: case PT_R10: case PT_R11: @@ -738,8 +794,9 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data, long arg4, long arg5, long arg6, long arg7, long stack) { struct pt_regs *pt, *regs = (struct pt_regs *) &stack; + unsigned long flags, urbs_end; struct task_struct *child; - unsigned long flags, bsp; + struct switch_stack *sw; long ret; lock_kernel(); @@ -784,25 +841,17 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data, goto out_tsk; pt = ia64_task_regs(child); + sw = (struct switch_stack *) (child->thread.ksp + 16); switch (request) { case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: /* read word at location addr */ - bsp = ia64_get_user_bsp(child, pt); - if (!(child->thread.flags & IA64_THREAD_KRBS_SYNCED)) { - struct mm_struct *mm; - long do_sync; - - task_lock(child); - { - mm = child->mm; - do_sync = mm && (atomic_read(&mm->mm_users) > 1); - } - task_unlock(child); - if (do_sync) - sync_thread_rbs(child, bsp, mm, 0); - } - ret = ia64_peek(child, bsp, addr, &data); + urbs_end = ia64_get_user_rbs_end(child, pt, NULL); + + if (!(child->thread.flags & IA64_THREAD_KRBS_SYNCED)) + threads_sync_user_rbs(child, urbs_end, 0); + + ret = ia64_peek(child, sw, urbs_end, addr, &data); if (ret == 0) { ret = data; regs->r8 = 0; /* ensure "ret" is not mistaken as an error code */ @@ -811,21 +860,11 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data, case PTRACE_POKETEXT: case PTRACE_POKEDATA: /* write the word at location addr */ - bsp = ia64_get_user_bsp(child, pt); - if (!(child->thread.flags & IA64_THREAD_KRBS_SYNCED)) { - struct mm_struct *mm; - long do_sync; - - task_lock(child); - { - mm = child->mm; - do_sync = mm && (atomic_read(&child->mm->mm_users) > 1); - } - task_unlock(child); - if (do_sync) - sync_thread_rbs(child, bsp, mm, 1); - } - ret = ia64_poke(child, bsp, addr, data); + urbs_end = ia64_get_user_rbs_end(child, pt, NULL); + if (!(child->thread.flags & IA64_THREAD_KRBS_SYNCED)) + threads_sync_user_rbs(child, urbs_end, 1); + + ret = ia64_poke(child, sw, urbs_end, addr, data); goto out_tsk; case PTRACE_PEEKUSR: /* read the word at addr in the USER area */ diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 68358cee27e9..7f15ccdd2120 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -3,7 +3,7 @@ * * Copyright (C) 1998-2001 Hewlett-Packard Co * Copyright (C) 1998-2001 David Mosberger-Tang <davidm@hpl.hp.com> - * Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 1998, 1999, 2001 Stephane Eranian <eranian@hpl.hp.com> * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com> * Copyright (C) 1999 VA Linux Systems * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> @@ -46,20 +46,20 @@ # error "struct cpuinfo_ia64 too big!" #endif +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + extern char _end; -/* cpu_data[0] is data for the bootstrap processor: */ -struct cpuinfo_ia64 cpu_data[NR_CPUS] __attribute__ ((section ("__special_page_section"))); +#ifdef CONFIG_NUMA + struct cpuinfo_ia64 *boot_cpu_data; +#else + struct cpuinfo_ia64 _cpu_data[NR_CPUS] __attribute__ ((section ("__special_page_section"))); +#endif unsigned long ia64_cycles_per_usec; struct ia64_boot_param *ia64_boot_param; struct screen_info screen_info; -/* This tells _start which CPU is booting. */ -int cpu_now_booting; - -#ifdef CONFIG_SMP -volatile unsigned long cpu_online_map; -#endif unsigned long ia64_iobase; /* virtual address for I/O accesses */ @@ -67,6 +67,31 @@ unsigned long ia64_iobase; /* virtual address for I/O accesses */ char saved_command_line[COMMAND_LINE_SIZE]; /* used in proc filesystem */ +/* + * Entries defined so far: + * - boot param structure itself + * - memory map + * - initrd (optional) + * - command line string + * - kernel code & data + * + * More could be added if necessary + */ +#define IA64_MAX_RSVD_REGIONS 5 + +struct rsvd_region { + unsigned long start; /* virtual address of beginning of element */ + unsigned long end; /* virtual address of end of element + 1 */ +}; + +/* + * We use a special marker for the end of memory and it uses the extra (+1) slot + */ +static struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1]; +static int num_rsvd_regions; + +static unsigned long bootmap_start; /* physical address where the bootmem map is located */ + static int find_max_pfn (unsigned long start, unsigned long end, void *arg) { @@ -78,126 +103,191 @@ find_max_pfn (unsigned long start, unsigned long end, void *arg) return 0; } +#define IGNORE_PFN0 1 /* XXX fix me: ignore pfn 0 until TLB miss handler is updated... */ + +/* + * Free available memory based on the primitive map created from + * the boot parameters. This routine does not assume the incoming + * segments are sorted. + */ static int free_available_memory (unsigned long start, unsigned long end, void *arg) { -# define KERNEL_END ((unsigned long) &_end) -# define MIN(a,b) ((a) < (b) ? (a) : (b)) -# define MAX(a,b) ((a) > (b) ? (a) : (b)) - unsigned long range_start, range_end; - - range_start = MIN(start, KERNEL_START); - range_end = MIN(end, KERNEL_START); + unsigned long range_start, range_end, prev_start; + int i; +#if IGNORE_PFN0 + if (start == PAGE_OFFSET) { + printk("warning: skipping physical page 0\n"); + start += PAGE_SIZE; + if (start >= end) return 0; + } +#endif /* - * XXX This should not be necessary, but the bootmem allocator - * is broken and fails to work correctly when the starting - * address is not properly aligned. + * lowest possible address(walker uses virtual) */ - range_start = PAGE_ALIGN(range_start); + prev_start = PAGE_OFFSET; - if (range_start < range_end) - free_bootmem(__pa(range_start), range_end - range_start); + for (i = 0; i < num_rsvd_regions; ++i) { + range_start = MAX(start, prev_start); + range_end = MIN(end, rsvd_region[i].start); - range_start = MAX(start, KERNEL_END); - range_end = MAX(end, KERNEL_END); + if (range_start < range_end) + free_bootmem(__pa(range_start), range_end - range_start); - /* - * XXX This should not be necessary, but the bootmem allocator - * is broken and fails to work correctly when the starting - * address is not properly aligned. - */ - range_start = PAGE_ALIGN(range_start); + /* nothing more available in this segment */ + if (range_end == end) return 0; - if (range_start < range_end) - free_bootmem(__pa(range_start), range_end - range_start); + prev_start = rsvd_region[i].end; + } + /* end of memory marker allows full processing inside loop body */ + return 0; +} + +static int +find_bootmap_location (unsigned long start, unsigned long end, void *arg) +{ + unsigned long needed = *(unsigned long *)arg; + unsigned long range_start, range_end, free_start; + int i; + +#if IGNORE_PFN0 + if (start == PAGE_OFFSET) { + start += PAGE_SIZE; + if (start >= end) return 0; + } +#endif + + free_start = PAGE_OFFSET; + + for (i = 0; i < num_rsvd_regions; i++) { + range_start = MAX(start, free_start); + range_end = MIN(end, rsvd_region[i].start); + + if (range_end <= range_start) continue; /* skip over empty range */ + + if (range_end - range_start >= needed) { + bootmap_start = __pa(range_start); + return 1; /* done */ + } + + /* nothing more available in this segment */ + if (range_end == end) return 0; + + free_start = rsvd_region[i].end; + } return 0; } -void __init -setup_arch (char **cmdline_p) +static void +sort_regions (struct rsvd_region *rsvd_region, int max) { - extern unsigned long ia64_iobase; - unsigned long max_pfn, bootmap_start, bootmap_size; + int j; + + /* simple bubble sorting */ + while (max--) { + for (j = 0; j < max; ++j) { + if (rsvd_region[j].start > rsvd_region[j+1].start) { + struct rsvd_region tmp; + tmp = rsvd_region[j]; + rsvd_region[j] = rsvd_region[j + 1]; + rsvd_region[j + 1] = tmp; + } + } + } +} - unw_init(); +static void +find_memory (void) +{ +# define KERNEL_END ((unsigned long) &_end) + unsigned long bootmap_size; + unsigned long max_pfn; + int n = 0; - *cmdline_p = __va(ia64_boot_param->command_line); - strncpy(saved_command_line, *cmdline_p, sizeof(saved_command_line)); - saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; /* for safety */ + /* + * none of the entries in this table overlap + */ + rsvd_region[n].start = (unsigned long) ia64_boot_param; + rsvd_region[n].end = rsvd_region[n].start + sizeof(*ia64_boot_param); + n++; - efi_init(); + rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->efi_memmap); + rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->efi_memmap_size; + n++; + + rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->command_line); + rsvd_region[n].end = (rsvd_region[n].start + + strlen(__va(ia64_boot_param->command_line)) + 1); + n++; + + rsvd_region[n].start = KERNEL_START; + rsvd_region[n].end = KERNEL_END; + n++; + +#ifdef CONFIG_BLK_DEV_INITRD + if (ia64_boot_param->initrd_start) { + rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start); + rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->initrd_size; + n++; + } +#endif + + /* end of memory marker */ + rsvd_region[n].start = ~0UL; + rsvd_region[n].end = ~0UL; + n++; + + num_rsvd_regions = n; + sort_regions(rsvd_region, num_rsvd_regions); + + /* first find highest page frame number */ max_pfn = 0; efi_memmap_walk(find_max_pfn, &max_pfn); - /* - * This is wrong, wrong, wrong. Darn it, you'd think if they - * change APIs, they'd do things for the better. Grumble... - */ - bootmap_start = PAGE_ALIGN(__pa(&_end)); - if (ia64_boot_param->initrd_size) - bootmap_start = PAGE_ALIGN(bootmap_start + ia64_boot_param->initrd_size); + /* how many bytes to cover all the pages */ + bootmap_size = bootmem_bootmap_pages(max_pfn) << PAGE_SHIFT; + + /* look for a location to hold the bootmap */ + bootmap_start = ~0UL; + efi_memmap_walk(find_bootmap_location, &bootmap_size); + if (bootmap_start == ~0UL) + panic("Cannot find %ld bytes for bootmap\n", bootmap_size); + bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn); + /* Free all available memory, then mark bootmem-map as being in use. */ efi_memmap_walk(free_available_memory, 0); - reserve_bootmem(bootmap_start, bootmap_size); #ifdef CONFIG_BLK_DEV_INITRD - initrd_start = ia64_boot_param->initrd_start; - - if (initrd_start) { - u64 start, size; -# define is_same_page(a,b) (((a)&PAGE_MASK) == ((b)&PAGE_MASK)) - -#if 1 - /* XXX for now some backwards compatibility... */ - if (initrd_start >= PAGE_OFFSET) - printk("Warning: boot loader passed virtual address " - "for initrd, please upgrade the loader\n"); - else -#endif - /* - * The loader ONLY passes physical addresses - */ - initrd_start = (unsigned long)__va(initrd_start); - initrd_end = initrd_start+ia64_boot_param->initrd_size; - start = initrd_start; - size = ia64_boot_param->initrd_size; - - printk("Initial ramdisk at: 0x%p (%lu bytes)\n", - (void *) initrd_start, ia64_boot_param->initrd_size); - - /* - * The kernel end and the beginning of initrd can be - * on the same page. This would cause the page to be - * reserved twice. While not harmful, it does lead to - * a warning message which can cause confusion. Thus, - * we make sure that in this case we only reserve new - * pages, i.e., initrd only pages. We need to: - * - * - align up start - * - adjust size of reserved section accordingly - * - * It should be noted that this operation is only - * valid for the reserve_bootmem() call and does not - * affect the integrety of the initrd itself. - * - * reserve_bootmem() considers partial pages as reserved. - */ - if (is_same_page(initrd_start, (unsigned long)&_end)) { - start = PAGE_ALIGN(start); - size -= start-initrd_start; - - printk("Initial ramdisk & kernel on the same page: " - "reserving start=%lx size=%ld bytes\n", - start, size); - } - reserve_bootmem(__pa(start), size); + if (ia64_boot_param->initrd_start) { + initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start); + initrd_end = initrd_start+ia64_boot_param->initrd_size; + + printk("Initial ramdisk at: 0x%lx (%lu bytes)\n", + initrd_start, ia64_boot_param->initrd_size); } #endif +} + +void __init +setup_arch (char **cmdline_p) +{ + extern unsigned long ia64_iobase; + + unw_init(); + + *cmdline_p = __va(ia64_boot_param->command_line); + strncpy(saved_command_line, *cmdline_p, sizeof(saved_command_line)); + saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; /* for safety */ + + efi_init(); + + find_memory(); + #if 0 /* XXX fix me */ init_mm.start_code = (unsigned long) &_stext; @@ -217,27 +307,37 @@ setup_arch (char **cmdline_p) /* * Set `iobase' to the appropriate address in region 6 * (uncached access range) + * + * The EFI memory map is the "prefered" location to get the I/O port + * space base, rather the relying on AR.KR0. This should become more + * clear in future SAL specs. We'll fall back to getting it out of + * AR.KR0 if no appropriate entry is found in the memory map. */ - ia64_iobase = ia64_get_kr(IA64_KR_IO_BASE); + ia64_iobase = efi_get_iobase(); + if (ia64_iobase) + /* set AR.KR0 since this is all we use it for anyway */ + ia64_set_kr(IA64_KR_IO_BASE, ia64_iobase); + else { + ia64_iobase = ia64_get_kr(IA64_KR_IO_BASE); + printk("No I/O port range found in EFI memory map, falling back to AR.KR0\n"); + printk("I/O port base = 0x%lx\n", ia64_iobase); + } ia64_iobase = __IA64_UNCACHED_OFFSET | (ia64_iobase & ~PAGE_OFFSET); - cpu_init(); /* initialize the bootstrap CPU */ - #ifdef CONFIG_SMP cpu_physical_id(0) = hard_smp_processor_id(); #endif + cpu_init(); /* initialize the bootstrap CPU */ + #ifdef CONFIG_IA64_GENERIC machvec_init(acpi_get_sysname()); #endif -#ifdef CONFIG_ACPI20 if (efi.acpi20) { /* Parse the ACPI 2.0 tables */ acpi20_parse(efi.acpi20); - } else -#endif - if (efi.acpi) { + } else if (efi.acpi) { /* Parse the ACPI tables */ acpi_parse(efi.acpi); } @@ -257,6 +357,8 @@ setup_arch (char **cmdline_p) platform_setup(cmdline_p); paging_init(); + + unw_create_gate_table(); } /* @@ -270,26 +372,18 @@ get_cpuinfo (char *buffer) #else # define lpj loops_per_jiffy #endif - char family[32], model[32], features[128], *cp, *p = buffer; + char family[32], features[128], *cp, *p = buffer; struct cpuinfo_ia64 *c; - unsigned long mask; - - for (c = cpu_data; c < cpu_data + NR_CPUS; ++c) { -#ifdef CONFIG_SMP - if (!(cpu_online_map & (1UL << (c - cpu_data)))) - continue; -#endif + unsigned long mask, cpu; + for (cpu = 0; cpu < smp_num_cpus; ++cpu) { + c = cpu_data(cpu); mask = c->features; - if (c->family == 7) - memcpy(family, "IA-64", 6); - else - sprintf(family, "%u", c->family); - - switch (c->model) { - case 0: strcpy(model, "Itanium"); break; - default: sprintf(model, "%u", c->model); break; + switch (c->family) { + case 0x07: memcpy(family, "Itanium", 8); break; + case 0x1f: memcpy(family, "McKinley", 9); break; + default: sprintf(family, "%u", c->family); break; } /* build the feature string: */ @@ -306,8 +400,9 @@ get_cpuinfo (char *buffer) p += sprintf(p, "processor : %lu\n" "vendor : %s\n" + "arch : IA-64\n" "family : %s\n" - "model : %s\n" + "model : %u\n" "revision : %u\n" "archrev : %u\n" "features :%s\n" /* don't change this---it _is_ right! */ @@ -316,8 +411,7 @@ get_cpuinfo (char *buffer) "cpu MHz : %lu.%06lu\n" "itc MHz : %lu.%06lu\n" "BogoMIPS : %lu.%02lu\n\n", - c - cpu_data, c->vendor, family, model, c->revision, c->archrev, - features, + cpu, c->vendor, family, c->model, c->revision, c->archrev, features, c->ppn, c->number, c->proc_freq / 1000000, c->proc_freq % 1000000, c->itc_freq / 1000000, c->itc_freq % 1000000, lpj*HZ/500000, (lpj*HZ/5000) % 100); @@ -385,18 +479,54 @@ identify_cpu (struct cpuinfo_ia64 *c) void cpu_init (void) { - extern void __init ia64_mmu_init (void); + extern void __init ia64_mmu_init (void *); unsigned long num_phys_stacked; pal_vm_info_2_u_t vmi; unsigned int max_ctx; + struct cpuinfo_ia64 *my_cpu_data; +#ifdef CONFIG_NUMA + int cpu, order; + + /* + * If NUMA is configured, the cpu_data array is not preallocated. The boot cpu + * allocates entries for every possible cpu. As the remaining cpus come online, + * they reallocate a new cpu_data structure on their local node. This extra work + * is required because some boot code references all cpu_data structures + * before the cpus are actually started. + */ + if (!boot_cpu_data) { + my_cpu_data = alloc_bootmem_pages_node(NODE_DATA(numa_node_id()), + sizeof(struct cpuinfo_ia64)); + boot_cpu_data = my_cpu_data; + my_cpu_data->cpu_data[0] = my_cpu_data; + for (cpu = 1; cpu < NR_CPUS; ++cpu) + my_cpu_data->cpu_data[cpu] + = alloc_bootmem_pages_node(NODE_DATA(numa_node_id()), + sizeof(struct cpuinfo_ia64)); + for (cpu = 1; cpu < NR_CPUS; ++cpu) + memcpy(my_cpu_data->cpu_data[cpu]->cpu_data_ptrs, + my_cpu_data->cpu_data, sizeof(my_cpu_data->cpu_data)); + } else { + order = get_order(sizeof(struct cpuinfo_ia64)); + my_cpu_data = page_address(alloc_pages_node(numa_node_id(), GFP_KERNEL, order)); + memcpy(my_cpu_data, boot_cpu_data->cpu_data[smp_processor_id()], + sizeof(struct cpuinfo_ia64)); + __free_pages(virt_to_page(boot_cpu_data->cpu_data[smp_processor_id()]), + order); + for (cpu = 0; cpu < NR_CPUS; ++cpu) + boot_cpu_data->cpu_data[cpu]->cpu_data[smp_processor_id()] = my_cpu_data; + } +#else + my_cpu_data = cpu_data(smp_processor_id()); +#endif /* - * We can't pass "local_cpu_data" do identify_cpu() because we haven't called + * We can't pass "local_cpu_data" to identify_cpu() because we haven't called * ia64_mmu_init() yet. And we can't call ia64_mmu_init() first because it * depends on the data returned by identify_cpu(). We break the dependency by - * accessing cpu_data[] the old way, through identity mapped space. + * accessing cpu_data() the old way, through identity mapped space. */ - identify_cpu(&cpu_data[smp_processor_id()]); + identify_cpu(my_cpu_data); /* Clear the stack memory reserved for pt_regs: */ memset(ia64_task_regs(current), 0, sizeof(struct pt_regs)); @@ -415,13 +545,11 @@ cpu_init (void) atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; - ia64_mmu_init(); + ia64_mmu_init(my_cpu_data); #ifdef CONFIG_IA32_SUPPORT /* initialize global ia32 state - CR0 and CR4 */ - __asm__("mov ar.cflg = %0" - : /* no outputs */ - : "r" (((ulong) IA32_CR4 << 32) | IA32_CR0)); + asm volatile ("mov ar.cflg = %0" :: "r" (((ulong) IA32_CR4 << 32) | IA32_CR0)); #endif /* disable all local interrupt sources: */ diff --git a/arch/ia64/kernel/sigframe.h b/arch/ia64/kernel/sigframe.h new file mode 100644 index 000000000000..77decb790d8c --- /dev/null +++ b/arch/ia64/kernel/sigframe.h @@ -0,0 +1,16 @@ +struct sigframe { + /* + * Place signal handler args where user-level unwinder can find them easily. + * DO NOT MOVE THESE. They are part of the IA-64 Linux ABI and there is + * user-level code that depends on their presence! + */ + unsigned long arg0; /* signum */ + unsigned long arg1; /* siginfo pointer */ + unsigned long arg2; /* sigcontext pointer */ + + unsigned long rbs_base; /* base of new register backing store (or NULL) */ + void *handler; /* pointer to the plabel of the signal handler */ + + struct siginfo info; + struct sigcontext sc; +}; diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 55a7bd8d2e31..11c59f6bc2e7 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -25,6 +25,8 @@ #include <asm/rse.h> #include <asm/sigcontext.h> +#include "sigframe.h" + #define DEBUG_SIG 0 #define STACK_ALIGN 16 /* minimal alignment for stack pointer */ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) @@ -43,11 +45,6 @@ struct sigscratch { struct pt_regs pt; }; -struct sigframe { - struct siginfo info; - struct sigcontext sc; -}; - extern long ia64_do_signal (sigset_t *, struct sigscratch *, long); /* forward decl */ long @@ -380,7 +377,13 @@ setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; - err = copy_siginfo_to_user(&frame->info, info); + err = __put_user(sig, &frame->arg0); + err |= __put_user(&frame->info, &frame->arg1); + err |= __put_user(&frame->sc, &frame->arg2); + err |= __put_user(new_rbs, &frame->rbs_base); + err |= __put_user(ka->sa.sa_handler, &frame->handler); + + err |= copy_siginfo_to_user(&frame->info, info); err |= __put_user(current->sas_ss_sp, &frame->sc.sc_stack.ss_sp); err |= __put_user(current->sas_ss_size, &frame->sc.sc_stack.ss_size); @@ -390,19 +393,16 @@ setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, if (err) goto give_sigsegv; - scr->pt.r12 = (unsigned long) frame - 16; /* new stack pointer */ - scr->pt.r2 = sig; /* signal number */ - scr->pt.r3 = (unsigned long) ka->sa.sa_handler; /* addr. of handler's proc desc */ - scr->pt.r15 = new_rbs; + scr->pt.r12 = (unsigned long) frame - 16; /* new stack pointer */ scr->pt.ar_fpsr = FPSR_DEFAULT; /* reset fpsr for signal handler */ scr->pt.cr_iip = tramp_addr; ia64_psr(&scr->pt)->ri = 0; /* start executing in first slot */ /* - * Note: this affects only the NaT bits of the scratch regs - * (the ones saved in pt_regs), which is exactly what we want. + * Note: this affects only the NaT bits of the scratch regs (the ones saved in + * pt_regs), which is exactly what we want. */ - scr->scratch_unat = 0; /* ensure NaT bits of at least r2, r3, r12, and r15 are clear */ + scr->scratch_unat = 0; /* ensure NaT bits of r12 is clear */ #if DEBUG_SIG printk("SIG deliver (%s:%d): sig=%d sp=%lx ip=%lx handler=%lx\n", diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index b1cd9c2f33b0..59f0c55ecfa4 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -6,12 +6,16 @@ * * Lots of stuff stolen from arch/alpha/kernel/smp.c * - * 00/09/11 David Mosberger <davidm@hpl.hp.com> Do loops_per_jiffy calibration on each CPU. - * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> fixed logical processor id - * 00/03/31 Rohit Seth <rohit.seth@intel.com> Fixes for Bootstrap Processor & cpu_online_map - * now gets done here (instead of setup.c) - * 99/10/05 davidm Update to bring it in sync with new command-line processing scheme. - * 10/13/00 Goutham Rao <goutham.rao@intel.com> Updated smp_call_function and + * 01/05/16 Rohit Seth <rohit.seth@intel.com> IA64-SMP functions. Reorganized + * the existing code (on the lines of x86 port). + * 00/09/11 David Mosberger <davidm@hpl.hp.com> Do loops_per_jiffy + * calibration on each CPU. + * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> fixed logical processor id + * 00/03/31 Rohit Seth <rohit.seth@intel.com> Fixes for Bootstrap Processor + * & cpu_online_map now gets done here (instead of setup.c) + * 99/10/05 davidm Update to bring it in sync with new command-line processing + * scheme. + * 10/13/00 Goutham Rao <goutham.rao@intel.com> Updated smp_call_function and * smp_call_function_single to resend IPI on timeouts */ #define __KERNEL_SYSCALLS__ @@ -45,120 +49,47 @@ #include <asm/system.h> #include <asm/unistd.h> -extern void __init calibrate_delay(void); -extern int cpu_idle(void * unused); -extern void machine_halt(void); -extern void start_ap(void); - -extern int cpu_now_booting; /* used by head.S to find idle task */ -extern volatile unsigned long cpu_online_map; /* bitmap of available cpu's */ - -struct smp_boot_data smp_boot_data __initdata; - +/* The 'big kernel lock' */ spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED; -char __initdata no_int_routing; - -/* don't make this a CPU-local variable: it's used for IPIs, mostly... */ -int __cpu_physical_id[NR_CPUS]; /* logical ID -> physical CPU ID map */ - -unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */ -int smp_num_cpus = 1; -volatile int smp_threads_ready; /* set when the idlers are all forked */ -unsigned long ap_wakeup_vector; /* external Int to use to wakeup AP's */ - -static volatile unsigned long cpu_callin_map; -static volatile int smp_commenced; - -static int max_cpus = -1; /* command line */ +/* + * Structure and data for smp_call_function(). This is designed to minimise static memory + * requirements. It also looks cleaner. + */ +static spinlock_t call_lock = SPIN_LOCK_UNLOCKED; -struct smp_call_struct { +struct call_data_struct { void (*func) (void *info); void *info; long wait; - atomic_t unstarted_count; - atomic_t unfinished_count; + atomic_t started; + atomic_t finished; }; -static volatile struct smp_call_struct *smp_call_function_data; - -#define IPI_RESCHEDULE 0 -#define IPI_CALL_FUNC 1 -#define IPI_CPU_STOP 2 -#ifndef CONFIG_ITANIUM_PTCG -# define IPI_FLUSH_TLB 3 -#endif /*!CONFIG_ITANIUM_PTCG */ - -/* - * Setup routine for controlling SMP activation - * - * Command-line option of "nosmp" or "maxcpus=0" will disable SMP - * activation entirely (the MPS table probe still happens, though). - * - * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer - * greater than 0, limits the maximum number of CPUs activated in - * SMP mode to <NUM>. - */ - -static int __init -nosmp (char *str) -{ - max_cpus = 0; - return 1; -} - -__setup("nosmp", nosmp); - -static int __init -maxcpus (char *str) -{ - get_option(&str, &max_cpus); - return 1; -} -__setup("maxcpus=", maxcpus); +static volatile struct call_data_struct *call_data; -static int __init -nointroute (char *str) -{ - no_int_routing = 1; - return 1; -} - -__setup("nointroute", nointroute); +#define IPI_CALL_FUNC 0 +#define IPI_CPU_STOP 1 +#ifndef CONFIG_ITANIUM_PTCG +# define IPI_FLUSH_TLB 2 +#endif /*!CONFIG_ITANIUM_PTCG */ -/* - * Yoink this CPU from the runnable list... - */ -void -halt_processor (void) +static void +stop_this_cpu (void) { + /* + * Remove this CPU: + */ clear_bit(smp_processor_id(), &cpu_online_map); max_xtp(); __cli(); - for (;;) - ; -} - -static inline int -pointer_lock (void *lock, void *data, int retry) -{ - volatile long *ptr = lock; - again: - if (cmpxchg_acq((void **) lock, 0, data) == 0) - return 0; - - if (!retry) - return -EBUSY; - - while (*ptr) - ; - - goto again; + for (;;); } void handle_IPI (int irq, void *dev_id, struct pt_regs *regs) { + int this_cpu = smp_processor_id(); unsigned long *pending_ipis = &local_cpu_data->ipi_operation; unsigned long ops; @@ -175,28 +106,21 @@ handle_IPI (int irq, void *dev_id, struct pt_regs *regs) ops &= ~(1 << which); switch (which) { - case IPI_RESCHEDULE: - /* - * Reschedule callback. Everything to be done is done by the - * interrupt return path. - */ - break; - case IPI_CALL_FUNC: { - struct smp_call_struct *data; + struct call_data_struct *data; void (*func)(void *info); void *info; int wait; /* release the 'pointer lock' */ - data = (struct smp_call_struct *) smp_call_function_data; + data = (struct call_data_struct *) call_data; func = data->func; info = data->info; wait = data->wait; mb(); - atomic_dec(&data->unstarted_count); + atomic_inc(&data->started); /* At this point the structure may be gone unless wait is true. */ (*func)(info); @@ -204,12 +128,12 @@ handle_IPI (int irq, void *dev_id, struct pt_regs *regs) /* Notify the sending CPU that the task is done. */ mb(); if (wait) - atomic_dec(&data->unfinished_count); + atomic_inc(&data->finished); } break; case IPI_CPU_STOP: - halt_processor(); + stop_this_cpu(); break; #ifndef CONFIG_ITANIUM_PTCG @@ -224,13 +148,9 @@ handle_IPI (int irq, void *dev_id, struct pt_regs *regs) /* * Current CPU may be running with different RID so we need to - * reload the RID of flushed address. - * Current CPU may be running with different - * RID so we need to reload the RID of flushed - * address. Purging the translation also - * needs ALAT invalidation; we do not need - * "invala" here since it is done in - * ia64_leave_kernel. + * reload the RID of flushed address. Purging the translation + * also needs ALAT invalidation; we do not need "invala" here + * since it is done in ia64_leave_kernel. */ ia64_srlz_d(); if (saved_rid != flush_rid) { @@ -260,8 +180,7 @@ handle_IPI (int irq, void *dev_id, struct pt_regs *regs) #endif /* !CONFIG_ITANIUM_PTCG */ default: - printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", - smp_processor_id(), which); + printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which); break; } /* Switch */ } while (ops); @@ -273,11 +192,7 @@ handle_IPI (int irq, void *dev_id, struct pt_regs *regs) static inline void send_IPI_single (int dest_cpu, int op) { - - if (dest_cpu == -1) - return; - - set_bit(op, &cpu_data[dest_cpu].ipi_operation); + set_bit(op, &cpu_data(dest_cpu)->ipi_operation); platform_send_ipi(dest_cpu, IA64_IPI_VECTOR, IA64_IPI_DM_INT, 0); } @@ -310,13 +225,7 @@ send_IPI_self (int op) void smp_send_reschedule (int cpu) { - send_IPI_single(cpu, IPI_RESCHEDULE); -} - -void -smp_send_stop (void) -{ - send_IPI_allbutself(IPI_CPU_STOP); + platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); } #ifndef CONFIG_ITANIUM_PTCG @@ -328,22 +237,33 @@ smp_send_flush_tlb (void) } void -smp_resend_flush_tlb(void) +smp_resend_flush_tlb (void) { + int i; + /* * Really need a null IPI but since this rarely should happen & since this code * will go away, lets not add one. */ - send_IPI_allbutself(IPI_RESCHEDULE); + for (i = 0; i < smp_num_cpus; ++i) + if (i != smp_processor_id()) + smp_send_reschedule(i); } -#endif /* !CONFIG_ITANIUM_PTCG */ +#endif /* !CONFIG_ITANIUM_PTCG */ + +void +smp_flush_tlb_all (void) +{ + smp_call_function ((void (*)(void *))__flush_tlb_all,0,1,1); + __flush_tlb_all(); +} /* * Run a function on another CPU * <func> The function to run. This must be fast and non-blocking. * <info> An arbitrary pointer to pass to the function. - * <retry> If true, keep retrying until ready. + * <nonatomic> Currently unused. * <wait> If true, wait until function has completed on other CPUs. * [RETURNS] 0 on success, else a negative status code. * @@ -352,11 +272,15 @@ smp_resend_flush_tlb(void) */ int -smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int retry, int wait) +smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int nonatomic, + int wait) { - struct smp_call_struct data; - unsigned long timeout; + struct call_data_struct data; int cpus = 1; +#if (defined(CONFIG_ITANIUM_B0_SPECIFIC) \ + || defined(CONFIG_ITANIUM_B1_SPECIFIC) || defined(CONFIG_ITANIUM_B2_SPECIFIC)) + unsigned long timeout; +#endif if (cpuid == smp_processor_id()) { printk(__FUNCTION__" trying to call self\n"); @@ -365,115 +289,110 @@ smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int data.func = func; data.info = info; + atomic_set(&data.started, 0); data.wait = wait; - atomic_set(&data.unstarted_count, cpus); - atomic_set(&data.unfinished_count, cpus); + if (wait) + atomic_set(&data.finished, 0); - if (pointer_lock(&smp_call_function_data, &data, retry)) - return -EBUSY; + spin_lock_bh(&call_lock); + call_data = &data; -resend: - /* Send a message to all other CPUs and wait for them to respond */ - send_IPI_single(cpuid, IPI_CALL_FUNC); +#if (defined(CONFIG_ITANIUM_B0_SPECIFIC) \ + || defined(CONFIG_ITANIUM_B1_SPECIFIC) || defined(CONFIG_ITANIUM_B2_SPECIFIC)) + resend: + send_IPI_single(cpuid, IPI_CALL_FUNC); - /* Wait for response */ + /* Wait for response */ timeout = jiffies + HZ; - while ((atomic_read(&data.unstarted_count) > 0) && time_before(jiffies, timeout)) + while ((atomic_read(&data.started) != cpus) && time_before(jiffies, timeout)) barrier(); - if (atomic_read(&data.unstarted_count) > 0) { -#if (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC)) + if (atomic_read(&data.started) != cpus) goto resend; #else - smp_call_function_data = NULL; - return -ETIMEDOUT; + send_IPI_single(cpuid, IPI_CALL_FUNC); + + /* Wait for response */ + while (atomic_read(&data.started) != cpus) + barrier(); #endif - } if (wait) - while (atomic_read(&data.unfinished_count) > 0) + while (atomic_read(&data.finished) != cpus) barrier(); - /* unlock pointer */ - smp_call_function_data = NULL; + call_data = NULL; + + spin_unlock_bh(&call_lock); return 0; } /* - * Run a function on all other CPUs. + * this function sends a 'generic call function' IPI to all other CPUs + * in the system. + */ + +/* + * [SUMMARY] Run a function on all other CPUs. * <func> The function to run. This must be fast and non-blocking. * <info> An arbitrary pointer to pass to the function. - * <retry> If true, keep retrying until ready. - * <wait> If true, wait until function has completed on other CPUs. + * <nonatomic> currently unused. + * <wait> If true, wait (atomically) until function has completed on other CPUs. * [RETURNS] 0 on success, else a negative status code. * - * Does not return until remote CPUs are nearly ready to execute <func> - * or are or have executed. + * Does not return until remote CPUs are nearly ready to execute <func> or are or have + * executed. + * + * You must not call this function with disabled interrupts or from a hardware interrupt + * handler, you may call it from a bottom half handler. */ int -smp_call_function (void (*func) (void *info), void *info, int retry, int wait) +smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait) { - struct smp_call_struct data; + struct call_data_struct data; + int cpus = smp_num_cpus-1; +#if (defined(CONFIG_ITANIUM_B0_SPECIFIC) \ + || defined(CONFIG_ITANIUM_B1_SPECIFIC) || defined(CONFIG_ITANIUM_B2_SPECIFIC)) unsigned long timeout; - int cpus = smp_num_cpus - 1; +#endif - if (cpus == 0) + if (!cpus) return 0; data.func = func; data.info = info; + atomic_set(&data.started, 0); data.wait = wait; - atomic_set(&data.unstarted_count, cpus); - atomic_set(&data.unfinished_count, cpus); + if (wait) + atomic_set(&data.finished, 0); - if (pointer_lock(&smp_call_function_data, &data, retry)) - return -EBUSY; + spin_lock_bh(&call_lock); + call_data = &data; - /* Send a message to all other CPUs and wait for them to respond */ +#if (defined(CONFIG_ITANIUM_B0_SPECIFIC) \ + || defined(CONFIG_ITANIUM_B1_SPECIFIC) || defined(CONFIG_ITANIUM_B2_SPECIFIC)) + resend: + /* Send a message to all other CPUs and wait for them to respond */ send_IPI_allbutself(IPI_CALL_FUNC); -retry: - /* Wait for response */ + /* Wait for response */ timeout = jiffies + HZ; - while ((atomic_read(&data.unstarted_count) > 0) && time_before(jiffies, timeout)) + while ((atomic_read(&data.started) != cpus) && time_before(jiffies, timeout)) barrier(); - if (atomic_read(&data.unstarted_count) > 0) { -#if (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC)) - int i; - for (i = 0; i < smp_num_cpus; i++) { - if (i != smp_processor_id()) - platform_send_ipi(i, IA64_IPI_VECTOR, IA64_IPI_DM_INT, 0); - } - goto retry; + if (atomic_read(&data.started) != cpus) + goto resend; #else - smp_call_function_data = NULL; - return -ETIMEDOUT; + send_IPI_allbutself(IPI_CALL_FUNC); + + /* Wait for response */ + while (atomic_read(&data.started) != cpus) + barrier(); #endif - } + if (wait) - while (atomic_read(&data.unfinished_count) > 0) + while (atomic_read(&data.finished) != cpus) barrier(); - /* unlock pointer */ - smp_call_function_data = NULL; - return 0; -} + call_data = NULL; -/* - * Flush all other CPU's tlb and then mine. Do this with smp_call_function() as we - * want to ensure all TLB's flushed before proceeding. - */ -void -smp_flush_tlb_all (void) -{ - smp_call_function((void (*)(void *))__flush_tlb_all, NULL, 1, 1); - __flush_tlb_all(); -} - -/* - * Ideally sets up per-cpu profiling hooks. Doesn't do much now... - */ -static inline void __init -smp_setup_percpu_timer(void) -{ - local_cpu_data->prof_counter = 1; - local_cpu_data->prof_multiplier = 1; + spin_unlock_bh(&call_lock); + return 0; } void @@ -487,198 +406,14 @@ smp_do_timer (struct pt_regs *regs) } } - -/* - * AP's start using C here. - */ -void __init -smp_callin (void) -{ - extern void ia64_rid_init(void); - extern void ia64_init_itm(void); - extern void ia64_cpu_local_tick(void); -#ifdef CONFIG_PERFMON - extern void perfmon_init_percpu(void); -#endif - int cpu = smp_processor_id(); - - if (test_and_set_bit(cpu, &cpu_online_map)) { - printk("CPU#%d already initialized!\n", cpu); - machine_halt(); - } - - efi_map_pal_code(); - cpu_init(); - - smp_setup_percpu_timer(); - - /* setup the CPU local timer tick */ - ia64_init_itm(); - -#ifdef CONFIG_PERFMON - perfmon_init_percpu(); -#endif - local_irq_enable(); /* Interrupts have been off until now */ - - calibrate_delay(); - local_cpu_data->loops_per_jiffy = loops_per_jiffy; - - /* allow the master to continue */ - set_bit(cpu, &cpu_callin_map); - - /* finally, wait for the BP to finish initialization: */ - while (!smp_commenced); - - cpu_idle(NULL); -} - -/* - * Create the idle task for a new AP. DO NOT use kernel_thread() because - * that could end up calling schedule() in the ia64_leave_kernel exit - * path in which case the new idle task could get scheduled before we - * had a chance to remove it from the run-queue... - */ -static int __init -fork_by_hand (void) -{ - /* - * Don't care about the usp and regs settings since we'll never - * reschedule the forked task. - */ - return do_fork(CLONE_VM|CLONE_PID, 0, 0, 0); -} - -/* - * Bring one cpu online. Return 0 if this fails for any reason. - */ -static int __init -smp_boot_one_cpu (int cpu) -{ - struct task_struct *idle; - int cpu_phys_id = cpu_physical_id(cpu); - long timeout; - - /* - * Create an idle task for this CPU. Note that the address we - * give to kernel_thread is irrelevant -- it's going to start - * where OS_BOOT_RENDEVZ vector in SAL says to start. But - * this gets all the other task-y sort of data structures set - * up like we wish. We need to pull the just created idle task - * off the run queue and stuff it into the init_tasks[] array. - * Sheesh . . . - */ - if (fork_by_hand() < 0) - panic("failed fork for CPU 0x%x", cpu_phys_id); - /* - * We remove it from the pidhash and the runqueue - * once we got the process: - */ - idle = init_task.prev_task; - if (!idle) - panic("No idle process for CPU 0x%x", cpu_phys_id); - init_tasks[cpu] = idle; - del_from_runqueue(idle); - unhash_process(idle); - - /* Schedule the first task manually. */ - idle->processor = cpu; - idle->has_cpu = 1; - - /* Let _start know what logical CPU we're booting (offset into init_tasks[] */ - cpu_now_booting = cpu; - - /* Kick the AP in the butt */ - platform_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0); - - /* wait up to 10s for the AP to start */ - for (timeout = 0; timeout < 100000; timeout++) { - if (test_bit(cpu, &cpu_callin_map)) - return 1; - udelay(100); - } - - printk(KERN_ERR "SMP: CPU 0x%x is stuck\n", cpu_phys_id); - return 0; -} - - - -/* - * Called by smp_init bring all the secondaries online and hold them. - */ -void __init -smp_boot_cpus (void) -{ - int i, cpu_count = 1; - unsigned long bogosum; - - /* on the BP, the kernel already called calibrate_delay_loop() in init/main.c */ - local_cpu_data->loops_per_jiffy = loops_per_jiffy; -#if 0 - smp_tune_scheduling(); -#endif - smp_setup_percpu_timer(); - - if (test_and_set_bit(0, &cpu_online_map)) { - printk("CPU#%d already initialized!\n", smp_processor_id()); - machine_halt(); - } - init_idle(); - - /* Nothing to do when told not to. */ - if (max_cpus == 0) { - printk(KERN_INFO "SMP mode deactivated.\n"); - return; - } - - if (max_cpus != -1) - printk("Limiting CPUs to %d\n", max_cpus); - - if (smp_boot_data.cpu_count > 1) { - printk(KERN_INFO "SMP: starting up secondaries.\n"); - - for (i = 0; i < smp_boot_data.cpu_count; i++) { - /* skip performance restricted and bootstrap cpu: */ - if (smp_boot_data.cpu_phys_id[i] == -1 - || smp_boot_data.cpu_phys_id[i] == hard_smp_processor_id()) - continue; - - cpu_physical_id(cpu_count) = smp_boot_data.cpu_phys_id[i]; - if (!smp_boot_one_cpu(cpu_count)) - continue; /* failed */ - - cpu_count++; /* Count good CPUs only... */ - /* - * Bail if we've started as many CPUS as we've been told to. - */ - if (cpu_count == max_cpus) - break; - } - } - - if (cpu_count == 1) { - printk(KERN_ERR "SMP: Bootstrap processor only.\n"); - } - - bogosum = 0; - for (i = 0; i < NR_CPUS; i++) { - if (cpu_online_map & (1L << i)) - bogosum += cpu_data[i].loops_per_jiffy; - } - - printk(KERN_INFO "SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n", - cpu_count, bogosum*HZ/500000, (bogosum*HZ/5000) % 100); - - smp_num_cpus = cpu_count; -} - /* - * Called when the BP is just about to fire off init. + * this function calls the 'stop' function on all other CPUs in the system. */ -void __init -smp_commence (void) +void +smp_send_stop (void) { - smp_commenced = 1; + send_IPI_allbutself(IPI_CPU_STOP); + smp_num_cpus = 1; } int __init @@ -686,33 +421,3 @@ setup_profiling_timer (unsigned int multiplier) { return -EINVAL; } - -/* - * Assume that CPU's have been discovered by some platform-dependant - * interface. For SoftSDV/Lion, that would be ACPI. - * - * Setup of the IPI irq handler is done in irq.c:init_IRQ(). - * - * This also registers the AP OS_MC_REDVEZ address with SAL. - */ -void __init -init_smp_config (void) -{ - struct fptr { - unsigned long fp; - unsigned long gp; - } *ap_startup; - long sal_ret; - - /* Tell SAL where to drop the AP's. */ - ap_startup = (struct fptr *) start_ap; - sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ, __pa(ap_startup->fp), - __pa(ap_startup->gp), 0, 0, 0, 0); - if (sal_ret < 0) { - printk("SMP: Can't set SAL AP Boot Rendezvous: %s\n", ia64_sal_strerror(sal_ret)); - printk(" Forcing UP mode\n"); - max_cpus = 0; - smp_num_cpus = 1; - } - -} diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index d51fa1ee5b0b..52e393aa2c81 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -1,4 +1,563 @@ /* + * SMP boot-related support + * + * Copyright (C) 2001 David Mosberger-Tang <davidm@hpl.hp.com> + * + * 01/05/16 Rohit Seth <rohit.seth@intel.com> Moved SMP booting functions from smp.c to here. + * 01/04/27 David Mosberger <davidm@hpl.hp.com> Added ITC synching code. */ -/* place holder... */ + +#define __KERNEL_SYSCALLS__ + +#include <linux/config.h> + +#include <linux/bootmem.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/spinlock.h> + +#include <asm/atomic.h> +#include <asm/bitops.h> +#include <asm/cache.h> +#include <asm/current.h> +#include <asm/delay.h> +#include <asm/efi.h> +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/machvec.h> +#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/sal.h> +#include <asm/system.h> +#include <asm/unistd.h> + +#if SMP_DEBUG +#define Dprintk(x...) printk(x) +#else +#define Dprintk(x...) +#endif + + +/* + * ITC synchronization related stuff: + */ +#define MASTER 0 +#define SLAVE (SMP_CACHE_BYTES/8) + +#define NUM_ROUNDS 64 /* magic value */ +#define NUM_ITERS 5 /* likewise */ + +static spinlock_t itc_sync_lock = SPIN_LOCK_UNLOCKED; +static volatile unsigned long go[SLAVE + 1]; + +#define DEBUG_ITC_SYNC 0 + +extern void __init calibrate_delay(void); +extern void start_ap(void); + +int cpucount; + +/* Setup configured maximum number of CPUs to activate */ +static int max_cpus = -1; + +/* Total count of live CPUs */ +int smp_num_cpus = 1; + +/* Bitmask of currently online CPUs */ +volatile unsigned long cpu_online_map; + +/* which logical CPU number maps to which CPU (physical APIC ID) */ +volatile int ia64_cpu_to_sapicid[NR_CPUS]; + +static volatile unsigned long cpu_callin_map; + +struct smp_boot_data smp_boot_data __initdata; + +/* Set when the idlers are all forked */ +volatile int smp_threads_ready; + +unsigned long ap_wakeup_vector = -1; /* External Int use to wakeup APs */ + +char __initdata no_int_routing; + +unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */ + +/* + * Setup routine for controlling SMP activation + * + * Command-line option of "nosmp" or "maxcpus=0" will disable SMP + * activation entirely (the MPS table probe still happens, though). + * + * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer + * greater than 0, limits the maximum number of CPUs activated in + * SMP mode to <NUM>. + */ + +static int __init +nosmp (char *str) +{ + max_cpus = 0; + return 1; +} + +__setup("nosmp", nosmp); + +static int __init +maxcpus (char *str) +{ + get_option(&str, &max_cpus); + return 1; +} + +__setup("maxcpus=", maxcpus); + +static int __init +nointroute (char *str) +{ + no_int_routing = 1; + return 1; +} + +__setup("nointroute", nointroute); + +void +sync_master (void *arg) +{ + unsigned long flags, i; + + go[MASTER] = 0; + + local_irq_save(flags); + { + for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) { + while (!go[MASTER]); + go[MASTER] = 0; + go[SLAVE] = ia64_get_itc(); + } + } + local_irq_restore(flags); +} + +/* + * Return the number of cycles by which our itc differs from the itc on the master + * (time-keeper) CPU. A positive number indicates our itc is ahead of the master, + * negative that it is behind. + */ +static inline long +get_delta (long *rt, long *master) +{ + unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0; + unsigned long tcenter, t0, t1, tm; + long i; + + for (i = 0; i < NUM_ITERS; ++i) { + t0 = ia64_get_itc(); + go[MASTER] = 1; + while (!(tm = go[SLAVE])); + go[SLAVE] = 0; + t1 = ia64_get_itc(); + + if (t1 - t0 < best_t1 - best_t0) + best_t0 = t0, best_t1 = t1, best_tm = tm; + } + + *rt = best_t1 - best_t0; + *master = best_tm - best_t0; + + /* average best_t0 and best_t1 without overflow: */ + tcenter = (best_t0/2 + best_t1/2); + if (best_t0 % 2 + best_t1 % 2 == 2) + ++tcenter; + return tcenter - best_tm; +} + +/* + * Synchronize ar.itc of the current (slave) CPU with the ar.itc of the MASTER CPU + * (normally the time-keeper CPU). We use a closed loop to eliminate the possibility of + * unaccounted-for errors (such as getting a machine check in the middle of a calibration + * step). The basic idea is for the slave to ask the master what itc value it has and to + * read its own itc before and after the master responds. Each iteration gives us three + * timestamps: + * + * slave master + * + * t0 ---\ + * ---\ + * ---> + * tm + * /--- + * /--- + * t1 <--- + * + * + * The goal is to adjust the slave's ar.itc such that tm falls exactly half-way between t0 + * and t1. If we achieve this, the clocks are synchronized provided the interconnect + * between the slave and the master is symmetric. Even if the interconnect were + * asymmetric, we would still know that the synchronization error is smaller than the + * roundtrip latency (t0 - t1). + * + * When the interconnect is quiet and symmetric, this lets us synchronize the itc to + * within one or two cycles. However, we can only *guarantee* that the synchronization is + * accurate to within a round-trip time, which is typically in the range of several + * hundred cycles (e.g., ~500 cycles). In practice, this means that the itc's are usually + * almost perfectly synchronized, but we shouldn't assume that the accuracy is much better + * than half a micro second or so. + */ +void +ia64_sync_itc (unsigned int master) +{ + long i, delta, adj, adjust_latency = 0, done = 0; + unsigned long flags, rt, master_time_stamp, bound; +#if DEBUG_ITC_SYNC + struct { + long rt; /* roundtrip time */ + long master; /* master's timestamp */ + long diff; /* difference between midpoint and master's timestamp */ + long lat; /* estimate of itc adjustment latency */ + } t[NUM_ROUNDS]; +#endif + + go[MASTER] = 1; + + if (smp_call_function_single(master, sync_master, NULL, 1, 0) < 0) { + printk("sync_itc: failed to get attention of CPU %u!\n", master); + return; + } + + while (go[MASTER]); /* wait for master to be ready */ + + spin_lock_irqsave(&itc_sync_lock, flags); + { + for (i = 0; i < NUM_ROUNDS; ++i) { + delta = get_delta(&rt, &master_time_stamp); + if (delta == 0) { + done = 1; /* let's lock on to this... */ + bound = rt; + } + + if (!done) { + if (i > 0) { + adjust_latency += -delta; + adj = -delta + adjust_latency/4; + } else + adj = -delta; + + ia64_set_itc(ia64_get_itc() + adj); + } +#if DEBUG_ITC_SYNC + t[i].rt = rt; + t[i].master = master_time_stamp; + t[i].diff = delta; + t[i].lat = adjust_latency/4; +#endif + } + } + spin_unlock_irqrestore(&itc_sync_lock, flags); + +#if DEBUG_ITC_SYNC + for (i = 0; i < NUM_ROUNDS; ++i) + printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", + t[i].rt, t[i].master, t[i].diff, t[i].lat); +#endif + + printk("CPU %d: synchronized ITC with CPU %u (last diff %ld cycles, maxerr %lu cycles)\n", + smp_processor_id(), master, delta, rt); +} + +/* + * Ideally sets up per-cpu profiling hooks. Doesn't do much now... + */ +static inline void __init +smp_setup_percpu_timer (void) +{ + local_cpu_data->prof_counter = 1; + local_cpu_data->prof_multiplier = 1; +} + +/* + * Architecture specific routine called by the kernel just before init is + * fired off. This allows the BP to have everything in order [we hope]. + * At the end of this all the APs will hit the system scheduling and off + * we go. Each AP will jump through the kernel + * init into idle(). At this point the scheduler will one day take over + * and give them jobs to do. smp_callin is a standard routine + * we use to track CPUs as they power up. + */ + +static volatile atomic_t smp_commenced = ATOMIC_INIT(0); + +void __init +smp_commence (void) +{ + /* + * Lets the callins below out of their loop. + */ + Dprintk("Setting commenced=1, go go go\n"); + + wmb(); + atomic_set(&smp_commenced,1); +} + + +void __init +smp_callin (void) +{ + int cpuid, phys_id; + extern void ia64_init_itm(void); + +#ifdef CONFIG_PERFMON + extern void perfmon_init_percpu(void); +#endif + + cpuid = smp_processor_id(); + phys_id = hard_smp_processor_id(); + + if (test_and_set_bit(cpuid, &cpu_online_map)) { + printk("huh, phys CPU#0x%x, CPU#0x%x already present??\n", + phys_id, cpuid); + BUG(); + } + + smp_setup_percpu_timer(); + + /* + * Synchronize the ITC with the BP + */ + Dprintk("Going to syncup ITC with BP.\n"); + + ia64_sync_itc(0); + /* + * Get our bogomips. + */ + ia64_init_itm(); +#ifdef CONFIG_PERFMON + perfmon_init_percpu(); +#endif + + local_irq_enable(); + calibrate_delay(); + local_cpu_data->loops_per_jiffy = loops_per_jiffy; + /* + * Allow the master to continue. + */ + set_bit(cpuid, &cpu_callin_map); + Dprintk("Stack on CPU %d at about %p\n",cpuid, &cpuid); +} + + +/* + * Activate a secondary processor. head.S calls this. + */ +int __init +start_secondary (void *unused) +{ + extern int cpu_idle (void); + + efi_map_pal_code(); + cpu_init(); + smp_callin(); + Dprintk("CPU %d is set to go. \n", smp_processor_id()); + while (!atomic_read(&smp_commenced)) + ; + + Dprintk("CPU %d is starting idle. \n", smp_processor_id()); + return cpu_idle(); +} + +static int __init +fork_by_hand (void) +{ + /* + * don't care about the eip and regs settings since + * we'll never reschedule the forked task. + */ + return do_fork(CLONE_VM|CLONE_PID, 0, 0, 0); +} + +static void __init +do_boot_cpu (int sapicid) +{ + struct task_struct *idle; + int timeout, cpu; + + cpu = ++cpucount; + /* + * We can't use kernel_thread since we must avoid to + * reschedule the child. + */ + if (fork_by_hand() < 0) + panic("failed fork for CPU %d", cpu); + + /* + * We remove it from the pidhash and the runqueue + * once we got the process: + */ + idle = init_task.prev_task; + if (!idle) + panic("No idle process for CPU %d", cpu); + + idle->processor = cpu; + ia64_cpu_to_sapicid[cpu] = sapicid; + idle->has_cpu = 1; /* we schedule the first task manually */ + + del_from_runqueue(idle); + unhash_process(idle); + init_tasks[cpu] = idle; + + Dprintk("Sending Wakeup Vector to AP 0x%x/0x%x.\n", cpu, sapicid); + + platform_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0); + + /* + * Wait 10s total for the AP to start + */ + Dprintk("Waiting on callin_map ..."); + for (timeout = 0; timeout < 100000; timeout++) { + Dprintk("."); + if (test_bit(cpu, &cpu_callin_map)) + break; /* It has booted */ + udelay(100); + } + Dprintk("\n"); + + if (test_bit(cpu, &cpu_callin_map)) { + /* number CPUs logically, starting from 1 (BSP is 0) */ + printk("CPU%d: ", cpu); + /*print_cpu_info(&cpu_data[cpu]); */ + printk("CPU has booted.\n"); + } else { + printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid); + ia64_cpu_to_sapicid[cpu] = -1; + cpucount--; + } +} + +/* + * Cycle through the APs sending Wakeup IPIs to boot each. + */ +void __init +smp_boot_cpus (void) +{ + int sapicid, cpu; + int boot_cpu_id = hard_smp_processor_id(); + + /* + * Initialize the logical to physical CPU number mapping + * and the per-CPU profiling counter/multiplier + */ + + for (cpu = 0; cpu < NR_CPUS; cpu++) + ia64_cpu_to_sapicid[cpu] = -1; + smp_setup_percpu_timer(); + + /* + * We have the boot CPU online for sure. + */ + set_bit(0, &cpu_online_map); + set_bit(0, &cpu_callin_map); + + local_cpu_data->loops_per_jiffy = loops_per_jiffy; + ia64_cpu_to_sapicid[0] = boot_cpu_id; + + printk("Boot processor id 0x%x/0x%x\n", 0, boot_cpu_id); + + global_irq_holder = 0; + current->processor = 0; + init_idle(); + + /* + * If SMP should be disabled, then really disable it! + */ + if (!max_cpus || (max_cpus < -1)) { + printk(KERN_INFO "SMP mode deactivated.\n"); + cpu_online_map = 1; + smp_num_cpus = 1; + goto smp_done; + } + if (max_cpus != -1) + printk (KERN_INFO "Limiting CPUs to %d\n", max_cpus); + + if (smp_boot_data.cpu_count > 1) { + + printk(KERN_INFO "SMP: starting up secondaries.\n"); + + for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++) { + /* + * Don't even attempt to start the boot CPU! + */ + sapicid = smp_boot_data.cpu_phys_id[cpu]; + if ((sapicid == -1) || (sapicid == hard_smp_processor_id())) + continue; + + if ((max_cpus > 0) && (cpucount + 1 >= max_cpus)) + break; + + do_boot_cpu(sapicid); + + /* + * Make sure we unmap all failed CPUs + */ + if (ia64_cpu_to_sapicid[cpu] == -1) + printk("phys CPU#%d not responding - cannot use it.\n", cpu); + } + + smp_num_cpus = cpucount + 1; + + /* + * Allow the user to impress friends. + */ + + printk("Before bogomips.\n"); + if (!cpucount) { + printk(KERN_ERR "Error: only one processor found.\n"); + } else { + unsigned long bogosum = 0; + for (cpu = 0; cpu < NR_CPUS; cpu++) + if (cpu_online_map & (1<<cpu)) + bogosum += cpu_data(cpu)->loops_per_jiffy; + + printk(KERN_INFO"Total of %d processors activated (%lu.%02lu BogoMIPS).\n", + cpucount + 1, bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); + } + } + smp_done: + ; +} + +/* + * Assume that CPU's have been discovered by some platform-dependant interface. For + * SoftSDV/Lion, that would be ACPI. + * + * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP(). + */ +void __init +init_smp_config(void) +{ + struct fptr { + unsigned long fp; + unsigned long gp; + } *ap_startup; + long sal_ret; + + /* Tell SAL where to drop the AP's. */ + ap_startup = (struct fptr *) start_ap; + sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ, + __pa(ap_startup->fp), __pa(ap_startup->gp), 0, 0, 0, 0); + if (sal_ret < 0) { + printk("SMP: Can't set SAL AP Boot Rendezvous: %s\n Forcing UP mode\n", + ia64_sal_strerror(sal_ret)); + max_cpus = 0; + smp_num_cpus = 1; + } +} diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index e62b304a7adc..577ed872b3db 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -22,16 +22,18 @@ #define COLOR_ALIGN(addr) (((addr) + SHMLBA - 1) & ~(SHMLBA - 1)) unsigned long -arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) +arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) { struct vm_area_struct * vmm; + long map_shared = (flags & MAP_SHARED); if (len > RGN_MAP_LIMIT) return -ENOMEM; if (!addr) addr = TASK_UNMAPPED_BASE; - if (flags & MAP_SHARED) + if (map_shared) addr = COLOR_ALIGN(addr); else addr = PAGE_ALIGN(addr); @@ -45,7 +47,7 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len if (!vmm || addr + len <= vmm->vm_start) return addr; addr = vmm->vm_end; - if (flags & MAP_SHARED) + if (map_shared) addr = COLOR_ALIGN(addr); } } @@ -176,11 +178,22 @@ do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, un unsigned long roff; struct file *file = 0; + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + if (!(flags & MAP_ANONYMOUS)) { + file = fget(fd); + if (!file) + return -EBADF; + + if (!file->f_op || !file->f_op->mmap) + return -ENODEV; + } + /* - * A zero mmap always succeeds in Linux, independent of - * whether or not the remaining arguments are valid. + * A zero mmap always succeeds in Linux, independent of whether or not the + * remaining arguments are valid. */ - if (PAGE_ALIGN(len) == 0) + len = PAGE_ALIGN(len); + if (len == 0) return addr; /* don't permit mappings into unmapped space or the virtual page table of a region: */ @@ -192,13 +205,6 @@ do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, un if (rgn_index(addr) != rgn_index(addr + len)) return -EINVAL; - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - return -EBADF; - } - down_write(¤t->mm->mmap_sem); addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); up_write(¤t->mm->mmap_sem); @@ -247,13 +253,6 @@ sys_vm86 (long arg0, long arg1, long arg2, long arg3) return -ENOSYS; } -asmlinkage long -sys_modify_ldt (long arg0, long arg1, long arg2, long arg3) -{ - printk(KERN_ERR "sys_modify_ldt(%lx, %lx, %lx, %lx)!\n", arg0, arg1, arg2, arg3); - return -ENOSYS; -} - asmlinkage unsigned long ia64_create_module (const char *name_user, size_t size, long arg2, long arg3, long arg4, long arg5, long arg6, long arg7, long stack) diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index a1ebc2e7ea97..dc6500b7a167 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -25,6 +25,7 @@ extern rwlock_t xtime_lock; extern unsigned long wall_jiffies; +extern unsigned long last_time_offset; #ifdef CONFIG_IA64_DEBUG_IRQ @@ -45,9 +46,8 @@ do_profile (unsigned long ip) ip -= (unsigned long) &_stext; ip >>= prof_shift; /* - * Don't ignore out-of-bounds IP values silently, - * put them into the last histogram slot, so if - * present, they will show up as a sharp peak. + * Don't ignore out-of-bounds IP values silently, put them into the last + * histogram slot, so if present, they will show up as a sharp peak. */ if (ip > prof_len - 1) ip = prof_len - 1; @@ -57,34 +57,29 @@ do_profile (unsigned long ip) } /* - * Return the number of micro-seconds that elapsed since the last - * update to jiffy. The xtime_lock must be at least read-locked when - * calling this routine. + * Return the number of micro-seconds that elapsed since the last update to jiffy. The + * xtime_lock must be at least read-locked when calling this routine. */ static inline unsigned long gettimeoffset (void) { -#ifdef CONFIG_SMP - /* - * The code below doesn't work for SMP because only CPU 0 - * keeps track of the time. - */ - return 0; -#else - unsigned long now = ia64_get_itc(), last_tick; unsigned long elapsed_cycles, lost = jiffies - wall_jiffies; + unsigned long now, last_tick; +# define time_keeper_id 0 /* smp_processor_id() of time-keeper */ - last_tick = (local_cpu_data->itm_next - (lost+1)*local_cpu_data->itm_delta); -# if 1 + last_tick = (cpu_data(time_keeper_id)->itm_next + - (lost + 1)*cpu_data(time_keeper_id)->itm_delta); + + now = ia64_get_itc(); if ((long) (now - last_tick) < 0) { - printk("Yikes: now < last_tick (now=0x%lx,last_tick=%lx)! No can do.\n", - now, last_tick); - return 0; - } +# if 1 + printk("CPU %d: now < last_tick (now=0x%lx,last_tick=0x%lx)!\n", + smp_processor_id(), now, last_tick); # endif + return last_time_offset; + } elapsed_cycles = now - last_tick; return (elapsed_cycles*local_cpu_data->usec_per_cyc) >> IA64_USEC_PER_CYC_SHIFT; -#endif } void @@ -93,11 +88,10 @@ do_settimeofday (struct timeval *tv) write_lock_irq(&xtime_lock); { /* - * This is revolting. We need to set "xtime" - * correctly. However, the value in this location is - * the value at the most recent update of wall time. - * Discover what correction gettimeofday would have - * done, and then undo it! + * This is revolting. We need to set "xtime" correctly. However, the value + * in this location is the value at the most recent update of wall time. + * Discover what correction gettimeofday would have done, and then undo + * it! */ tv->tv_usec -= gettimeoffset(); tv->tv_usec -= (jiffies - wall_jiffies) * (1000000 / HZ); @@ -119,12 +113,24 @@ do_settimeofday (struct timeval *tv) void do_gettimeofday (struct timeval *tv) { - unsigned long flags, usec, sec; + unsigned long flags, usec, sec, old; read_lock_irqsave(&xtime_lock, flags); { usec = gettimeoffset(); + /* + * Ensure time never goes backwards, even when ITC on different CPUs are + * not perfectly synchronized. + */ + do { + old = last_time_offset; + if (usec <= old) { + usec = old; + break; + } + } while (cmpxchg(&last_time_offset, old, usec) != old); + sec = xtime.tv_sec; usec += xtime.tv_usec; } @@ -162,6 +168,8 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) #ifdef CONFIG_SMP smp_do_timer(regs); #endif + new_itm += local_cpu_data->itm_delta; + if (smp_processor_id() == 0) { /* * Here we are in the timer irq handler. We have irqs locally @@ -171,11 +179,11 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) */ write_lock(&xtime_lock); do_timer(regs); + local_cpu_data->itm_next = new_itm; write_unlock(&xtime_lock); - } + } else + local_cpu_data->itm_next = new_itm; - new_itm += local_cpu_data->itm_delta; - local_cpu_data->itm_next = new_itm; if (time_after(new_itm, ia64_get_itc())) break; } @@ -228,9 +236,9 @@ ia64_init_itm (void) long status; /* - * According to SAL v2.6, we need to use a SAL call to determine the - * platform base frequency and then a PAL call to determine the - * frequency ratio between the ITC and the base frequency. + * According to SAL v2.6, we need to use a SAL call to determine the platform base + * frequency and then a PAL call to determine the frequency ratio between the ITC + * and the base frequency. */ status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, &platform_base_freq, &drift); if (status != 0) { @@ -284,6 +292,6 @@ void __init time_init (void) { register_percpu_irq(IA64_TIMER_VECTOR, &timer_irqaction); - efi_gettimeofday(&xtime); + efi_gettimeofday((struct timeval *) &xtime); ia64_init_itm(); } diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index 64de164bb1d6..53eab244b9ac 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -215,12 +215,9 @@ static inline int fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long *pr, long *ifs, struct pt_regs *regs) { + struct ia64_fpreg f6_11[6]; fp_state_t fp_state; fpswa_ret_t ret; -#define FPSWA_BUG -#ifdef FPSWA_BUG - struct ia64_fpreg f6_15[10]; -#endif if (!fpswa_interface) return -1; @@ -232,23 +229,12 @@ fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long * kernel, so set those bits in the mask and set the low volatile * pointer to point to these registers. */ -#ifndef FPSWA_BUG - fp_state.bitmask_low64 = 0x3c0; /* bit 6..9 */ - fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) ®s->f6; -#else - fp_state.bitmask_low64 = 0xffc0; /* bit6..bit15 */ - f6_15[0] = regs->f6; - f6_15[1] = regs->f7; - f6_15[2] = regs->f8; - f6_15[3] = regs->f9; - __asm__ ("stf.spill %0=f10%P0" : "=m"(f6_15[4])); - __asm__ ("stf.spill %0=f11%P0" : "=m"(f6_15[5])); - __asm__ ("stf.spill %0=f12%P0" : "=m"(f6_15[6])); - __asm__ ("stf.spill %0=f13%P0" : "=m"(f6_15[7])); - __asm__ ("stf.spill %0=f14%P0" : "=m"(f6_15[8])); - __asm__ ("stf.spill %0=f15%P0" : "=m"(f6_15[9])); - fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) f6_15; -#endif + fp_state.bitmask_low64 = 0xfc0; /* bit6..bit11 */ + f6_11[0] = regs->f6; f6_11[1] = regs->f7; + f6_11[2] = regs->f8; f6_11[3] = regs->f9; + __asm__ ("stf.spill %0=f10%P0" : "=m"(f6_11[4])); + __asm__ ("stf.spill %0=f11%P0" : "=m"(f6_11[5])); + fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) f6_11; /* * unsigned long (*EFI_FPSWA) ( * unsigned long trap_type, @@ -264,18 +250,10 @@ fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long (unsigned long *) ipsr, (unsigned long *) fpsr, (unsigned long *) isr, (unsigned long *) pr, (unsigned long *) ifs, &fp_state); -#ifdef FPSWA_BUG - __asm__ ("ldf.fill f10=%0%P0" :: "m"(f6_15[4])); - __asm__ ("ldf.fill f11=%0%P0" :: "m"(f6_15[5])); - __asm__ ("ldf.fill f12=%0%P0" :: "m"(f6_15[6])); - __asm__ ("ldf.fill f13=%0%P0" :: "m"(f6_15[7])); - __asm__ ("ldf.fill f14=%0%P0" :: "m"(f6_15[8])); - __asm__ ("ldf.fill f15=%0%P0" :: "m"(f6_15[9])); - regs->f6 = f6_15[0]; - regs->f7 = f6_15[1]; - regs->f8 = f6_15[2]; - regs->f9 = f6_15[3]; -#endif + regs->f6 = f6_11[0]; regs->f7 = f6_11[1]; + regs->f8 = f6_11[2]; regs->f9 = f6_11[3]; + __asm__ ("ldf.fill f10=%0%P0" :: "m"(f6_11[4])); + __asm__ ("ldf.fill f11=%0%P0" :: "m"(f6_11[5])); return ret.status; } @@ -321,7 +299,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) } siginfo.si_signo = SIGFPE; siginfo.si_errno = 0; - siginfo.si_code = 0; + siginfo.si_code = __SI_FAULT; /* default code */ siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); if (isr & 0x11) { siginfo.si_code = FPE_FLTINV; @@ -339,7 +317,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) /* raise exception */ siginfo.si_signo = SIGFPE; siginfo.si_errno = 0; - siginfo.si_code = 0; + siginfo.si_code = __SI_FAULT; /* default code */ siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); if (isr & 0x880) { siginfo.si_code = FPE_FLTOVF; @@ -443,14 +421,12 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, sprintf(buf, "General Exception: %s%s", reason[code], (code == 3) ? ((isr & (1UL << 37)) ? " (RSE access)" : " (data access)") : ""); -#ifndef CONFIG_ITANIUM_ASTEP_SPECIFIC if (code == 8) { # ifdef CONFIG_IA64_PRINT_HAZARDS printk("%016lx:possible hazard, pr = %016lx\n", regs->cr_iip, regs->pr); # endif return; } -#endif break; case 25: /* Disabled FP-Register */ diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index 5b102e44cd60..7dc41e5a7c41 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -325,11 +325,11 @@ set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat) DPRINT("ubs_end=%p bsp=%p addr=%px\n", (void *) ubs_end, (void *) bsp, (void *) addr); - ia64_poke(current, (unsigned long) ubs_end, (unsigned long) addr, val); + ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val); rnat_addr = ia64_rse_rnat_addr(addr); - ia64_peek(current, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats); + ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats); DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n", (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1); @@ -338,7 +338,7 @@ set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat) rnats |= nat_mask; else rnats &= ~nat_mask; - ia64_poke(current, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats); + ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats); DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats); } @@ -394,7 +394,7 @@ get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *na DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr); - ia64_peek(current, (unsigned long) ubs_end, (unsigned long) addr, val); + ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val); if (nat) { rnat_addr = ia64_rse_rnat_addr(addr); @@ -402,7 +402,7 @@ get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *na DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats); - ia64_peek(current, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats); + ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats); *nat = (rnats & nat_mask) != 0; } } @@ -1299,7 +1299,12 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, " "ip=0x%016lx\n\r", current->comm, current->pid, ifa, regs->cr_iip + ipsr->ri); - tty_write_message(current->tty, buf); + /* + * Don't call tty_write_message() if we're in the kernel; we might + * be holding locks... + */ + if (user_mode(regs)) + tty_write_message(current->tty, buf); buf[len-1] = '\0'; /* drop '\r' */ printk(KERN_WARNING "%s", buf); /* watch for command names containing %s */ } diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c index 4de1f4bc4935..bdd277fbe990 100644 --- a/arch/ia64/kernel/unwind.c +++ b/arch/ia64/kernel/unwind.c @@ -24,6 +24,7 @@ * o if both the unw.lock spinlock and a script's read-write lock must be * acquired, then the read-write lock must be acquired first. */ +#include <linux/bootmem.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/slab.h> @@ -36,6 +37,7 @@ #include <asm/ptrace_offsets.h> #include <asm/rse.h> #include <asm/system.h> +#include <asm/uaccess.h> #include "entry.h" #include "unwind_i.h" @@ -97,6 +99,10 @@ static struct { /* unwind table for the kernel: */ struct unw_table kernel_table; + /* unwind table describing the gate page (kernel code that is mapped into user space): */ + size_t gate_table_size; + unsigned long *gate_table; + /* hash table that maps instruction pointer to script index: */ unsigned short hash[UNW_HASH_SIZE]; @@ -323,8 +329,13 @@ unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val, char else *nat_addr &= ~nat_mask; } else { - *val = *addr; - *nat = (*nat_addr & nat_mask) != 0; + if ((*nat_addr & nat_mask) == 0) { + *val = *addr; + *nat = 0; + } else { + *val = 0; /* if register is a NaT, *addr may contain kernel data! */ + *nat = 1; + } } return 0; } @@ -1350,10 +1361,10 @@ compile_reg (struct unw_state_record *sr, int i, struct unw_script *script) } } -static inline struct unw_table_entry * +static inline const struct unw_table_entry * lookup (struct unw_table *table, unsigned long rel_ip) { - struct unw_table_entry *e = 0; + const struct unw_table_entry *e = 0; unsigned long lo, hi, mid; /* do a binary search for right entry: */ @@ -1378,7 +1389,7 @@ static inline struct unw_script * build_script (struct unw_frame_info *info) { struct unw_reg_state *rs, *next; - struct unw_table_entry *e = 0; + const struct unw_table_entry *e = 0; struct unw_script *script = 0; unsigned long ip = info->ip; struct unw_state_record sr; @@ -1836,9 +1847,9 @@ unw_init_from_blocked_task (struct unw_frame_info *info, struct task_struct *t) static void init_unwind_table (struct unw_table *table, const char *name, unsigned long segment_base, - unsigned long gp, void *table_start, void *table_end) + unsigned long gp, const void *table_start, const void *table_end) { - struct unw_table_entry *start = table_start, *end = table_end; + const struct unw_table_entry *start = table_start, *end = table_end; table->name = name; table->segment_base = segment_base; @@ -1851,9 +1862,9 @@ init_unwind_table (struct unw_table *table, const char *name, unsigned long segm void * unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned long gp, - void *table_start, void *table_end) + const void *table_start, const void *table_end) { - struct unw_table_entry *start = table_start, *end = table_end; + const struct unw_table_entry *start = table_start, *end = table_end; struct unw_table *table; unsigned long flags; @@ -1936,6 +1947,47 @@ unw_remove_unwind_table (void *handle) } void +unw_create_gate_table (void) +{ + extern char __start_gate_section[], __stop_gate_section[]; + unsigned long *lp, start, end, segbase = unw.kernel_table.segment_base; + const struct unw_table_entry *entry, *first; + size_t info_size, size; + char *info; + + start = (unsigned long) __start_gate_section - segbase; + end = (unsigned long) __stop_gate_section - segbase; + size = 0; + first = lookup(&unw.kernel_table, start); + + for (entry = first; entry->start_offset < end; ++entry) + size += 3*8 + 8 + 8*UNW_LENGTH(*(u64 *) (segbase + entry->info_offset)); + size += 8; /* reserve space for "end of table" marker */ + + unw.gate_table = alloc_bootmem(size); + if (!unw.gate_table) { + unw.gate_table_size = 0; + printk("unwind: unable to create unwind data for gate page!\n"); + return; + } + unw.gate_table_size = size; + + lp = unw.gate_table; + info = (char *) unw.gate_table + size; + + for (entry = first; entry->start_offset < end; ++entry, lp += 3) { + info_size = 8 + 8*UNW_LENGTH(*(u64 *) (segbase + entry->info_offset)); + info -= info_size; + memcpy(info, (char *) segbase + entry->info_offset, info_size); + + lp[0] = entry->start_offset - start + GATE_ADDR; /* start */ + lp[1] = entry->end_offset - start + GATE_ADDR; /* end */ + lp[2] = info - (char *) unw.gate_table; /* info */ + } + *lp = 0; /* end-of-table marker */ +} + +void unw_init (void) { extern int ia64_unw_start, ia64_unw_end, __gp; @@ -1974,3 +2026,34 @@ unw_init (void) init_unwind_table(&unw.kernel_table, "kernel", KERNEL_START, (unsigned long) &__gp, &ia64_unw_start, &ia64_unw_end); } + +/* + * This system call copies the unwind data into the buffer pointed to by BUF and returns + * the size of the unwind data. If BUF_SIZE is smaller than the size of the unwind data + * or if BUF is NULL, nothing is copied, but the system call still returns the size of the + * unwind data. + * + * The first portion of the unwind data contains an unwind table and rest contains the + * associated unwind info (in no particular order). The unwind table consists of a table + * of entries of the form: + * + * u64 start; (64-bit address of start of function) + * u64 end; (64-bit address of start of function) + * u64 info; (BUF-relative offset to unwind info) + * + * The end of the unwind table is indicated by an entry with a START address of zero. + * + * Please see the IA-64 Software Conventions and Runtime Architecture manual for details + * on the format of the unwind info. + * + * ERRORS + * EFAULT BUF points outside your accessible address space. + */ +asmlinkage long +sys_getunwind (void *buf, size_t buf_size) +{ + if (buf && buf_size >= unw.gate_table_size) + if (copy_to_user(buf, unw.gate_table, unw.gate_table_size) != 0) + return -EFAULT; + return unw.gate_table_size; +} diff --git a/arch/ia64/kernel/unwind_i.h b/arch/ia64/kernel/unwind_i.h index 383d65effcec..8aaff0a9c16c 100644 --- a/arch/ia64/kernel/unwind_i.h +++ b/arch/ia64/kernel/unwind_i.h @@ -58,7 +58,7 @@ struct unw_table { unsigned long segment_base; /* base for offsets in the unwind table entries */ unsigned long start; unsigned long end; - struct unw_table_entry *array; + const struct unw_table_entry *array; unsigned long length; }; diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile index 2214e616c130..714aa1ffb3a9 100644 --- a/arch/ia64/lib/Makefile +++ b/arch/ia64/lib/Makefile @@ -14,11 +14,7 @@ obj-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \ checksum.o clear_page.o csum_partial_copy.o copy_page.o \ copy_user.o clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o \ flush.o io.o do_csum.o \ - swiotlb.o - -ifneq ($(CONFIG_ITANIUM_ASTEP_SPECIFIC),y) - obj-y += memcpy.o memset.o strlen.o -endif + memcpy.o memset.o strlen.o swiotlb.o IGNORE_FLAGS_OBJS = __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \ __divdi3.o __udivdi3.o __moddi3.o __umoddi3.o diff --git a/arch/ia64/lib/checksum.c b/arch/ia64/lib/checksum.c index 9c4a8af75a5a..79dbe8af10db 100644 --- a/arch/ia64/lib/checksum.c +++ b/arch/ia64/lib/checksum.c @@ -9,7 +9,7 @@ * This file contains network checksum routines that are better done * in an architecture-specific manner due to speed.. */ - + #include <linux/string.h> #include <asm/byteorder.h> @@ -55,8 +55,7 @@ unsigned int csum_tcpudp_nofold(unsigned long saddr, ((unsigned long) ntohs(len) << 16) + ((unsigned long) proto << 8)); - /* Fold down to 32-bits so we don't loose in the typedef-less - network stack. */ + /* Fold down to 32-bits so we don't loose in the typedef-less network stack. */ /* 64 to 33 */ result = (result & 0xffffffff) + (result >> 32); /* 33 to 32 */ @@ -64,8 +63,7 @@ unsigned int csum_tcpudp_nofold(unsigned long saddr, return result; } -extern unsigned long do_csum(const unsigned char *, unsigned int, unsigned int); -extern unsigned long do_csum_c(const unsigned char *, unsigned int, unsigned int); +extern unsigned long do_csum (const unsigned char *, long); /* * This is a version of ip_compute_csum() optimized for IP headers, @@ -73,7 +71,7 @@ extern unsigned long do_csum_c(const unsigned char *, unsigned int, unsigned int */ unsigned short ip_fast_csum(unsigned char * iph, unsigned int ihl) { - return ~do_csum(iph,ihl*4,0); + return ~do_csum(iph, ihl*4); } /* @@ -90,7 +88,7 @@ unsigned short ip_fast_csum(unsigned char * iph, unsigned int ihl) */ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) { - unsigned long result = do_csum(buff, len, 0); + unsigned long result = do_csum(buff, len); /* add in old sum, and carry.. */ result += sum; @@ -106,5 +104,5 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) */ unsigned short ip_compute_csum(unsigned char * buff, int len) { - return ~do_csum(buff,len, 0); + return ~do_csum(buff,len); } diff --git a/arch/ia64/lib/clear_page.S b/arch/ia64/lib/clear_page.S index 9750cef96b4f..810d51be909c 100644 --- a/arch/ia64/lib/clear_page.S +++ b/arch/ia64/lib/clear_page.S @@ -1,8 +1,6 @@ /* * - * Optimized version of the standard clearpage() function - * - * Based on comments from ddd. Try not to overflow the write buffer. + * Optimized function to clear a page of memory. * * Inputs: * in0: address of page @@ -13,27 +11,41 @@ * Copyright (C) 1999-2001 Hewlett-Packard Co * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com> * Copyright (C) 1999-2001 David Mosberger-Tang <davidm@hpl.hp.com> + * + * 1/06/01 davidm Tuned for Itanium. */ #include <asm/asmmacro.h> #include <asm/page.h> +#define saved_lc r2 +#define dst0 in0 +#define dst1 r8 +#define dst2 r9 +#define dst3 r10 +#define dst_fetch r11 + GLOBAL_ENTRY(clear_page) .prologue - alloc r11=ar.pfs,1,0,0,0 - .save ar.lc, r16 - mov r16=ar.lc // slow - - .body - - mov r17=PAGE_SIZE/32-1 // -1 = repeat/until + .regstk 1,0,0,0 + mov r16 = PAGE_SIZE/64-1 // -1 = repeat/until ;; - adds r18=16,in0 - mov ar.lc=r17 + .save ar.lc, saved_lc + mov saved_lc = ar.lc + .body + mov ar.lc = r16 + adds dst1 = 16, dst0 + adds dst2 = 32, dst0 + adds dst3 = 48, dst0 + adds dst_fetch = 512, dst0 ;; -1: stf.spill.nta [in0]=f0,32 - stf.spill.nta [r18]=f0,32 +1: stf.spill.nta [dst0] = f0, 64 + stf.spill.nta [dst1] = f0, 64 + stf.spill.nta [dst2] = f0, 64 + stf.spill.nta [dst3] = f0, 64 + + lfetch [dst_fetch], 64 br.cloop.dptk.few 1b ;; - mov ar.lc=r16 // restore lc + mov ar.lc = r2 // restore lc br.ret.sptk.few rp END(clear_page) diff --git a/arch/ia64/lib/clear_user.S b/arch/ia64/lib/clear_user.S index 8f504e4c4ed0..eac872d77ac8 100644 --- a/arch/ia64/lib/clear_user.S +++ b/arch/ia64/lib/clear_user.S @@ -69,7 +69,7 @@ GLOBAL_ENTRY(__do_clear_user) (p6) br.cond.dptk.few long_do_clear ;; // WAR on ar.lc // - // worst case 16 cyles, avg 8 cycles + // worst case 16 iterations, avg 8 iterations // // We could have played with the predicates to use the extra // M slot for 2 stores/iteration but the cost the initialization diff --git a/arch/ia64/lib/copy_page.S b/arch/ia64/lib/copy_page.S index 3aede64e49b9..5739223f99a6 100644 --- a/arch/ia64/lib/copy_page.S +++ b/arch/ia64/lib/copy_page.S @@ -2,8 +2,6 @@ * * Optimized version of the standard copy_page() function * - * Based on comments from ddd. Try not to overflow write buffer. - * * Inputs: * in0: address of target page * in1: address of source page @@ -12,11 +10,14 @@ * * Copyright (C) 1999, 2001 Hewlett-Packard Co * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 2001 David Mosberger <davidm@hpl.hp.com> + * + * 4/06/01 davidm Tuned to make it perform well both for cached and uncached copies. */ #include <asm/asmmacro.h> #include <asm/page.h> -#define PIPE_DEPTH 6 +#define PIPE_DEPTH 3 #define EPI p[PIPE_DEPTH-1] #define lcount r16 @@ -27,62 +28,67 @@ #define src2 r21 #define tgt1 r22 #define tgt2 r23 +#define srcf r24 +#define tgtf r25 + +#define Nrot ((8*PIPE_DEPTH+7)&~7) GLOBAL_ENTRY(copy_page) .prologue .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,3,((2*PIPE_DEPTH+7)&~7),0,((2*PIPE_DEPTH+7)&~7) + alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot - .rotr t1[PIPE_DEPTH], t2[PIPE_DEPTH] + .rotr t1[PIPE_DEPTH], t2[PIPE_DEPTH], t3[PIPE_DEPTH], t4[PIPE_DEPTH], \ + t5[PIPE_DEPTH], t6[PIPE_DEPTH], t7[PIPE_DEPTH], t8[PIPE_DEPTH] .rotp p[PIPE_DEPTH] .save ar.lc, saved_lc - mov saved_lc=ar.lc // save ar.lc ahead of time + mov saved_lc=ar.lc + mov ar.ec=PIPE_DEPTH + + mov lcount=PAGE_SIZE/64-1 .save pr, saved_pr - mov saved_pr=pr // rotating predicates are preserved - // resgisters we must save. - .body + mov saved_pr=pr + mov pr.rot=1<<16 - mov src1=in1 // initialize 1st stream source - adds src2=8,in1 // initialize 2nd stream source - mov lcount=PAGE_SIZE/16-1 // as many 16bytes as there are on a page - // -1 is because br.ctop is repeat/until + .body - adds tgt2=8,in0 // initialize 2nd stream target - mov tgt1=in0 // initialize 1st stream target + mov src1=in1 + adds src2=8,in1 ;; - mov pr.rot=1<<16 // pr16=1 & pr[17-63]=0 , 63 not modified - - mov ar.lc=lcount // set loop counter - mov ar.ec=PIPE_DEPTH // ar.ec must match pipeline depth + adds tgt2=8,in0 + add srcf=512,in1 + mov ar.lc=lcount + mov tgt1=in0 + add tgtf=512,in0 + ;; +1: +(p[0]) ld8 t1[0]=[src1],16 +(EPI) st8 [tgt1]=t1[PIPE_DEPTH-1],16 +(p[0]) ld8 t2[0]=[src2],16 +(EPI) st8 [tgt2]=t2[PIPE_DEPTH-1],16 + ;; +(p[0]) ld8 t3[0]=[src1],16 +(EPI) st8 [tgt1]=t3[PIPE_DEPTH-1],16 +(p[0]) ld8 t4[0]=[src2],16 +(EPI) st8 [tgt2]=t4[PIPE_DEPTH-1],16 + ;; +(p[0]) ld8 t5[0]=[src1],16 +(EPI) st8 [tgt1]=t5[PIPE_DEPTH-1],16 +(p[0]) ld8 t6[0]=[src2],16 +(EPI) st8 [tgt2]=t6[PIPE_DEPTH-1],16 ;; +(p[0]) ld8 t7[0]=[src1],16 +(EPI) st8 [tgt1]=t7[PIPE_DEPTH-1],16 +(p[0]) ld8 t8[0]=[src2],16 +(EPI) st8 [tgt2]=t8[PIPE_DEPTH-1],16 - // We need to preload the n-1 stages of the pipeline (n=depth). - // We do this during the "prolog" of the loop: we execute - // n-1 times the "load" bundle. Then both loads & stores are - // enabled until we reach the end of the last word of the page - // on the load side. Then, we enter the epilog (controlled by ec) - // where we just do the stores and no loads n times : drain the pipe - // (we exit the loop when ec=1). - // - // The initialization of the prolog is done via the predicate registers: - // the choice of EPI DEPENDS on the depth of the pipeline (n). - // When lc > 0 pr63=1 and it is fed back into pr16 and pr16-pr62 - // are then shifted right at every iteration, - // Thus by initializing pr16=1 and the rest to 0 before the loop - // we get EPI=1 after n iterations. - // -1: // engage loop now, let the magic happen... -(p16) ld8 t1[0]=[src1],16 // new data on top of pipeline in 1st stream -(p16) ld8 t2[0]=[src2],16 // new data on top of pipeline in 2nd stream - nop.i 0x0 -(EPI) st8 [tgt1]=t1[PIPE_DEPTH-1],16 // store top of 1st pipeline -(EPI) st8 [tgt2]=t2[PIPE_DEPTH-1],16 // store top of 2nd pipeline - br.ctop.dptk.few 1b // once lc==0, ec-- & p16=0 - // stores but no loads anymore + lfetch [srcf], 64 + lfetch [tgtf], 64 + br.ctop.sptk.few 1b ;; mov pr=saved_pr,0xffffffffffff0000 // restore predicates - mov ar.pfs=saved_pfs // restore ar.ec - mov ar.lc=saved_lc // restore saved lc - br.ret.sptk.few rp // bye... + mov ar.pfs=saved_pfs + mov ar.lc=saved_lc + br.ret.sptk.few rp END(copy_page) diff --git a/arch/ia64/lib/copy_user.S b/arch/ia64/lib/copy_user.S index 46d041733a72..dc8cd56e7835 100644 --- a/arch/ia64/lib/copy_user.S +++ b/arch/ia64/lib/copy_user.S @@ -35,9 +35,9 @@ // Tuneable parameters // #define COPY_BREAK 16 // we do byte copy below (must be >=16) -#define PIPE_DEPTH 4 // pipe depth +#define PIPE_DEPTH 21 // pipe depth -#define EPI p[PIPE_DEPTH-1] // PASTE(p,16+PIPE_DEPTH-1) +#define EPI p[PIPE_DEPTH-1] // // arguments @@ -148,8 +148,8 @@ diff_align_copy_user: // // - // Optimization. If dst1 is 8-byte aligned (not rarely), we don't need - // to copy the head to dst1, to start 8-byte copy software pipleline. + // Optimization. If dst1 is 8-byte aligned (quite common), we don't need + // to copy the head to dst1, to start 8-byte copy software pipeline. // We know src1 is not 8-byte aligned in this case. // cmp.eq p14,p15=r0,dst2 @@ -233,15 +233,23 @@ word_copy_user: #define SWITCH(pred, shift) cmp.eq pred,p0=shift,rshift #define CASE(pred, shift) \ (pred) br.cond.spnt.few copy_user_bit##shift -#define BODY(rshift) \ -copy_user_bit##rshift: \ -1: \ - EX(failure_out,(EPI) st8 [dst1]=tmp,8); \ -(EPI_1) shrp tmp=val1[PIPE_DEPTH-3],val1[PIPE_DEPTH-2],rshift; \ - EX(failure_in2,(p16) ld8 val1[0]=[src1],8); \ - br.ctop.dptk.few 1b; \ - ;; \ - br.cond.spnt.few .diff_align_do_tail +#define BODY(rshift) \ +copy_user_bit##rshift: \ +1: \ + EX(failure_out,(EPI) st8 [dst1]=tmp,8); \ +(EPI_1) shrp tmp=val1[PIPE_DEPTH-3],val1[PIPE_DEPTH-2],rshift; \ + EX(3f,(p16) ld8 val1[0]=[src1],8); \ + br.ctop.dptk.few 1b; \ + ;; \ + br.cond.sptk.few .diff_align_do_tail; \ +2: \ +(EPI) st8 [dst1]=tmp,8; \ +(EPI_1) shrp tmp=val1[PIPE_DEPTH-3],val1[PIPE_DEPTH-2],rshift; \ +3: \ +(p16) mov val1[0]=r0; \ + br.ctop.dptk.few 2b; \ + ;; \ + br.cond.sptk.few failure_in2 // // Since the instruction 'shrp' requires a fixed 128-bit value @@ -581,13 +589,7 @@ failure_in3: br.ret.dptk.few rp failure_in2: - sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied - ;; -3: -(p16) mov val1[0]=r0 -(EPI) st8 [dst1]=val1[PIPE_DEPTH-1],8 - br.ctop.dptk.few 3b - ;; + sub ret0=endsrc,src1 cmp.ne p6,p0=dst1,enddst // Do we need to finish the tail ? sub len=enddst,dst1,1 // precompute len (p6) br.cond.dptk.few failure_in1bis diff --git a/arch/ia64/lib/csum_partial_copy.c b/arch/ia64/lib/csum_partial_copy.c index 7ee1a243f386..9f88404011be 100644 --- a/arch/ia64/lib/csum_partial_copy.c +++ b/arch/ia64/lib/csum_partial_copy.c @@ -101,7 +101,7 @@ out: * This is very ugly but temporary. THIS NEEDS SERIOUS ENHANCEMENTS. * But it's very tricky to get right even in C. */ -extern unsigned long do_csum(const unsigned char *, int); +extern unsigned long do_csum(const unsigned char *, long); static unsigned int do_csum_partial_copy_from_user (const char *src, char *dst, int len, diff --git a/arch/ia64/lib/do_csum.S b/arch/ia64/lib/do_csum.S index 6ae8edf989ca..628cb9053a62 100644 --- a/arch/ia64/lib/do_csum.S +++ b/arch/ia64/lib/do_csum.S @@ -11,6 +11,12 @@ * Copyright (C) 1999, 2001 Hewlett-Packard Co * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com> * + * 01/04/18 Jun Nakajima <jun.nakajima@intel.com> + * Clean up and optimize and the software pipeline, loading two + * back-to-back 8-byte words per loop. Clean up the initialization + * for the loop. Support the cases where load latency = 1 or 2. + * Set CONFIG_IA64_LOAD_LATENCY to 1 or 2 (default). + * */ #include <asm/asmmacro.h> @@ -18,51 +24,54 @@ // // Theory of operations: // The goal is to go as quickly as possible to the point where -// we can checksum 8 bytes/loop. Before reaching that point we must +// we can checksum 16 bytes/loop. Before reaching that point we must // take care of incorrect alignment of first byte. // // The code hereafter also takes care of the "tail" part of the buffer // before entering the core loop, if any. The checksum is a sum so it -// allows us to commute operations. So we do do the "head" and "tail" +// allows us to commute operations. So we do the "head" and "tail" // first to finish at full speed in the body. Once we get the head and // tail values, we feed them into the pipeline, very handy initialization. // // Of course we deal with the special case where the whole buffer fits // into one 8 byte word. In this case we have only one entry in the pipeline. // -// We use a (3+1)-stage pipeline in the loop to account for possible -// load latency and also to accomodate for head and tail. +// We use a (LOAD_LATENCY+2)-stage pipeline in the loop to account for +// possible load latency and also to accomodate for head and tail. // // The end of the function deals with folding the checksum from 64bits // down to 16bits taking care of the carry. // // This version avoids synchronization in the core loop by also using a -// pipeline for the accumulation of the checksum in result[]. +// pipeline for the accumulation of the checksum in resultx[] (x=1,2). // -// p[] +// wordx[] (x=1,2) // |---| -// 0| | r32 : new value loaded in pipeline +// | | 0 : new value loaded in pipeline // |---| -// 1| | r33 : in transit data +// | | - : in transit data // |---| -// 2| | r34 : current value to add to checksum +// | | LOAD_LATENCY : current value to add to checksum // |---| -// 3| | r35 : previous value added to checksum (previous iteration) -// |---| +// | | LOAD_LATENCY+1 : previous value added to checksum +// |---| (previous iteration) // -// result[] +// resultx[] (x=1,2) // |---| -// 0| | r36 : new checksum +// | | 0 : initial value // |---| -// 1| | r37 : previous value of checksum +// | | LOAD_LATENCY-1 : new checksum // |---| -// 2| | r38 : final checksum when out of the loop (after 2 epilogue rots) +// | | LOAD_LATENCY : previous value of checksum // |---| +// | | LOAD_LATENCY+1 : final checksum when out of the loop +// |---| // // +// See RFC1071 "Computing the Internet Checksum" for various techniques for +// calculating the Internet checksum. +// // NOT YET DONE: -// - Take advantage of the MMI bandwidth to load more than 8byte per loop -// iteration // - use the lfetch instruction to augment the chances of the data being in // the cache when we need it. // - Maybe another algorithm which would take care of the folding at the @@ -71,14 +80,12 @@ // to figure out if we could not split the function depending on the // type of packet or alignment we get. Like the ip_fast_csum() routine // where we know we have at least 20bytes worth of data to checksum. -// - Look at RFCs about checksums to see whether or not we can do better -// // - Do a better job of handling small packets. -// + #define saved_pfs r11 #define hmask r16 #define tmask r17 -#define first r18 +#define first1 r18 #define firstval r19 #define firstoff r20 #define last r21 @@ -89,32 +96,47 @@ #define tmp1 r26 #define tmp2 r27 #define tmp3 r28 -#define carry r29 +#define carry1 r29 +#define carry2 r30 +#define first2 r31 #define buf in0 #define len in1 -// unsigned long do_csum(unsigned char *buf,int len) +#ifndef CONFIG_IA64_LOAD_LATENCY +#define CONFIG_IA64_LOAD_LATENCY 2 +#endif + +#define LOAD_LATENCY 2 // XXX fix me + +#if (LOAD_LATENCY != 1) && (LOAD_LATENCY != 2) +# error "Only 1 or 2 is supported/tested for LOAD_LATENCY." +#endif + +#define PIPE_DEPTH (LOAD_LATENCY+2) +#define ELD p[LOAD_LATENCY] // end of load +#define ELD_1 p[LOAD_LATENCY+1] // and next stage + +// unsigned long do_csum(unsigned char *buf,long len) GLOBAL_ENTRY(do_csum) .prologue .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,2,8,0,8 - - .rotr p[4], result[3] + alloc saved_pfs=ar.pfs,2,16,1,16 + .rotr word1[4], word2[4],result1[4],result2[4] + .rotp p[PIPE_DEPTH] mov ret0=r0 // in case we have zero length - cmp4.lt p0,p6=r0,len // check for zero length or negative (32bit len) + cmp.lt p0,p6=r0,len // check for zero length or negative (32bit len) ;; // avoid WAW on CFM mov tmp3=0x7 // a temporary mask/value add tmp1=buf,len // last byte's address (p6) br.ret.spnt.few rp // return if true (hope we can avoid that) - and firstoff=7,buf // how many bytes off for first element - tbit.nz p10,p0=buf,0 // is buf an odd address ? + and firstoff=7,buf // how many bytes off for first1 element + tbit.nz p15,p0=buf,0 // is buf an odd address ? mov hmask=-1 // intialize head mask ;; - - andcm first=buf,tmp3 // 8byte aligned down address of first element + andcm first1=buf,tmp3 // 8byte aligned down address of first1 element mov tmask=-1 // initialize tail mask adds tmp2=-1,tmp1 // last-1 ;; @@ -123,75 +145,125 @@ GLOBAL_ENTRY(do_csum) .save pr, saved_pr mov saved_pr=pr // preserve predicates (rotation) ;; - sub tmp3=last,first // tmp3=distance from first to last - cmp.eq p8,p9=last,first // everything fits in one word ? + sub tmp3=last,first1 // tmp3=distance from first1 to last + cmp.eq p8,p9=last,first1 // everything fits in one word ? sub tmp1=8,lastoff // complement to lastoff - - ld8 firstval=[first],8 // load,ahead of time, "first" word + ld8 firstval=[first1],8 // load,ahead of time, "first1" word shl tmp2=firstoff,3 // number of bits ;; and tmp1=7, tmp1 // make sure that if tmp1==8 -> tmp1=0 - (p9) ld8 lastval=[last] // load,ahead of time, "last" word, if needed -(p8) mov lastval=r0 // we don't need lastval if first==last - mov result[1]=r0 // initialize result +(p9) adds tmp3=-8,tmp3 // effectively loaded ;; - +(p8) mov lastval=r0 // we don't need lastval if first1==last shl tmp1=tmp1,3 // number of bits - shl hmask=hmask,tmp2 // build head mask, mask off [0,firstoff[ + shl hmask=hmask,tmp2 // build head mask, mask off [0,first1off[ ;; shr.u tmask=tmask,tmp1 // build tail mask, mask off ]8,lastoff] .save ar.lc, saved_lc mov saved_lc=ar.lc // save lc ;; - .body +#define count tmp3 (p8) and hmask=hmask,tmask // apply tail mask to head mask if 1 word only -(p9) and p[1]=lastval,tmask // mask last it as appropriate - shr.u tmp3=tmp3,3 // we do 8 bytes per loop +(p9) and word2[0]=lastval,tmask // mask last it as appropriate + shr.u count=count,3 // we do 8 bytes per loop (count) ;; - cmp.lt p6,p7=2,tmp3 // tmp3 > 2 ? - and p[2]=firstval,hmask // and mask it as appropriate - add tmp1=-2,tmp3 // -2 = -1 (br.ctop) -1 (last-first) + // If count is odd, finish this 8-byte word so that we can + // load two back-to-back 8-byte words per loop thereafter. + tbit.nz p10,p11=count,0 // if (count is odd) + and word1[0]=firstval,hmask // and mask it as appropriate + ;; +(p8) mov result1[0]=word1[0] +(p9) add result1[0]=word1[0],word2[0] + ;; + cmp.ltu p6,p0=result1[0],word1[0] // check the carry + ;; +(p6) adds result1[0]=1,result1[0] +(p8) br.cond.dptk.few do_csum_exit // if (within an 8-byte word) + ;; +(p11) br.cond.dptk.few do_csum16 // if (count is even) + ;; + // Here count is odd. + ld8 word1[1]=[first1],8 // load an 8-byte word + cmp.eq p9,p10=1,count // if (count == 1) + adds count=-1,count // loaded an 8-byte word + ;; + add result1[0]=result1[0],word1[1] + ;; + cmp.ltu p6,p0=result1[0],word1[1] + ;; +(p6) adds result1[0]=1,result1[0] + ;; +(p9) br.cond.sptk.few do_csum_exit // if (count == 1) exit + // Fall through to caluculate the checksum, feeding result1[0] as + // the initial value in result1[0]. ;; - // XXX Fixme: not very nice initialization here - // - // Setup loop control registers: // - // tmp3=0 (1 word) : lc=0, ec=2, p16=F - // tmp3=1 (2 words) : lc=0, ec=3, p16=F - // tmp3=2 (3 words) : lc=0, ec=4, p16=T - // tmp3>2 (4 or more): lc=tmp3-2, ec=4, p16=T + // Calculate the checksum loading two 8-byte words per loop. // - cmp.eq p8,p9=r0,tmp3 // tmp3 == 0 ? -(p6) mov ar.lc=tmp1 -(p7) mov ar.lc=0 +do_csum16: + mov saved_lc=ar.lc + shr.u count=count,1 // we do 16 bytes per loop ;; - cmp.lt p6,p7=1,tmp3 // tmp3 > 1 ? -(p8) mov ar.ec=2 // we need the extra rotation on result[] -(p9) mov ar.ec=3 // hard not to set it twice sometimes + cmp.eq p9,p10=r0,count // if (count == 0) + brp.loop.imp 1f,2f ;; - mov carry=r0 // initialize carry -(p6) mov ar.ec=4 -(p6) mov pr.rot=0xffffffffffff0000 // p16=T, p18=T - - cmp.ne p8,p0=r0,r0 // p8 is false - mov p[3]=r0 // make sure first compare fails -(p7) mov pr.rot=0xfffffffffffe0000 // p16=F, p18=T + adds count=-1,count + mov ar.ec=PIPE_DEPTH + ;; + mov ar.lc=count // set lc + ;; + // result1[0] must be initialized in advance. + mov result2[0]=r0 + ;; + mov pr.rot=1<<16 + ;; + mov carry1=r0 + mov carry2=r0 + ;; + add first2=8,first1 + ;; +(p9) br.cond.sptk.few do_csum_exit + ;; + nop.m 0 + nop.i 0 ;; + .align 32 1: -(p16) ld8 p[0]=[first],8 // load next -(p8) adds carry=1,carry // add carry on prev_prev_value -(p18) add result[0]=result[1],p[2] // new_res = prev_res + cur_val - cmp.ltu p8,p0=result[1],p[3] // p8= prev_result < prev_val - br.ctop.dptk.few 1b // loop until lc--==0 - ;; // RAW on carry when loop exits - (p8) adds carry=1,carry;; // correct for carry on prev_value - add result[2]=carry,result[2];; // add carry to final result - cmp.ltu p6,p7=result[2], carry // check for new carry - ;; -(p6) adds result[2]=1,result[1] // correct if required +(ELD_1) cmp.ltu p31,p0=result1[LOAD_LATENCY],word1[LOAD_LATENCY+1] +(p32) adds carry1=1,carry1 +(ELD_1) cmp.ltu p47,p0=result2[LOAD_LATENCY],word2[LOAD_LATENCY+1] +(p48) adds carry2=1,carry2 +(ELD) add result1[LOAD_LATENCY-1]=result1[LOAD_LATENCY],word1[LOAD_LATENCY] +(ELD) add result2[LOAD_LATENCY-1]=result2[LOAD_LATENCY],word2[LOAD_LATENCY] +2: +(p16) ld8 word1[0]=[first1],16 +(p16) ld8 word2[0]=[first2],16 + br.ctop.sptk.few 1b + ;; + // Since len is a 32-bit value, carry cannot be larger than + // a 64-bit value. +(p32) adds carry1=1,carry1 // since we miss the last one +(p48) adds carry2=1,carry2 + ;; + add result1[LOAD_LATENCY+1]=result1[LOAD_LATENCY+1],carry1 + add result2[LOAD_LATENCY+1]=result2[LOAD_LATENCY+1],carry2 + ;; + cmp.ltu p6,p0=result1[LOAD_LATENCY+1],carry1 + cmp.ltu p7,p0=result2[LOAD_LATENCY+1],carry2 + ;; +(p6) adds result1[LOAD_LATENCY+1]=1,result1[LOAD_LATENCY+1] +(p7) adds result2[LOAD_LATENCY+1]=1,result2[LOAD_LATENCY+1] + ;; + add result1[0]=result1[LOAD_LATENCY+1],result2[LOAD_LATENCY+1] + ;; + cmp.ltu p6,p0=result1[0],result2[LOAD_LATENCY+1] + ;; +(p6) adds result1[0]=1,result1[0] + ;; +do_csum_exit: movl tmp3=0xffffffff ;; // XXX Fixme @@ -199,33 +271,66 @@ GLOBAL_ENTRY(do_csum) // now fold 64 into 16 bits taking care of carry // that's not very good because it has lots of sequentiality // - and tmp1=result[2],tmp3 - shr.u tmp2=result[2],32 + and tmp1=result1[0],tmp3 + shr.u tmp2=result1[0],32 ;; - add result[2]=tmp1,tmp2 + add result1[0]=tmp1,tmp2 shr.u tmp3=tmp3,16 ;; - and tmp1=result[2],tmp3 - shr.u tmp2=result[2],16 + and tmp1=result1[0],tmp3 + shr.u tmp2=result1[0],16 ;; - add result[2]=tmp1,tmp2 + add result1[0]=tmp1,tmp2 ;; - and tmp1=result[2],tmp3 - shr.u tmp2=result[2],16 + and tmp1=result1[0],tmp3 + shr.u tmp2=result1[0],16 ;; - add result[2]=tmp1,tmp2 + add result1[0]=tmp1,tmp2 ;; - and tmp1=result[2],tmp3 - shr.u tmp2=result[2],16 + and tmp1=result1[0],tmp3 + shr.u tmp2=result1[0],16 ;; add ret0=tmp1,tmp2 mov pr=saved_pr,0xffffffffffff0000 ;; // if buf was odd then swap bytes mov ar.pfs=saved_pfs // restore ar.ec -(p10) mux1 ret0=ret0,@rev // reverse word +(p15) mux1 ret0=ret0,@rev // reverse word ;; mov ar.lc=saved_lc -(p10) shr.u ret0=ret0,64-16 // + shift back to position = swap bytes +(p15) shr.u ret0=ret0,64-16 // + shift back to position = swap bytes br.ret.sptk.few rp + +// I (Jun Nakajima) wrote an equivalent code (see below), but it was +// not much better than the original. So keep the original there so that +// someone else can challenge. +// +// shr.u word1[0]=result1[0],32 +// zxt4 result1[0]=result1[0] +// ;; +// add result1[0]=result1[0],word1[0] +// ;; +// zxt2 result2[0]=result1[0] +// extr.u word1[0]=result1[0],16,16 +// shr.u carry1=result1[0],32 +// ;; +// add result2[0]=result2[0],word1[0] +// ;; +// add result2[0]=result2[0],carry1 +// ;; +// extr.u ret0=result2[0],16,16 +// ;; +// add ret0=ret0,result2[0] +// ;; +// zxt2 ret0=ret0 +// mov ar.pfs=saved_pfs // restore ar.ec +// mov pr=saved_pr,0xffffffffffff0000 +// ;; +// // if buf was odd then swap bytes +// mov ar.lc=saved_lc +//(p15) mux1 ret0=ret0,@rev // reverse word +// ;; +//(p15) shr.u ret0=ret0,64-16 // + shift back to position = swap bytes +// br.ret.sptk.few rp + END(do_csum) diff --git a/arch/ia64/lib/swiotlb.c b/arch/ia64/lib/swiotlb.c index 24d3a30d2508..c67432bacc05 100644 --- a/arch/ia64/lib/swiotlb.c +++ b/arch/ia64/lib/swiotlb.c @@ -263,7 +263,7 @@ swiotlb_alloc_consistent (struct pci_dev *hwdev, size_t size, dma_addr_t *dma_ha memset(ret, 0, size); pci_addr = virt_to_phys(ret); - if ((pci_addr & ~hwdev->dma_mask) != 0) + if (hwdev && (pci_addr & ~hwdev->dma_mask) != 0) panic("swiotlb_alloc_consistent: allocated memory is out of range for PCI device"); *dma_handle = pci_addr; return ret; diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c index 0b80340d21f0..868d8ba4356e 100644 --- a/arch/ia64/mm/extable.c +++ b/arch/ia64/mm/extable.c @@ -6,8 +6,9 @@ */ #include <linux/config.h> -#include <linux/module.h> + #include <asm/uaccess.h> +#include <asm/module.h> extern const struct exception_table_entry __start___ex_table[]; extern const struct exception_table_entry __stop___ex_table[]; @@ -15,35 +16,25 @@ extern const struct exception_table_entry __stop___ex_table[]; static inline const struct exception_table_entry * search_one_table (const struct exception_table_entry *first, const struct exception_table_entry *last, - signed long value) + unsigned long ip, unsigned long gp) { - /* Abort early if the search value is out of range. */ - if (value != (signed int)value) - return 0; - while (first <= last) { const struct exception_table_entry *mid; long diff; - /* - * We know that first and last are both kernel virtual - * pointers (region 7) so first+last will cause an - * overflow. We fix that by calling __va() on the - * result, which will ensure that the top two bits get - * set again. - */ - mid = (void *) __va((((__u64) first + (__u64) last)/2/sizeof(*mid))*sizeof(*mid)); - diff = mid->addr - value; + + mid = &first[(last - first)/2]; + diff = (mid->addr + gp) - ip; if (diff == 0) return mid; else if (diff < 0) - first = mid+1; + first = mid + 1; else - last = mid-1; + last = mid - 1; } return 0; } -#ifndef CONFIG_MODULE +#ifndef CONFIG_MODULES register unsigned long main_gp __asm__("gp"); #endif @@ -53,23 +44,25 @@ search_exception_table (unsigned long addr) const struct exception_table_entry *entry; struct exception_fixup fix = { 0 }; -#ifndef CONFIG_MODULE +#ifndef CONFIG_MODULES /* There is only the kernel to search. */ - entry = search_one_table(__start___ex_table, __stop___ex_table - 1, addr - main_gp); + entry = search_one_table(__start___ex_table, __stop___ex_table - 1, addr, main_gp); if (entry) fix.cont = entry->cont + main_gp; return fix; #else - struct exception_table_entry *ret; - /* The kernel is the last "module" -- no need to treat it special. */ + struct archdata *archdata; struct module *mp; + /* The kernel is the last "module" -- no need to treat it special. */ for (mp = module_list; mp ; mp = mp->next) { if (!mp->ex_table_start) continue; - entry = search_one_table(mp->ex_table_start, mp->ex_table_end - 1, addr - mp->gp); + archdata = (struct archdata *) mp->archdata_start; + entry = search_one_table(mp->ex_table_start, mp->ex_table_end - 1, + addr, (unsigned long) archdata->gp); if (entry) { - fix.cont = entry->cont + mp->gp; + fix.cont = entry->cont + (unsigned long) archdata->gp; return fix; } } diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 73ae21e6b37e..01ee03f53b66 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -42,7 +42,7 @@ do_check_pgt_cache (int low, int high) if (pgtable_cache_size > high) { do { if (pgd_quicklist) - free_page((unsigned long)pgd_alloc_one_fast()), ++freed; + free_page((unsigned long)pgd_alloc_one_fast(0)), ++freed; if (pmd_quicklist) free_page((unsigned long)pmd_alloc_one_fast(0, 0)), ++freed; if (pte_quicklist) @@ -111,7 +111,7 @@ free_initrd_mem(unsigned long start, unsigned long end) * * To avoid freeing/using the wrong page (kernel sized) we: * - align up the beginning of initrd - * - keep the end untouched + * - align down the end of initrd * * | | * |=============| a000 @@ -135,11 +135,14 @@ free_initrd_mem(unsigned long start, unsigned long end) * initrd_start and keep initrd_end as is. */ start = PAGE_ALIGN(start); + end = end & PAGE_MASK; if (start < end) printk ("Freeing initrd memory: %ldkB freed\n", (end - start) >> 10); for (; start < end; start += PAGE_SIZE) { + if (!VALID_PAGE(virt_to_page(start))) + continue; clear_bit(PG_reserved, &virt_to_page(start)->flags); set_page_count(virt_to_page(start), 1); free_page(start); @@ -225,7 +228,7 @@ put_gate_page (struct page *page, unsigned long address) } void __init -ia64_mmu_init (void) +ia64_mmu_init (void *my_cpu_data) { unsigned long flags, rid, pta, impl_va_bits; extern void __init tlb_init (void); @@ -242,7 +245,7 @@ ia64_mmu_init (void) ia64_clear_ic(flags); rid = ia64_rid(IA64_REGION_ID_KERNEL, __IA64_UNCACHED_OFFSET); - ia64_set_rr(__IA64_UNCACHED_OFFSET, (rid << 8) | (_PAGE_SIZE_64M << 2)); + ia64_set_rr(__IA64_UNCACHED_OFFSET, (rid << 8) | (KERNEL_PG_SHIFT << 2)); rid = ia64_rid(IA64_REGION_ID_KERNEL, VMALLOC_START); ia64_set_rr(VMALLOC_START, (rid << 8) | (PAGE_SHIFT << 2) | 1); @@ -251,8 +254,7 @@ ia64_mmu_init (void) ia64_srlz_d(); ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR, - pte_val(mk_pte_phys(__pa(&cpu_data[smp_processor_id()]), PAGE_KERNEL)), - PAGE_SHIFT); + pte_val(mk_pte_phys(__pa(my_cpu_data), PAGE_KERNEL)), PAGE_SHIFT); __restore_flags(flags); ia64_srlz_i(); @@ -354,6 +356,7 @@ mem_init (void) { extern char __start_gate_section[]; long reserved_pages, codesize, datasize, initsize; + unsigned long num_pgt_pages; #ifdef CONFIG_PCI /* @@ -387,6 +390,19 @@ mem_init (void) max_mapnr << (PAGE_SHIFT - 10), codesize >> 10, reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10); + /* + * Allow for enough (cached) page table pages so that we can map the entire memory + * at least once. Each task also needs a couple of page tables pages, so add in a + * fudge factor for that (don't use "threads-max" here; that would be wrong!). + * Don't allow the cache to be more than 10% of total memory, though. + */ +# define NUM_TASKS 500 /* typical number of tasks */ + num_pgt_pages = nr_free_pages() / PTRS_PER_PGD + NUM_TASKS; + if (num_pgt_pages > nr_free_pages() / 10) + num_pgt_pages = nr_free_pages() / 10; + if (num_pgt_pages > pgt_cache_water[1]) + pgt_cache_water[1] = num_pgt_pages; + /* install the gate page in the global page table: */ put_gate_page(virt_to_page(__start_gate_section), GATE_ADDR); diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index 3d78cb1f8970..8eb07ab94739 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -97,7 +97,7 @@ flush_tlb_no_ptcg (unsigned long start, unsigned long end, unsigned long nbits) /* * Wait for other CPUs to finish purging entries. */ -#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC) +#if defined(CONFIG_ITANIUM_BSTEP_SPECIFIC) { extern void smp_resend_flush_tlb (void); unsigned long start = ia64_get_itc(); diff --git a/arch/ia64/sn/fprom/fw-emu.c b/arch/ia64/sn/fprom/fw-emu.c index 6b8144bf23d0..3440794529f5 100644 --- a/arch/ia64/sn/fprom/fw-emu.c +++ b/arch/ia64/sn/fprom/fw-emu.c @@ -8,7 +8,6 @@ * Copyright (C) 2000 Silicon Graphics, Inc. * Copyright (C) 2000 by Jack Steiner (steiner@sgi.com) */ - #include <asm/efi.h> #include <asm/pal.h> #include <asm/sal.h> diff --git a/arch/ia64/sn/io/ml_SN_intr.c b/arch/ia64/sn/io/ml_SN_intr.c index b725012c29ac..2fe1e1a94f7a 100644 --- a/arch/ia64/sn/io/ml_SN_intr.c +++ b/arch/ia64/sn/io/ml_SN_intr.c @@ -36,7 +36,6 @@ #include <asm/sn/pci/pcibr_private.h> #include <asm/sn/intr.h> - #if DEBUG_INTR_TSTAMP_DEBUG #include <sys/debug.h> #include <sys/idbg.h> diff --git a/arch/ia64/tools/print_offsets.c b/arch/ia64/tools/print_offsets.c index 02e06a65da3d..140611cd1e73 100644 --- a/arch/ia64/tools/print_offsets.c +++ b/arch/ia64/tools/print_offsets.c @@ -21,6 +21,8 @@ #include <asm-ia64/siginfo.h> #include <asm-ia64/sigcontext.h> +#include "../kernel/sigframe.h" + #ifdef offsetof # undef offsetof #endif @@ -46,6 +48,7 @@ tab[] = { "IA64_SWITCH_STACK_SIZE", sizeof (struct switch_stack) }, { "IA64_SIGINFO_SIZE", sizeof (struct siginfo) }, { "IA64_CPU_SIZE", sizeof (struct cpuinfo_ia64) }, + { "SIGFRAME_SIZE", sizeof (struct sigframe) }, { "UNW_FRAME_INFO_SIZE", sizeof (struct unw_frame_info) }, { "", 0 }, /* spacer */ { "IA64_TASK_PTRACE_OFFSET", offsetof (struct task_struct, ptrace) }, @@ -153,16 +156,25 @@ tab[] = { "IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET", offsetof (struct switch_stack, ar_bspstore) }, { "IA64_SWITCH_STACK_PR_OFFSET", offsetof (struct switch_stack, pr) }, { "IA64_SIGCONTEXT_AR_BSP_OFFSET", offsetof (struct sigcontext, sc_ar_bsp) }, + { "IA64_SIGCONTEXT_AR_FPSR_OFFSET", offsetof (struct sigcontext, sc_ar_fpsr) }, { "IA64_SIGCONTEXT_AR_RNAT_OFFSET", offsetof (struct sigcontext, sc_ar_rnat) }, - { "IA64_SIGCONTEXT_FLAGS_OFFSET", offsetof (struct sigcontext, sc_flags) }, + { "IA64_SIGCONTEXT_AR_UNAT_OFFSET", offsetof (struct sigcontext, sc_ar_unat) }, + { "IA64_SIGCONTEXT_B0_OFFSET", offsetof (struct sigcontext, sc_br[0]) }, { "IA64_SIGCONTEXT_CFM_OFFSET", offsetof (struct sigcontext, sc_cfm) }, + { "IA64_SIGCONTEXT_FLAGS_OFFSET", offsetof (struct sigcontext, sc_flags) }, { "IA64_SIGCONTEXT_FR6_OFFSET", offsetof (struct sigcontext, sc_fr[6]) }, + { "IA64_SIGCONTEXT_PR_OFFSET", offsetof (struct sigcontext, sc_pr) }, + { "IA64_SIGCONTEXT_R12_OFFSET", offsetof (struct sigcontext, sc_gr[12]) }, + { "IA64_SIGFRAME_ARG0_OFFSET", offsetof (struct sigframe, arg0) }, + { "IA64_SIGFRAME_ARG1_OFFSET", offsetof (struct sigframe, arg1) }, + { "IA64_SIGFRAME_ARG2_OFFSET", offsetof (struct sigframe, arg2) }, + { "IA64_SIGFRAME_RBS_BASE_OFFSET", offsetof (struct sigframe, rbs_base) }, + { "IA64_SIGFRAME_HANDLER_OFFSET", offsetof (struct sigframe, handler) }, + { "IA64_SIGFRAME_SIGCONTEXT_OFFSET", offsetof (struct sigframe, sc) }, { "IA64_CLONE_VFORK", CLONE_VFORK }, { "IA64_CLONE_VM", CLONE_VM }, { "IA64_CPU_IRQ_COUNT_OFFSET", offsetof (struct cpuinfo_ia64, irq_stat.f.irq_count) }, { "IA64_CPU_BH_COUNT_OFFSET", offsetof (struct cpuinfo_ia64, irq_stat.f.bh_count) }, - { "IA64_CPU_SOFTIRQ_ACTIVE_OFFSET", offsetof (struct cpuinfo_ia64, softirq.active) }, - { "IA64_CPU_SOFTIRQ_MASK_OFFSET", offsetof (struct cpuinfo_ia64, softirq.mask) }, { "IA64_CPU_PHYS_STACKED_SIZE_P8_OFFSET", offsetof (struct cpuinfo_ia64, phys_stacked_size_p8) }, }; diff --git a/arch/ia64/vmlinux.lds.S b/arch/ia64/vmlinux.lds.S index 9064b6321972..727e44ba264a 100644 --- a/arch/ia64/vmlinux.lds.S +++ b/arch/ia64/vmlinux.lds.S @@ -25,7 +25,7 @@ SECTIONS .text : AT(ADDR(.text) - PAGE_OFFSET) { *(.text.ivt) - /* these are not really text pages, but the zero page needs to be in a fixed location: */ + /* these are not really text pages, but they need to be page aligned: */ *(__special_page_section) __start_gate_section = .; *(.text.gate) @@ -74,9 +74,9 @@ SECTIONS __stop___kallsyms = .; /* Unwind info & table: */ + . = ALIGN(8); .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - PAGE_OFFSET) { *(.IA_64.unwind_info*) } - . = ALIGN(8); ia64_unw_start = .; .IA_64.unwind : AT(ADDR(.IA_64.unwind) - PAGE_OFFSET) { *(.IA_64.unwind*) } diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 33b612ba5cbe..9cd9ff915e94 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -119,17 +119,10 @@ int * max_readahead[MAX_BLKDEV]; int * max_sectors[MAX_BLKDEV]; /* - * queued sectors for all devices, used to make sure we don't fill all - * of memory with locked buffers + * How many reqeusts do we allocate per queue, + * and how many do we "batch" on freeing them? */ -atomic_t queued_sectors; - -/* - * high and low watermark for above - */ -static int high_queued_sectors, low_queued_sectors; -static int batch_requests, queue_nr_requests; -static DECLARE_WAIT_QUEUE_HEAD(blk_buffers_wait); +static int queue_nr_requests, batch_requests; static inline int get_max_sectors(kdev_t dev) { @@ -592,13 +585,6 @@ inline void blkdev_release_request(struct request *req) */ if (q) { /* - * we've released enough buffers to start I/O again - */ - if (waitqueue_active(&blk_buffers_wait) - && atomic_read(&queued_sectors) < low_queued_sectors) - wake_up(&blk_buffers_wait); - - /* * Add to pending free list and batch wakeups */ list_add(&req->table, &q->pending_freelist[rw]); @@ -1032,16 +1018,6 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]) for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - /* - * don't lock any more buffers if we are above the high - * water mark. instead start I/O on the queued stuff. - */ - if (atomic_read(&queued_sectors) >= high_queued_sectors) { - run_task_queue(&tq_disk); - wait_event(blk_buffers_wait, - atomic_read(&queued_sectors) < low_queued_sectors); - } - /* Only one thread can actually submit the I/O. */ if (test_and_set_bit(BH_Lock, &bh->b_state)) continue; @@ -1168,42 +1144,21 @@ int __init blk_dev_init(void) memset(max_readahead, 0, sizeof(max_readahead)); memset(max_sectors, 0, sizeof(max_sectors)); - atomic_set(&queued_sectors, 0); total_ram = nr_free_pages() << (PAGE_SHIFT - 10); /* - * Try to keep 128MB max hysteris. If not possible, - * use half of RAM - */ - high_queued_sectors = (total_ram * 2) / 3; - low_queued_sectors = high_queued_sectors / 3; - if (high_queued_sectors - low_queued_sectors > MB(128)) - low_queued_sectors = high_queued_sectors - MB(128); - - - /* - * make it sectors (512b) + * Free request slots per queue. + * (Half for reads, half for writes) */ - high_queued_sectors <<= 1; - low_queued_sectors <<= 1; + queue_nr_requests = 64; + if (total_ram > MB(32)) + queue_nr_requests = 128; /* - * Scale free request slots per queue too + * Batch frees according to queue length */ - total_ram = (total_ram + MB(32) - 1) & ~(MB(32) - 1); - if ((queue_nr_requests = total_ram >> 9) > QUEUE_NR_REQUESTS) - queue_nr_requests = QUEUE_NR_REQUESTS; - - /* - * adjust batch frees according to queue length, with upper limit - */ - if ((batch_requests = queue_nr_requests >> 3) > 32) - batch_requests = 32; - - printk("block: queued sectors max/low %dkB/%dkB, %d slots per queue\n", - high_queued_sectors / 2, - low_queued_sectors / 2, - queue_nr_requests); + batch_requests = queue_nr_requests >> 3; + printk("block: %d slots per queue, batch=%d\n", queue_nr_requests, batch_requests); #ifdef CONFIG_AMIGA_Z2RAM z2_init(); @@ -1324,4 +1279,3 @@ EXPORT_SYMBOL(blk_queue_make_request); EXPORT_SYMBOL(generic_make_request); EXPORT_SYMBOL(blkdev_release_request); EXPORT_SYMBOL(generic_unplug_device); -EXPORT_SYMBOL(queued_sectors); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 7edfe35f10f9..8b4b695f95ca 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -655,25 +655,35 @@ void scsi_io_completion(Scsi_Cmnd * SCpnt, int good_sectors, } #endif } - if ((SCpnt->sense_buffer[0] & 0x7f) == 0x70 - && (SCpnt->sense_buffer[2] & 0xf) == UNIT_ATTENTION) { - if (SCpnt->device->removable) { - /* detected disc change. set a bit and quietly refuse - * further access. - */ - SCpnt->device->changed = 1; - SCpnt = scsi_end_request(SCpnt, 0, this_count); - return; - } else { - /* - * Must have been a power glitch, or a - * bus reset. Could not have been a - * media change, so we just retry the - * request and see what happens. - */ + if ((SCpnt->sense_buffer[0] & 0x7f) == 0x70) { + /* + * If the device is in the process of becoming ready, + * retry. + */ + if (SCpnt->sense_buffer[12] == 0x04 && + SCpnt->sense_buffer[13] == 0x01) { scsi_queue_next_request(q, SCpnt); return; } + if ((SCpnt->sense_buffer[2] & 0xf) == UNIT_ATTENTION) { + if (SCpnt->device->removable) { + /* detected disc change. set a bit + * and quietly refuse further access. + */ + SCpnt->device->changed = 1; + SCpnt = scsi_end_request(SCpnt, 0, this_count); + return; + } else { + /* + * Must have been a power glitch, or a + * bus reset. Could not have been a + * media change, so we just retry the + * request and see what happens. + */ + scsi_queue_next_request(q, SCpnt); + return; + } + } } /* If we had an ILLEGAL REQUEST returned, then we may have * performed an unsupported command. The only thing this should be diff --git a/drivers/video/Config.in b/drivers/video/Config.in index a3742547fdf9..e95d91a8e9f9 100644 --- a/drivers/video/Config.in +++ b/drivers/video/Config.in @@ -59,6 +59,9 @@ if [ "$CONFIG_FB" = "y" ]; then if [ "$CONFIG_ATARI" = "y" ]; then bool ' Atari native chipset support' CONFIG_FB_ATARI tristate ' ATI Mach64 display support' CONFIG_FB_ATY + if [ "$CONFIG_FB_ATY" != "n" ]; then + define_bool CONFIG_FB_ATY_GX y + fi fi if [ "$CONFIG_PPC" = "y" ]; then bool ' Open Firmware frame buffer device support' CONFIG_FB_OF @@ -122,6 +125,10 @@ if [ "$CONFIG_FB" = "y" ]; then bool ' Multihead support' CONFIG_FB_MATROX_MULTIHEAD fi tristate ' ATI Mach64 display support (EXPERIMENTAL)' CONFIG_FB_ATY + if [ "$CONFIG_FB_ATY" != "n" ]; then + bool ' Mach64 GX support (EXPERIMENTAL)' CONFIG_FB_ATY_GX + bool ' Mach64 CT/VT/GT/LT (incl. 3D RAGE) support' CONFIG_FB_ATY_CT + fi tristate ' ATI Rage 128 display support (EXPERIMENTAL)' CONFIG_FB_ATY128 tristate ' 3Dfx Banshee/Voodoo3 display support (EXPERIMENTAL)' CONFIG_FB_3DFX tristate ' SIS 630/540 display support (EXPERIMENTAL)' CONFIG_FB_SIS @@ -157,6 +164,9 @@ if [ "$CONFIG_FB" = "y" ]; then bool ' PCI framebuffers' CONFIG_FB_PCI if [ "$CONFIG_FB_PCI" != "n" ]; then tristate ' ATI Mach64 display support' CONFIG_FB_ATY + if [ "$CONFIG_FB_ATY" != "n" ]; then + define_bool CONFIG_FB_ATY_CT y + fi fi fi fi diff --git a/drivers/video/Makefile b/drivers/video/Makefile index 52f2684f2f9f..f98fd81c4ca7 100644 --- a/drivers/video/Makefile +++ b/drivers/video/Makefile @@ -46,7 +46,6 @@ obj-$(CONFIG_FB_PM2) += pm2fb.o fbgen.o obj-$(CONFIG_FB_APOLLO) += dnfb.o obj-$(CONFIG_FB_Q40) += q40fb.o obj-$(CONFIG_FB_ATARI) += atafb.o -obj-$(CONFIG_FB_ATY) += atyfb.o obj-$(CONFIG_FB_ATY128) += aty128fb.o obj-$(CONFIG_FB_IGA) += igafb.o obj-$(CONFIG_FB_CONTROL) += controlfb.o @@ -94,6 +93,11 @@ ifeq ($(CONFIG_FB_SIS),y) obj-y += sis/sisfb.o endif +subdir-$(CONFIG_FB_ATY) += aty +ifeq ($(CONFIG_FB_ATY),y) +obj-y += aty/atyfb.o +endif + obj-$(CONFIG_FB_SUN3) += sun3fb.o obj-$(CONFIG_FB_BWTWO) += bwtwofb.o obj-$(CONFIG_FB_HGA) += hgafb.o diff --git a/drivers/video/aty/Makefile b/drivers/video/aty/Makefile new file mode 100644 index 000000000000..147df38f1478 --- /dev/null +++ b/drivers/video/aty/Makefile @@ -0,0 +1,12 @@ + +O_TARGET := atyfb.o + +export-objs := atyfb_base.o mach64_accel.o + +obj-y := atyfb_base.o mach64_accel.o +obj-$(CONFIG_FB_ATY_GX) += mach64_gx.o +obj-$(CONFIG_FB_ATY_CT) += mach64_ct.o mach64_cursor.o +obj-m := $(O_TARGET) + +include $(TOPDIR)/Rules.make + diff --git a/drivers/video/aty/atyfb.h b/drivers/video/aty/atyfb.h new file mode 100644 index 000000000000..75826bf3b405 --- /dev/null +++ b/drivers/video/aty/atyfb.h @@ -0,0 +1,318 @@ + +/* + * ATI Frame Buffer Device Driver Core Definitions + */ + +#include <linux/config.h> + + + /* + * Elements of the hardware specific atyfb_par structure + */ + +struct crtc { + u32 vxres; + u32 vyres; + u32 xoffset; + u32 yoffset; + u32 bpp; + u32 h_tot_disp; + u32 h_sync_strt_wid; + u32 v_tot_disp; + u32 v_sync_strt_wid; + u32 off_pitch; + u32 gen_cntl; + u32 dp_pix_width; /* acceleration */ + u32 dp_chain_mask; /* acceleration */ +}; + +struct pll_514 { + u8 m; + u8 n; +}; + +struct pll_18818 +{ + u32 program_bits; + u32 locationAddr; + u32 period_in_ps; + u32 post_divider; +}; + +struct pll_ct { + u8 pll_ref_div; + u8 pll_gen_cntl; + u8 mclk_fb_div; + u8 pll_vclk_cntl; + u8 vclk_post_div; + u8 vclk_fb_div; + u8 pll_ext_cntl; + u32 dsp_config; /* Mach64 GTB DSP */ + u32 dsp_on_off; /* Mach64 GTB DSP */ + u8 mclk_post_div_real; + u8 vclk_post_div_real; +}; + +union aty_pll { + struct pll_ct ct; + struct pll_514 ibm514; + struct pll_18818 ics2595; +}; + + + /* + * The hardware parameters for each card + */ + +struct atyfb_par { + struct crtc crtc; + union aty_pll pll; + u32 accel_flags; +}; + +struct aty_cursor { + int enable; + int on; + int vbl_cnt; + int blink_rate; + u32 offset; + struct { + u16 x, y; + } pos, hot, size; + u32 color[2]; + u8 bits[8][64]; + u8 mask[8][64]; + u8 *ram; + struct timer_list *timer; +}; + +struct fb_info_aty { + struct fb_info fb_info; + struct fb_info_aty *next; + unsigned long ati_regbase_phys; + unsigned long ati_regbase; + unsigned long frame_buffer_phys; + unsigned long frame_buffer; + unsigned long clk_wr_offset; + struct pci_mmap_map *mmap_map; + struct aty_cursor *cursor; + struct aty_cmap_regs *aty_cmap_regs; + struct { u8 red, green, blue, pad; } palette[256]; + struct atyfb_par default_par; + struct atyfb_par current_par; + u32 features; + u32 total_vram; + u32 ref_clk_per; + u32 pll_per; + u32 mclk_per; + u8 bus_type; + u8 ram_type; + u8 mem_refresh_rate; + const struct aty_dac_ops *dac_ops; + const struct aty_pll_ops *pll_ops; + struct display disp; + struct display_switch dispsw; + union { +#ifdef FBCON_HAS_CFB16 + u16 cfb16[16]; +#endif +#ifdef FBCON_HAS_CFB24 + u32 cfb24[16]; +#endif +#ifdef FBCON_HAS_CFB32 + u32 cfb32[16]; +#endif + } fbcon_cmap; + u8 blitter_may_be_busy; +#ifdef __sparc__ + u8 mmaped; + int open; + int vtconsole; + int consolecnt; +#endif +#ifdef CONFIG_PMAC_PBOOK + unsigned char *save_framebuffer; + unsigned long save_pll[64]; +#endif +}; + + + /* + * ATI Mach64 features + */ + +#define M64_HAS(feature) ((info)->features & (M64F_##feature)) + +#define M64F_RESET_3D 0x00000001 +#define M64F_MAGIC_FIFO 0x00000002 +#define M64F_GTB_DSP 0x00000004 +#define M64F_FIFO_24 0x00000008 +#define M64F_SDRAM_MAGIC_PLL 0x00000010 +#define M64F_MAGIC_POSTDIV 0x00000020 +#define M64F_INTEGRATED 0x00000040 +#define M64F_CT_BUS 0x00000080 +#define M64F_VT_BUS 0x00000100 +#define M64F_MOBIL_BUS 0x00000200 +#define M64F_GX 0x00000400 +#define M64F_CT 0x00000800 +#define M64F_VT 0x00001000 +#define M64F_GT 0x00002000 +#define M64F_MAGIC_VRAM_SIZE 0x00004000 +#define M64F_G3_PB_1_1 0x00008000 +#define M64F_G3_PB_1024x768 0x00010000 +#define M64F_EXTRA_BRIGHT 0x00020000 +#define M64F_LT_SLEEP 0x00040000 +#define M64F_XL_DLL 0x00080000 + + + /* + * Register access + */ + +static inline u32 aty_ld_le32(int regindex, + const struct fb_info_aty *info) +{ + /* Hack for bloc 1, should be cleanly optimized by compiler */ + if (regindex >= 0x400) + regindex -= 0x800; + +#if defined(__mc68000__) + return le32_to_cpu(*((volatile u32 *)(info->ati_regbase+regindex))); +#else + return readl (info->ati_regbase + regindex); +#endif +} + +static inline void aty_st_le32(int regindex, u32 val, + const struct fb_info_aty *info) +{ + /* Hack for bloc 1, should be cleanly optimized by compiler */ + if (regindex >= 0x400) + regindex -= 0x800; + +#if defined(__mc68000__) + *((volatile u32 *)(info->ati_regbase+regindex)) = cpu_to_le32(val); +#else + writel (val, info->ati_regbase + regindex); +#endif +} + +static inline u8 aty_ld_8(int regindex, + const struct fb_info_aty *info) +{ + /* Hack for bloc 1, should be cleanly optimized by compiler */ + if (regindex >= 0x400) + regindex -= 0x800; + + return readb (info->ati_regbase + regindex); +} + +static inline void aty_st_8(int regindex, u8 val, + const struct fb_info_aty *info) +{ + /* Hack for bloc 1, should be cleanly optimized by compiler */ + if (regindex >= 0x400) + regindex -= 0x800; + + writeb (val, info->ati_regbase + regindex); +} + +static inline u8 aty_ld_pll(int offset, const struct fb_info_aty *info) +{ + u8 res; + + /* write addr byte */ + aty_st_8(CLOCK_CNTL + 1, (offset << 2), info); + /* read the register value */ + res = aty_ld_8(CLOCK_CNTL + 2, info); + return res; +} + + + /* + * DAC operations + */ + +struct aty_dac_ops { + int (*set_dac)(const struct fb_info_aty *info, const union aty_pll *pll, + u32 bpp, u32 accel); +}; + +extern const struct aty_dac_ops aty_dac_ibm514; /* IBM RGB514 */ +extern const struct aty_dac_ops aty_dac_ati68860b; /* ATI 68860-B */ +extern const struct aty_dac_ops aty_dac_att21c498; /* AT&T 21C498 */ +extern const struct aty_dac_ops aty_dac_unsupported; /* unsupported */ +extern const struct aty_dac_ops aty_dac_ct; /* Integrated */ + + + /* + * Clock operations + */ + +struct aty_pll_ops { + int (*var_to_pll)(const struct fb_info_aty *info, u32 vclk_per, u8 bpp, + union aty_pll *pll); + u32 (*pll_to_var)(const struct fb_info_aty *info, + const union aty_pll *pll); + void (*set_pll)(const struct fb_info_aty *info, const union aty_pll *pll); +}; + +extern const struct aty_pll_ops aty_pll_ati18818_1; /* ATI 18818 */ +extern const struct aty_pll_ops aty_pll_stg1703; /* STG 1703 */ +extern const struct aty_pll_ops aty_pll_ch8398; /* Chrontel 8398 */ +extern const struct aty_pll_ops aty_pll_att20c408; /* AT&T 20C408 */ +extern const struct aty_pll_ops aty_pll_ibm514; /* IBM RGB514 */ +extern const struct aty_pll_ops aty_pll_unsupported; /* unsupported */ +extern const struct aty_pll_ops aty_pll_ct; /* Integrated */ + + +extern void aty_set_pll_ct(const struct fb_info_aty *info, + const union aty_pll *pll); +extern void aty_calc_pll_ct(const struct fb_info_aty *info, + struct pll_ct *pll); + + + /* + * Hardware cursor support + */ + +extern struct aty_cursor *aty_init_cursor(struct fb_info_aty *fb); +extern void atyfb_cursor(struct display *p, int mode, int x, int y); +extern void aty_set_cursor_color(struct fb_info_aty *fb); +extern void aty_set_cursor_shape(struct fb_info_aty *fb); +extern int atyfb_set_font(struct display *d, int width, int height); + + + /* + * Hardware acceleration + */ + +static inline void wait_for_fifo(u16 entries, const struct fb_info_aty *info) +{ + while ((aty_ld_le32(FIFO_STAT, info) & 0xffff) > + ((u32)(0x8000 >> entries))); +} + +static inline void wait_for_idle(struct fb_info_aty *info) +{ + wait_for_fifo(16, info); + while ((aty_ld_le32(GUI_STAT, info) & 1)!= 0); + info->blitter_may_be_busy = 0; +} + +extern void aty_reset_engine(const struct fb_info_aty *info); +extern void aty_init_engine(const struct atyfb_par *par, + struct fb_info_aty *info); +extern void aty_rectfill(int dstx, int dsty, u_int width, u_int height, + u_int color, struct fb_info_aty *info); + + + /* + * Text console acceleration + */ + +extern const struct display_switch fbcon_aty8; +extern const struct display_switch fbcon_aty16; +extern const struct display_switch fbcon_aty24; +extern const struct display_switch fbcon_aty32; + diff --git a/drivers/video/atyfb.c b/drivers/video/aty/atyfb_base.c index 1200406927b9..82fab8a38e99 100644 --- a/drivers/video/atyfb.c +++ b/drivers/video/aty/atyfb_base.c @@ -1,10 +1,18 @@ -/* $Id: atyfb.c,v 1.147 2000/08/29 07:01:56 davem Exp $ - * linux/drivers/video/atyfb.c -- Frame buffer device for ATI Mach64 + +/* + * ATI Frame Buffer Device Driver Core * - * Copyright (C) 1997-1998 Geert Uytterhoeven + * Copyright (C) 1997-2001 Geert Uytterhoeven * Copyright (C) 1998 Bernd Harries * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) * + * This driver supports the following ATI graphics chips: + * - ATI Mach64 + * + * To do: add support for + * - ATI Rage128 (from aty128fb.c) + * - ATI Radeon (from radeonfb.c) + * * This driver is partly based on the PowerMac console driver: * * Copyright (C) 1996 Paul Mackerras @@ -44,85 +52,57 @@ #include <linux/errno.h> #include <linux/string.h> #include <linux/mm.h> -#include <linux/tty.h> -#include <linux/slab.h> +#include <linux/malloc.h> #include <linux/vmalloc.h> #include <linux/delay.h> -#include <linux/interrupt.h> -#include <linux/fb.h> #include <linux/selection.h> #include <linux/console.h> +#include <linux/fb.h> #include <linux/init.h> #include <linux/pci.h> -#include <linux/kd.h> #include <linux/vt_kern.h> -#ifdef CONFIG_FB_COMPAT_XPMAC -#include <asm/vc_ioctl.h> -#endif - #include <asm/io.h> +#include <asm/uaccess.h> + +#include <video/fbcon.h> +#include <video/fbcon-cfb8.h> +#include <video/fbcon-cfb16.h> +#include <video/fbcon-cfb24.h> +#include <video/fbcon-cfb32.h> + +#include "mach64.h" +#include "atyfb.h" #ifdef __powerpc__ -#include <linux/adb.h> #include <asm/prom.h> -#include <asm/pci-bridge.h> #include <video/macmodes.h> #endif +#ifdef __sparc__ +#include <asm/pbm.h> +#include <asm/fbio.h> +#endif + #ifdef CONFIG_ADB_PMU +#include <linux/adb.h> #include <linux/pmu.h> #endif #ifdef CONFIG_NVRAM #include <linux/nvram.h> #endif +#ifdef CONFIG_FB_COMPAT_XPMAC +#include <asm/vc_ioctl.h> +#endif #ifdef CONFIG_PMAC_BACKLIGHT #include <asm/backlight.h> #endif -#ifdef __sparc__ -#include <asm/pbm.h> -#include <asm/fbio.h> -#endif -#include <asm/uaccess.h> - -#include <video/fbcon.h> -#include <video/fbcon-cfb8.h> -#include <video/fbcon-cfb16.h> -#include <video/fbcon-cfb24.h> -#include <video/fbcon-cfb32.h> - -#include "aty.h" - /* * Debug flags. */ #undef DEBUG -/* Definitions for the ICS 2595 == ATI 18818_1 Clockchip */ - -#define REF_FREQ_2595 1432 /* 14.33 MHz (exact 14.31818) */ -#define REF_DIV_2595 46 /* really 43 on ICS 2595 !!! */ - /* ohne Prescaler */ -#define MAX_FREQ_2595 15938 /* 159.38 MHz (really 170.486) */ -#define MIN_FREQ_2595 8000 /* 80.00 MHz ( 85.565) */ - /* mit Prescaler 2, 4, 8 */ -#define ABS_MIN_FREQ_2595 1000 /* 10.00 MHz (really 10.697) */ -#define N_ADJ_2595 257 - -#define STOP_BITS_2595 0x1800 - - -#define MIN_N_408 2 - -#define MIN_N_1703 6 - -#define MIN_M 2 -#define MAX_M 30 -#define MIN_N 35 -#define MAX_N 255-8 - - /* Make sure n * PAGE_SIZE is protected at end of Aperture for GUI-regs */ /* - must be large enough to catch all GUI-Regs */ /* - must be aligned to a PAGE boundary */ @@ -134,67 +114,9 @@ /* - * Elements of the Hardware specific atyfb_par structure - */ - -struct crtc { - u32 vxres; - u32 vyres; - u32 xoffset; - u32 yoffset; - u32 bpp; - u32 h_tot_disp; - u32 h_sync_strt_wid; - u32 v_tot_disp; - u32 v_sync_strt_wid; - u32 off_pitch; - u32 gen_cntl; - u32 dp_pix_width; /* acceleration */ - u32 dp_chain_mask; /* acceleration */ -}; - -struct pll_gx { - u8 m; - u8 n; -}; - -struct pll_18818 -{ - u32 program_bits; - u32 locationAddr; - u32 period_in_ps; - u32 post_divider; -}; - -struct pll_ct { - u8 pll_ref_div; - u8 pll_gen_cntl; - u8 mclk_fb_div; - u8 pll_vclk_cntl; - u8 vclk_post_div; - u8 vclk_fb_div; - u8 pll_ext_cntl; - u32 dsp_config; /* Mach64 GTB DSP */ - u32 dsp_on_off; /* Mach64 GTB DSP */ - u8 mclk_post_div_real; - u8 vclk_post_div_real; -}; - - - /* * The Hardware parameters for each card */ -struct atyfb_par { - struct crtc crtc; - union { - struct pll_gx gx; - struct pll_ct ct; - struct pll_18818 ics2595; - } pll; - u32 accel_flags; -}; - struct aty_cmap_regs { u8 windex; u8 lut; @@ -211,96 +133,6 @@ struct pci_mmap_map { unsigned long prot_mask; }; -#define DEFAULT_CURSOR_BLINK_RATE (20) -#define CURSOR_DRAW_DELAY (2) - -struct aty_cursor { - int enable; - int on; - int vbl_cnt; - int blink_rate; - u32 offset; - struct { - u16 x, y; - } pos, hot, size; - u32 color[2]; - u8 bits[8][64]; - u8 mask[8][64]; - u8 *ram; - struct timer_list *timer; -}; - -struct fb_info_aty { - struct fb_info fb_info; - struct fb_info_aty *next; - unsigned long ati_regbase_phys; - unsigned long ati_regbase; - unsigned long frame_buffer_phys; - unsigned long frame_buffer; - unsigned long clk_wr_offset; - struct pci_mmap_map *mmap_map; - struct aty_cursor *cursor; - struct aty_cmap_regs *aty_cmap_regs; - struct { u8 red, green, blue, pad; } palette[256]; - struct atyfb_par default_par; - struct atyfb_par current_par; - u32 total_vram; - u32 ref_clk_per; - u32 pll_per; - u32 mclk_per; - u16 chip_type; -#define Gx info->chip_type - u8 chip_rev; -#define Rev info->chip_rev - u8 bus_type; - u8 ram_type; - u8 dac_type; - u8 dac_subtype; - u8 clk_type; - u8 mem_refresh_rate; - struct display disp; - struct display_switch dispsw; - union { -#ifdef FBCON_HAS_CFB16 - u16 cfb16[16]; -#endif -#ifdef FBCON_HAS_CFB24 - u32 cfb24[16]; -#endif -#ifdef FBCON_HAS_CFB32 - u32 cfb32[16]; -#endif - } fbcon_cmap; - u8 blitter_may_be_busy; -#ifdef __sparc__ - u8 mmaped; - int open; - int vtconsole; - int consolecnt; -#endif -#ifdef CONFIG_PMAC_PBOOK - unsigned char *save_framebuffer; - unsigned long save_pll[64]; -#endif -}; - -#ifdef CONFIG_PMAC_PBOOK - int aty_sleep_notify(struct pmu_sleep_notifier *self, int when); - static struct pmu_sleep_notifier aty_sleep_notifier = { - aty_sleep_notify, SLEEP_LEVEL_VIDEO, - }; - static struct fb_info_aty* first_display = NULL; -#endif - -#ifdef CONFIG_PMAC_BACKLIGHT -static int aty_set_backlight_enable(int on, int level, void* data); -static int aty_set_backlight_level(int level, void* data); - -static struct backlight_controller aty_backlight_controller = { - aty_set_backlight_enable, - aty_set_backlight_level -}; -#endif /* CONFIG_PMAC_BACKLIGHT */ /* * Frame buffer device API @@ -339,120 +171,23 @@ static void atyfbcon_blank(int blank, struct fb_info *fb); /* - * Text console acceleration - */ - -static void fbcon_aty_bmove(struct display *p, int sy, int sx, int dy, int dx, - int height, int width); -static void fbcon_aty_clear(struct vc_data *conp, struct display *p, int sy, - int sx, int height, int width); -#ifdef FBCON_HAS_CFB8 -static struct display_switch fbcon_aty8; -static void fbcon_aty8_putc(struct vc_data *conp, struct display *p, int c, - int yy, int xx); -static void fbcon_aty8_putcs(struct vc_data *conp, struct display *p, - const unsigned short *s, int count, int yy, - int xx); -#endif -#ifdef FBCON_HAS_CFB16 -static struct display_switch fbcon_aty16; -static void fbcon_aty16_putc(struct vc_data *conp, struct display *p, int c, - int yy, int xx); -static void fbcon_aty16_putcs(struct vc_data *conp, struct display *p, - const unsigned short *s, int count, int yy, - int xx); -#endif -#ifdef FBCON_HAS_CFB24 -static struct display_switch fbcon_aty24; -static void fbcon_aty24_putc(struct vc_data *conp, struct display *p, int c, - int yy, int xx); -static void fbcon_aty24_putcs(struct vc_data *conp, struct display *p, - const unsigned short *s, int count, int yy, - int xx); -#endif -#ifdef FBCON_HAS_CFB32 -static struct display_switch fbcon_aty32; -static void fbcon_aty32_putc(struct vc_data *conp, struct display *p, int c, - int yy, int xx); -static void fbcon_aty32_putcs(struct vc_data *conp, struct display *p, - const unsigned short *s, int count, int yy, - int xx); -#endif - - - /* * Internal routines */ static int aty_init(struct fb_info_aty *info, const char *name); -static struct aty_cursor *aty_init_cursor(struct fb_info_aty *fb); #ifdef CONFIG_ATARI static int store_video_par(char *videopar, unsigned char m64_num); static char *strtoke(char *s, const char *ct); #endif -static void reset_engine(const struct fb_info_aty *info); -static void init_engine(const struct atyfb_par *par, struct fb_info_aty *info); - -static void aty_st_514(int offset, u8 val, const struct fb_info_aty *info); -static void aty_st_pll(int offset, u8 val, const struct fb_info_aty *info); -static u8 aty_ld_pll(int offset, const struct fb_info_aty *info); static void aty_set_crtc(const struct fb_info_aty *info, const struct crtc *crtc); static int aty_var_to_crtc(const struct fb_info_aty *info, const struct fb_var_screeninfo *var, struct crtc *crtc); -static void aty_set_dac_514(const struct fb_info_aty *info, u32 bpp); static int aty_crtc_to_var(const struct crtc *crtc, struct fb_var_screeninfo *var); -static void aty_set_pll_gx(const struct fb_info_aty *info, - const struct pll_gx *pll); - -static int aty_set_dac_ATI68860_B(const struct fb_info_aty *info, u32 bpp, - u32 AccelMode); -static int aty_set_dac_ATT21C498(const struct fb_info_aty *info, - const struct pll_18818 *pll, u32 bpp); -void aty_dac_waste4(const struct fb_info_aty *info); - -static int aty_var_to_pll_18818(u32 period_in_ps, struct pll_18818 *pll); -static u32 aty_pll_18818_to_var(const struct pll_18818 *pll); -static void aty_set_pll18818(const struct fb_info_aty *info, - const struct pll_18818 *pll); - -static void aty_StrobeClock(const struct fb_info_aty *info); - -static void aty_ICS2595_put1bit(u8 data, const struct fb_info_aty *info); - -static int aty_var_to_pll_408(u32 period_in_ps, struct pll_18818 *pll); -static u32 aty_pll_408_to_var(const struct pll_18818 *pll); -static void aty_set_pll_408(const struct fb_info_aty *info, - const struct pll_18818 *pll); - -static int aty_var_to_pll_1703(u32 period_in_ps, struct pll_18818 *pll); -static u32 aty_pll_1703_to_var(const struct pll_18818 *pll); -static void aty_set_pll_1703(const struct fb_info_aty *info, - const struct pll_18818 *pll); - -static int aty_var_to_pll_8398(u32 period_in_ps, struct pll_18818 *pll); -static u32 aty_pll_8398_to_var(const struct pll_18818 *pll); -static void aty_set_pll_8398(const struct fb_info_aty *info, - const struct pll_18818 *pll); - -static int aty_var_to_pll_514(u32 vclk_per, struct pll_gx *pll); -static u32 aty_pll_gx_to_var(const struct pll_gx *pll, - const struct fb_info_aty *info); -static void aty_set_pll_ct(const struct fb_info_aty *info, - const struct pll_ct *pll); -static int aty_valid_pll_ct(const struct fb_info_aty *info, u32 vclk_per, - struct pll_ct *pll); -static int aty_dsp_gt(const struct fb_info_aty *info, u8 bpp, - struct pll_ct *pll); -static void aty_calc_pll_ct(const struct fb_info_aty *info, - struct pll_ct *pll); -static int aty_var_to_pll_ct(const struct fb_info_aty *info, u32 vclk_per, - u8 bpp, struct pll_ct *pll); -static u32 aty_pll_ct_to_var(const struct pll_ct *pll, - const struct fb_info_aty *info); + static void atyfb_set_par(const struct atyfb_par *par, struct fb_info_aty *info); static int atyfb_decode_var(const struct fb_var_screeninfo *var, @@ -535,332 +270,123 @@ static unsigned long phys_size[FB_MAX] __initdata = { 0, }; static unsigned long phys_guiregbase[FB_MAX] __initdata = { 0, }; #endif - -static struct aty_features { - u16 pci_id; - u16 chip_type; +static const char m64n_gx[] __initdata = "mach64GX (ATI888GX00)"; +static const char m64n_cx[] __initdata = "mach64CX (ATI888CX00)"; +static const char m64n_ct[] __initdata = "mach64CT (ATI264CT)"; +static const char m64n_et[] __initdata = "mach64ET (ATI264ET)"; +static const char m64n_vta3[] __initdata = "mach64VTA3 (ATI264VT)"; +static const char m64n_vta4[] __initdata = "mach64VTA4 (ATI264VT)"; +static const char m64n_vtb[] __initdata = "mach64VTB (ATI264VTB)"; +static const char m64n_vt4[] __initdata = "mach64VT4 (ATI264VT4)"; +static const char m64n_gt[] __initdata = "3D RAGE (GT)"; +static const char m64n_gtb[] __initdata = "3D RAGE II+ (GTB)"; +static const char m64n_iic_p[] __initdata = "3D RAGE IIC (PCI)"; +static const char m64n_iic_a[] __initdata = "3D RAGE IIC (AGP)"; +static const char m64n_lt[] __initdata = "3D RAGE LT"; +static const char m64n_ltg[] __initdata = "3D RAGE LT-G"; +static const char m64n_gtc_ba[] __initdata = "3D RAGE PRO (BGA, AGP)"; +static const char m64n_gtc_ba1[] __initdata = "3D RAGE PRO (BGA, AGP, 1x only)"; +static const char m64n_gtc_bp[] __initdata = "3D RAGE PRO (BGA, PCI)"; +static const char m64n_gtc_pp[] __initdata = "3D RAGE PRO (PQFP, PCI)"; +static const char m64n_gtc_ppl[] __initdata = "3D RAGE PRO (PQFP, PCI, limited 3D)"; +static const char m64n_xl[] __initdata = "3D RAGE (XL)"; +static const char m64n_ltp_a[] __initdata = "3D RAGE LT PRO (AGP)"; +static const char m64n_ltp_p[] __initdata = "3D RAGE LT PRO (PCI)"; +static const char m64n_mob_p[] __initdata = "3D RAGE Mobility (PCI)"; +static const char m64n_mob_a[] __initdata = "3D RAGE Mobility (AGP)"; + + +static const struct { + u16 pci_id, chip_type; + u8 rev_mask, rev_val; const char *name; -} aty_features[] __initdata = { - /* mach64GX family */ - { 0x4758, 0x00d7, "mach64GX (ATI888GX00)" }, - { 0x4358, 0x0057, "mach64CX (ATI888CX00)" }, - - /* mach64CT family */ - { 0x4354, 0x4354, "mach64CT (ATI264CT)" }, - { 0x4554, 0x4554, "mach64ET (ATI264ET)" }, - - /* mach64CT family / mach64VT class */ - { 0x5654, 0x5654, "mach64VT (ATI264VT)" }, - { 0x5655, 0x5655, "mach64VTB (ATI264VTB)" }, - { 0x5656, 0x5656, "mach64VT4 (ATI264VT4)" }, - - /* mach64CT family / mach64GT (3D RAGE) class */ - { 0x4c42, 0x4c42, "3D RAGE LT PRO (AGP)" }, - { 0x4c44, 0x4c44, "3D RAGE LT PRO" }, - { 0x4c47, 0x4c47, "3D RAGE LT-G" }, - { 0x4c49, 0x4c49, "3D RAGE LT PRO" }, - { 0x4c50, 0x4c50, "3D RAGE LT PRO" }, - { 0x4c54, 0x4c54, "3D RAGE LT" }, - { 0x4752, 0x4752, "3D RAGE (XL)" }, - { 0x4754, 0x4754, "3D RAGE (GT)" }, - { 0x4755, 0x4755, "3D RAGE II+ (GTB)" }, - { 0x4756, 0x4756, "3D RAGE IIC (PCI)" }, - { 0x4757, 0x4757, "3D RAGE IIC (AGP)" }, - { 0x475a, 0x475a, "3D RAGE IIC (AGP)" }, - { 0x4742, 0x4742, "3D RAGE PRO (BGA, AGP)" }, - { 0x4744, 0x4744, "3D RAGE PRO (BGA, AGP, 1x only)" }, - { 0x4749, 0x4749, "3D RAGE PRO (BGA, PCI)" }, - { 0x4750, 0x4750, "3D RAGE PRO (PQFP, PCI)" }, - { 0x4751, 0x4751, "3D RAGE PRO (PQFP, PCI, limited 3D)" }, - { 0x4c4d, 0x4c4d, "3D RAGE Mobility (PCI)" }, - { 0x4c4e, 0x4c4e, "3D RAGE Mobility (AGP)" }, + int pll, mclk; + u32 features; +} aty_chips[] __initdata = { +#ifdef CONFIG_FB_ATY_GX + /* Mach64 GX */ + { 0x4758, 0x00d7, 0x00, 0x00, m64n_gx, 135, 50, M64F_GX }, + { 0x4358, 0x0057, 0x00, 0x00, m64n_cx, 135, 50, M64F_GX }, +#endif /* CONFIG_FB_ATY_GX */ + +#ifdef CONFIG_FB_ATY_CT + /* Mach64 CT */ + { 0x4354, 0x4354, 0x00, 0x00, m64n_ct, 135, 60, M64F_CT | M64F_INTEGRATED | M64F_CT_BUS | M64F_MAGIC_FIFO }, + { 0x4554, 0x4554, 0x00, 0x00, m64n_et, 135, 60, M64F_CT | M64F_INTEGRATED | M64F_CT_BUS | M64F_MAGIC_FIFO }, + + /* Mach64 VT */ + { 0x5654, 0x5654, 0xc7, 0x00, m64n_vta3, 170, 67, M64F_VT | M64F_INTEGRATED | M64F_VT_BUS | M64F_MAGIC_FIFO | M64F_FIFO_24 }, + { 0x5654, 0x5654, 0xc7, 0x40, m64n_vta4, 200, 67, M64F_VT | M64F_INTEGRATED | M64F_VT_BUS | M64F_MAGIC_FIFO | M64F_FIFO_24 | M64F_MAGIC_POSTDIV }, + { 0x5654, 0x5654, 0x00, 0x00, m64n_vtb, 200, 67, M64F_VT | M64F_INTEGRATED | M64F_VT_BUS | M64F_GTB_DSP | M64F_FIFO_24 }, + { 0x5655, 0x5655, 0x00, 0x00, m64n_vtb, 200, 67, M64F_VT | M64F_INTEGRATED | M64F_VT_BUS | M64F_GTB_DSP | M64F_FIFO_24 | M64F_SDRAM_MAGIC_PLL }, + { 0x5656, 0x5656, 0x00, 0x00, m64n_vt4, 230, 83, M64F_VT | M64F_INTEGRATED | M64F_GTB_DSP }, + + /* Mach64 GT (3D RAGE) */ + { 0x4754, 0x4754, 0x07, 0x00, m64n_gt, 135, 63, M64F_GT | M64F_INTEGRATED | M64F_MAGIC_FIFO | M64F_FIFO_24 | M64F_EXTRA_BRIGHT }, + { 0x4754, 0x4754, 0x07, 0x01, m64n_gt, 170, 67, M64F_GT | M64F_INTEGRATED | M64F_GTB_DSP | M64F_FIFO_24 | M64F_SDRAM_MAGIC_PLL | M64F_EXTRA_BRIGHT }, + { 0x4754, 0x4754, 0x07, 0x02, m64n_gt, 200, 67, M64F_GT | M64F_INTEGRATED | M64F_GTB_DSP | M64F_FIFO_24 | M64F_SDRAM_MAGIC_PLL | M64F_EXTRA_BRIGHT }, + { 0x4755, 0x4755, 0x00, 0x00, m64n_gtb, 200, 67, M64F_GT | M64F_INTEGRATED | M64F_GTB_DSP | M64F_FIFO_24 | M64F_SDRAM_MAGIC_PLL | M64F_EXTRA_BRIGHT }, + { 0x4756, 0x4756, 0x00, 0x00, m64n_iic_p, 230, 83, M64F_GT | M64F_INTEGRATED | M64F_GTB_DSP | M64F_FIFO_24 | M64F_SDRAM_MAGIC_PLL | M64F_EXTRA_BRIGHT }, + { 0x4757, 0x4757, 0x00, 0x00, m64n_iic_a, 230, 83, M64F_GT | M64F_INTEGRATED | M64F_GTB_DSP | M64F_FIFO_24 | M64F_SDRAM_MAGIC_PLL | M64F_EXTRA_BRIGHT }, + { 0x475a, 0x475a, 0x00, 0x00, m64n_iic_a, 230, 83, M64F_GT | M64F_INTEGRATED | M64F_GTB_DSP | M64F_FIFO_24 | M64F_SDRAM_MAGIC_PLL | M64F_EXTRA_BRIGHT }, + + /* Mach64 LT */ + { 0x4c54, 0x4c54, 0x00, 0x00, m64n_lt, 135, 63, M64F_GT | M64F_INTEGRATED | M64F_GTB_DSP }, + { 0x4c47, 0x4c47, 0x00, 0x00, m64n_ltg, 230, 63, M64F_GT | M64F_INTEGRATED | M64F_GTB_DSP | M64F_SDRAM_MAGIC_PLL | M64F_EXTRA_BRIGHT | M64F_LT_SLEEP | M64F_G3_PB_1024x768 }, + + /* Mach64 GTC (3D RAGE PRO) */ + { 0x4742, 0x4742, 0x00, 0x00, m64n_gtc_ba, 230, 100, M64F_GT | M64F_INTEGRATED | M64F_RESET_3D | M64F_GTB_DSP | M64F_SDRAM_MAGIC_PLL | M64F_EXTRA_BRIGHT }, + { 0x4744, 0x4744, 0x00, 0x00, m64n_gtc_ba1, 230, 100, M64F_GT | M64F_INTEGRATED | M64F_RESET_3D | M64F_GTB_DSP | M64F_SDRAM_MAGIC_PLL | M64F_EXTRA_BRIGHT }, + { 0x4749, 0x4749, 0x00, 0x00, m64n_gtc_bp, 230, 100, M64F_GT | M64F_INTEGRATED | M64F_RESET_3D | M64F_GTB_DSP | M64F_SDRAM_MAGIC_PLL | M64F_EXTRA_BRIGHT | M64F_MAGIC_VRAM_SIZE }, + { 0x4750, 0x4750, 0x00, 0x00, m64n_gtc_pp, 230, 100, M64F_GT | M64F_INTEGRATED | M64F_RESET_3D | M64F_GTB_DSP | M64F_SDRAM_MAGIC_PLL | M64F_EXTRA_BRIGHT }, + { 0x4751, 0x4751, 0x00, 0x00, m64n_gtc_ppl, 230, 100, M64F_GT | M64F_INTEGRATED | M64F_RESET_3D | M64F_GTB_DSP | M64F_SDRAM_MAGIC_PLL | M64F_EXTRA_BRIGHT }, + + /* 3D RAGE XL */ + { 0x4752, 0x4752, 0x00, 0x00, m64n_xl, 230, 120, M64F_GT | M64F_INTEGRATED | M64F_RESET_3D | M64F_GTB_DSP | M64F_SDRAM_MAGIC_PLL | M64F_EXTRA_BRIGHT | M64F_XL_DLL }, + + /* Mach64 LT PRO */ + { 0x4c42, 0x4c42, 0x00, 0x00, m64n_ltp_a, 230, 100, M64F_GT | M64F_INTEGRATED | M64F_RESET_3D | M64F_GTB_DSP }, + { 0x4c44, 0x4c44, 0x00, 0x00, m64n_ltp_p, 230, 100, M64F_GT | M64F_INTEGRATED | M64F_RESET_3D | M64F_GTB_DSP }, + { 0x4c49, 0x4c49, 0x00, 0x00, m64n_ltp_p, 230, 100, M64F_GT | M64F_INTEGRATED | M64F_RESET_3D | M64F_GTB_DSP | M64F_EXTRA_BRIGHT | M64F_G3_PB_1_1 | M64F_G3_PB_1024x768 }, + { 0x4c50, 0x4c50, 0x00, 0x00, m64n_ltp_p, 230, 100, M64F_GT | M64F_INTEGRATED | M64F_RESET_3D | M64F_GTB_DSP }, + + /* 3D RAGE Mobility */ + { 0x4c4d, 0x4c4d, 0x00, 0x00, m64n_mob_p, 230, 50, M64F_GT | M64F_INTEGRATED | M64F_RESET_3D | M64F_GTB_DSP | M64F_MOBIL_BUS }, + { 0x4c4e, 0x4c4e, 0x00, 0x00, m64n_mob_a, 230, 50, M64F_GT | M64F_INTEGRATED | M64F_RESET_3D | M64F_GTB_DSP | M64F_MOBIL_BUS }, +#endif /* CONFIG_FB_ATY_CT */ }; +static const char ram_dram[] __initdata = "DRAM"; +static const char ram_vram[] __initdata = "VRAM"; +static const char ram_edo[] __initdata = "EDO"; +static const char ram_sdram[] __initdata = "SDRAM"; +static const char ram_sgram[] __initdata = "SGRAM"; +static const char ram_wram[] __initdata = "WRAM"; +static const char ram_off[] __initdata = "OFF"; +static const char ram_resv[] __initdata = "RESV"; + +#ifdef CONFIG_FB_ATY_GX static const char *aty_gx_ram[8] __initdata = { - "DRAM", "VRAM", "VRAM", "DRAM", "DRAM", "VRAM", "VRAM", "RESV" + ram_dram, ram_vram, ram_vram, ram_dram, + ram_dram, ram_vram, ram_vram, ram_resv }; +#endif /* CONFIG_FB_ATY_GX */ +#ifdef CONFIG_FB_ATY_CT static const char *aty_ct_ram[8] __initdata = { - "OFF", "DRAM", "EDO", "EDO", "SDRAM", "SGRAM", "WRAM", "RESV" + ram_off, ram_dram, ram_edo, ram_edo, + ram_sdram, ram_sgram, ram_wram, ram_resv }; +#endif /* CONFIG_FB_ATY_CT */ -static inline u32 aty_ld_le32(int regindex, - const struct fb_info_aty *info) -{ - /* Hack for bloc 1, should be cleanly optimized by compiler */ - if (regindex >= 0x400) - regindex -= 0x800; - -#if defined(__mc68000__) - return le32_to_cpu(*((volatile u32 *)(info->ati_regbase+regindex))); -#else - return readl (info->ati_regbase + regindex); -#endif -} - -static inline void aty_st_le32(int regindex, u32 val, - const struct fb_info_aty *info) -{ - /* Hack for bloc 1, should be cleanly optimized by compiler */ - if (regindex >= 0x400) - regindex -= 0x800; - -#if defined(__mc68000__) - *((volatile u32 *)(info->ati_regbase+regindex)) = cpu_to_le32(val); -#else - writel (val, info->ati_regbase + regindex); -#endif -} - -static inline u8 aty_ld_8(int regindex, - const struct fb_info_aty *info) -{ - /* Hack for bloc 1, should be cleanly optimized by compiler */ - if (regindex >= 0x400) - regindex -= 0x800; - - return readb (info->ati_regbase + regindex); -} - -static inline void aty_st_8(int regindex, u8 val, - const struct fb_info_aty *info) -{ - /* Hack for bloc 1, should be cleanly optimized by compiler */ - if (regindex >= 0x400) - regindex -= 0x800; - - writeb (val, info->ati_regbase + regindex); -} - -#if defined(CONFIG_PPC) || defined(CONFIG_PMAC_PBOOK) -static void aty_st_lcd(int index, u32 val, const struct fb_info_aty *info) -{ - unsigned long temp; - - /* write addr byte */ - temp = aty_ld_le32(LCD_INDEX, info); - aty_st_le32(LCD_INDEX, (temp & ~LCD_INDEX_MASK) | index, info); - /* write the register value */ - aty_st_le32(LCD_DATA, val, info); -} - -static u32 aty_ld_lcd(int index, const struct fb_info_aty *info) -{ - unsigned long temp; - - /* write addr byte */ - temp = aty_ld_le32(LCD_INDEX, info); - aty_st_le32(LCD_INDEX, (temp & ~LCD_INDEX_MASK) | index, info); - /* read the register value */ - return aty_ld_le32(LCD_DATA, info); -} -#endif - - /* - * Generic Mach64 routines - */ - - /* - * All writes to draw engine registers are automatically routed through a - * 32-bit-wide, 16-entry-deep command FIFO ... - * Register writes to registers with DWORD offsets less than 40h are not - * FIFOed. - * (from Chapter 5 of the Mach64 Programmer's Guide) - */ - -static inline void wait_for_fifo(u16 entries, const struct fb_info_aty *info) -{ - while ((aty_ld_le32(FIFO_STAT, info) & 0xffff) > - ((u32)(0x8000 >> entries))); -} - -static inline void wait_for_idle(struct fb_info_aty *info) -{ - wait_for_fifo(16, info); - while ((aty_ld_le32(GUI_STAT, info) & 1)!= 0); - info->blitter_may_be_busy = 0; -} - -static void reset_engine(const struct fb_info_aty *info) -{ - /* reset engine */ - aty_st_le32(GEN_TEST_CNTL, - aty_ld_le32(GEN_TEST_CNTL, info) & ~GUI_ENGINE_ENABLE, info); - /* enable engine */ - aty_st_le32(GEN_TEST_CNTL, - aty_ld_le32(GEN_TEST_CNTL, info) | GUI_ENGINE_ENABLE, info); - /* ensure engine is not locked up by clearing any FIFO or */ - /* HOST errors */ - aty_st_le32(BUS_CNTL, aty_ld_le32(BUS_CNTL, info) | BUS_HOST_ERR_ACK | - BUS_FIFO_ERR_ACK, info); -} - -static void reset_GTC_3D_engine(const struct fb_info_aty *info) -{ - aty_st_le32(SCALE_3D_CNTL, 0xc0, info); - mdelay(GTC_3D_RESET_DELAY); - aty_st_le32(SETUP_CNTL, 0x00, info); - mdelay(GTC_3D_RESET_DELAY); - aty_st_le32(SCALE_3D_CNTL, 0x00, info); - mdelay(GTC_3D_RESET_DELAY); -} - -static void init_engine(const struct atyfb_par *par, struct fb_info_aty *info) -{ - u32 pitch_value; - - /* determine modal information from global mode structure */ - pitch_value = par->crtc.vxres; - - if (par->crtc.bpp == 24) { - /* In 24 bpp, the engine is in 8 bpp - this requires that all */ - /* horizontal coordinates and widths must be adjusted */ - pitch_value = pitch_value * 3; - } - - /* On GTC (RagePro), we need to reset the 3D engine before */ - if (Gx == LB_CHIP_ID || Gx == LD_CHIP_ID || Gx == LI_CHIP_ID || - Gx == LP_CHIP_ID || Gx == GB_CHIP_ID || Gx == GD_CHIP_ID || - Gx == GI_CHIP_ID || Gx == GP_CHIP_ID || Gx == GQ_CHIP_ID || - Gx == LM_CHIP_ID || Gx == LN_CHIP_ID || Gx == XL_CHIP_ID) - reset_GTC_3D_engine(info); - - /* Reset engine, enable, and clear any engine errors */ - reset_engine(info); - /* Ensure that vga page pointers are set to zero - the upper */ - /* page pointers are set to 1 to handle overflows in the */ - /* lower page */ - aty_st_le32(MEM_VGA_WP_SEL, 0x00010000, info); - aty_st_le32(MEM_VGA_RP_SEL, 0x00010000, info); - - /* ---- Setup standard engine context ---- */ - - /* All GUI registers here are FIFOed - therefore, wait for */ - /* the appropriate number of empty FIFO entries */ - wait_for_fifo(14, info); - - /* enable all registers to be loaded for context loads */ - aty_st_le32(CONTEXT_MASK, 0xFFFFFFFF, info); - - /* set destination pitch to modal pitch, set offset to zero */ - aty_st_le32(DST_OFF_PITCH, (pitch_value / 8) << 22, info); - - /* zero these registers (set them to a known state) */ - aty_st_le32(DST_Y_X, 0, info); - aty_st_le32(DST_HEIGHT, 0, info); - aty_st_le32(DST_BRES_ERR, 0, info); - aty_st_le32(DST_BRES_INC, 0, info); - aty_st_le32(DST_BRES_DEC, 0, info); - - /* set destination drawing attributes */ - aty_st_le32(DST_CNTL, DST_LAST_PEL | DST_Y_TOP_TO_BOTTOM | - DST_X_LEFT_TO_RIGHT, info); - - /* set source pitch to modal pitch, set offset to zero */ - aty_st_le32(SRC_OFF_PITCH, (pitch_value / 8) << 22, info); - - /* set these registers to a known state */ - aty_st_le32(SRC_Y_X, 0, info); - aty_st_le32(SRC_HEIGHT1_WIDTH1, 1, info); - aty_st_le32(SRC_Y_X_START, 0, info); - aty_st_le32(SRC_HEIGHT2_WIDTH2, 1, info); - - /* set source pixel retrieving attributes */ - aty_st_le32(SRC_CNTL, SRC_LINE_X_LEFT_TO_RIGHT, info); - - /* set host attributes */ - wait_for_fifo(13, info); - aty_st_le32(HOST_CNTL, 0, info); - - /* set pattern attributes */ - aty_st_le32(PAT_REG0, 0, info); - aty_st_le32(PAT_REG1, 0, info); - aty_st_le32(PAT_CNTL, 0, info); - - /* set scissors to modal size */ - aty_st_le32(SC_LEFT, 0, info); - aty_st_le32(SC_TOP, 0, info); - aty_st_le32(SC_BOTTOM, par->crtc.vyres-1, info); - aty_st_le32(SC_RIGHT, pitch_value-1, info); - - /* set background color to minimum value (usually BLACK) */ - aty_st_le32(DP_BKGD_CLR, 0, info); - - /* set foreground color to maximum value (usually WHITE) */ - aty_st_le32(DP_FRGD_CLR, 0xFFFFFFFF, info); - - /* set write mask to effect all pixel bits */ - aty_st_le32(DP_WRITE_MASK, 0xFFFFFFFF, info); - - /* set foreground mix to overpaint and background mix to */ - /* no-effect */ - aty_st_le32(DP_MIX, FRGD_MIX_S | BKGD_MIX_D, info); - - /* set primary source pixel channel to foreground color */ - /* register */ - aty_st_le32(DP_SRC, FRGD_SRC_FRGD_CLR, info); - - /* set compare functionality to false (no-effect on */ - /* destination) */ - wait_for_fifo(3, info); - aty_st_le32(CLR_CMP_CLR, 0, info); - aty_st_le32(CLR_CMP_MASK, 0xFFFFFFFF, info); - aty_st_le32(CLR_CMP_CNTL, 0, info); - - /* set pixel depth */ - wait_for_fifo(2, info); - aty_st_le32(DP_PIX_WIDTH, par->crtc.dp_pix_width, info); - aty_st_le32(DP_CHAIN_MASK, par->crtc.dp_chain_mask, info); - - wait_for_fifo(5, info); - aty_st_le32(SCALE_3D_CNTL, 0, info); - aty_st_le32(Z_CNTL, 0, info); - aty_st_le32(CRTC_INT_CNTL, aty_ld_le32(CRTC_INT_CNTL, info) & ~0x20, info); - aty_st_le32(GUI_TRAJ_CNTL, 0x100023, info); - - /* insure engine is idle before leaving */ - wait_for_idle(info); -} - -static void aty_st_514(int offset, u8 val, const struct fb_info_aty *info) -{ - aty_st_8(DAC_CNTL, 1, info); - /* right addr byte */ - aty_st_8(DAC_W_INDEX, offset & 0xff, info); - /* left addr byte */ - aty_st_8(DAC_DATA, (offset >> 8) & 0xff, info); - aty_st_8(DAC_MASK, val, info); - aty_st_8(DAC_CNTL, 0, info); -} - -static void aty_st_pll(int offset, u8 val, const struct fb_info_aty *info) -{ - /* write addr byte */ - aty_st_8(CLOCK_CNTL + 1, (offset << 2) | PLL_WR_EN, info); - /* write the register value */ - aty_st_8(CLOCK_CNTL + 2, val, info); - aty_st_8(CLOCK_CNTL + 1, (offset << 2) & ~PLL_WR_EN, info); -} - -static u8 aty_ld_pll(int offset, const struct fb_info_aty *info) -{ - u8 res; - - /* write addr byte */ - aty_st_8(CLOCK_CNTL + 1, (offset << 2), info); - /* read the register value */ - res = aty_ld_8(CLOCK_CNTL + 2, info); - return res; -} - #if defined(CONFIG_PPC) /* * Apple monitor sense */ -static int read_aty_sense(const struct fb_info_aty *info) +static int __init read_aty_sense(const struct fb_info_aty *info) { int sense, i; @@ -896,294 +422,29 @@ static int read_aty_sense(const struct fb_info_aty *info) #endif /* defined(CONFIG_PPC) */ -/* ------------------------------------------------------------------------- */ - - /* - * Hardware Cursor support. - */ - -static u8 cursor_pixel_map[2] = { 0, 15 }; -static u8 cursor_color_map[2] = { 0, 0xff }; - -static u8 cursor_bits_lookup[16] = -{ - 0x00, 0x40, 0x10, 0x50, 0x04, 0x44, 0x14, 0x54, - 0x01, 0x41, 0x11, 0x51, 0x05, 0x45, 0x15, 0x55 -}; - -static u8 cursor_mask_lookup[16] = -{ - 0xaa, 0x2a, 0x8a, 0x0a, 0xa2, 0x22, 0x82, 0x02, - 0xa8, 0x28, 0x88, 0x08, 0xa0, 0x20, 0x80, 0x00 -}; - -static void -aty_set_cursor_color(struct fb_info_aty *fb, u8 *pixel, - u8 *red, u8 *green, u8 *blue) -{ - struct aty_cursor *c = fb->cursor; - int i; - - if (!c) - return; - -#ifdef __sparc__ - if (fb->mmaped && (!fb->fb_info.display_fg - || fb->fb_info.display_fg->vc_num == fb->vtconsole)) - return; -#endif - - for (i = 0; i < 2; i++) { - c->color[i] = (u32)red[i] << 24; - c->color[i] |= (u32)green[i] << 16; - c->color[i] |= (u32)blue[i] << 8; - c->color[i] |= (u32)pixel[i]; - } - - wait_for_fifo(2, fb); - aty_st_le32(CUR_CLR0, c->color[0], fb); - aty_st_le32(CUR_CLR1, c->color[1], fb); -} - -static void -aty_set_cursor_shape(struct fb_info_aty *fb) -{ - struct aty_cursor *c = fb->cursor; - u8 *ram, m, b; - int x, y; - - if (!c) - return; - -#ifdef __sparc__ - if (fb->mmaped && (!fb->fb_info.display_fg - || fb->fb_info.display_fg->vc_num == fb->vtconsole)) - return; -#endif - - ram = c->ram; - for (y = 0; y < c->size.y; y++) { - for (x = 0; x < c->size.x >> 2; x++) { - m = c->mask[x][y]; - b = c->bits[x][y]; - fb_writeb (cursor_mask_lookup[m >> 4] | - cursor_bits_lookup[(b & m) >> 4], - ram++); - fb_writeb (cursor_mask_lookup[m & 0x0f] | - cursor_bits_lookup[(b & m) & 0x0f], - ram++); - } - for ( ; x < 8; x++) { - fb_writeb (0xaa, ram++); - fb_writeb (0xaa, ram++); - } - } - fb_memset (ram, 0xaa, (64 - c->size.y) * 16); -} - -static void -aty_set_cursor(struct fb_info_aty *fb, int on) -{ - struct atyfb_par *par = &fb->current_par; - struct aty_cursor *c = fb->cursor; - u16 xoff, yoff; - int x, y; - - if (!c) - return; - -#ifdef __sparc__ - if (fb->mmaped && (!fb->fb_info.display_fg - || fb->fb_info.display_fg->vc_num == fb->vtconsole)) - return; -#endif - - if (on) { - x = c->pos.x - c->hot.x - par->crtc.xoffset; - if (x < 0) { - xoff = -x; - x = 0; - } else { - xoff = 0; - } - - y = c->pos.y - c->hot.y - par->crtc.yoffset; - if (y < 0) { - yoff = -y; - y = 0; - } else { - yoff = 0; - } - - wait_for_fifo(4, fb); - aty_st_le32(CUR_OFFSET, (c->offset >> 3) + (yoff << 1), fb); - aty_st_le32(CUR_HORZ_VERT_OFF, - ((u32)(64 - c->size.y + yoff) << 16) | xoff, fb); - aty_st_le32(CUR_HORZ_VERT_POSN, ((u32)y << 16) | x, fb); - aty_st_le32(GEN_TEST_CNTL, aty_ld_le32(GEN_TEST_CNTL, fb) - | HWCURSOR_ENABLE, fb); - } else { - wait_for_fifo(1, fb); - aty_st_le32(GEN_TEST_CNTL, - aty_ld_le32(GEN_TEST_CNTL, fb) & ~HWCURSOR_ENABLE, - fb); - } - if (fb->blitter_may_be_busy) - wait_for_idle(fb); -} - -static void -aty_cursor_timer_handler(unsigned long dev_addr) -{ - struct fb_info_aty *fb = (struct fb_info_aty *)dev_addr; - - if (!fb->cursor) - return; - - if (!fb->cursor->enable) - goto out; - - if (fb->cursor->vbl_cnt && --fb->cursor->vbl_cnt == 0) { - fb->cursor->on ^= 1; - aty_set_cursor(fb, fb->cursor->on); - fb->cursor->vbl_cnt = fb->cursor->blink_rate; - } - -out: - fb->cursor->timer->expires = jiffies + (HZ / 50); - add_timer(fb->cursor->timer); -} - -static void -atyfb_cursor(struct display *p, int mode, int x, int y) -{ - struct fb_info_aty *fb = (struct fb_info_aty *)p->fb_info; - struct aty_cursor *c = fb->cursor; - - if (!c) - return; - -#ifdef __sparc__ - if (fb->mmaped && (!fb->fb_info.display_fg - || fb->fb_info.display_fg->vc_num == fb->vtconsole)) - return; -#endif - - x *= fontwidth(p); - y *= fontheight(p); - if (c->pos.x == x && c->pos.y == y && (mode == CM_ERASE) == !c->enable) - return; - - c->enable = 0; - if (c->on) - aty_set_cursor(fb, 0); - c->pos.x = x; - c->pos.y = y; - - switch (mode) { - case CM_ERASE: - c->on = 0; - break; - - case CM_DRAW: - case CM_MOVE: - if (c->on) - aty_set_cursor(fb, 1); - else - c->vbl_cnt = CURSOR_DRAW_DELAY; - c->enable = 1; - break; - } -} - -static struct fb_info_aty *fb_list = NULL; - -static struct aty_cursor * __init -aty_init_cursor(struct fb_info_aty *fb) +#if defined(CONFIG_PMAC_PBOOK) || defined(CONFIG_PMAC_BACKLIGHT) +static void aty_st_lcd(int index, u32 val, const struct fb_info_aty *info) { - struct aty_cursor *cursor; - unsigned long addr; - - cursor = kmalloc(sizeof(struct aty_cursor), GFP_ATOMIC); - if (!cursor) - return 0; - memset(cursor, 0, sizeof(*cursor)); - - cursor->timer = kmalloc(sizeof(*cursor->timer), GFP_KERNEL); - if (!cursor->timer) { - kfree(cursor); - return 0; - } - memset(cursor->timer, 0, sizeof(*cursor->timer)); - - cursor->blink_rate = DEFAULT_CURSOR_BLINK_RATE; - fb->total_vram -= PAGE_SIZE; - cursor->offset = fb->total_vram; - -#ifdef __sparc__ - addr = fb->frame_buffer - 0x800000 + cursor->offset; - cursor->ram = (u8 *)addr; -#else -#ifdef __BIG_ENDIAN - addr = fb->frame_buffer_phys - 0x800000 + cursor->offset; - cursor->ram = (u8 *)ioremap(addr, 1024); -#else - addr = fb->frame_buffer + cursor->offset; - cursor->ram = (u8 *)addr; -#endif -#endif - - if (! cursor->ram) { - kfree(cursor); - return NULL; - } - - if (curblink) { - init_timer(cursor->timer); - cursor->timer->expires = jiffies + (HZ / 50); - cursor->timer->data = (unsigned long)fb; - cursor->timer->function = aty_cursor_timer_handler; - add_timer(cursor->timer); - } + unsigned long temp; - return cursor; + /* write addr byte */ + temp = aty_ld_le32(LCD_INDEX, info); + aty_st_le32(LCD_INDEX, (temp & ~LCD_INDEX_MASK) | index, info); + /* write the register value */ + aty_st_le32(LCD_DATA, val, info); } -static int -atyfb_set_font(struct display *d, int width, int height) +static u32 aty_ld_lcd(int index, const struct fb_info_aty *info) { - struct fb_info_aty *fb = (struct fb_info_aty *)d->fb_info; - struct aty_cursor *c = fb->cursor; - int i, j; - - if (c) { - if (!width || !height) { - width = 8; - height = 16; - } - - c->hot.x = 0; - c->hot.y = 0; - c->size.x = width; - c->size.y = height; - - memset(c->bits, 0xff, sizeof(c->bits)); - memset(c->mask, 0, sizeof(c->mask)); - - for (i = 0, j = width; j >= 0; j -= 8, i++) { - c->mask[i][height-2] = (j >= 8) ? 0xff : (0xff << (8 - j)); - c->mask[i][height-1] = (j >= 8) ? 0xff : (0xff << (8 - j)); - } + unsigned long temp; - aty_set_cursor_color(fb, cursor_pixel_map, cursor_color_map, - cursor_color_map, cursor_color_map); - aty_set_cursor_shape(fb); - } - return 1; + /* write addr byte */ + temp = aty_ld_le32(LCD_INDEX, info); + aty_st_le32(LCD_INDEX, (temp & ~LCD_INDEX_MASK) | index, info); + /* read the register value */ + return aty_ld_le32(LCD_DATA, info); } - - - +#endif /* CONFIG_PMAC_PBOOK || CONFIG_PMAC_BACKLIGHT */ /* ------------------------------------------------------------------------- */ @@ -1280,7 +541,7 @@ static int aty_var_to_crtc(const struct fb_info_aty *info, dp_pix_width = HOST_15BPP | SRC_15BPP | DST_15BPP | BYTE_ORDER_LSB_TO_MSB; dp_chain_mask = 0x4210; - } else if ((bpp <= 24) && (Gx != GX_CHIP_ID) && (Gx != CX_CHIP_ID)) { + } else if (bpp <= 24 && M64_HAS(INTEGRATED)) { bpp = 24; pix_width = CRTC_PIX_WIDTH_24BPP; dp_pix_width = HOST_8BPP | SRC_8BPP | DST_8BPP | BYTE_ORDER_LSB_TO_MSB; @@ -1314,8 +575,7 @@ static int aty_var_to_crtc(const struct fb_info_aty *info, crtc->v_sync_strt_wid = v_sync_strt | (v_sync_wid<<16) | (v_sync_pol<<21); crtc->off_pitch = ((yoffset*vxres+xoffset)*bpp/64) | (vxres<<19); crtc->gen_cntl = pix_width | c_sync | CRTC_EXT_DISP_EN | CRTC_ENABLE; - if ((Gx == CT_CHIP_ID) || (Gx == ET_CHIP_ID) || - ((Gx == VT_CHIP_ID || Gx == GT_CHIP_ID) && !(Rev & 0x07))) { + if (M64_HAS(MAGIC_FIFO)) { /* Not VTB/GTB */ /* FIXME: magic FIFO values */ crtc->gen_cntl |= aty_ld_le32(CRTC_GEN_CNTL, info) & 0x000e0000; @@ -1327,160 +587,6 @@ static int aty_var_to_crtc(const struct fb_info_aty *info, } -static int aty_set_dac_ATI68860_B(const struct fb_info_aty *info, u32 bpp, - u32 AccelMode) -{ - u32 gModeReg, devSetupRegA, temp, mask; - - gModeReg = 0; - devSetupRegA = 0; - - switch (bpp) { - case 8: - gModeReg = 0x83; - devSetupRegA = 0x60 | 0x00 /*(info->mach64DAC8Bit ? 0x00 : 0x01) */; - break; - case 15: - gModeReg = 0xA0; - devSetupRegA = 0x60; - break; - case 16: - gModeReg = 0xA1; - devSetupRegA = 0x60; - break; - case 24: - gModeReg = 0xC0; - devSetupRegA = 0x60; - break; - case 32: - gModeReg = 0xE3; - devSetupRegA = 0x60; - break; - } - - if (!AccelMode) { - gModeReg = 0x80; - devSetupRegA = 0x61; - } - - temp = aty_ld_8(DAC_CNTL, info); - aty_st_8(DAC_CNTL, (temp & ~DAC_EXT_SEL_RS2) | DAC_EXT_SEL_RS3, info); - - aty_st_8(DAC_REGS + 2, 0x1D, info); - aty_st_8(DAC_REGS + 3, gModeReg, info); - aty_st_8(DAC_REGS, 0x02, info); - - temp = aty_ld_8(DAC_CNTL, info); - aty_st_8(DAC_CNTL, temp | DAC_EXT_SEL_RS2 | DAC_EXT_SEL_RS3, info); - - if (info->total_vram < MEM_SIZE_1M) - mask = 0x04; - else if (info->total_vram == MEM_SIZE_1M) - mask = 0x08; - else - mask = 0x0C; - - /* The following assumes that the BIOS has correctly set R7 of the - * Device Setup Register A at boot time. - */ -#define A860_DELAY_L 0x80 - - temp = aty_ld_8(DAC_REGS, info); - aty_st_8(DAC_REGS, (devSetupRegA | mask) | (temp & A860_DELAY_L), info); - temp = aty_ld_8(DAC_CNTL, info); - aty_st_8(DAC_CNTL, (temp & ~(DAC_EXT_SEL_RS2 | DAC_EXT_SEL_RS3)), info); - - return 0; -} - -static int aty_set_dac_ATT21C498(const struct fb_info_aty *info, - const struct pll_18818 *pll, u32 bpp) -{ - u32 dotClock; - int muxmode = 0; - int DACMask = 0; - - dotClock = 100000000 / pll->period_in_ps; - - switch (bpp) { - case 8: - if (dotClock > 8000) { - DACMask = 0x24; - muxmode = 1; - } else - DACMask = 0x04; - break; - case 15: - DACMask = 0x16; - break; - case 16: - DACMask = 0x36; - break; - case 24: - DACMask = 0xE6; - break; - case 32: - DACMask = 0xE6; - break; - } - - if (1 /* info->mach64DAC8Bit */) - DACMask |= 0x02; - - aty_dac_waste4(info); - aty_st_8(DAC_REGS + 2, DACMask, info); - - return muxmode; -} - -void aty_dac_waste4(const struct fb_info_aty *info) -{ - (void)aty_ld_8(DAC_REGS, info); - - (void)aty_ld_8(DAC_REGS + 2, info); - (void)aty_ld_8(DAC_REGS + 2, info); - (void)aty_ld_8(DAC_REGS + 2, info); - (void)aty_ld_8(DAC_REGS + 2, info); -} - - -static void aty_set_dac_514(const struct fb_info_aty *info, u32 bpp) -{ - static struct { - u8 pixel_dly; - u8 misc2_cntl; - u8 pixel_rep; - u8 pixel_cntl_index; - u8 pixel_cntl_v1; - } tab[3] = { - { 0, 0x41, 0x03, 0x71, 0x45 }, /* 8 bpp */ - { 0, 0x45, 0x04, 0x0c, 0x01 }, /* 555 */ - { 0, 0x45, 0x06, 0x0e, 0x00 }, /* XRGB */ - }; - int i; - - switch (bpp) { - case 8: - default: - i = 0; - break; - case 16: - i = 1; - break; - case 32: - i = 2; - break; - } - aty_st_514(0x90, 0x00, info); /* VRAM Mask Low */ - aty_st_514(0x04, tab[i].pixel_dly, info); /* Horizontal Sync Control */ - aty_st_514(0x05, 0x00, info); /* Power Management */ - aty_st_514(0x02, 0x01, info); /* Misc Clock Control */ - aty_st_514(0x71, tab[i].misc2_cntl, info); /* Misc Control 2 */ - aty_st_514(0x0a, tab[i].pixel_rep, info); /* Pixel Format */ - aty_st_514(tab[i].pixel_cntl_index, tab[i].pixel_cntl_v1, info); - /* Misc Control 2 / 16 BPP Control / 32 BPP Control */ -} - static int aty_crtc_to_var(const struct crtc *crtc, struct fb_var_screeninfo *var) { @@ -1615,821 +721,11 @@ static int aty_crtc_to_var(const struct crtc *crtc, /* ------------------------------------------------------------------------- */ - /* - * PLL programming (Mach64 GX family) - * - * FIXME: use function pointer tables instead of switch statements - */ - -static void aty_set_pll_gx(const struct fb_info_aty *info, - const struct pll_gx *pll) -{ - switch (info->clk_type) { - case CLK_ATI18818_1: - aty_st_8(CLOCK_CNTL, pll->m, info); - break; - case CLK_IBMRGB514: - aty_st_514(0x06, 0x02, info); /* DAC Operation */ - aty_st_514(0x10, 0x01, info); /* PLL Control 1 */ - aty_st_514(0x70, 0x01, info); /* Misc Control 1 */ - aty_st_514(0x8f, 0x1f, info); /* PLL Ref. Divider Input */ - aty_st_514(0x03, 0x00, info); /* Sync Control */ - aty_st_514(0x05, 0x00, info); /* Power Management */ - aty_st_514(0x20, pll->m, info); /* F0 / M0 */ - aty_st_514(0x21, pll->n, info); /* F1 / N0 */ - break; - } -} - - -static int aty_var_to_pll_18818(u32 period_in_ps, struct pll_18818 *pll) -{ - u32 MHz100; /* in 0.01 MHz */ - u32 program_bits; - u32 post_divider; - - /* Calculate the programming word */ - MHz100 = 100000000 / period_in_ps; - - program_bits = -1; - post_divider = 1; - - if (MHz100 > MAX_FREQ_2595) { - MHz100 = MAX_FREQ_2595; - return -EINVAL; - } else if (MHz100 < ABS_MIN_FREQ_2595) { - program_bits = 0; /* MHz100 = 257 */ - return -EINVAL; - } else { - while (MHz100 < MIN_FREQ_2595) { - MHz100 *= 2; - post_divider *= 2; - } - } - MHz100 *= 1000; - MHz100 = (REF_DIV_2595 * MHz100) / REF_FREQ_2595; - - MHz100 += 500; /* + 0.5 round */ - MHz100 /= 1000; - - if (program_bits == -1) { - program_bits = MHz100 - N_ADJ_2595; - switch (post_divider) { - case 1: - program_bits |= 0x0600; - break; - case 2: - program_bits |= 0x0400; - break; - case 4: - program_bits |= 0x0200; - break; - case 8: - default: - break; - } - } - - program_bits |= STOP_BITS_2595; - - pll->program_bits = program_bits; - pll->locationAddr = 0; - pll->post_divider = post_divider; - pll->period_in_ps = period_in_ps; - - return 0; -} - -static u32 aty_pll_18818_to_var(const struct pll_18818 *pll) -{ - return(pll->period_in_ps); /* default for now */ -} - -static void aty_set_pll18818(const struct fb_info_aty *info, - const struct pll_18818 *pll) -{ - u32 program_bits; - u32 locationAddr; - - u32 i; - - u8 old_clock_cntl; - u8 old_crtc_ext_disp; - - old_clock_cntl = aty_ld_8(CLOCK_CNTL, info); - aty_st_8(CLOCK_CNTL + info->clk_wr_offset, 0, info); - - old_crtc_ext_disp = aty_ld_8(CRTC_GEN_CNTL + 3, info); - aty_st_8(CRTC_GEN_CNTL + 3, old_crtc_ext_disp | (CRTC_EXT_DISP_EN >> 24), - info); - - mdelay(15); /* delay for 50 (15) ms */ - - program_bits = pll->program_bits; - locationAddr = pll->locationAddr; - - /* Program the clock chip */ - aty_st_8(CLOCK_CNTL + info->clk_wr_offset, 0, info); /* Strobe = 0 */ - aty_StrobeClock(info); - aty_st_8(CLOCK_CNTL + info->clk_wr_offset, 1, info); /* Strobe = 0 */ - aty_StrobeClock(info); - - aty_ICS2595_put1bit(1, info); /* Send start bits */ - aty_ICS2595_put1bit(0, info); /* Start bit */ - aty_ICS2595_put1bit(0, info); /* Read / ~Write */ - - for (i = 0; i < 5; i++) { /* Location 0..4 */ - aty_ICS2595_put1bit(locationAddr & 1, info); - locationAddr >>= 1; - } - - for (i = 0; i < 8 + 1 + 2 + 2; i++) { - aty_ICS2595_put1bit(program_bits & 1, info); - program_bits >>= 1; - } - - udelay(1000); /* delay for 1 ms */ - - (void)aty_ld_8(DAC_REGS, info); /* Clear DAC Counter */ - aty_st_8(CRTC_GEN_CNTL + 3, old_crtc_ext_disp, info); - aty_st_8(CLOCK_CNTL + info->clk_wr_offset, old_clock_cntl | CLOCK_STROBE, - info); - - mdelay(50); /* delay for 50 (15) ms */ - aty_st_8(CLOCK_CNTL + info->clk_wr_offset, - ((pll->locationAddr & 0x0F) | CLOCK_STROBE), info); - - return; -} - - -static int aty_var_to_pll_408(u32 period_in_ps, struct pll_18818 *pll) -{ - u32 mhz100; /* in 0.01 MHz */ - u32 program_bits; - /* u32 post_divider; */ - u32 mach64MinFreq, mach64MaxFreq, mach64RefFreq; - u32 temp, tempB; - u16 remainder, preRemainder; - short divider = 0, tempA; - - /* Calculate the programming word */ - mhz100 = 100000000 / period_in_ps; - mach64MinFreq = MIN_FREQ_2595; - mach64MaxFreq = MAX_FREQ_2595; - mach64RefFreq = REF_FREQ_2595; /* 14.32 MHz */ - - /* Calculate program word */ - if (mhz100 == 0) - program_bits = 0xFF; - else { - if (mhz100 < mach64MinFreq) - mhz100 = mach64MinFreq; - if (mhz100 > mach64MaxFreq) - mhz100 = mach64MaxFreq; - - while (mhz100 < (mach64MinFreq << 3)) { - mhz100 <<= 1; - divider += 0x40; - } - - temp = (unsigned int)mhz100; - temp = (unsigned int)(temp * (MIN_N_408 + 2)); - temp -= ((short)(mach64RefFreq << 1)); - - tempA = MIN_N_408; - preRemainder = 0xFFFF; - - do { - tempB = temp; - remainder = tempB % mach64RefFreq; - tempB = tempB / mach64RefFreq; - if (((tempB & 0xFFFF) <= 255) && (remainder <= preRemainder)) { - preRemainder = remainder; - divider &= ~0x3f; - divider |= tempA; - divider = (divider & 0x00FF) + ((tempB & 0xFF) << 8); - } - temp += mhz100; - tempA++; - } while(tempA <= 32); - - program_bits = divider; - } - - pll->program_bits = program_bits; - pll->locationAddr = 0; - pll->post_divider = divider; /* fuer nix */ - pll->period_in_ps = period_in_ps; - - return 0; -} - -static u32 aty_pll_408_to_var(const struct pll_18818 *pll) -{ - return(pll->period_in_ps); /* default for now */ -} - -static void aty_set_pll_408(const struct fb_info_aty *info, - const struct pll_18818 *pll) -{ - u32 program_bits; - u32 locationAddr; - - u8 tmpA, tmpB, tmpC; - char old_crtc_ext_disp; - - old_crtc_ext_disp = aty_ld_8(CRTC_GEN_CNTL + 3, info); - aty_st_8(CRTC_GEN_CNTL + 3, old_crtc_ext_disp | (CRTC_EXT_DISP_EN >> 24), - info); - - program_bits = pll->program_bits; - locationAddr = pll->locationAddr; - - /* Program clock */ - aty_dac_waste4(info); - tmpB = aty_ld_8(DAC_REGS + 2, info) | 1; - aty_dac_waste4(info); - aty_st_8(DAC_REGS + 2, tmpB, info); - - tmpA = tmpB; - tmpC = tmpA; - tmpA |= 8; - tmpB = 1; - - aty_st_8(DAC_REGS, tmpB, info); - aty_st_8(DAC_REGS + 2, tmpA, info); - - udelay(400); /* delay for 400 us */ - - locationAddr = (locationAddr << 2) + 0x40; - tmpB = locationAddr; - tmpA = program_bits >> 8; - - aty_st_8(DAC_REGS, tmpB, info); - aty_st_8(DAC_REGS + 2, tmpA, info); - - tmpB = locationAddr + 1; - tmpA = (u8)program_bits; - - aty_st_8(DAC_REGS, tmpB, info); - aty_st_8(DAC_REGS + 2, tmpA, info); - - tmpB = locationAddr + 2; - tmpA = 0x77; - - aty_st_8(DAC_REGS, tmpB, info); - aty_st_8(DAC_REGS + 2, tmpA, info); - - udelay(400); /* delay for 400 us */ - tmpA = tmpC & (~(1 | 8)); - tmpB = 1; - - aty_st_8(DAC_REGS, tmpB, info); - aty_st_8(DAC_REGS + 2, tmpA, info); - - (void)aty_ld_8(DAC_REGS, info); /* Clear DAC Counter */ - aty_st_8(CRTC_GEN_CNTL + 3, old_crtc_ext_disp, info); - - return; -} - - -static int aty_var_to_pll_1703(u32 period_in_ps, struct pll_18818 *pll) -{ - u32 mhz100; /* in 0.01 MHz */ - u32 program_bits; - /* u32 post_divider; */ - u32 mach64MinFreq, mach64MaxFreq, mach64RefFreq; - u32 temp, tempB; - u16 remainder, preRemainder; - short divider = 0, tempA; - - /* Calculate the programming word */ - mhz100 = 100000000 / period_in_ps; - mach64MinFreq = MIN_FREQ_2595; - mach64MaxFreq = MAX_FREQ_2595; - mach64RefFreq = REF_FREQ_2595; /* 14.32 MHz */ - - /* Calculate program word */ - if (mhz100 == 0) - program_bits = 0xE0; - else { - if (mhz100 < mach64MinFreq) - mhz100 = mach64MinFreq; - if (mhz100 > mach64MaxFreq) - mhz100 = mach64MaxFreq; - - divider = 0; - while (mhz100 < (mach64MinFreq << 3)) { - mhz100 <<= 1; - divider += 0x20; - } - - temp = (unsigned int)(mhz100); - temp = (unsigned int)(temp * (MIN_N_1703 + 2)); - temp -= (short)(mach64RefFreq << 1); - - tempA = MIN_N_1703; - preRemainder = 0xffff; - - do { - tempB = temp; - remainder = tempB % mach64RefFreq; - tempB = tempB / mach64RefFreq; - - if ((tempB & 0xffff) <= 127 && (remainder <= preRemainder)) { - preRemainder = remainder; - divider &= ~0x1f; - divider |= tempA; - divider = (divider & 0x00ff) + ((tempB & 0xff) << 8); - } - - temp += mhz100; - tempA++; - } while (tempA <= (MIN_N_1703 << 1)); - - program_bits = divider; - } - - pll->program_bits = program_bits; - pll->locationAddr = 0; - pll->post_divider = divider; /* fuer nix */ - pll->period_in_ps = period_in_ps; - - return 0; -} - -static u32 aty_pll_1703_to_var(const struct pll_18818 *pll) -{ - return(pll->period_in_ps); /* default for now */ -} - -static void aty_set_pll_1703(const struct fb_info_aty *info, - const struct pll_18818 *pll) -{ - u32 program_bits; - u32 locationAddr; - - char old_crtc_ext_disp; - - old_crtc_ext_disp = aty_ld_8(CRTC_GEN_CNTL + 3, info); - aty_st_8(CRTC_GEN_CNTL + 3, old_crtc_ext_disp | (CRTC_EXT_DISP_EN >> 24), - info); - - program_bits = pll->program_bits; - locationAddr = pll->locationAddr; - - /* Program clock */ - aty_dac_waste4(info); - - (void)aty_ld_8(DAC_REGS + 2, info); - aty_st_8(DAC_REGS+2, (locationAddr << 1) + 0x20, info); - aty_st_8(DAC_REGS+2, 0, info); - aty_st_8(DAC_REGS+2, (program_bits & 0xFF00) >> 8, info); - aty_st_8(DAC_REGS+2, (program_bits & 0xFF), info); - - (void)aty_ld_8(DAC_REGS, info); /* Clear DAC Counter */ - aty_st_8(CRTC_GEN_CNTL + 3, old_crtc_ext_disp, info); - - return; -} - - -static int aty_var_to_pll_8398(u32 period_in_ps, struct pll_18818 *pll) -{ - - u32 tempA, tempB, fOut, longMHz100, diff, preDiff; - - u32 mhz100; /* in 0.01 MHz */ - u32 program_bits; - /* u32 post_divider; */ - u32 mach64MinFreq, mach64MaxFreq, mach64RefFreq; - u16 m, n, k=0, save_m, save_n, twoToKth; - - /* Calculate the programming word */ - mhz100 = 100000000 / period_in_ps; - mach64MinFreq = MIN_FREQ_2595; - mach64MaxFreq = MAX_FREQ_2595; - mach64RefFreq = REF_FREQ_2595; /* 14.32 MHz */ - - save_m = 0; - save_n = 0; - - /* Calculate program word */ - if (mhz100 == 0) - program_bits = 0xE0; - else - { - if (mhz100 < mach64MinFreq) - mhz100 = mach64MinFreq; - if (mhz100 > mach64MaxFreq) - mhz100 = mach64MaxFreq; - - longMHz100 = mhz100 * 256 / 100; /* 8 bit scale this */ - - while (mhz100 < (mach64MinFreq << 3)) - { - mhz100 <<= 1; - k++; - } - - twoToKth = 1 << k; - diff = 0; - preDiff = 0xFFFFFFFF; - - for (m = MIN_M; m <= MAX_M; m++) - { - for (n = MIN_N; n <= MAX_N; n++) - { - tempA = (14.31818 * 65536); - tempA *= (n + 8); /* 43..256 */ - tempB = twoToKth * 256; - tempB *= (m + 2); /* 4..32 */ - fOut = tempA / tempB; /* 8 bit scale */ - - if (longMHz100 > fOut) - diff = longMHz100 - fOut; - else - diff = fOut - longMHz100; - - if (diff < preDiff) - { - save_m = m; - save_n = n; - preDiff = diff; - } - } - } - - program_bits = (k << 6) + (save_m) + (save_n << 8); - } - - pll->program_bits = program_bits; - pll->locationAddr = 0; - pll->post_divider = 0; - pll->period_in_ps = period_in_ps; - - return 0; -} - -static u32 aty_pll_8398_to_var(const struct pll_18818 *pll) -{ - return(pll->period_in_ps); /* default for now */ -} - -static void aty_set_pll_8398(const struct fb_info_aty *info, - const struct pll_18818 *pll) -{ - u32 program_bits; - u32 locationAddr; - - char old_crtc_ext_disp; - char tmp; - - old_crtc_ext_disp = aty_ld_8(CRTC_GEN_CNTL + 3, info); - aty_st_8(CRTC_GEN_CNTL + 3, old_crtc_ext_disp | (CRTC_EXT_DISP_EN >> 24), - info); - - program_bits = pll->program_bits; - locationAddr = pll->locationAddr; - - /* Program clock */ - tmp = aty_ld_8(DAC_CNTL, info); - aty_st_8(DAC_CNTL, tmp | DAC_EXT_SEL_RS2 | DAC_EXT_SEL_RS3, info); - - aty_st_8(DAC_REGS, locationAddr, info); - aty_st_8(DAC_REGS+1, (program_bits & 0xff00) >> 8, info); - aty_st_8(DAC_REGS+1, (program_bits & 0xff), info); - - tmp = aty_ld_8(DAC_CNTL, info); - aty_st_8(DAC_CNTL, (tmp & ~DAC_EXT_SEL_RS2) | DAC_EXT_SEL_RS3, info); - - (void)aty_ld_8(DAC_REGS, info); /* Clear DAC Counter */ - aty_st_8(CRTC_GEN_CNTL + 3, old_crtc_ext_disp, info); - - return; -} - - -static int aty_var_to_pll_514(u32 vclk_per, struct pll_gx *pll) -{ - /* - * FIXME: use real calculations instead of using fixed values from the old - * driver - */ - static struct { - u32 limit; /* pixlock rounding limit (arbitrary) */ - u8 m; /* (df<<6) | vco_div_count */ - u8 n; /* ref_div_count */ - } RGB514_clocks[7] = { - { 8000, (3<<6) | 20, 9 }, /* 7395 ps / 135.2273 MHz */ - { 10000, (1<<6) | 19, 3 }, /* 9977 ps / 100.2273 MHz */ - { 13000, (1<<6) | 2, 3 }, /* 12509 ps / 79.9432 MHz */ - { 14000, (2<<6) | 8, 7 }, /* 13394 ps / 74.6591 MHz */ - { 16000, (1<<6) | 44, 6 }, /* 15378 ps / 65.0284 MHz */ - { 25000, (1<<6) | 15, 5 }, /* 17460 ps / 57.2727 MHz */ - { 50000, (0<<6) | 53, 7 }, /* 33145 ps / 30.1705 MHz */ - }; - int i; - - for (i = 0; i < sizeof(RGB514_clocks)/sizeof(*RGB514_clocks); i++) - if (vclk_per <= RGB514_clocks[i].limit) { - pll->m = RGB514_clocks[i].m; - pll->n = RGB514_clocks[i].n; - return 0; - } - return -EINVAL; -} - - -static void aty_StrobeClock(const struct fb_info_aty *info) -{ - u8 tmp; - - udelay(26); - - tmp = aty_ld_8(CLOCK_CNTL, info); - aty_st_8(CLOCK_CNTL + info->clk_wr_offset, tmp | CLOCK_STROBE, info); - - return; -} - - -static void aty_ICS2595_put1bit(u8 data, const struct fb_info_aty *info) -{ - u8 tmp; - - data &= 0x01; - tmp = aty_ld_8(CLOCK_CNTL, info); - aty_st_8(CLOCK_CNTL + info->clk_wr_offset, (tmp & ~0x04) | (data << 2), - info); - - tmp = aty_ld_8(CLOCK_CNTL, info); - aty_st_8(CLOCK_CNTL + info->clk_wr_offset, (tmp & ~0x08) | (0 << 3), info); - - aty_StrobeClock(info); - - tmp = aty_ld_8(CLOCK_CNTL, info); - aty_st_8(CLOCK_CNTL + info->clk_wr_offset, (tmp & ~0x08) | (1 << 3), info); - - aty_StrobeClock(info); - - return; -} - - -static u32 aty_pll_gx_to_var(const struct pll_gx *pll, - const struct fb_info_aty *info) -{ - u8 df, vco_div_count, ref_div_count; - - df = pll->m >> 6; - vco_div_count = pll->m & 0x3f; - ref_div_count = pll->n; - - return ((info->ref_clk_per*ref_div_count)<<(3-df))/(vco_div_count+65); -} - - - /* - * PLL programming (Mach64 CT family) - */ - -static void aty_set_pll_ct(const struct fb_info_aty *info, - const struct pll_ct *pll) -{ - aty_st_pll(PLL_REF_DIV, pll->pll_ref_div, info); - aty_st_pll(PLL_GEN_CNTL, pll->pll_gen_cntl, info); - aty_st_pll(MCLK_FB_DIV, pll->mclk_fb_div, info); - aty_st_pll(PLL_VCLK_CNTL, pll->pll_vclk_cntl, info); - aty_st_pll(VCLK_POST_DIV, pll->vclk_post_div, info); - aty_st_pll(VCLK0_FB_DIV, pll->vclk_fb_div, info); - aty_st_pll(PLL_EXT_CNTL, pll->pll_ext_cntl, info); - - if (!(Gx == GX_CHIP_ID || Gx == CX_CHIP_ID || Gx == CT_CHIP_ID || - Gx == ET_CHIP_ID || - ((Gx == VT_CHIP_ID || Gx == GT_CHIP_ID) && !(Rev & 0x07)))) { - if (Gx == XL_CHIP_ID) { - aty_st_pll(DLL_CNTL, 0x80, info); - } else { - if (info->ram_type >= SDRAM) - aty_st_pll(DLL_CNTL, 0xa6, info); - else - aty_st_pll(DLL_CNTL, 0xa0, info); - } - aty_st_pll(VFC_CNTL, 0x1b, info); - aty_st_le32(DSP_CONFIG, pll->dsp_config, info); - aty_st_le32(DSP_ON_OFF, pll->dsp_on_off, info); - } -} - -static int aty_dsp_gt(const struct fb_info_aty *info, u8 bpp, - struct pll_ct *pll) -{ - u32 dsp_xclks_per_row, dsp_loop_latency, dsp_precision, dsp_off, dsp_on; - u32 xclks_per_row, fifo_off, fifo_on, y, fifo_size, page_size; - - /* xclocks_per_row<<11 */ - xclks_per_row = (pll->mclk_fb_div*pll->vclk_post_div_real*64<<11)/ - (pll->vclk_fb_div*pll->mclk_post_div_real*bpp); - if (xclks_per_row < (1<<11)) - FAIL("Dotclock to high"); - if (Gx == GT_CHIP_ID || Gx == GU_CHIP_ID || Gx == VT_CHIP_ID || - Gx == VU_CHIP_ID || Gx == GV_CHIP_ID || Gx == GW_CHIP_ID || - Gx == GZ_CHIP_ID) { - fifo_size = 24; - dsp_loop_latency = 0; - } else { - fifo_size = 32; - dsp_loop_latency = 2; - } - dsp_precision = 0; - y = (xclks_per_row*fifo_size)>>11; - while (y) { - y >>= 1; - dsp_precision++; - } - dsp_precision -= 5; - /* fifo_off<<6 */ - fifo_off = ((xclks_per_row*(fifo_size-1))>>5)+(3<<6); - - if (info->total_vram > 1*1024*1024) { - if (info->ram_type >= SDRAM) { - /* >1 MB SDRAM */ - dsp_loop_latency += 8; - page_size = 8; - } else { - /* >1 MB DRAM */ - dsp_loop_latency += 6; - page_size = 9; - } - } else { - if (info->ram_type >= SDRAM) { - /* <2 MB SDRAM */ - dsp_loop_latency += 9; - page_size = 10; - } else { - /* <2 MB DRAM */ - dsp_loop_latency += 8; - page_size = 10; - } - } - /* fifo_on<<6 */ - if (xclks_per_row >= (page_size<<11)) - fifo_on = ((2*page_size+1)<<6)+(xclks_per_row>>5); - else - fifo_on = (3*page_size+2)<<6; - - dsp_xclks_per_row = xclks_per_row>>dsp_precision; - dsp_on = fifo_on>>dsp_precision; - dsp_off = fifo_off>>dsp_precision; - - pll->dsp_config = (dsp_xclks_per_row & 0x3fff) | - ((dsp_loop_latency & 0xf)<<16) | - ((dsp_precision & 7)<<20); - pll->dsp_on_off = (dsp_on & 0x7ff) | ((dsp_off & 0x7ff)<<16); - return 0; -} - -static int aty_valid_pll_ct(const struct fb_info_aty *info, u32 vclk_per, - struct pll_ct *pll) -{ - u32 q, x; /* x is a workaround for sparc64-linux-gcc */ - x = x; /* x is a workaround for sparc64-linux-gcc */ - - pll->pll_ref_div = info->pll_per*2*255/info->ref_clk_per; - - /* FIXME: use the VTB/GTB /3 post divider if it's better suited */ - q = info->ref_clk_per*pll->pll_ref_div*4/info->mclk_per; /* actually 8*q */ - if (q < 16*8 || q > 255*8) - FAIL("mclk out of range"); - else if (q < 32*8) - pll->mclk_post_div_real = 8; - else if (q < 64*8) - pll->mclk_post_div_real = 4; - else if (q < 128*8) - pll->mclk_post_div_real = 2; - else - pll->mclk_post_div_real = 1; - pll->mclk_fb_div = q*pll->mclk_post_div_real/8; - - /* FIXME: use the VTB/GTB /{3,6,12} post dividers if they're better suited */ - q = info->ref_clk_per*pll->pll_ref_div*4/vclk_per; /* actually 8*q */ - if (q < 16*8 || q > 255*8) - FAIL("vclk out of range"); - else if (q < 32*8) - pll->vclk_post_div_real = 8; - else if (q < 64*8) - pll->vclk_post_div_real = 4; - else if (q < 128*8) - pll->vclk_post_div_real = 2; - else - pll->vclk_post_div_real = 1; - pll->vclk_fb_div = q*pll->vclk_post_div_real/8; - return 0; -} - -static void aty_calc_pll_ct(const struct fb_info_aty *info, struct pll_ct *pll) -{ - u8 mpostdiv = 0; - u8 vpostdiv = 0; - - if ((((Gx == GT_CHIP_ID) && (Rev & 0x03)) || (Gx == GU_CHIP_ID) || - (Gx == GV_CHIP_ID) || (Gx == GW_CHIP_ID) || (Gx == GZ_CHIP_ID) || - (Gx == LG_CHIP_ID) || (Gx == GB_CHIP_ID) || (Gx == GD_CHIP_ID) || - (Gx == GI_CHIP_ID) || (Gx == GP_CHIP_ID) || (Gx == GQ_CHIP_ID) || - (Gx == XL_CHIP_ID) || - (Gx == VU_CHIP_ID)) && (info->ram_type >= SDRAM)) - pll->pll_gen_cntl = 0x04; - else - pll->pll_gen_cntl = 0x84; - - switch (pll->mclk_post_div_real) { - case 1: - mpostdiv = 0; - break; - case 2: - mpostdiv = 1; - break; - case 3: - mpostdiv = 4; - break; - case 4: - mpostdiv = 2; - break; - case 8: - mpostdiv = 3; - break; - } - pll->pll_gen_cntl |= mpostdiv<<4; /* mclk */ - - if (Gx == VT_CHIP_ID && (Rev == 0x40 || Rev == 0x48)) - pll->pll_ext_cntl = 0; - else - pll->pll_ext_cntl = mpostdiv; /* xclk == mclk */ - - switch (pll->vclk_post_div_real) { - case 2: - vpostdiv = 1; - break; - case 3: - pll->pll_ext_cntl |= 0x10; - case 1: - vpostdiv = 0; - break; - case 6: - pll->pll_ext_cntl |= 0x10; - case 4: - vpostdiv = 2; - break; - case 12: - pll->pll_ext_cntl |= 0x10; - case 8: - vpostdiv = 3; - break; - } - - pll->pll_vclk_cntl = 0x03; /* VCLK = PLL_VCLK/VCLKx_POST */ - pll->vclk_post_div = vpostdiv; -} - -static int aty_var_to_pll_ct(const struct fb_info_aty *info, u32 vclk_per, - u8 bpp, struct pll_ct *pll) -{ - int err; - - if ((err = aty_valid_pll_ct(info, vclk_per, pll))) - return err; - if (!(Gx == GX_CHIP_ID || Gx == CX_CHIP_ID || Gx == CT_CHIP_ID || - Gx == ET_CHIP_ID || - ((Gx == VT_CHIP_ID || Gx == GT_CHIP_ID) && !(Rev & 0x07)))) { - if ((err = aty_dsp_gt(info, bpp, pll))) - return err; - } - aty_calc_pll_ct(info, pll); - return 0; -} - -static u32 aty_pll_ct_to_var(const struct pll_ct *pll, - const struct fb_info_aty *info) -{ - u32 ref_clk_per = info->ref_clk_per; - u8 pll_ref_div = pll->pll_ref_div; - u8 vclk_fb_div = pll->vclk_fb_div; - u8 vclk_post_div = pll->vclk_post_div_real; - - return ref_clk_per*pll_ref_div*vclk_post_div/vclk_fb_div/2; -} - -/* ------------------------------------------------------------------------- */ - static void atyfb_set_par(const struct atyfb_par *par, struct fb_info_aty *info) { u32 i; int accelmode; - int muxmode; u8 tmp; accelmode = par->accel_flags; /* hack */ @@ -2444,61 +740,10 @@ static void atyfb_set_par(const struct atyfb_par *par, /* better call aty_StrobeClock ?? */ aty_st_8(CLOCK_CNTL + info->clk_wr_offset, CLOCK_STROBE, info); - if ((Gx == GX_CHIP_ID) || (Gx == CX_CHIP_ID)) { - switch (info->dac_subtype) { - case DAC_IBMRGB514: - aty_set_dac_514(info, par->crtc.bpp); - break; - case DAC_ATI68860_B: - case DAC_ATI68860_C: - muxmode = aty_set_dac_ATI68860_B(info, par->crtc.bpp, - accelmode); - aty_st_le32(BUS_CNTL, 0x890e20f1, info); - aty_st_le32(DAC_CNTL, 0x47052100, info); - break; - case DAC_ATT20C408: - muxmode = aty_set_dac_ATT21C498(info, &par->pll.ics2595, - par->crtc.bpp); - aty_st_le32(BUS_CNTL, 0x890e20f1, info); - aty_st_le32(DAC_CNTL, 0x00072000, info); - break; - case DAC_ATT21C498: - muxmode = aty_set_dac_ATT21C498(info, &par->pll.ics2595, - par->crtc.bpp); - aty_st_le32(BUS_CNTL, 0x890e20f1, info); - aty_st_le32(DAC_CNTL, 0x00072000, info); - break; - default: - printk(" atyfb_set_par: DAC type not implemented yet!\n"); - aty_st_le32(BUS_CNTL, 0x890e20f1, info); - aty_st_le32(DAC_CNTL, 0x47052100, info); - /* new in 2.2.3p1 from Geert. ???????? */ - aty_st_le32(BUS_CNTL, 0x590e10ff, info); - aty_st_le32(DAC_CNTL, 0x47012100, info); - break; - } - - switch (info->clk_type) { - case CLK_ATI18818_1: - aty_set_pll18818(info, &par->pll.ics2595); - break; - case CLK_STG1703: - aty_set_pll_1703(info, &par->pll.ics2595); - break; - case CLK_CH8398: - aty_set_pll_8398(info, &par->pll.ics2595); - break; - case CLK_ATT20C408: - aty_set_pll_408(info, &par->pll.ics2595); - break; - case CLK_IBMRGB514: - aty_set_pll_gx(info, &par->pll.gx); - break; - default: - printk(" atyfb_set_par: CLK type not implemented yet!"); - break; - } + info->dac_ops->set_dac(info, &par->pll, par->crtc.bpp, accelmode); + info->pll_ops->set_pll(info, &par->pll); + if (!M64_HAS(INTEGRATED)) { /* Don't forget MEM_CNTL */ i = aty_ld_le32(MEM_CNTL, info) & 0xf0ffffff; switch (par->crtc.bpp) { @@ -2513,11 +758,9 @@ static void atyfb_set_par(const struct atyfb_par *par, break; } aty_st_le32(MEM_CNTL, i, info); - } else { - aty_set_pll_ct(info, &par->pll.ct); i = aty_ld_le32(MEM_CNTL, info) & 0xf00fffff; - if (!(Gx == VT_CHIP_ID && (Rev == 0x40 || Rev == 0x48))) + if (!M64_HAS(MAGIC_POSTDIV)) i |= info->mem_refresh_rate << 20; switch (par->crtc.bpp) { case 8: @@ -2531,13 +774,13 @@ static void atyfb_set_par(const struct atyfb_par *par, i |= 0x08000000; break; } - if ((Gx == CT_CHIP_ID) || (Gx == ET_CHIP_ID)) { + if (M64_HAS(CT_BUS)) { aty_st_le32(DAC_CNTL, 0x87010184, info); aty_st_le32(BUS_CNTL, 0x680000f9, info); - } else if ((Gx == VT_CHIP_ID) || (Gx == VU_CHIP_ID)) { + } else if (M64_HAS(VT_BUS)) { aty_st_le32(DAC_CNTL, 0x87010184, info); aty_st_le32(BUS_CNTL, 0x680000f9, info); - } else if ((Gx == LN_CHIP_ID) || (Gx == LM_CHIP_ID)) { + } else if (M64_HAS(MOBIL_BUS)) { aty_st_le32(DAC_CNTL, 0x80010102, info); aty_st_le32(BUS_CNTL, 0x7b33a040, info); } else { @@ -2553,7 +796,7 @@ static void atyfb_set_par(const struct atyfb_par *par, /* Initialize the graphics engine */ if (par->accel_flags & FB_ACCELF_TEXT) - init_engine(par, info); + aty_init_engine(par, info); #ifdef CONFIG_FB_COMPAT_XPMAC if (!console_fb_info || console_fb_info == &info->fb_info) { @@ -2583,30 +826,9 @@ static int atyfb_decode_var(const struct fb_var_screeninfo *var, { int err; - if ((err = aty_var_to_crtc(info, var, &par->crtc))) - return err; - if ((Gx == GX_CHIP_ID) || (Gx == CX_CHIP_ID)) - switch (info->clk_type) { - case CLK_ATI18818_1: - err = aty_var_to_pll_18818(var->pixclock, &par->pll.ics2595); - break; - case CLK_STG1703: - err = aty_var_to_pll_1703(var->pixclock, &par->pll.ics2595); - break; - case CLK_CH8398: - err = aty_var_to_pll_8398(var->pixclock, &par->pll.ics2595); - break; - case CLK_ATT20C408: - err = aty_var_to_pll_408(var->pixclock, &par->pll.ics2595); - break; - case CLK_IBMRGB514: - err = aty_var_to_pll_514(var->pixclock, &par->pll.gx); - break; - } - else - err = aty_var_to_pll_ct(info, var->pixclock, par->crtc.bpp, - &par->pll.ct); - if (err) + if ((err = aty_var_to_crtc(info, var, &par->crtc)) || + (err = info->pll_ops->var_to_pll(info, var->pixclock, par->crtc.bpp, + &par->pll))) return err; if (var->accel_flags & FB_ACCELF_TEXT) @@ -2632,26 +854,7 @@ static int atyfb_encode_var(struct fb_var_screeninfo *var, if ((err = aty_crtc_to_var(&par->crtc, var))) return err; - if ((Gx == GX_CHIP_ID) || (Gx == CX_CHIP_ID)) - switch (info->clk_type) { - case CLK_ATI18818_1: - var->pixclock = aty_pll_18818_to_var(&par->pll.ics2595); - break; - case CLK_STG1703: - var->pixclock = aty_pll_1703_to_var(&par->pll.ics2595); - break; - case CLK_CH8398: - var->pixclock = aty_pll_8398_to_var(&par->pll.ics2595); - break; - case CLK_ATT20C408: - var->pixclock = aty_pll_408_to_var(&par->pll.ics2595); - break; - case CLK_IBMRGB514: - var->pixclock = aty_pll_gx_to_var(&par->pll.gx, info); - break; - } - else - var->pixclock = aty_pll_ct_to_var(&par->pll.ct, info); + var->pixclock = info->pll_ops->pll_to_var(info, &par->pll); var->height = -1; var->width = -1; @@ -2711,7 +914,7 @@ static int atyfb_release(struct fb_info *info, int user) if (user) { fb->open--; - udelay(1000); + mdelay(1); wait_for_idle(fb); if (!fb->open) { int was_mmaped = fb->mmaped; @@ -2766,19 +969,19 @@ static int encode_fix(struct fb_fix_screeninfo *fix, * Reg Block 0 (CT-compatible block) is at ati_regbase_phys * Reg Block 1 (multimedia extensions) is at ati_regbase_phys-0x400 */ - if (Gx == GX_CHIP_ID || Gx == CX_CHIP_ID) { + if (M64_HAS(GX)) { fix->mmio_start = info->ati_regbase_phys; fix->mmio_len = 0x400; fix->accel = FB_ACCEL_ATI_MACH64GX; - } else if (Gx == CT_CHIP_ID || Gx == ET_CHIP_ID) { + } else if (M64_HAS(CT)) { fix->mmio_start = info->ati_regbase_phys; fix->mmio_len = 0x400; fix->accel = FB_ACCEL_ATI_MACH64CT; - } else if (Gx == VT_CHIP_ID || Gx == VU_CHIP_ID || Gx == VV_CHIP_ID) { + } else if (M64_HAS(VT)) { fix->mmio_start = info->ati_regbase_phys-0x400; fix->mmio_len = 0x800; fix->accel = FB_ACCEL_ATI_MACH64VT; - } else { + } else /* if (M64_HAS(GT)) */ { fix->mmio_start = info->ati_regbase_phys-0x400; fix->mmio_len = 0x800; fix->accel = FB_ACCEL_ATI_MACH64GT; @@ -2866,10 +1069,12 @@ static void atyfb_set_dispsw(struct display *disp, struct fb_info_aty *info, default: disp->dispsw = &fbcon_dummy; } +#ifdef CONFIG_FB_ATY_CT if (info->cursor) { info->dispsw.cursor = atyfb_cursor; info->dispsw.set_font = atyfb_set_font; } +#endif /* CONFIG_FB_ATY_CT */ } @@ -3030,12 +1235,15 @@ struct atyclk { u32 dsp_on; /* 0-2047 */ u32 dsp_off; /* 0-2047 */ }; + +#define ATYIO_FEATR 0x41545902 /* ATY\02 */ +#define ATYIO_FEATW 0x41545903 /* ATY\03 */ #endif static int atyfb_ioctl(struct inode *inode, struct file *file, u_int cmd, u_long arg, int con, struct fb_info *info2) { -#if defined(__sparc__) || defined(DEBUG) +#if defined(__sparc__) || (defined(DEBUG) && defined(CONFIG_FB_ATY_CT)) struct fb_info_aty *info = (struct fb_info_aty *)info2; #endif /* __sparc__ || DEBUG */ #ifdef __sparc__ @@ -3061,19 +1269,19 @@ static int atyfb_ioctl(struct inode *inode, struct file *file, u_int cmd, return -EFAULT; break; #endif /* __sparc__ */ -#ifdef DEBUG +#if defined(DEBUG) && defined(CONFIG_FB_ATY_CT) case ATYIO_CLKR: - if ((Gx != GX_CHIP_ID) && (Gx != CX_CHIP_ID)) { + if (M64_HAS(INTEGRATED)) { struct atyclk clk; - struct pll_ct *pll = &info->current_par.pll.ct; - u32 dsp_config = pll->dsp_config; - u32 dsp_on_off = pll->dsp_on_off; + union aty_pll *pll = &info->current_par.pll; + u32 dsp_config = pll->ct.dsp_config; + u32 dsp_on_off = pll->ct.dsp_on_off; clk.ref_clk_per = info->ref_clk_per; - clk.pll_ref_div = pll->pll_ref_div; - clk.mclk_fb_div = pll->mclk_fb_div; - clk.mclk_post_div = pll->mclk_post_div_real; - clk.vclk_fb_div = pll->vclk_fb_div; - clk.vclk_post_div = pll->vclk_post_div_real; + clk.pll_ref_div = pll->ct.pll_ref_div; + clk.mclk_fb_div = pll->ct.mclk_fb_div; + clk.mclk_post_div = pll->ct.mclk_post_div_real; + clk.vclk_fb_div = pll->ct.vclk_fb_div; + clk.vclk_post_div = pll->ct.vclk_post_div_real; clk.dsp_xclks_per_row = dsp_config & 0x3fff; clk.dsp_loop_latency = (dsp_config>>16) & 0xf; clk.dsp_precision = (dsp_config>>20) & 7; @@ -3085,28 +1293,36 @@ static int atyfb_ioctl(struct inode *inode, struct file *file, u_int cmd, return -EINVAL; break; case ATYIO_CLKW: - if ((Gx != GX_CHIP_ID) && (Gx != CX_CHIP_ID)) { + if (M64_HAS(INTEGRATED)) { struct atyclk clk; - struct pll_ct *pll = &info->current_par.pll.ct; + union aty_pll *pll = &info->current_par.pll; if (copy_from_user(&clk, (struct atyclk *)arg, sizeof(clk))) return -EFAULT; info->ref_clk_per = clk.ref_clk_per; - pll->pll_ref_div = clk.pll_ref_div; - pll->mclk_fb_div = clk.mclk_fb_div; - pll->mclk_post_div_real = clk.mclk_post_div; - pll->vclk_fb_div = clk.vclk_fb_div; - pll->vclk_post_div_real = clk.vclk_post_div; - pll->dsp_config = (clk.dsp_xclks_per_row & 0x3fff) | - ((clk.dsp_loop_latency & 0xf)<<16) | - ((clk.dsp_precision & 7)<<20); - pll->dsp_on_off = (clk.dsp_on & 0x7ff) | - ((clk.dsp_off & 0x7ff)<<16); - aty_calc_pll_ct(info, pll); + pll->ct.pll_ref_div = clk.pll_ref_div; + pll->ct.mclk_fb_div = clk.mclk_fb_div; + pll->ct.mclk_post_div_real = clk.mclk_post_div; + pll->ct.vclk_fb_div = clk.vclk_fb_div; + pll->ct.vclk_post_div_real = clk.vclk_post_div; + pll->ct.dsp_config = (clk.dsp_xclks_per_row & 0x3fff) | + ((clk.dsp_loop_latency & 0xf)<<16) | + ((clk.dsp_precision & 7)<<20); + pll->ct.dsp_on_off = (clk.dsp_on & 0x7ff) | + ((clk.dsp_off & 0x7ff)<<16); + aty_calc_pll_ct(info, &pll->ct); aty_set_pll_ct(info, pll); } else return -EINVAL; break; -#endif /* DEBUG */ + case ATYIO_FEATR: + if (get_user(info->features, (u32 *)arg)) + return -EFAULT; + break; + case ATYIO_FEATW: + if (put_user(info->features, (u32 *)arg)) + return -EFAULT; + break; +#endif /* DEBUG && CONFIG_FB_ATY_CT */ default: return -EINVAL; } @@ -3217,10 +1433,7 @@ static void atyfb_save_palette(struct fb_info *fb, int enter) for (i = 0; i < 256; i++) { tmp = aty_ld_8(DAC_CNTL, info) & 0xfc; - if (Gx == GT_CHIP_ID || Gx == GU_CHIP_ID || Gx == GV_CHIP_ID || - Gx == GW_CHIP_ID || Gx == GZ_CHIP_ID || Gx == LG_CHIP_ID || - Gx == GB_CHIP_ID || Gx == GD_CHIP_ID || Gx == GI_CHIP_ID || - Gx == GP_CHIP_ID || Gx == GQ_CHIP_ID || Gx == XL_CHIP_ID) + if (M64_HAS(EXTRA_BRIGHT)) tmp |= 0x2; aty_st_8(DAC_CNTL, tmp, info); aty_st_8(DAC_MASK, 0xff, info); @@ -3266,10 +1479,256 @@ static void atyfb_palette(int enter) } #endif /* __sparc__ */ + + +#ifdef CONFIG_PMAC_PBOOK + +static struct fb_info_aty* first_display = NULL; + +/* Power management routines. Those are used for PowerBook sleep. + * + * It appears that Rage LT and Rage LT Pro have different power + * management registers. There's is some confusion about which + * chipID is a Rage LT or LT pro :( + */ +static int aty_power_mgmt_LT(int sleep, struct fb_info_aty *info) +{ + unsigned int pm; + int timeout; + + pm = aty_ld_le32(POWER_MANAGEMENT_LG, info); + pm = (pm & ~PWR_MGT_MODE_MASK) | PWR_MGT_MODE_REG; + aty_st_le32(POWER_MANAGEMENT_LG, pm, info); + pm = aty_ld_le32(POWER_MANAGEMENT_LG, info); + + timeout = 200000; + if (sleep) { + /* Sleep */ + pm &= ~PWR_MGT_ON; + aty_st_le32(POWER_MANAGEMENT_LG, pm, info); + pm = aty_ld_le32(POWER_MANAGEMENT_LG, info); + udelay(10); + pm &= ~(PWR_BLON | AUTO_PWR_UP); + pm |= SUSPEND_NOW; + aty_st_le32(POWER_MANAGEMENT_LG, pm, info); + pm = aty_ld_le32(POWER_MANAGEMENT_LG, info); + udelay(10); + pm |= PWR_MGT_ON; + aty_st_le32(POWER_MANAGEMENT_LG, pm, info); + do { + pm = aty_ld_le32(POWER_MANAGEMENT_LG, info); + udelay(10); + if ((--timeout) == 0) + break; + } while ((pm & PWR_MGT_STATUS_MASK) != PWR_MGT_STATUS_SUSPEND); + } else { + /* Wakeup */ + pm &= ~PWR_MGT_ON; + aty_st_le32(POWER_MANAGEMENT_LG, pm, info); + pm = aty_ld_le32(POWER_MANAGEMENT_LG, info); + udelay(10); + pm |= (PWR_BLON | AUTO_PWR_UP); + pm &= ~SUSPEND_NOW; + aty_st_le32(POWER_MANAGEMENT_LG, pm, info); + pm = aty_ld_le32(POWER_MANAGEMENT_LG, info); + udelay(10); + pm |= PWR_MGT_ON; + aty_st_le32(POWER_MANAGEMENT_LG, pm, info); + do { + pm = aty_ld_le32(POWER_MANAGEMENT_LG, info); + udelay(10); + if ((--timeout) == 0) + break; + } while ((pm & PWR_MGT_STATUS_MASK) != 0); + } + mdelay(500); + + return timeout ? PBOOK_SLEEP_OK : PBOOK_SLEEP_REFUSE; +} + +static int aty_power_mgmt_LTPro(int sleep, struct fb_info_aty *info) +{ + unsigned int pm; + int timeout; + + pm = aty_ld_lcd(POWER_MANAGEMENT, info); + pm = (pm & ~PWR_MGT_MODE_MASK) | PWR_MGT_MODE_REG; + aty_st_lcd(POWER_MANAGEMENT, pm, info); + pm = aty_ld_lcd(POWER_MANAGEMENT, info); + + timeout = 200; + if (sleep) { + /* Sleep */ + pm &= ~PWR_MGT_ON; + aty_st_lcd(POWER_MANAGEMENT, pm, info); + pm = aty_ld_lcd(POWER_MANAGEMENT, info); + udelay(10); + pm &= ~(PWR_BLON | AUTO_PWR_UP); + pm |= SUSPEND_NOW; + aty_st_lcd(POWER_MANAGEMENT, pm, info); + pm = aty_ld_lcd(POWER_MANAGEMENT, info); + udelay(10); + pm |= PWR_MGT_ON; + aty_st_lcd(POWER_MANAGEMENT, pm, info); + do { + pm = aty_ld_lcd(POWER_MANAGEMENT, info); + mdelay(1); + if ((--timeout) == 0) + break; + } while ((pm & PWR_MGT_STATUS_MASK) != PWR_MGT_STATUS_SUSPEND); + } else { + /* Wakeup */ + pm &= ~PWR_MGT_ON; + aty_st_lcd(POWER_MANAGEMENT, pm, info); + pm = aty_ld_lcd(POWER_MANAGEMENT, info); + udelay(10); + pm &= ~SUSPEND_NOW; + pm |= (PWR_BLON | AUTO_PWR_UP); + aty_st_lcd(POWER_MANAGEMENT, pm, info); + pm = aty_ld_lcd(POWER_MANAGEMENT, info); + udelay(10); + pm |= PWR_MGT_ON; + aty_st_lcd(POWER_MANAGEMENT, pm, info); + do { + pm = aty_ld_lcd(POWER_MANAGEMENT, info); + mdelay(1); + if ((--timeout) == 0) + break; + } while ((pm & PWR_MGT_STATUS_MASK) != 0); + } + + return timeout ? PBOOK_SLEEP_OK : PBOOK_SLEEP_REFUSE; +} + +static int aty_power_mgmt(int sleep, struct fb_info_aty *info) +{ + return M64_HAS(LT_SLEEP) ? aty_power_mgmt_LT(sleep, info) + : aty_power_mgmt_LTPro(sleep, info); +} + +/* + * Save the contents of the frame buffer when we go to sleep, + * and restore it when we wake up again. + */ +static int aty_sleep_notify(struct pmu_sleep_notifier *self, int when) +{ + struct fb_info_aty *info; + int result; + + result = PBOOK_SLEEP_OK; + + for (info = first_display; info != NULL; info = info->next) { + struct fb_fix_screeninfo fix; + int nb; + + atyfb_get_fix(&fix, fg_console, (struct fb_info *)info); + nb = fb_display[fg_console].var.yres * fix.line_length; + + switch (when) { + case PBOOK_SLEEP_REQUEST: + info->save_framebuffer = vmalloc(nb); + if (info->save_framebuffer == NULL) + return PBOOK_SLEEP_REFUSE; + break; + case PBOOK_SLEEP_REJECT: + if (info->save_framebuffer) { + vfree(info->save_framebuffer); + info->save_framebuffer = 0; + } + break; + case PBOOK_SLEEP_NOW: + if (info->blitter_may_be_busy) + wait_for_idle(info); + /* Stop accel engine (stop bus mastering) */ + if (info->current_par.accel_flags & FB_ACCELF_TEXT) + aty_reset_engine(info); + + /* Backup fb content */ + if (info->save_framebuffer) + memcpy_fromio(info->save_framebuffer, + (void *)info->frame_buffer, nb); + + /* Blank display and LCD */ + atyfbcon_blank(VESA_POWERDOWN+1, (struct fb_info *)info); + + /* Set chip to "suspend" mode */ + result = aty_power_mgmt(1, info); + break; + case PBOOK_WAKE: + /* Wakeup chip */ + result = aty_power_mgmt(0, info); + + /* Restore fb content */ + if (info->save_framebuffer) { + memcpy_toio((void *)info->frame_buffer, + info->save_framebuffer, nb); + vfree(info->save_framebuffer); + info->save_framebuffer = 0; + } + /* Restore display */ + atyfb_set_par(&info->current_par, info); + atyfbcon_blank(0, (struct fb_info *)info); + break; + } + } + return result; +} + +static struct pmu_sleep_notifier aty_sleep_notifier = { + aty_sleep_notify, SLEEP_LEVEL_VIDEO, +}; +#endif /* CONFIG_PMAC_PBOOK */ + +#ifdef CONFIG_PMAC_BACKLIGHT + + /* + * LCD backlight control + */ + +static int backlight_conv[] = { + 0x00, 0x3f, 0x4c, 0x59, 0x66, 0x73, 0x80, 0x8d, + 0x9a, 0xa7, 0xb4, 0xc1, 0xcf, 0xdc, 0xe9, 0xff +}; + +static int +aty_set_backlight_enable(int on, int level, void* data) +{ + struct fb_info_aty *info = (struct fb_info_aty *)data; + unsigned int reg = aty_ld_lcd(LCD_MISC_CNTL, info); + + reg |= (BLMOD_EN | BIASMOD_EN); + if (on && level > BACKLIGHT_OFF) { + reg &= ~BIAS_MOD_LEVEL_MASK; + reg |= (backlight_conv[level] << BIAS_MOD_LEVEL_SHIFT); + } else { + reg &= ~BIAS_MOD_LEVEL_MASK; + reg |= (backlight_conv[0] << BIAS_MOD_LEVEL_SHIFT); + } + aty_st_lcd(LCD_MISC_CNTL, reg, info); + + return 0; +} + +static int +aty_set_backlight_level(int level, void* data) +{ + return aty_set_backlight_enable(1, level, data); +} + +static struct backlight_controller aty_backlight_controller = { + aty_set_backlight_enable, + aty_set_backlight_level +}; +#endif /* CONFIG_PMAC_BACKLIGHT */ + + + /* * Initialisation */ +static struct fb_info_aty *fb_list = NULL; + static int __init aty_init(struct fb_info_aty *info, const char *name) { u32 chip_id; @@ -3277,6 +1736,8 @@ static int __init aty_init(struct fb_info_aty *info, const char *name) int j, k; struct fb_var_screeninfo var; struct display *disp; + u16 type; + u8 rev; const char *chipname = NULL, *ramname = NULL, *xtal; int pll, mclk, gtb_memsize; #if defined(CONFIG_PPC) @@ -3286,115 +1747,100 @@ static int __init aty_init(struct fb_info_aty *info, const char *name) info->aty_cmap_regs = (struct aty_cmap_regs *)(info->ati_regbase+0xc0); chip_id = aty_ld_le32(CONFIG_CHIP_ID, info); - Gx = chip_id & CFG_CHIP_TYPE; - Rev = (chip_id & CFG_CHIP_REV)>>24; - for (j = 0; j < (sizeof(aty_features)/sizeof(*aty_features)); j++) - if (aty_features[j].chip_type == Gx) { - chipname = aty_features[j].name; - info->dac_type = (aty_ld_le32(DAC_CNTL, info) >> 16) & 0x07; - break; + type = chip_id & CFG_CHIP_TYPE; + rev = (chip_id & CFG_CHIP_REV)>>24; + for (j = 0; j < (sizeof(aty_chips)/sizeof(*aty_chips)); j++) + if (type == aty_chips[j].chip_type && + (rev & aty_chips[j].rev_mask) == aty_chips[j].rev_val) { + chipname = aty_chips[j].name; + pll = aty_chips[j].pll; + mclk = aty_chips[j].mclk; + info->features = aty_chips[j].features; + goto found; } - if (!chipname) { - printk("atyfb: Unknown mach64 0x%04x\n", Gx); - return 0; - } else - printk("atyfb: %s [0x%04x rev 0x%02x] ", chipname, Gx, Rev); - if ((Gx == GX_CHIP_ID) || (Gx == CX_CHIP_ID)) { - info->bus_type = (aty_ld_le32(CONFIG_STAT0, info) >> 0) & 0x07; - info->ram_type = (aty_ld_le32(CONFIG_STAT0, info) >> 3) & 0x07; + printk("atyfb: Unknown mach64 0x%04x rev 0x%04x\n", type, rev); + return 0; + +found: + printk("atyfb: %s [0x%04x rev 0x%02x] ", chipname, type, rev); +#ifdef CONFIG_FB_ATY_GX + if (!M64_HAS(INTEGRATED)) { + u32 stat0; + u8 dac_type, dac_subtype, clk_type; + stat0 = aty_ld_le32(CONFIG_STAT0, info); + info->bus_type = (stat0 >> 0) & 0x07; + info->ram_type = (stat0 >> 3) & 0x07; ramname = aty_gx_ram[info->ram_type]; /* FIXME: clockchip/RAMDAC probing? */ + dac_type = (aty_ld_le32(DAC_CNTL, info) >> 16) & 0x07; #ifdef CONFIG_ATARI - info->clk_type = CLK_ATI18818_1; - info->dac_type = (aty_ld_le32(CONFIG_STAT0, info) >> 9) & 0x07; - if (info->dac_type == 0x07) - info->dac_subtype = DAC_ATT20C408; + clk_type = CLK_ATI18818_1; + dac_type = (stat0 >> 9) & 0x07; + if (dac_type == 0x07) + dac_subtype = DAC_ATT20C408; else - info->dac_subtype = (aty_ld_8(SCRATCH_REG1 + 1, info) & 0xF0) | - info->dac_type; + dac_subtype = (aty_ld_8(SCRATCH_REG1 + 1, info) & 0xF0) | dac_type; #else - info->dac_type = DAC_IBMRGB514; - info->dac_subtype = DAC_IBMRGB514; - info->clk_type = CLK_IBMRGB514; + dac_type = DAC_IBMRGB514; + dac_subtype = DAC_IBMRGB514; + clk_type = CLK_IBMRGB514; #endif - /* FIXME */ - pll = 135; - mclk = 50; - } else { + switch (dac_subtype) { + case DAC_IBMRGB514: + info->dac_ops = &aty_dac_ibm514; + break; + case DAC_ATI68860_B: + case DAC_ATI68860_C: + info->dac_ops = &aty_dac_ati68860b; + break; + case DAC_ATT20C408: + case DAC_ATT21C498: + info->dac_ops = &aty_dac_att21c498; + break; + default: + printk(" atyfb_set_par: DAC type not implemented yet!\n"); + info->dac_ops = &aty_dac_unsupported; + break; + } + switch (clk_type) { + case CLK_ATI18818_1: + info->pll_ops = &aty_pll_ati18818_1; + break; + case CLK_STG1703: + info->pll_ops = &aty_pll_stg1703; + break; + case CLK_CH8398: + info->pll_ops = &aty_pll_ch8398; + break; + case CLK_ATT20C408: + info->pll_ops = &aty_pll_att20c408; + break; + case CLK_IBMRGB514: + info->pll_ops = &aty_pll_ibm514; + break; + default: + printk(" atyfb_set_par: CLK type not implemented yet!"); + info->pll_ops = &aty_pll_unsupported; + break; + } + } +#endif /* CONFIG_FB_ATY_GX */ +#ifdef CONFIG_FB_ATY_CT + if (M64_HAS(INTEGRATED)) { info->bus_type = PCI; info->ram_type = (aty_ld_le32(CONFIG_STAT0, info) & 0x07); ramname = aty_ct_ram[info->ram_type]; - info->dac_type = DAC_INTERNAL; - info->dac_subtype = DAC_INTERNAL; - info->clk_type = CLK_INTERNAL; - if ((Gx == CT_CHIP_ID) || (Gx == ET_CHIP_ID)) { - pll = 135; - mclk = 60; - } else { - mclk = info->ram_type >= SDRAM ? 67 : 63; - if ((Gx == VT_CHIP_ID) && (Rev == 0x08)) { - /* VTA3 */ - pll = 170; - } else if (((Gx == VT_CHIP_ID) && ((Rev == 0x40) || - (Rev == 0x48))) || - ((Gx == VT_CHIP_ID) && ((Rev == 0x01) || - (Rev == 0x9a))) || - Gx == VU_CHIP_ID) { - /* VTA4 or VTB */ - pll = 200; - } else if (Gx == VV_CHIP_ID) { - /* VT4 */ - pll = 230; - mclk = 83; - } else if (Gx == VT_CHIP_ID) { - /* other VT */ - pll = 135; - mclk = 63; - } else if ((Gx == GT_CHIP_ID) && (Rev & 0x01)) { - /* RAGE II */ - pll = 170; - } else if (((Gx == GT_CHIP_ID) && (Rev & 0x02)) || - (Gx == GU_CHIP_ID)) { - /* RAGE II+ */ - pll = 200; - } else if (Gx == GV_CHIP_ID || Gx == GW_CHIP_ID || - Gx == GZ_CHIP_ID) { - /* RAGE IIC */ - pll = 230; - mclk = 83; - } else if (Gx == GB_CHIP_ID || Gx == GD_CHIP_ID || - Gx == GI_CHIP_ID || Gx == GP_CHIP_ID || - Gx == GQ_CHIP_ID || Gx == LB_CHIP_ID || - Gx == LD_CHIP_ID || - Gx == LI_CHIP_ID || Gx == LP_CHIP_ID) { - /* RAGE PRO or LT PRO */ - pll = 230; - mclk = 100; - } else if (Gx == XL_CHIP_ID) { - pll = 230; - mclk = 120; - } else if (Gx == LG_CHIP_ID) { - /* Rage LT */ - pll = 230; - mclk = 63; - } else if ((Gx == LN_CHIP_ID) || (Gx == LM_CHIP_ID)) { - /* Rage mobility M1 */ - pll = 230; - mclk = 50; - } else { - /* other RAGE */ - pll = 135; - mclk = 63; - } - } + info->dac_ops = &aty_dac_ct; + info->pll_ops = &aty_pll_ct; + /* for many chips, the mclk is 67 MHz for SDRAM, 63 MHz otherwise */ + if (mclk == 67 && info->ram_type < SDRAM) + mclk = 63; } +#endif /* CONFIG_FB_ATY_CT */ info->ref_clk_per = 1000000000000ULL/14318180; xtal = "14.31818"; - if (!(Gx == GX_CHIP_ID || Gx == CX_CHIP_ID || Gx == CT_CHIP_ID || - Gx == ET_CHIP_ID || - ((Gx == VT_CHIP_ID || Gx == GT_CHIP_ID) && !(Rev & 0x07))) && - (pll_ref_div = aty_ld_pll(PLL_REF_DIV, info))) { + if (M64_HAS(GTB_DSP) && (pll_ref_div = aty_ld_pll(PLL_REF_DIV, info))) { int diff1, diff2; diff1 = 510*14/pll_ref_div-pll; diff2 = 510*29/pll_ref_div-pll; @@ -3409,9 +1855,7 @@ static int __init aty_init(struct fb_info_aty *info, const char *name) } i = aty_ld_le32(MEM_CNTL, info); - gtb_memsize = !(Gx == GX_CHIP_ID || Gx == CX_CHIP_ID || Gx == CT_CHIP_ID || - Gx == ET_CHIP_ID || - ((Gx == VT_CHIP_ID || Gx == GT_CHIP_ID) && !(Rev & 0x07))); + gtb_memsize = M64_HAS(GTB_DSP); if (gtb_memsize) switch (i & 0xF) { /* 0xF used instead of MEM_SIZE_ALIAS */ case MEM_SIZE_512K: @@ -3459,7 +1903,7 @@ static int __init aty_init(struct fb_info_aty *info, const char *name) info->total_vram = 0x80000; } - if (Gx == GI_CHIP_ID) { + if (M64_HAS(MAGIC_VRAM_SIZE)) { if (aty_ld_le32(CONFIG_STAT1, info) & 0x40000000) info->total_vram += 0x400000; } @@ -3510,7 +1954,7 @@ static int __init aty_init(struct fb_info_aty *info, const char *name) info->mclk_per = 1000000/mclk; #ifdef DEBUG - if ((Gx != GX_CHIP_ID) && (Gx != CX_CHIP_ID)) { + if (M64_HAS(INTEGRATED)) { int i; printk("BUS_CNTL DAC_CNTL MEM_CNTL EXT_MEM_CNTL CRTC_GEN_CNTL " "DSP_CONFIG DSP_ON_OFF\n" @@ -3528,8 +1972,8 @@ static int __init aty_init(struct fb_info_aty *info, const char *name) /* * Last page of 8 MB (4 MB on ISA) aperture is MMIO - * FIXME: we should use the auxiliary aperture instead so we can acces the - * full 8 MB of video RAM on 8 MB boards + * FIXME: we should use the auxiliary aperture instead so we can access + * the full 8 MB of video RAM on 8 MB boards */ if (info->total_vram == 0x800000 || (info->bus_type == ISA && info->total_vram == 0x400000)) @@ -3552,12 +1996,12 @@ static int __init aty_init(struct fb_info_aty *info, const char *name) info->fb_info.flags = FBINFO_FLAG_DEFAULT; #ifdef CONFIG_PMAC_BACKLIGHT - if (Gx == LI_CHIP_ID && machine_is_compatible("PowerBook1,1")) { + if (M64_HAS(G3_PB_1_1) && machine_is_compatible("PowerBook1,1")) { /* these bits let the 101 powerbook wake up from sleep -- paulus */ - aty_st_lcd(LCD_POWER_MANAGEMENT, aty_ld_lcd(LCD_POWER_MANAGEMENT, info) + aty_st_lcd(POWER_MANAGEMENT, aty_ld_lcd(POWER_MANAGEMENT, info) | (USE_F32KHZ | TRISTATE_MEM_EN), info); } - if ((Gx == LN_CHIP_ID) || (Gx == LM_CHIP_ID)) + if (M64_HAS(MOBIL_BUS)) register_backlight_controller(&aty_backlight_controller, info, "ati"); #endif /* CONFIG_PMAC_BACKLIGHT */ @@ -3583,7 +2027,7 @@ static int __init aty_init(struct fb_info_aty *info, const char *name) } #endif if (default_vmode == VMODE_CHOOSE) { - if (Gx == LG_CHIP_ID || Gx == LI_CHIP_ID) + if (M64_HAS(G3_PB_1024x768)) /* G3 PowerBook with 1024x768 LCD */ default_vmode = VMODE_1024_768_60; else if (machine_is_compatible("iMac")) @@ -3651,13 +2095,15 @@ static int __init aty_init(struct fb_info_aty *info, const char *name) info->palette[j].blue = default_blu[k]; } - if (Gx != GX_CHIP_ID && Gx != CX_CHIP_ID) { +#ifdef CONFIG_FB_ATY_CT + if (curblink && M64_HAS(INTEGRATED)) { info->cursor = aty_init_cursor(info); if (info->cursor) { info->dispsw.cursor = atyfb_cursor; info->dispsw.set_font = atyfb_set_font; } } +#endif /* CONFIG_FB_ATY_CT */ atyfb_set_var(&var, -1, &info->fb_info); @@ -3698,8 +2144,8 @@ int __init atyfb_init(void) if ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) { struct resource *rp; - for (i = sizeof(aty_features)/sizeof(*aty_features)-1; i >= 0; i--) - if (pdev->device == aty_features[i].pci_id) + for (i = sizeof(aty_chips)/sizeof(*aty_chips)-1; i >= 0; i--) + if (pdev->device == aty_chips[i].pci_id) break; if (i < 0) continue; @@ -3815,24 +2261,24 @@ int __init atyfb_init(void) chip_id = aty_ld_le32(CONFIG_CHIP_ID, info); if (((chip_id & CFG_CHIP_TYPE) == VT_CHIP_ID) && !((chip_id >> 24) & 1)) { - switch (mem & 0x0f) { + switch (mem & 0x0f) { case 3: - mem = (mem & ~(0x0f)) | 2; - break; + mem = (mem & ~(0x0f)) | 2; + break; case 7: - mem = (mem & ~(0x0f)) | 3; - break; + mem = (mem & ~(0x0f)) | 3; + break; case 9: - mem = (mem & ~(0x0f)) | 4; - break; + mem = (mem & ~(0x0f)) | 4; + break; case 11: - mem = (mem & ~(0x0f)) | 5; - break; + mem = (mem & ~(0x0f)) | 5; + break; default: - break; - } - if ((aty_ld_le32(CONFIG_STAT0, info) & 7) >= SDRAM) - mem &= ~(0x00700000); + break; + } + if ((aty_ld_le32(CONFIG_STAT0, info) & 7) >= SDRAM) + mem &= ~(0x00700000); } mem &= ~(0xcf80e000); /* Turn off all undocumented bits. */ aty_st_le32(MEM_CNTL, mem, info); @@ -3889,12 +2335,12 @@ int __init atyfb_init(void) pll_regs[i] = aty_ld_pll(i, info); /* - * PLL Reference Devider M: + * PLL Reference Divider M: */ M = pll_regs[2]; /* - * PLL Feedback Devider N (Dependant on CLOCK_CNTL): + * PLL Feedback Divider N (Dependant on CLOCK_CNTL): */ N = pll_regs[7 + (clock_cntl & 3)]; @@ -3904,7 +2350,7 @@ int __init atyfb_init(void) P = 1 << (pll_regs[6] >> ((clock_cntl & 3) << 1)); /* - * PLL Devider Q: + * PLL Divider Q: */ Q = N / P; @@ -3997,7 +2443,7 @@ int __init atyfb_init(void) #ifdef CONFIG_PMAC_PBOOK if (first_display == NULL) - pmu_register_sleep_notifier(&aty_sleep_notifier); + pmu_register_sleep_notifier(&aty_sleep_notifier); info->next = first_display; first_display = info; #endif @@ -4010,7 +2456,7 @@ int __init atyfb_init(void) } #elif defined(CONFIG_ATARI) - u32 clock_r; + u32 clock_r; int m64_num; struct fb_info_aty *info; @@ -4200,10 +2646,12 @@ static int atyfbcon_switch(int con, struct fb_info *fb) if (fb_display[currcon].cmap.len) fb_get_cmap(&fb_display[currcon].cmap, 1, atyfb_getcolreg, fb); +#ifdef CONFIG_FB_ATY_CT /* Erase HW Cursor */ if (info->cursor) atyfb_cursor(&fb_display[currcon], CM_ERASE, info->cursor->pos.x, info->cursor->pos.y); +#endif /* CONFIG_FB_ATY_CT */ currcon = con; @@ -4215,12 +2663,13 @@ static int atyfbcon_switch(int con, struct fb_info *fb) /* Install new colormap */ do_install_cmap(con, fb); +#ifdef CONFIG_FB_ATY_CT /* Install hw cursor */ if (info->cursor) { - aty_set_cursor_color(info, cursor_pixel_map, cursor_color_map, - cursor_color_map, cursor_color_map); + aty_set_cursor_color(info); aty_set_cursor_shape(info); } +#endif /* CONFIG_FB_ATY_CT */ return 1; } @@ -4306,16 +2755,11 @@ static int atyfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue, info->palette[regno].green = green; info->palette[regno].blue = blue; i = aty_ld_8(DAC_CNTL, info) & 0xfc; - if (Gx == GT_CHIP_ID || Gx == GU_CHIP_ID || Gx == GV_CHIP_ID || - Gx == GW_CHIP_ID || Gx == GZ_CHIP_ID || Gx == LG_CHIP_ID || - Gx == GB_CHIP_ID || Gx == GD_CHIP_ID || Gx == GI_CHIP_ID || - Gx == GP_CHIP_ID || Gx == GQ_CHIP_ID || Gx == LI_CHIP_ID || - Gx == XL_CHIP_ID) + if (M64_HAS(EXTRA_BRIGHT)) i |= 0x2; /*DAC_CNTL|0x2 turns off the extra brightness for gt*/ aty_st_8(DAC_CNTL, i, info); aty_st_8(DAC_MASK, 0xff, info); - scale = ((Gx != GX_CHIP_ID) && (Gx != CX_CHIP_ID) && - (info->current_par.crtc.bpp == 16)) ? 3 : 0; + scale = (M64_HAS(INTEGRATED) && info->current_par.crtc.bpp == 16) ? 3 : 0; writeb(regno << scale, &info->aty_cmap_regs->windex); writeb(red, &info->aty_cmap_regs->lut); writeb(green, &info->aty_cmap_regs->lut); @@ -4359,82 +2803,6 @@ static void do_install_cmap(int con, struct fb_info *info) /* - * Accelerated functions - */ - -static inline void draw_rect(s16 x, s16 y, u16 width, u16 height, - struct fb_info_aty *info) -{ - /* perform rectangle fill */ - wait_for_fifo(2, info); - aty_st_le32(DST_Y_X, (x << 16) | y, info); - aty_st_le32(DST_HEIGHT_WIDTH, (width << 16) | height, info); - info->blitter_may_be_busy = 1; -} - -static inline void aty_rectcopy(int srcx, int srcy, int dstx, int dsty, - u_int width, u_int height, - struct fb_info_aty *info) -{ - u32 direction = DST_LAST_PEL; - u32 pitch_value; - - if (!width || !height) - return; - - pitch_value = info->current_par.crtc.vxres; - if (info->current_par.crtc.bpp == 24) { - /* In 24 bpp, the engine is in 8 bpp - this requires that all */ - /* horizontal coordinates and widths must be adjusted */ - pitch_value *= 3; - srcx *= 3; - dstx *= 3; - width *= 3; - } - - if (srcy < dsty) { - dsty += height - 1; - srcy += height - 1; - } else - direction |= DST_Y_TOP_TO_BOTTOM; - - if (srcx < dstx) { - dstx += width - 1; - srcx += width - 1; - } else - direction |= DST_X_LEFT_TO_RIGHT; - - wait_for_fifo(4, info); - aty_st_le32(DP_SRC, FRGD_SRC_BLIT, info); - aty_st_le32(SRC_Y_X, (srcx << 16) | srcy, info); - aty_st_le32(SRC_HEIGHT1_WIDTH1, (width << 16) | height, info); - aty_st_le32(DST_CNTL, direction, info); - draw_rect(dstx, dsty, width, height, info); -} - -static inline void aty_rectfill(int dstx, int dsty, u_int width, u_int height, - u_int color, struct fb_info_aty *info) -{ - if (!width || !height) - return; - - if (info->current_par.crtc.bpp == 24) { - /* In 24 bpp, the engine is in 8 bpp - this requires that all */ - /* horizontal coordinates and widths must be adjusted */ - dstx *= 3; - width *= 3; - } - - wait_for_fifo(3, info); - aty_st_le32(DP_FRGD_CLR, color, info); - aty_st_le32(DP_SRC, BKGD_SRC_BKGD_CLR | FRGD_SRC_FRGD_CLR | MONO_SRC_ONE, - info); - aty_st_le32(DST_CNTL, DST_LAST_PEL | DST_Y_TOP_TO_BOTTOM | - DST_X_LEFT_TO_RIGHT, info); - draw_rect(dstx, dsty, width, height, info); -} - - /* * Update the `var' structure (called by fbcon.c) */ @@ -4471,540 +2839,16 @@ static int atyfbcon_updatevar(int con, struct fb_info *fb) aty_rectfill(xoffset, sy, xres, height, bgx, info); } +#ifdef CONFIG_FB_ATY_CT if (info->cursor && (yoffset + yres <= sy)) atyfb_cursor(p, CM_ERASE, info->cursor->pos.x, info->cursor->pos.y); +#endif /* CONFIG_FB_ATY_CT */ info->current_par.crtc.yoffset = yoffset; set_off_pitch(&info->current_par, info); return 0; } - /* - * Text console acceleration - */ - -static void fbcon_aty_bmove(struct display *p, int sy, int sx, int dy, int dx, - int height, int width) -{ -#ifdef __sparc__ - struct fb_info_aty *fb = (struct fb_info_aty *)(p->fb_info); - - if (fb->mmaped && (!fb->fb_info.display_fg - || fb->fb_info.display_fg->vc_num == fb->vtconsole)) - return; -#endif - - sx *= fontwidth(p); - sy *= fontheight(p); - dx *= fontwidth(p); - dy *= fontheight(p); - width *= fontwidth(p); - height *= fontheight(p); - - aty_rectcopy(sx, sy, dx, dy, width, height, - (struct fb_info_aty *)p->fb_info); -} - -static void fbcon_aty_clear(struct vc_data *conp, struct display *p, int sy, - int sx, int height, int width) -{ - u32 bgx; -#ifdef __sparc__ - struct fb_info_aty *fb = (struct fb_info_aty *)(p->fb_info); - - if (fb->mmaped && (!fb->fb_info.display_fg - || fb->fb_info.display_fg->vc_num == fb->vtconsole)) - return; -#endif - - bgx = attr_bgcol_ec(p, conp); - bgx |= (bgx << 8); - bgx |= (bgx << 16); - - sx *= fontwidth(p); - sy *= fontheight(p); - width *= fontwidth(p); - height *= fontheight(p); - - aty_rectfill(sx, sy, width, height, bgx, - (struct fb_info_aty *)p->fb_info); -} - -#ifdef FBCON_HAS_CFB8 -static void fbcon_aty8_putc(struct vc_data *conp, struct display *p, int c, - int yy, int xx) -{ - struct fb_info_aty *fb = (struct fb_info_aty *)(p->fb_info); - -#ifdef __sparc__ - if (fb->mmaped && (!fb->fb_info.display_fg - || fb->fb_info.display_fg->vc_num == fb->vtconsole)) - return; -#endif - - if (fb->blitter_may_be_busy) - wait_for_idle((struct fb_info_aty *)p->fb_info); - fbcon_cfb8_putc(conp, p, c, yy, xx); -} - -static void fbcon_aty8_putcs(struct vc_data *conp, struct display *p, - const unsigned short *s, int count, int yy, - int xx) -{ - struct fb_info_aty *fb = (struct fb_info_aty *)(p->fb_info); - -#ifdef __sparc__ - if (fb->mmaped && (!fb->fb_info.display_fg - || fb->fb_info.display_fg->vc_num == fb->vtconsole)) - return; -#endif - - if (fb->blitter_may_be_busy) - wait_for_idle((struct fb_info_aty *)p->fb_info); - fbcon_cfb8_putcs(conp, p, s, count, yy, xx); -} - -static void fbcon_aty8_clear_margins(struct vc_data *conp, struct display *p, - int bottom_only) -{ - struct fb_info_aty *fb = (struct fb_info_aty *)(p->fb_info); - -#ifdef __sparc__ - if (fb->mmaped && (!fb->fb_info.display_fg - || fb->fb_info.display_fg->vc_num == fb->vtconsole)) - return; -#endif - - if (fb->blitter_may_be_busy) - wait_for_idle((struct fb_info_aty *)p->fb_info); - fbcon_cfb8_clear_margins(conp, p, bottom_only); -} - -static struct display_switch fbcon_aty8 = { - setup: fbcon_cfb8_setup, - bmove: fbcon_aty_bmove, - clear: fbcon_aty_clear, - putc: fbcon_aty8_putc, - putcs: fbcon_aty8_putcs, - revc: fbcon_cfb8_revc, - clear_margins: fbcon_aty8_clear_margins, - fontwidthmask: FONTWIDTH(4)|FONTWIDTH(8)|FONTWIDTH(12)|FONTWIDTH(16) -}; -#endif - -#ifdef FBCON_HAS_CFB16 -static void fbcon_aty16_putc(struct vc_data *conp, struct display *p, int c, - int yy, int xx) -{ - struct fb_info_aty *fb = (struct fb_info_aty *)(p->fb_info); - -#ifdef __sparc__ - if (fb->mmaped && (!fb->fb_info.display_fg - || fb->fb_info.display_fg->vc_num == fb->vtconsole)) - return; -#endif - - if (fb->blitter_may_be_busy) - wait_for_idle((struct fb_info_aty *)p->fb_info); - fbcon_cfb16_putc(conp, p, c, yy, xx); -} - -static void fbcon_aty16_putcs(struct vc_data *conp, struct display *p, - const unsigned short *s, int count, int yy, - int xx) -{ - struct fb_info_aty *fb = (struct fb_info_aty *)(p->fb_info); - -#ifdef __sparc__ - if (fb->mmaped && (!fb->fb_info.display_fg - || fb->fb_info.display_fg->vc_num == fb->vtconsole)) - return; -#endif - - if (fb->blitter_may_be_busy) - wait_for_idle((struct fb_info_aty *)p->fb_info); - fbcon_cfb16_putcs(conp, p, s, count, yy, xx); -} - -static void fbcon_aty16_clear_margins(struct vc_data *conp, struct display *p, - int bottom_only) -{ - struct fb_info_aty *fb = (struct fb_info_aty *)(p->fb_info); - -#ifdef __sparc__ - if (fb->mmaped && (!fb->fb_info.display_fg - || fb->fb_info.display_fg->vc_num == fb->vtconsole)) - return; -#endif - - if (fb->blitter_may_be_busy) - wait_for_idle((struct fb_info_aty *)p->fb_info); - fbcon_cfb16_clear_margins(conp, p, bottom_only); -} - -static struct display_switch fbcon_aty16 = { - setup: fbcon_cfb16_setup, - bmove: fbcon_aty_bmove, - clear: fbcon_aty_clear, - putc: fbcon_aty16_putc, - putcs: fbcon_aty16_putcs, - revc: fbcon_cfb16_revc, - clear_margins: fbcon_aty16_clear_margins, - fontwidthmask: FONTWIDTH(4)|FONTWIDTH(8)|FONTWIDTH(12)|FONTWIDTH(16) -}; -#endif - -#ifdef FBCON_HAS_CFB24 -static void fbcon_aty24_putc(struct vc_data *conp, struct display *p, int c, - int yy, int xx) -{ - struct fb_info_aty *fb = (struct fb_info_aty *)(p->fb_info); - -#ifdef __sparc__ - if (fb->mmaped && (!fb->fb_info.display_fg - || fb->fb_info.display_fg->vc_num == fb->vtconsole)) - return; -#endif - - if (fb->blitter_may_be_busy) - wait_for_idle((struct fb_info_aty *)p->fb_info); - fbcon_cfb24_putc(conp, p, c, yy, xx); -} - -static void fbcon_aty24_putcs(struct vc_data *conp, struct display *p, - const unsigned short *s, int count, int yy, - int xx) -{ - struct fb_info_aty *fb = (struct fb_info_aty *)(p->fb_info); - -#ifdef __sparc__ - if (fb->mmaped && (!fb->fb_info.display_fg - || fb->fb_info.display_fg->vc_num == fb->vtconsole)) - return; -#endif - - if (fb->blitter_may_be_busy) - wait_for_idle((struct fb_info_aty *)p->fb_info); - fbcon_cfb24_putcs(conp, p, s, count, yy, xx); -} - -static void fbcon_aty24_clear_margins(struct vc_data *conp, struct display *p, - int bottom_only) -{ - struct fb_info_aty *fb = (struct fb_info_aty *)(p->fb_info); - -#ifdef __sparc__ - if (fb->mmaped && (!fb->fb_info.display_fg - || fb->fb_info.display_fg->vc_num == fb->vtconsole)) - return; -#endif - - if (fb->blitter_may_be_busy) - wait_for_idle((struct fb_info_aty *)p->fb_info); - fbcon_cfb24_clear_margins(conp, p, bottom_only); -} - -static struct display_switch fbcon_aty24 = { - setup: fbcon_cfb24_setup, - bmove: fbcon_aty_bmove, - clear: fbcon_aty_clear, - putc: fbcon_aty24_putc, - putcs: fbcon_aty24_putcs, - revc: fbcon_cfb24_revc, - clear_margins: fbcon_aty24_clear_margins, - fontwidthmask: FONTWIDTH(4)|FONTWIDTH(8)|FONTWIDTH(12)|FONTWIDTH(16) -}; -#endif - -#ifdef FBCON_HAS_CFB32 -static void fbcon_aty32_putc(struct vc_data *conp, struct display *p, int c, - int yy, int xx) -{ - struct fb_info_aty *fb = (struct fb_info_aty *)(p->fb_info); - -#ifdef __sparc__ - if (fb->mmaped && (!fb->fb_info.display_fg - || fb->fb_info.display_fg->vc_num == fb->vtconsole)) - return; -#endif - - if (fb->blitter_may_be_busy) - wait_for_idle((struct fb_info_aty *)p->fb_info); - fbcon_cfb32_putc(conp, p, c, yy, xx); -} - -static void fbcon_aty32_putcs(struct vc_data *conp, struct display *p, - const unsigned short *s, int count, int yy, - int xx) -{ - struct fb_info_aty *fb = (struct fb_info_aty *)(p->fb_info); - -#ifdef __sparc__ - if (fb->mmaped && (!fb->fb_info.display_fg - || fb->fb_info.display_fg->vc_num == fb->vtconsole)) - return; -#endif - - if (fb->blitter_may_be_busy) - wait_for_idle((struct fb_info_aty *)p->fb_info); - fbcon_cfb32_putcs(conp, p, s, count, yy, xx); -} - -static void fbcon_aty32_clear_margins(struct vc_data *conp, struct display *p, - int bottom_only) -{ - struct fb_info_aty *fb = (struct fb_info_aty *)(p->fb_info); - -#ifdef __sparc__ - if (fb->mmaped && (!fb->fb_info.display_fg - || fb->fb_info.display_fg->vc_num == fb->vtconsole)) - return; -#endif - - if (fb->blitter_may_be_busy) - wait_for_idle((struct fb_info_aty *)p->fb_info); - fbcon_cfb32_clear_margins(conp, p, bottom_only); -} - -static struct display_switch fbcon_aty32 = { - setup: fbcon_cfb32_setup, - bmove: fbcon_aty_bmove, - clear: fbcon_aty_clear, - putc: fbcon_aty32_putc, - putcs: fbcon_aty32_putcs, - revc: fbcon_cfb32_revc, - clear_margins: fbcon_aty32_clear_margins, - fontwidthmask: FONTWIDTH(4)|FONTWIDTH(8)|FONTWIDTH(12)|FONTWIDTH(16) -}; -#endif - -#ifdef CONFIG_PMAC_PBOOK - -/* Power management routines. Those are used for PowerBook sleep. - * - * It appears that Rage LT and Rage LT Pro have different power - * management registers. There's is some confusion about which - * chipID is a Rage LT or LT pro :( - */ -static int -aty_power_mgmt_LT(int sleep, struct fb_info_aty *info) -{ - unsigned int pm; - int timeout; - - pm = aty_ld_le32(POWER_MANAGEMENT_LG, info); - pm = (pm & ~PWR_MGT_MODE_MASK) | PWR_MGT_MODE_REG; - aty_st_le32(POWER_MANAGEMENT_LG, pm, info); - pm = aty_ld_le32(POWER_MANAGEMENT_LG, info); - - timeout = 200000; - if (sleep) { - /* Sleep */ - pm &= ~PWR_MGT_ON; - aty_st_le32(POWER_MANAGEMENT_LG, pm, info); - pm = aty_ld_le32(POWER_MANAGEMENT_LG, info); - udelay(10); - pm &= ~(PWR_BLON | AUTO_PWR_UP); - pm |= SUSPEND_NOW; - aty_st_le32(POWER_MANAGEMENT_LG, pm, info); - pm = aty_ld_le32(POWER_MANAGEMENT_LG, info); - udelay(10); - pm |= PWR_MGT_ON; - aty_st_le32(POWER_MANAGEMENT_LG, pm, info); - do { - pm = aty_ld_le32(POWER_MANAGEMENT_LG, info); - udelay(10); - if ((--timeout) == 0) - break; - } while ((pm & PWR_MGT_STATUS_MASK) != PWR_MGT_STATUS_SUSPEND); - } else { - /* Wakeup */ - pm &= ~PWR_MGT_ON; - aty_st_le32(POWER_MANAGEMENT_LG, pm, info); - pm = aty_ld_le32(POWER_MANAGEMENT_LG, info); - udelay(10); - pm |= (PWR_BLON | AUTO_PWR_UP); - pm &= ~SUSPEND_NOW; - aty_st_le32(POWER_MANAGEMENT_LG, pm, info); - pm = aty_ld_le32(POWER_MANAGEMENT_LG, info); - udelay(10); - pm |= PWR_MGT_ON; - aty_st_le32(POWER_MANAGEMENT_LG, pm, info); - do { - pm = aty_ld_le32(POWER_MANAGEMENT_LG, info); - udelay(10); - if ((--timeout) == 0) - break; - } while ((pm & PWR_MGT_STATUS_MASK) != 0); - } - mdelay(500); - - return timeout ? PBOOK_SLEEP_OK : PBOOK_SLEEP_REFUSE; -} - -static int -aty_power_mgmt_LTPro(int sleep, struct fb_info_aty *info) -{ - unsigned int pm; - int timeout; - - pm = aty_ld_lcd(LCD_POWER_MANAGEMENT, info); - pm = (pm & ~PWR_MGT_MODE_MASK) | PWR_MGT_MODE_REG; - aty_st_lcd(LCD_POWER_MANAGEMENT, pm, info); - pm = aty_ld_lcd(LCD_POWER_MANAGEMENT, info); - - timeout = 200; - if (sleep) { - /* Sleep */ - pm &= ~PWR_MGT_ON; - aty_st_lcd(LCD_POWER_MANAGEMENT, pm, info); - pm = aty_ld_lcd(LCD_POWER_MANAGEMENT, info); - udelay(10); - pm &= ~(PWR_BLON | AUTO_PWR_UP); - pm |= SUSPEND_NOW; - aty_st_lcd(LCD_POWER_MANAGEMENT, pm, info); - pm = aty_ld_lcd(LCD_POWER_MANAGEMENT, info); - udelay(10); - pm |= PWR_MGT_ON; - aty_st_lcd(LCD_POWER_MANAGEMENT, pm, info); - do { - pm = aty_ld_lcd(LCD_POWER_MANAGEMENT, info); - udelay(1000); - if ((--timeout) == 0) - break; - } while ((pm & PWR_MGT_STATUS_MASK) != PWR_MGT_STATUS_SUSPEND); - } else { - /* Wakeup */ - pm &= ~PWR_MGT_ON; - aty_st_lcd(LCD_POWER_MANAGEMENT, pm, info); - pm = aty_ld_lcd(LCD_POWER_MANAGEMENT, info); - udelay(10); - pm &= ~SUSPEND_NOW; - pm |= (PWR_BLON | AUTO_PWR_UP); - aty_st_lcd(LCD_POWER_MANAGEMENT, pm, info); - pm = aty_ld_lcd(LCD_POWER_MANAGEMENT, info); - udelay(10); - pm |= PWR_MGT_ON; - aty_st_lcd(LCD_POWER_MANAGEMENT, pm, info); - do { - pm = aty_ld_lcd(LCD_POWER_MANAGEMENT, info); - udelay(1000); - if ((--timeout) == 0) - break; - } while ((pm & PWR_MGT_STATUS_MASK) != 0); - } - - return timeout ? PBOOK_SLEEP_OK : PBOOK_SLEEP_REFUSE; -} - -/* - * Save the contents of the frame buffer when we go to sleep, - * and restore it when we wake up again. - */ -int -aty_sleep_notify(struct pmu_sleep_notifier *self, int when) -{ - struct fb_info_aty *info; - int result; - - result = PBOOK_SLEEP_OK; - - for (info = first_display; info != NULL; info = info->next) { - struct fb_fix_screeninfo fix; - int nb; - - atyfb_get_fix(&fix, fg_console, (struct fb_info *)info); - nb = fb_display[fg_console].var.yres * fix.line_length; - - switch (when) { - case PBOOK_SLEEP_REQUEST: - info->save_framebuffer = vmalloc(nb); - if (info->save_framebuffer == NULL) - return PBOOK_SLEEP_REFUSE; - break; - case PBOOK_SLEEP_REJECT: - if (info->save_framebuffer) { - vfree(info->save_framebuffer); - info->save_framebuffer = 0; - } - break; - case PBOOK_SLEEP_NOW: - if (info->blitter_may_be_busy) - wait_for_idle(info); - /* Stop accel engine (stop bus mastering) */ - if (info->current_par.accel_flags & FB_ACCELF_TEXT) - reset_engine(info); - - /* Backup fb content */ - if (info->save_framebuffer) - memcpy_fromio(info->save_framebuffer, - (void *)info->frame_buffer, nb); - - /* Blank display and LCD */ - atyfbcon_blank(VESA_POWERDOWN+1, (struct fb_info *)info); - - /* Set chip to "suspend" mode */ - if (Gx == LG_CHIP_ID) - result = aty_power_mgmt_LT(1, info); - else - result = aty_power_mgmt_LTPro(1, info); - break; - case PBOOK_WAKE: - /* Wakeup chip */ - if (Gx == LG_CHIP_ID) - result = aty_power_mgmt_LT(0, info); - else - result = aty_power_mgmt_LTPro(0, info); - - /* Restore fb content */ - if (info->save_framebuffer) { - memcpy_toio((void *)info->frame_buffer, - info->save_framebuffer, nb); - vfree(info->save_framebuffer); - info->save_framebuffer = 0; - } - /* Restore display */ - atyfb_set_par(&info->current_par, info); - atyfbcon_blank(0, (struct fb_info *)info); - break; - } - } - return result; -} -#endif /* CONFIG_PMAC_PBOOK */ - -#ifdef CONFIG_PMAC_BACKLIGHT -static int backlight_conv[] = { - 0x00, 0x3f, 0x4c, 0x59, 0x66, 0x73, 0x80, 0x8d, - 0x9a, 0xa7, 0xb4, 0xc1, 0xcf, 0xdc, 0xe9, 0xff -}; - -static int -aty_set_backlight_enable(int on, int level, void* data) -{ - struct fb_info_aty *info = (struct fb_info_aty *)data; - unsigned int reg = aty_ld_lcd(LCD_MISC_CNTL, info); - - reg |= (BLMOD_EN | BIASMOD_EN); - if (on && level > BACKLIGHT_OFF) { - reg &= ~BIAS_MOD_LEVEL_MASK; - reg |= (backlight_conv[level] << BIAS_MOD_LEVEL_SHIFT); - } else { - reg &= ~BIAS_MOD_LEVEL_MASK; - reg |= (backlight_conv[0] << BIAS_MOD_LEVEL_SHIFT); - } - aty_st_lcd(LCD_MISC_CNTL, reg, info); - - return 0; -} - -static int -aty_set_backlight_level(int level, void* data) -{ - return aty_set_backlight_enable(1, level, data); -} - -#endif /* CONFIG_PMAC_BACKLIGHT */ #ifdef MODULE diff --git a/drivers/video/aty.h b/drivers/video/aty/mach64.h index d9c673d26394..8ca47b74ac7e 100644 --- a/drivers/video/aty.h +++ b/drivers/video/aty/mach64.h @@ -1,10 +1,10 @@ /* - * Exported procedures for the ATI/mach64 display driver on PowerMacs. + * ATI Mach64 Register Definitions * * Copyright (C) 1997 Michael AK Tesch * written with much help from Jon Howell * - * Updated for 3D RAGE PRO by Geert Uytterhoeven + * Updated for 3D RAGE PRO and 3D RAGE Mobility by Geert Uytterhoeven * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -20,81 +20,124 @@ /* NON-GUI MEMORY MAPPED Registers - expressed in BYTE offsets */ +/* Accelerator CRTC */ #define CRTC_H_TOTAL_DISP 0x0000 /* Dword offset 0_00 */ +#define CRTC2_H_TOTAL_DISP 0x0000 /* Dword offset 0_00 */ #define CRTC_H_SYNC_STRT_WID 0x0004 /* Dword offset 0_01 */ +#define CRTC2_H_SYNC_STRT_WID 0x0004 /* Dword offset 0_01 */ #define CRTC_H_SYNC_STRT 0x0004 +#define CRTC2_H_SYNC_STRT 0x0004 #define CRTC_H_SYNC_DLY 0x0005 +#define CRTC2_H_SYNC_DLY 0x0005 #define CRTC_H_SYNC_WID 0x0006 - +#define CRTC2_H_SYNC_WID 0x0006 #define CRTC_V_TOTAL_DISP 0x0008 /* Dword offset 0_02 */ +#define CRTC2_V_TOTAL_DISP 0x0008 /* Dword offset 0_02 */ #define CRTC_V_TOTAL 0x0008 +#define CRTC2_V_TOTAL 0x0008 #define CRTC_V_DISP 0x000A +#define CRTC2_V_DISP 0x000A #define CRTC_V_SYNC_STRT_WID 0x000C /* Dword offset 0_03 */ +#define CRTC2_V_SYNC_STRT_WID 0x000C /* Dword offset 0_03 */ #define CRTC_V_SYNC_STRT 0x000C +#define CRTC2_V_SYNC_STRT 0x000C #define CRTC_V_SYNC_WID 0x000E - +#define CRTC2_V_SYNC_WID 0x000E #define CRTC_VLINE_CRNT_VLINE 0x0010 /* Dword offset 0_04 */ +#define CRTC2_VLINE_CRNT_VLINE 0x0010 /* Dword offset 0_04 */ #define CRTC_OFF_PITCH 0x0014 /* Dword offset 0_05 */ #define CRTC_OFFSET 0x0014 #define CRTC_PITCH 0x0016 - #define CRTC_INT_CNTL 0x0018 /* Dword offset 0_06 */ #define CRTC_GEN_CNTL 0x001C /* Dword offset 0_07 */ #define CRTC_PIX_WIDTH 0x001D #define CRTC_FIFO 0x001E #define CRTC_EXT_DISP 0x001F +/* Memory Buffer Control */ #define DSP_CONFIG 0x0020 /* Dword offset 0_08 */ +#define PM_DSP_CONFIG 0x0020 /* Dword offset 0_08 (Mobility Only) */ #define DSP_ON_OFF 0x0024 /* Dword offset 0_09 */ +#define PM_DSP_ON_OFF 0x0024 /* Dword offset 0_09 (Mobility Only) */ #define TIMER_CONFIG 0x0028 /* Dword offset 0_0A */ #define MEM_BUF_CNTL 0x002C /* Dword offset 0_0B */ #define MEM_ADDR_CONFIG 0x0034 /* Dword offset 0_0D */ +/* Accelerator CRTC */ #define CRT_TRAP 0x0038 /* Dword offset 0_0E */ #define I2C_CNTL_0 0x003C /* Dword offset 0_0F */ +/* Overscan */ #define OVR_CLR 0x0040 /* Dword offset 0_10 */ +#define OVR2_CLR 0x0040 /* Dword offset 0_10 */ #define OVR_WID_LEFT_RIGHT 0x0044 /* Dword offset 0_11 */ +#define OVR2_WID_LEFT_RIGHT 0x0044 /* Dword offset 0_11 */ #define OVR_WID_TOP_BOTTOM 0x0048 /* Dword offset 0_12 */ +#define OVR2_WID_TOP_BOTTOM 0x0048 /* Dword offset 0_12 */ +/* Memory Buffer Control */ #define VGA_DSP_CONFIG 0x004C /* Dword offset 0_13 */ +#define PM_VGA_DSP_CONFIG 0x004C /* Dword offset 0_13 (Mobility Only) */ #define VGA_DSP_ON_OFF 0x0050 /* Dword offset 0_14 */ +#define PM_VGA_DSP_ON_OFF 0x0050 /* Dword offset 0_14 (Mobility Only) */ +#define DSP2_CONFIG 0x0054 /* Dword offset 0_15 */ +#define PM_DSP2_CONFIG 0x0054 /* Dword offset 0_15 (Mobility Only) */ +#define DSP2_ON_OFF 0x0058 /* Dword offset 0_16 */ +#define PM_DSP2_ON_OFF 0x0058 /* Dword offset 0_16 (Mobility Only) */ +/* Accelerator CRTC */ +#define CRTC2_OFF_PITCH 0x005C /* Dword offset 0_17 */ + +/* Hardware Cursor */ #define CUR_CLR0 0x0060 /* Dword offset 0_18 */ +#define CUR2_CLR0 0x0060 /* Dword offset 0_18 */ #define CUR_CLR1 0x0064 /* Dword offset 0_19 */ +#define CUR2_CLR1 0x0064 /* Dword offset 0_19 */ #define CUR_OFFSET 0x0068 /* Dword offset 0_1A */ +#define CUR2_OFFSET 0x0068 /* Dword offset 0_1A */ #define CUR_HORZ_VERT_POSN 0x006C /* Dword offset 0_1B */ +#define CUR2_HORZ_VERT_POSN 0x006C /* Dword offset 0_1B */ #define CUR_HORZ_VERT_OFF 0x0070 /* Dword offset 0_1C */ +#define CUR2_HORZ_VERT_OFF 0x0070 /* Dword offset 0_1C */ #define CONFIG_PANEL_LG 0x0074 /* Dword offset 0_1D */ +/* General I/O Control */ #define GP_IO 0x0078 /* Dword offset 0_1E */ +/* Test and Debug */ #define HW_DEBUG 0x007C /* Dword offset 0_1F */ +/* Scratch Pad and Test */ #define SCRATCH_REG0 0x0080 /* Dword offset 0_20 */ #define SCRATCH_REG1 0x0084 /* Dword offset 0_21 */ +#define SCRATCH_REG2 0x0088 /* Dword offset 0_22 */ +#define SCRATCH_REG3 0x008C /* Dword offset 0_23 */ +/* Clock Control */ #define CLOCK_CNTL 0x0090 /* Dword offset 0_24 */ #define CLOCK_SEL_CNTL 0x0090 /* Dword offset 0_24 */ +/* Configuration */ #define CONFIG_STAT1 0x0094 /* Dword offset 0_25 */ #define CONFIG_STAT2 0x0098 /* Dword offset 0_26 */ +/* Bus Control */ #define BUS_CNTL 0x00A0 /* Dword offset 0_28 */ #define LCD_INDEX 0x00A4 /* Dword offset 0_29 */ #define LCD_DATA 0x00A8 /* Dword offset 0_2A */ +/* Memory Control */ #define EXT_MEM_CNTL 0x00AC /* Dword offset 0_2B */ #define MEM_CNTL 0x00B0 /* Dword offset 0_2C */ - #define MEM_VGA_WP_SEL 0x00B4 /* Dword offset 0_2D */ #define MEM_VGA_RP_SEL 0x00B8 /* Dword offset 0_2E */ #define I2C_CNTL_1 0x00BC /* Dword offset 0_2F */ +/* DAC Control */ #define DAC_REGS 0x00C0 /* Dword offset 0_30 */ #define DAC_W_INDEX 0x00C0 /* Dword offset 0_30 */ #define DAC_DATA 0x00C1 /* Dword offset 0_30 */ @@ -104,21 +147,29 @@ #define EXT_DAC_REGS 0x00C8 /* Dword offset 0_32 */ +/* Test and Debug */ #define GEN_TEST_CNTL 0x00D0 /* Dword offset 0_34 */ +/* Custom Macros */ #define CUSTOM_MACRO_CNTL 0x00D4 /* Dword offset 0_35 */ + #define LCD_GEN_CNTL_LG 0x00D4 /* Dword offset 0_35 */ #define POWER_MANAGEMENT_LG 0x00D8 /* Dword offset 0_36 (LG) */ +/* Configuration */ #define CONFIG_CNTL 0x00DC /* Dword offset 0_37 (CT, ET, VT) */ #define CONFIG_CHIP_ID 0x00E0 /* Dword offset 0_38 */ #define CONFIG_STAT0 0x00E4 /* Dword offset 0_39 */ + +/* Test and Debug */ #define CRC_SIG 0x00E8 /* Dword offset 0_3A */ +#define CRC2_SIG 0x00E8 /* Dword offset 0_3A */ /* GUI MEMORY MAPPED Registers */ +/* Draw Engine Destination Trajectory */ #define DST_OFF_PITCH 0x0100 /* Dword offset 0_40 */ #define DST_X 0x0104 /* Dword offset 0_41 */ #define DST_Y 0x0108 /* Dword offset 0_42 */ @@ -151,6 +202,7 @@ #define SECONDARY_T_Y_INC 0x0178 /* Dword offset 0_5E */ #define SECONDARY_T_START 0x017C /* Dword offset 0_5F */ +/* Draw Engine Source Trajectory */ #define SRC_OFF_PITCH 0x0180 /* Dword offset 0_60 */ #define SRC_X 0x0184 /* Dword offset 0_61 */ #define SRC_Y 0x0188 /* Dword offset 0_62 */ @@ -196,6 +248,7 @@ #define SCALE_VACC 0x01F8 /* Dword offset 0_7E */ #define SCALE_3D_CNTL 0x01FC /* Dword offset 0_7F */ +/* Host Data */ #define HOST_DATA0 0x0200 /* Dword offset 0_80 */ #define HOST_DATA1 0x0204 /* Dword offset 0_81 */ #define HOST_DATA2 0x0208 /* Dword offset 0_82 */ @@ -214,15 +267,18 @@ #define HOST_DATAF 0x023C /* Dword offset 0_8F */ #define HOST_CNTL 0x0240 /* Dword offset 0_90 */ +/* GUI Bus Mastering */ #define BM_HOSTDATA 0x0244 /* Dword offset 0_91 */ #define BM_ADDR 0x0248 /* Dword offset 0_92 */ #define BM_DATA 0x0248 /* Dword offset 0_92 */ #define BM_GUI_TABLE_CMD 0x024C /* Dword offset 0_93 */ +/* Pattern */ #define PAT_REG0 0x0280 /* Dword offset 0_A0 */ #define PAT_REG1 0x0284 /* Dword offset 0_A1 */ #define PAT_CNTL 0x0288 /* Dword offset 0_A2 */ +/* Scissors */ #define SC_LEFT 0x02A0 /* Dword offset 0_A8 */ #define SC_RIGHT 0x02A4 /* Dword offset 0_A9 */ #define SC_LEFT_RIGHT 0x02A8 /* Dword offset 0_AA */ @@ -230,6 +286,9 @@ #define SC_BOTTOM 0x02B0 /* Dword offset 0_AC */ #define SC_TOP_BOTTOM 0x02B4 /* Dword offset 0_AD */ +/* Data Path */ +#define USR1_DST_OFF_PITCH 0x02B8 /* Dword offset 0_AE */ +#define USR2_DST_OFF_PITCH 0x02BC /* Dword offset 0_AF */ #define DP_BKGD_CLR 0x02C0 /* Dword offset 0_B0 */ #define DP_FOG_CLR 0x02C4 /* Dword offset 0_B1 */ #define DP_FRGD_CLR 0x02C4 /* Dword offset 0_B1 */ @@ -239,24 +298,32 @@ #define DP_MIX 0x02D4 /* Dword offset 0_B5 */ #define DP_SRC 0x02D8 /* Dword offset 0_B6 */ #define DP_FRGD_CLR_MIX 0x02DC /* Dword offset 0_B7 */ -#define DP_FRGD_BLGD_CLR 0x02E0 /* Dword offset 0_B8 */ +#define DP_FRGD_BKGD_CLR 0x02E0 /* Dword offset 0_B8 */ +/* Draw Engine Destination Trajectory */ #define DST_X_Y 0x02E8 /* Dword offset 0_BA */ #define DST_WIDTH_HEIGHT 0x02EC /* Dword offset 0_BB */ + +/* Data Path */ #define USR_DST_PICTH 0x02F0 /* Dword offset 0_BC */ #define DP_SET_GUI_ENGINE2 0x02F8 /* Dword offset 0_BE */ #define DP_SET_GUI_ENGINE 0x02FC /* Dword offset 0_BF */ +/* Color Compare */ #define CLR_CMP_CLR 0x0300 /* Dword offset 0_C0 */ #define CLR_CMP_MASK 0x0304 /* Dword offset 0_C1 */ #define CLR_CMP_CNTL 0x0308 /* Dword offset 0_C2 */ +/* Command FIFO */ #define FIFO_STAT 0x0310 /* Dword offset 0_C4 */ #define CONTEXT_MASK 0x0320 /* Dword offset 0_C8 */ #define CONTEXT_LOAD_CNTL 0x032C /* Dword offset 0_CB */ +/* Engine Control */ #define GUI_TRAJ_CNTL 0x0330 /* Dword offset 0_CC */ + +/* Engine Status/FIFO */ #define GUI_STAT 0x0338 /* Dword offset 0_CE */ #define TEX_PALETTE_INDEX 0x0340 /* Dword offset 0_D0 */ @@ -287,6 +354,7 @@ #define SCALE_OFF_ACC 0x0388 /* Dword offset 0_E2 */ #define SCALE_DST_Y_X 0x038C /* Dword offset 0_E3 */ +/* Draw Engine Destination Trajectory */ #define COMPOSITE_SHADOW_ID 0x0398 /* Dword offset 0_E6 */ #define SECONDARY_SCALE_X_INC 0x039C /* Dword offset 0_E7 */ @@ -363,6 +431,7 @@ #define CAPTURE_DEBUG 0x0464 /* Dword offset 1_19 */ #define VIDEO_SYNC_TEST 0x0468 /* Dword offset 1_1A */ +/* GenLocking */ #define SNAPSHOT_VH_COUNTS 0x0470 /* Dword offset 1_1C */ #define SNAPSHOT_F_COUNT 0x0474 /* Dword offset 1_1D */ #define N_VIF_COUNT 0x0478 /* Dword offset 1_1E */ @@ -372,14 +441,22 @@ #define CAPTURE_BUF1_OFFSET 0x0484 /* Dword offset 1_21 */ #define CAPTURE_BUF_PITCH 0x0488 /* Dword offset 1_22 */ +/* GenLocking */ +#define SNAPSHOT2_VH_COUNTS 0x04B0 /* Dword offset 1_2C */ +#define SNAPSHOT2_F_COUNT 0x04B4 /* Dword offset 1_2D */ +#define N_VIF2_COUNT 0x04B8 /* Dword offset 1_2E */ +#define SNAPSHOT2_VIF_COUNT 0x04BC /* Dword offset 1_2F */ + #define MPP_CONFIG 0x04C0 /* Dword offset 1_30 */ #define MPP_STROBE_SEQ 0x04C4 /* Dword offset 1_31 */ #define MPP_ADDR 0x04C8 /* Dword offset 1_32 */ #define MPP_DATA 0x04CC /* Dword offset 1_33 */ #define TVO_CNTL 0x0500 /* Dword offset 1_40 */ +/* Test and Debug */ #define CRT_HORZ_VERT_LOAD 0x0544 /* Dword offset 1_51 */ +/* AGP */ #define AGP_BASE 0x0548 /* Dword offset 1_52 */ #define AGP_CNTL 0x054C /* Dword offset 1_53 */ @@ -390,8 +467,12 @@ #define SCALER_H_COEFF3 0x0560 /* Dword offset 1_58 */ #define SCALER_H_COEFF4 0x0564 /* Dword offset 1_59 */ +/* Command FIFO */ +#define GUI_CMDFIFO_DEBUG 0x0570 /* Dword offset 1_5C */ +#define GUI_CMDFIFO_DATA 0x0574 /* Dword offset 1_5D */ #define GUI_CNTL 0x0578 /* Dword offset 1_5E */ +/* Bus Mastering */ #define BM_FRAME_BUF_OFFSET 0x0580 /* Dword offset 1_60 */ #define BM_SYSTEM_MEM_ADDR 0x0584 /* Dword offset 1_61 */ #define BM_COMMAND 0x0588 /* Dword offset 1_62 */ @@ -404,6 +485,7 @@ #define SCALER_BUF1_OFFSET_U 0x05DC /* Dword offset 1_77 */ #define SCALER_BUF1_OFFSET_V 0x05E0 /* Dword offset 1_78 */ +/* Setup Engine */ #define VERTEX_1_S 0x0640 /* Dword offset 1_90 */ #define VERTEX_1_T 0x0644 /* Dword offset 1_91 */ #define VERTEX_1_W 0x0648 /* Dword offset 1_92 */ @@ -461,6 +543,7 @@ #define VERTEX_2_SECONDARY_T 0x0738 /* Dword offset 1_CE */ #define VERTEX_2_SECONDARY_W 0x073C /* Dword offset 1_CF */ + #define GTC_3D_RESET_DELAY 3 /* 3D engine reset delay in ms */ /* CRTC control values (mostly CRTC_GEN_CNTL) */ @@ -579,7 +662,7 @@ #define CLOCK_STROBE 0x40 #define PLL_WR_EN 0x02 -/* PLL registers */ +/* PLL register indices */ #define MPLL_CNTL 0x00 #define VPLL_CNTL 0x01 #define PLL_REF_DIV 0x02 @@ -593,9 +676,37 @@ #define VCLK3_FB_DIV 0x0A #define PLL_EXT_CNTL 0x0B #define DLL_CNTL 0x0C +#define DLL1_CNTL 0x0C #define VFC_CNTL 0x0D -#define PLL_TEST_CTRL 0x0E +#define PLL_TEST_CNTL 0x0E #define PLL_TEST_COUNT 0x0F +#define LVDS_CNTL0 0x10 +#define LVDS_CNTL1 0x11 +#define AGP1_CNTL 0x12 +#define AGP2_CNTL 0x13 +#define DLL2_CNTL 0x14 +#define SCLK_FB_DIV 0x15 +#define SPLL_CNTL1 0x16 +#define SPLL_CNTL2 0x17 +#define APLL_STRAPS 0x18 +#define EXT_VPLL_CNTL 0x19 +#define EXT_VPLL_REF_DIV 0x1A +#define EXT_VPLL_FB_DIV 0x1B +#define EXT_VPLL_MSB 0x1C +#define HTOTAL_CNTL 0x1D +#define BYTE_CLK_CNTL 0x1E +#define TV_PLL_CNTL1 0x1F +#define TV_PLL_CNTL2 0x20 +#define TV_PLL_CNTL 0x21 +#define EXT_TV_PLL 0x22 +#define V2PLL_CNTL 0x23 +#define PLL_V2CLK_CNTL 0x24 +#define EXT_V2PLL_REF_DIV 0x25 +#define EXT_V2PLL_FB_DIV 0x26 +#define EXT_V2PLL_MSB 0x27 +#define HTOTAL2_CNTL 0x28 +#define PLL_YCLK_CNTL 0x29 +#define PM_DYN_CLK_CNTL 0x2A /* Fields in PLL registers */ #define PLL_PC_GAIN 0x07 @@ -991,17 +1102,52 @@ #define CRTC2_DISPLAY_DIS 0x00000400 /* LCD register indices */ -#define LCD_CONFIG_PANEL 0x00 +#define CONFIG_PANEL 0x00 #define LCD_GEN_CTRL 0x01 -#define LCD_DSTN_CONTROL 0x02 -#define LCD_HFB_PITCH_ADDR 0x03 -#define LCD_HORZ_STRETCHING 0x04 -#define LCD_VERT_STRETCHING 0x05 -#define LCD_EXT_VERT_STRETCH 0x06 -#define LCD_LT_GIO 0x07 -#define LCD_POWER_MANAGEMENT 0x08 -#define LCD_ZVGPIO 0x09 +#define DSTN_CONTROL 0x02 +#define HFB_PITCH_ADDR 0x03 +#define HORZ_STRETCHING 0x04 +#define VERT_STRETCHING 0x05 +#define EXT_VERT_STRETCH 0x06 +#define LT_GIO 0x07 +#define POWER_MANAGEMENT 0x08 +#define ZVGPIO 0x09 +#define ICON_CLR0 0x0A +#define ICON_CLR1 0x0B +#define ICON_OFFSET 0x0C +#define ICON_HORZ_VERT_POSN 0x0D +#define ICON_HORZ_VERT_OFF 0x0E +#define ICON2_CLR0 0x0F +#define ICON2_CLR1 0x10 +#define ICON2_OFFSET 0x11 +#define ICON2_HORZ_VERT_POSN 0x12 +#define ICON2_HORZ_VERT_OFF 0x13 #define LCD_MISC_CNTL 0x14 +#define APC_CNTL 0x1C +#define POWER_MANAGEMENT_2 0x1D +#define ALPHA_BLENDING 0x25 +#define PORTRAIT_GEN_CNTL 0x26 +#define APC_CTRL_IO 0x27 +#define TEST_IO 0x28 +#define TEST_OUTPUTS 0x29 +#define DP1_MEM_ACCESS 0x2A +#define DP0_MEM_ACCESS 0x2B +#define DP0_DEBUG_A 0x2C +#define DP0_DEBUG_B 0x2D +#define DP1_DEBUG_A 0x2E +#define DP1_DEBUG_B 0x2F +#define DPCTRL_DEBUG_A 0x30 +#define DPCTRL_DEBUG_B 0x31 +#define MEMBLK_DEBUG 0x32 +#define APC_LUT_AB 0x33 +#define APC_LUT_CD 0x34 +#define APC_LUT_EF 0x35 +#define APC_LUT_GH 0x36 +#define APC_LUT_IJ 0x37 +#define APC_LUT_KL 0x38 +#define APC_LUT_MN 0x39 +#define APC_LUT_OP 0x3A + /* Values in LCD_MISC_CNTL */ #define BIAS_MOD_LEVEL_MASK 0x0000ff00 diff --git a/drivers/video/aty/mach64_accel.c b/drivers/video/aty/mach64_accel.c new file mode 100644 index 000000000000..37baf66f2135 --- /dev/null +++ b/drivers/video/aty/mach64_accel.c @@ -0,0 +1,352 @@ + +/* + * ATI Mach64 Hardware Acceleration + */ + +#include <linux/delay.h> +#include <linux/fb.h> + +#include <video/fbcon.h> +#include <video/fbcon-cfb8.h> +#include <video/fbcon-cfb16.h> +#include <video/fbcon-cfb24.h> +#include <video/fbcon-cfb32.h> + +#include "mach64.h" +#include "atyfb.h" + + /* + * Text console acceleration + */ + +static void fbcon_aty_bmove(struct display *p, int sy, int sx, int dy, int dx, + int height, int width); +static void fbcon_aty_clear(struct vc_data *conp, struct display *p, int sy, + int sx, int height, int width); + + + /* + * Generic Mach64 routines + */ + +void aty_reset_engine(const struct fb_info_aty *info) +{ + /* reset engine */ + aty_st_le32(GEN_TEST_CNTL, + aty_ld_le32(GEN_TEST_CNTL, info) & ~GUI_ENGINE_ENABLE, info); + /* enable engine */ + aty_st_le32(GEN_TEST_CNTL, + aty_ld_le32(GEN_TEST_CNTL, info) | GUI_ENGINE_ENABLE, info); + /* ensure engine is not locked up by clearing any FIFO or */ + /* HOST errors */ + aty_st_le32(BUS_CNTL, aty_ld_le32(BUS_CNTL, info) | BUS_HOST_ERR_ACK | + BUS_FIFO_ERR_ACK, info); +} + +static void reset_GTC_3D_engine(const struct fb_info_aty *info) +{ + aty_st_le32(SCALE_3D_CNTL, 0xc0, info); + mdelay(GTC_3D_RESET_DELAY); + aty_st_le32(SETUP_CNTL, 0x00, info); + mdelay(GTC_3D_RESET_DELAY); + aty_st_le32(SCALE_3D_CNTL, 0x00, info); + mdelay(GTC_3D_RESET_DELAY); +} + +void aty_init_engine(const struct atyfb_par *par, struct fb_info_aty *info) +{ + u32 pitch_value; + + /* determine modal information from global mode structure */ + pitch_value = par->crtc.vxres; + + if (par->crtc.bpp == 24) { + /* In 24 bpp, the engine is in 8 bpp - this requires that all */ + /* horizontal coordinates and widths must be adjusted */ + pitch_value = pitch_value * 3; + } + + /* On GTC (RagePro), we need to reset the 3D engine before */ + if (M64_HAS(RESET_3D)) + reset_GTC_3D_engine(info); + + /* Reset engine, enable, and clear any engine errors */ + aty_reset_engine(info); + /* Ensure that vga page pointers are set to zero - the upper */ + /* page pointers are set to 1 to handle overflows in the */ + /* lower page */ + aty_st_le32(MEM_VGA_WP_SEL, 0x00010000, info); + aty_st_le32(MEM_VGA_RP_SEL, 0x00010000, info); + + /* ---- Setup standard engine context ---- */ + + /* All GUI registers here are FIFOed - therefore, wait for */ + /* the appropriate number of empty FIFO entries */ + wait_for_fifo(14, info); + + /* enable all registers to be loaded for context loads */ + aty_st_le32(CONTEXT_MASK, 0xFFFFFFFF, info); + + /* set destination pitch to modal pitch, set offset to zero */ + aty_st_le32(DST_OFF_PITCH, (pitch_value / 8) << 22, info); + + /* zero these registers (set them to a known state) */ + aty_st_le32(DST_Y_X, 0, info); + aty_st_le32(DST_HEIGHT, 0, info); + aty_st_le32(DST_BRES_ERR, 0, info); + aty_st_le32(DST_BRES_INC, 0, info); + aty_st_le32(DST_BRES_DEC, 0, info); + + /* set destination drawing attributes */ + aty_st_le32(DST_CNTL, DST_LAST_PEL | DST_Y_TOP_TO_BOTTOM | + DST_X_LEFT_TO_RIGHT, info); + + /* set source pitch to modal pitch, set offset to zero */ + aty_st_le32(SRC_OFF_PITCH, (pitch_value / 8) << 22, info); + + /* set these registers to a known state */ + aty_st_le32(SRC_Y_X, 0, info); + aty_st_le32(SRC_HEIGHT1_WIDTH1, 1, info); + aty_st_le32(SRC_Y_X_START, 0, info); + aty_st_le32(SRC_HEIGHT2_WIDTH2, 1, info); + + /* set source pixel retrieving attributes */ + aty_st_le32(SRC_CNTL, SRC_LINE_X_LEFT_TO_RIGHT, info); + + /* set host attributes */ + wait_for_fifo(13, info); + aty_st_le32(HOST_CNTL, 0, info); + + /* set pattern attributes */ + aty_st_le32(PAT_REG0, 0, info); + aty_st_le32(PAT_REG1, 0, info); + aty_st_le32(PAT_CNTL, 0, info); + + /* set scissors to modal size */ + aty_st_le32(SC_LEFT, 0, info); + aty_st_le32(SC_TOP, 0, info); + aty_st_le32(SC_BOTTOM, par->crtc.vyres-1, info); + aty_st_le32(SC_RIGHT, pitch_value-1, info); + + /* set background color to minimum value (usually BLACK) */ + aty_st_le32(DP_BKGD_CLR, 0, info); + + /* set foreground color to maximum value (usually WHITE) */ + aty_st_le32(DP_FRGD_CLR, 0xFFFFFFFF, info); + + /* set write mask to effect all pixel bits */ + aty_st_le32(DP_WRITE_MASK, 0xFFFFFFFF, info); + + /* set foreground mix to overpaint and background mix to */ + /* no-effect */ + aty_st_le32(DP_MIX, FRGD_MIX_S | BKGD_MIX_D, info); + + /* set primary source pixel channel to foreground color */ + /* register */ + aty_st_le32(DP_SRC, FRGD_SRC_FRGD_CLR, info); + + /* set compare functionality to false (no-effect on */ + /* destination) */ + wait_for_fifo(3, info); + aty_st_le32(CLR_CMP_CLR, 0, info); + aty_st_le32(CLR_CMP_MASK, 0xFFFFFFFF, info); + aty_st_le32(CLR_CMP_CNTL, 0, info); + + /* set pixel depth */ + wait_for_fifo(2, info); + aty_st_le32(DP_PIX_WIDTH, par->crtc.dp_pix_width, info); + aty_st_le32(DP_CHAIN_MASK, par->crtc.dp_chain_mask, info); + + wait_for_fifo(5, info); + aty_st_le32(SCALE_3D_CNTL, 0, info); + aty_st_le32(Z_CNTL, 0, info); + aty_st_le32(CRTC_INT_CNTL, aty_ld_le32(CRTC_INT_CNTL, info) & ~0x20, info); + aty_st_le32(GUI_TRAJ_CNTL, 0x100023, info); + + /* insure engine is idle before leaving */ + wait_for_idle(info); +} + + + /* + * Accelerated functions + */ + +static inline void draw_rect(s16 x, s16 y, u16 width, u16 height, + struct fb_info_aty *info) +{ + /* perform rectangle fill */ + wait_for_fifo(2, info); + aty_st_le32(DST_Y_X, (x << 16) | y, info); + aty_st_le32(DST_HEIGHT_WIDTH, (width << 16) | height, info); + info->blitter_may_be_busy = 1; +} + +static inline void aty_rectcopy(int srcx, int srcy, int dstx, int dsty, + u_int width, u_int height, + struct fb_info_aty *info) +{ + u32 direction = DST_LAST_PEL; + u32 pitch_value; + + if (!width || !height) + return; + + pitch_value = info->current_par.crtc.vxres; + if (info->current_par.crtc.bpp == 24) { + /* In 24 bpp, the engine is in 8 bpp - this requires that all */ + /* horizontal coordinates and widths must be adjusted */ + pitch_value *= 3; + srcx *= 3; + dstx *= 3; + width *= 3; + } + + if (srcy < dsty) { + dsty += height - 1; + srcy += height - 1; + } else + direction |= DST_Y_TOP_TO_BOTTOM; + + if (srcx < dstx) { + dstx += width - 1; + srcx += width - 1; + } else + direction |= DST_X_LEFT_TO_RIGHT; + + wait_for_fifo(4, info); + aty_st_le32(DP_SRC, FRGD_SRC_BLIT, info); + aty_st_le32(SRC_Y_X, (srcx << 16) | srcy, info); + aty_st_le32(SRC_HEIGHT1_WIDTH1, (width << 16) | height, info); + aty_st_le32(DST_CNTL, direction, info); + draw_rect(dstx, dsty, width, height, info); +} + +void aty_rectfill(int dstx, int dsty, u_int width, u_int height, u_int color, + struct fb_info_aty *info) +{ + if (!width || !height) + return; + + if (info->current_par.crtc.bpp == 24) { + /* In 24 bpp, the engine is in 8 bpp - this requires that all */ + /* horizontal coordinates and widths must be adjusted */ + dstx *= 3; + width *= 3; + } + + wait_for_fifo(3, info); + aty_st_le32(DP_FRGD_CLR, color, info); + aty_st_le32(DP_SRC, BKGD_SRC_BKGD_CLR | FRGD_SRC_FRGD_CLR | MONO_SRC_ONE, + info); + aty_st_le32(DST_CNTL, DST_LAST_PEL | DST_Y_TOP_TO_BOTTOM | + DST_X_LEFT_TO_RIGHT, info); + draw_rect(dstx, dsty, width, height, info); +} + + + /* + * Text console acceleration + */ + +static void fbcon_aty_bmove(struct display *p, int sy, int sx, int dy, int dx, + int height, int width) +{ +#ifdef __sparc__ + struct fb_info_aty *fb = (struct fb_info_aty *)(p->fb_info); + + if (fb->mmaped && (!fb->fb_info.display_fg + || fb->fb_info.display_fg->vc_num == fb->vtconsole)) + return; +#endif + + sx *= fontwidth(p); + sy *= fontheight(p); + dx *= fontwidth(p); + dy *= fontheight(p); + width *= fontwidth(p); + height *= fontheight(p); + + aty_rectcopy(sx, sy, dx, dy, width, height, + (struct fb_info_aty *)p->fb_info); +} + +static void fbcon_aty_clear(struct vc_data *conp, struct display *p, int sy, + int sx, int height, int width) +{ + u32 bgx; +#ifdef __sparc__ + struct fb_info_aty *fb = (struct fb_info_aty *)(p->fb_info); + + if (fb->mmaped && (!fb->fb_info.display_fg + || fb->fb_info.display_fg->vc_num == fb->vtconsole)) + return; +#endif + + bgx = attr_bgcol_ec(p, conp); + bgx |= (bgx << 8); + bgx |= (bgx << 16); + + sx *= fontwidth(p); + sy *= fontheight(p); + width *= fontwidth(p); + height *= fontheight(p); + + aty_rectfill(sx, sy, width, height, bgx, + (struct fb_info_aty *)p->fb_info); +} + +#ifdef __sparc__ +#define check_access \ + if (fb->mmaped && (!fb->fb_info.display_fg \ + || fb->fb_info.display_fg->vc_num == fb->vtconsole)) \ + return; +#else +#define check_access do { } while (0) +#endif + +#define DEF_FBCON_ATY_OP(name, call, args...) \ +static void name(struct vc_data *conp, struct display *p, args) \ +{ \ + struct fb_info_aty *fb = (struct fb_info_aty *)(p->fb_info); \ + check_access; \ + if (fb->blitter_may_be_busy) \ + wait_for_idle((struct fb_info_aty *)p->fb_info); \ + call; \ +} + +#define DEF_FBCON_ATY(width) \ + DEF_FBCON_ATY_OP(fbcon_aty##width##_putc, \ + fbcon_cfb##width##_putc(conp, p, c, yy, xx), \ + int c, int yy, int xx) \ + DEF_FBCON_ATY_OP(fbcon_aty##width##_putcs, \ + fbcon_cfb##width##_putcs(conp, p, s, count, yy, xx), \ + const unsigned short *s, int count, int yy, int xx) \ + DEF_FBCON_ATY_OP(fbcon_aty##width##_clear_margins, \ + fbcon_cfb##width##_clear_margins(conp, p, bottom_only), \ + int bottom_only) \ + \ +const struct display_switch fbcon_aty##width## = { \ + setup: fbcon_cfb##width##_setup, \ + bmove: fbcon_aty_bmove, \ + clear: fbcon_aty_clear, \ + putc: fbcon_aty##width##_putc, \ + putcs: fbcon_aty##width##_putcs, \ + revc: fbcon_cfb##width##_revc, \ + clear_margins: fbcon_aty##width##_clear_margins, \ + fontwidthmask: FONTWIDTH(4)|FONTWIDTH(8)|FONTWIDTH(12)|FONTWIDTH(16) \ +}; + +#ifdef FBCON_HAS_CFB8 +DEF_FBCON_ATY(8) +#endif +#ifdef FBCON_HAS_CFB16 +DEF_FBCON_ATY(16) +#endif +#ifdef FBCON_HAS_CFB24 +DEF_FBCON_ATY(24) +#endif +#ifdef FBCON_HAS_CFB32 +DEF_FBCON_ATY(32) +#endif + diff --git a/drivers/video/aty/mach64_ct.c b/drivers/video/aty/mach64_ct.c new file mode 100644 index 000000000000..4af998965b27 --- /dev/null +++ b/drivers/video/aty/mach64_ct.c @@ -0,0 +1,272 @@ + +/* + * ATI Mach64 CT/VT/GT/LT Support + */ + +#include <linux/fb.h> + +#include <asm/io.h> + +#include <video/fbcon.h> + +#include "mach64.h" +#include "atyfb.h" + + +/* FIXME: remove the FAIL definition */ +#define FAIL(x) do { printk(x "\n"); return -EINVAL; } while (0) + +static void aty_st_pll(int offset, u8 val, const struct fb_info_aty *info); + +static int aty_valid_pll_ct(const struct fb_info_aty *info, u32 vclk_per, + struct pll_ct *pll); +static int aty_dsp_gt(const struct fb_info_aty *info, u8 bpp, + struct pll_ct *pll); +static int aty_var_to_pll_ct(const struct fb_info_aty *info, u32 vclk_per, + u8 bpp, union aty_pll *pll); +static u32 aty_pll_ct_to_var(const struct fb_info_aty *info, + const union aty_pll *pll); + + + +static void aty_st_pll(int offset, u8 val, const struct fb_info_aty *info) +{ + /* write addr byte */ + aty_st_8(CLOCK_CNTL + 1, (offset << 2) | PLL_WR_EN, info); + /* write the register value */ + aty_st_8(CLOCK_CNTL + 2, val, info); + aty_st_8(CLOCK_CNTL + 1, (offset << 2) & ~PLL_WR_EN, info); +} + + +/* ------------------------------------------------------------------------- */ + + /* + * PLL programming (Mach64 CT family) + */ + +static int aty_dsp_gt(const struct fb_info_aty *info, u8 bpp, + struct pll_ct *pll) +{ + u32 dsp_xclks_per_row, dsp_loop_latency, dsp_precision, dsp_off, dsp_on; + u32 xclks_per_row, fifo_off, fifo_on, y, fifo_size, page_size; + + /* xclocks_per_row<<11 */ + xclks_per_row = (pll->mclk_fb_div*pll->vclk_post_div_real*64<<11)/ + (pll->vclk_fb_div*pll->mclk_post_div_real*bpp); + if (xclks_per_row < (1<<11)) + FAIL("Dotclock to high"); + if (M64_HAS(FIFO_24)) { + fifo_size = 24; + dsp_loop_latency = 0; + } else { + fifo_size = 32; + dsp_loop_latency = 2; + } + dsp_precision = 0; + y = (xclks_per_row*fifo_size)>>11; + while (y) { + y >>= 1; + dsp_precision++; + } + dsp_precision -= 5; + /* fifo_off<<6 */ + fifo_off = ((xclks_per_row*(fifo_size-1))>>5)+(3<<6); + + if (info->total_vram > 1*1024*1024) { + if (info->ram_type >= SDRAM) { + /* >1 MB SDRAM */ + dsp_loop_latency += 8; + page_size = 8; + } else { + /* >1 MB DRAM */ + dsp_loop_latency += 6; + page_size = 9; + } + } else { + if (info->ram_type >= SDRAM) { + /* <2 MB SDRAM */ + dsp_loop_latency += 9; + page_size = 10; + } else { + /* <2 MB DRAM */ + dsp_loop_latency += 8; + page_size = 10; + } + } + /* fifo_on<<6 */ + if (xclks_per_row >= (page_size<<11)) + fifo_on = ((2*page_size+1)<<6)+(xclks_per_row>>5); + else + fifo_on = (3*page_size+2)<<6; + + dsp_xclks_per_row = xclks_per_row>>dsp_precision; + dsp_on = fifo_on>>dsp_precision; + dsp_off = fifo_off>>dsp_precision; + + pll->dsp_config = (dsp_xclks_per_row & 0x3fff) | + ((dsp_loop_latency & 0xf)<<16) | + ((dsp_precision & 7)<<20); + pll->dsp_on_off = (dsp_on & 0x7ff) | ((dsp_off & 0x7ff)<<16); + return 0; +} + +static int aty_valid_pll_ct(const struct fb_info_aty *info, u32 vclk_per, + struct pll_ct *pll) +{ + u32 q, x; /* x is a workaround for sparc64-linux-gcc */ + x = x; /* x is a workaround for sparc64-linux-gcc */ + + pll->pll_ref_div = info->pll_per*2*255/info->ref_clk_per; + + /* FIXME: use the VTB/GTB /3 post divider if it's better suited */ + q = info->ref_clk_per*pll->pll_ref_div*4/info->mclk_per; /* actually 8*q */ + if (q < 16*8 || q > 255*8) + FAIL("mclk out of range"); + else if (q < 32*8) + pll->mclk_post_div_real = 8; + else if (q < 64*8) + pll->mclk_post_div_real = 4; + else if (q < 128*8) + pll->mclk_post_div_real = 2; + else + pll->mclk_post_div_real = 1; + pll->mclk_fb_div = q*pll->mclk_post_div_real/8; + + /* FIXME: use the VTB/GTB /{3,6,12} post dividers if they're better suited */ + q = info->ref_clk_per*pll->pll_ref_div*4/vclk_per; /* actually 8*q */ + if (q < 16*8 || q > 255*8) + FAIL("vclk out of range"); + else if (q < 32*8) + pll->vclk_post_div_real = 8; + else if (q < 64*8) + pll->vclk_post_div_real = 4; + else if (q < 128*8) + pll->vclk_post_div_real = 2; + else + pll->vclk_post_div_real = 1; + pll->vclk_fb_div = q*pll->vclk_post_div_real/8; + return 0; +} + +void aty_calc_pll_ct(const struct fb_info_aty *info, struct pll_ct *pll) +{ + u8 mpostdiv = 0; + u8 vpostdiv = 0; + + if (M64_HAS(SDRAM_MAGIC_PLL) && (info->ram_type >= SDRAM)) + pll->pll_gen_cntl = 0x04; + else + pll->pll_gen_cntl = 0x84; + + switch (pll->mclk_post_div_real) { + case 1: + mpostdiv = 0; + break; + case 2: + mpostdiv = 1; + break; + case 3: + mpostdiv = 4; + break; + case 4: + mpostdiv = 2; + break; + case 8: + mpostdiv = 3; + break; + } + pll->pll_gen_cntl |= mpostdiv<<4; /* mclk */ + + if (M64_HAS(MAGIC_POSTDIV)) + pll->pll_ext_cntl = 0; + else + pll->pll_ext_cntl = mpostdiv; /* xclk == mclk */ + + switch (pll->vclk_post_div_real) { + case 2: + vpostdiv = 1; + break; + case 3: + pll->pll_ext_cntl |= 0x10; + case 1: + vpostdiv = 0; + break; + case 6: + pll->pll_ext_cntl |= 0x10; + case 4: + vpostdiv = 2; + break; + case 12: + pll->pll_ext_cntl |= 0x10; + case 8: + vpostdiv = 3; + break; + } + + pll->pll_vclk_cntl = 0x03; /* VCLK = PLL_VCLK/VCLKx_POST */ + pll->vclk_post_div = vpostdiv; +} + +static int aty_var_to_pll_ct(const struct fb_info_aty *info, u32 vclk_per, + u8 bpp, union aty_pll *pll) +{ + int err; + + if ((err = aty_valid_pll_ct(info, vclk_per, &pll->ct))) + return err; + if (M64_HAS(GTB_DSP) && (err = aty_dsp_gt(info, bpp, &pll->ct))) + return err; + aty_calc_pll_ct(info, &pll->ct); + return 0; +} + +static u32 aty_pll_ct_to_var(const struct fb_info_aty *info, + const union aty_pll *pll) +{ + u32 ref_clk_per = info->ref_clk_per; + u8 pll_ref_div = pll->ct.pll_ref_div; + u8 vclk_fb_div = pll->ct.vclk_fb_div; + u8 vclk_post_div = pll->ct.vclk_post_div_real; + + return ref_clk_per*pll_ref_div*vclk_post_div/vclk_fb_div/2; +} + +void aty_set_pll_ct(const struct fb_info_aty *info, const union aty_pll *pll) +{ + aty_st_pll(PLL_REF_DIV, pll->ct.pll_ref_div, info); + aty_st_pll(PLL_GEN_CNTL, pll->ct.pll_gen_cntl, info); + aty_st_pll(MCLK_FB_DIV, pll->ct.mclk_fb_div, info); + aty_st_pll(PLL_VCLK_CNTL, pll->ct.pll_vclk_cntl, info); + aty_st_pll(VCLK_POST_DIV, pll->ct.vclk_post_div, info); + aty_st_pll(VCLK0_FB_DIV, pll->ct.vclk_fb_div, info); + aty_st_pll(PLL_EXT_CNTL, pll->ct.pll_ext_cntl, info); + + if (M64_HAS(GTB_DSP)) { + if (M64_HAS(XL_DLL)) + aty_st_pll(DLL_CNTL, 0x80, info); + else if (info->ram_type >= SDRAM) + aty_st_pll(DLL_CNTL, 0xa6, info); + else + aty_st_pll(DLL_CNTL, 0xa0, info); + aty_st_pll(VFC_CNTL, 0x1b, info); + aty_st_le32(DSP_CONFIG, pll->ct.dsp_config, info); + aty_st_le32(DSP_ON_OFF, pll->ct.dsp_on_off, info); + } +} + +static int dummy(void) +{ + return 0; +} + +const struct aty_dac_ops aty_dac_ct = { + set_dac: (void *)dummy, +}; + +const struct aty_pll_ops aty_pll_ct = { + var_to_pll: aty_var_to_pll_ct, + pll_to_var: aty_pll_ct_to_var, + set_pll: aty_set_pll_ct, +}; + diff --git a/drivers/video/aty/mach64_cursor.c b/drivers/video/aty/mach64_cursor.c new file mode 100644 index 000000000000..4268d1ecddc8 --- /dev/null +++ b/drivers/video/aty/mach64_cursor.c @@ -0,0 +1,305 @@ + +/* + * ATI Mach64 CT/VT/GT/LT Cursor Support + */ + +#include <linux/malloc.h> +#include <linux/console.h> +#include <linux/fb.h> +#include <linux/init.h> + +#include <asm/io.h> +#include <asm/uaccess.h> + +#include <video/fbcon.h> + +#ifdef __sparc__ +#include <asm/pbm.h> +#include <asm/fbio.h> +#endif + +#include "mach64.h" +#include "atyfb.h" + + +#define DEFAULT_CURSOR_BLINK_RATE (20) +#define CURSOR_DRAW_DELAY (2) + + + /* + * Hardware Cursor support. + */ + +static const u8 cursor_pixel_map[2] = { 0, 15 }; +static const u8 cursor_color_map[2] = { 0, 0xff }; + +static const u8 cursor_bits_lookup[16] = +{ + 0x00, 0x40, 0x10, 0x50, 0x04, 0x44, 0x14, 0x54, + 0x01, 0x41, 0x11, 0x51, 0x05, 0x45, 0x15, 0x55 +}; + +static const u8 cursor_mask_lookup[16] = +{ + 0xaa, 0x2a, 0x8a, 0x0a, 0xa2, 0x22, 0x82, 0x02, + 0xa8, 0x28, 0x88, 0x08, 0xa0, 0x20, 0x80, 0x00 +}; + +void aty_set_cursor_color(struct fb_info_aty *fb) +{ + struct aty_cursor *c = fb->cursor; + const u8 *pixel = cursor_pixel_map; /* ++Geert: Why?? */ + const u8 *red = cursor_color_map; + const u8 *green = cursor_color_map; + const u8 *blue = cursor_color_map; + int i; + + if (!c) + return; + +#ifdef __sparc__ + if (fb->mmaped && (!fb->fb_info.display_fg + || fb->fb_info.display_fg->vc_num == fb->vtconsole)) + return; +#endif + + for (i = 0; i < 2; i++) { + c->color[i] = (u32)red[i] << 24; + c->color[i] |= (u32)green[i] << 16; + c->color[i] |= (u32)blue[i] << 8; + c->color[i] |= (u32)pixel[i]; + } + + wait_for_fifo(2, fb); + aty_st_le32(CUR_CLR0, c->color[0], fb); + aty_st_le32(CUR_CLR1, c->color[1], fb); +} + +void aty_set_cursor_shape(struct fb_info_aty *fb) +{ + struct aty_cursor *c = fb->cursor; + u8 *ram, m, b; + int x, y; + + if (!c) + return; + +#ifdef __sparc__ + if (fb->mmaped && (!fb->fb_info.display_fg + || fb->fb_info.display_fg->vc_num == fb->vtconsole)) + return; +#endif + + ram = c->ram; + for (y = 0; y < c->size.y; y++) { + for (x = 0; x < c->size.x >> 2; x++) { + m = c->mask[x][y]; + b = c->bits[x][y]; + fb_writeb (cursor_mask_lookup[m >> 4] | + cursor_bits_lookup[(b & m) >> 4], + ram++); + fb_writeb (cursor_mask_lookup[m & 0x0f] | + cursor_bits_lookup[(b & m) & 0x0f], + ram++); + } + for ( ; x < 8; x++) { + fb_writeb (0xaa, ram++); + fb_writeb (0xaa, ram++); + } + } + fb_memset (ram, 0xaa, (64 - c->size.y) * 16); +} + +static void +aty_set_cursor(struct fb_info_aty *fb, int on) +{ + struct atyfb_par *par = &fb->current_par; + struct aty_cursor *c = fb->cursor; + u16 xoff, yoff; + int x, y; + + if (!c) + return; + +#ifdef __sparc__ + if (fb->mmaped && (!fb->fb_info.display_fg + || fb->fb_info.display_fg->vc_num == fb->vtconsole)) + return; +#endif + + if (on) { + x = c->pos.x - c->hot.x - par->crtc.xoffset; + if (x < 0) { + xoff = -x; + x = 0; + } else { + xoff = 0; + } + + y = c->pos.y - c->hot.y - par->crtc.yoffset; + if (y < 0) { + yoff = -y; + y = 0; + } else { + yoff = 0; + } + + wait_for_fifo(4, fb); + aty_st_le32(CUR_OFFSET, (c->offset >> 3) + (yoff << 1), fb); + aty_st_le32(CUR_HORZ_VERT_OFF, + ((u32)(64 - c->size.y + yoff) << 16) | xoff, fb); + aty_st_le32(CUR_HORZ_VERT_POSN, ((u32)y << 16) | x, fb); + aty_st_le32(GEN_TEST_CNTL, aty_ld_le32(GEN_TEST_CNTL, fb) + | HWCURSOR_ENABLE, fb); + } else { + wait_for_fifo(1, fb); + aty_st_le32(GEN_TEST_CNTL, + aty_ld_le32(GEN_TEST_CNTL, fb) & ~HWCURSOR_ENABLE, + fb); + } + if (fb->blitter_may_be_busy) + wait_for_idle(fb); +} + +static void +aty_cursor_timer_handler(unsigned long dev_addr) +{ + struct fb_info_aty *fb = (struct fb_info_aty *)dev_addr; + + if (!fb->cursor) + return; + + if (!fb->cursor->enable) + goto out; + + if (fb->cursor->vbl_cnt && --fb->cursor->vbl_cnt == 0) { + fb->cursor->on ^= 1; + aty_set_cursor(fb, fb->cursor->on); + fb->cursor->vbl_cnt = fb->cursor->blink_rate; + } + +out: + fb->cursor->timer->expires = jiffies + (HZ / 50); + add_timer(fb->cursor->timer); +} + +void atyfb_cursor(struct display *p, int mode, int x, int y) +{ + struct fb_info_aty *fb = (struct fb_info_aty *)p->fb_info; + struct aty_cursor *c = fb->cursor; + + if (!c) + return; + +#ifdef __sparc__ + if (fb->mmaped && (!fb->fb_info.display_fg + || fb->fb_info.display_fg->vc_num == fb->vtconsole)) + return; +#endif + + x *= fontwidth(p); + y *= fontheight(p); + if (c->pos.x == x && c->pos.y == y && (mode == CM_ERASE) == !c->enable) + return; + + c->enable = 0; + if (c->on) + aty_set_cursor(fb, 0); + c->pos.x = x; + c->pos.y = y; + + switch (mode) { + case CM_ERASE: + c->on = 0; + break; + + case CM_DRAW: + case CM_MOVE: + if (c->on) + aty_set_cursor(fb, 1); + else + c->vbl_cnt = CURSOR_DRAW_DELAY; + c->enable = 1; + break; + } +} + +struct aty_cursor * __init aty_init_cursor(struct fb_info_aty *fb) +{ + struct aty_cursor *cursor; + unsigned long addr; + + cursor = kmalloc(sizeof(struct aty_cursor), GFP_ATOMIC); + if (!cursor) + return 0; + memset(cursor, 0, sizeof(*cursor)); + + cursor->timer = kmalloc(sizeof(*cursor->timer), GFP_KERNEL); + if (!cursor->timer) { + kfree(cursor); + return 0; + } + memset(cursor->timer, 0, sizeof(*cursor->timer)); + + cursor->blink_rate = DEFAULT_CURSOR_BLINK_RATE; + fb->total_vram -= PAGE_SIZE; + cursor->offset = fb->total_vram; + +#ifdef __sparc__ + addr = fb->frame_buffer - 0x800000 + cursor->offset; + cursor->ram = (u8 *)addr; +#else +#ifdef __BIG_ENDIAN + addr = fb->frame_buffer_phys - 0x800000 + cursor->offset; + cursor->ram = (u8 *)ioremap(addr, 1024); +#else + addr = fb->frame_buffer + cursor->offset; + cursor->ram = (u8 *)addr; +#endif +#endif + + if (!cursor->ram) { + kfree(cursor); + return NULL; + } + + init_timer(cursor->timer); + cursor->timer->expires = jiffies + (HZ / 50); + cursor->timer->data = (unsigned long)fb; + cursor->timer->function = aty_cursor_timer_handler; + add_timer(cursor->timer); + + return cursor; +} + +int atyfb_set_font(struct display *d, int width, int height) +{ + struct fb_info_aty *fb = (struct fb_info_aty *)d->fb_info; + struct aty_cursor *c = fb->cursor; + int i, j; + + if (c) { + if (!width || !height) { + width = 8; + height = 16; + } + + c->hot.x = 0; + c->hot.y = 0; + c->size.x = width; + c->size.y = height; + + memset(c->bits, 0xff, sizeof(c->bits)); + memset(c->mask, 0, sizeof(c->mask)); + + for (i = 0, j = width; j >= 0; j -= 8, i++) { + c->mask[i][height-2] = (j >= 8) ? 0xff : (0xff << (8 - j)); + c->mask[i][height-1] = (j >= 8) ? 0xff : (0xff << (8 - j)); + } + + aty_set_cursor_color(fb); + aty_set_cursor_shape(fb); + } + return 1; +} + diff --git a/drivers/video/aty/mach64_gx.c b/drivers/video/aty/mach64_gx.c new file mode 100644 index 000000000000..86d7d7eec69a --- /dev/null +++ b/drivers/video/aty/mach64_gx.c @@ -0,0 +1,886 @@ + +/* + * ATI Mach64 GX Support + */ + +#include <linux/delay.h> +#include <linux/fb.h> + +#include <asm/io.h> + +#include <video/fbcon.h> + +#include "mach64.h" +#include "atyfb.h" + +/* Definitions for the ICS 2595 == ATI 18818_1 Clockchip */ + +#define REF_FREQ_2595 1432 /* 14.33 MHz (exact 14.31818) */ +#define REF_DIV_2595 46 /* really 43 on ICS 2595 !!! */ + /* ohne Prescaler */ +#define MAX_FREQ_2595 15938 /* 159.38 MHz (really 170.486) */ +#define MIN_FREQ_2595 8000 /* 80.00 MHz ( 85.565) */ + /* mit Prescaler 2, 4, 8 */ +#define ABS_MIN_FREQ_2595 1000 /* 10.00 MHz (really 10.697) */ +#define N_ADJ_2595 257 + +#define STOP_BITS_2595 0x1800 + + +#define MIN_N_408 2 + +#define MIN_N_1703 6 + +#define MIN_M 2 +#define MAX_M 30 +#define MIN_N 35 +#define MAX_N 255-8 + + + /* + * Support Functions + */ + +static void aty_dac_waste4(const struct fb_info_aty *info) +{ + (void)aty_ld_8(DAC_REGS, info); + + (void)aty_ld_8(DAC_REGS + 2, info); + (void)aty_ld_8(DAC_REGS + 2, info); + (void)aty_ld_8(DAC_REGS + 2, info); + (void)aty_ld_8(DAC_REGS + 2, info); +} + +static void aty_StrobeClock(const struct fb_info_aty *info) +{ + u8 tmp; + + udelay(26); + + tmp = aty_ld_8(CLOCK_CNTL, info); + aty_st_8(CLOCK_CNTL + info->clk_wr_offset, tmp | CLOCK_STROBE, info); + + return; +} + + + /* + * IBM RGB514 DAC and Clock Chip + */ + +static void aty_st_514(int offset, u8 val, const struct fb_info_aty *info) +{ + aty_st_8(DAC_CNTL, 1, info); + /* right addr byte */ + aty_st_8(DAC_W_INDEX, offset & 0xff, info); + /* left addr byte */ + aty_st_8(DAC_DATA, (offset >> 8) & 0xff, info); + aty_st_8(DAC_MASK, val, info); + aty_st_8(DAC_CNTL, 0, info); +} + +static int aty_set_dac_514(const struct fb_info_aty *info, + const union aty_pll *pll, u32 bpp, u32 accel) +{ + static struct { + u8 pixel_dly; + u8 misc2_cntl; + u8 pixel_rep; + u8 pixel_cntl_index; + u8 pixel_cntl_v1; + } tab[3] = { + { 0, 0x41, 0x03, 0x71, 0x45 }, /* 8 bpp */ + { 0, 0x45, 0x04, 0x0c, 0x01 }, /* 555 */ + { 0, 0x45, 0x06, 0x0e, 0x00 }, /* XRGB */ + }; + int i; + + switch (bpp) { + case 8: + default: + i = 0; + break; + case 16: + i = 1; + break; + case 32: + i = 2; + break; + } + aty_st_514(0x90, 0x00, info); /* VRAM Mask Low */ + aty_st_514(0x04, tab[i].pixel_dly, info); /* Horizontal Sync Control */ + aty_st_514(0x05, 0x00, info); /* Power Management */ + aty_st_514(0x02, 0x01, info); /* Misc Clock Control */ + aty_st_514(0x71, tab[i].misc2_cntl, info); /* Misc Control 2 */ + aty_st_514(0x0a, tab[i].pixel_rep, info); /* Pixel Format */ + aty_st_514(tab[i].pixel_cntl_index, tab[i].pixel_cntl_v1, info); + /* Misc Control 2 / 16 BPP Control / 32 BPP Control */ + return 0; +} + +static int aty_var_to_pll_514(const struct fb_info_aty *info, u32 vclk_per, + u8 bpp, union aty_pll *pll) +{ + /* + * FIXME: use real calculations instead of using fixed values from the old + * driver + */ + static struct { + u32 limit; /* pixlock rounding limit (arbitrary) */ + u8 m; /* (df<<6) | vco_div_count */ + u8 n; /* ref_div_count */ + } RGB514_clocks[7] = { + { 8000, (3<<6) | 20, 9 }, /* 7395 ps / 135.2273 MHz */ + { 10000, (1<<6) | 19, 3 }, /* 9977 ps / 100.2273 MHz */ + { 13000, (1<<6) | 2, 3 }, /* 12509 ps / 79.9432 MHz */ + { 14000, (2<<6) | 8, 7 }, /* 13394 ps / 74.6591 MHz */ + { 16000, (1<<6) | 44, 6 }, /* 15378 ps / 65.0284 MHz */ + { 25000, (1<<6) | 15, 5 }, /* 17460 ps / 57.2727 MHz */ + { 50000, (0<<6) | 53, 7 }, /* 33145 ps / 30.1705 MHz */ + }; + int i; + + for (i = 0; i < sizeof(RGB514_clocks)/sizeof(*RGB514_clocks); i++) + if (vclk_per <= RGB514_clocks[i].limit) { + pll->ibm514.m = RGB514_clocks[i].m; + pll->ibm514.n = RGB514_clocks[i].n; + return 0; + } + return -EINVAL; +} + +static u32 aty_pll_514_to_var(const struct fb_info_aty *info, + const union aty_pll *pll) +{ + u8 df, vco_div_count, ref_div_count; + + df = pll->ibm514.m >> 6; + vco_div_count = pll->ibm514.m & 0x3f; + ref_div_count = pll->ibm514.n; + + return ((info->ref_clk_per*ref_div_count)<<(3-df))/(vco_div_count+65); +} + +static void aty_set_pll_514(const struct fb_info_aty *info, + const union aty_pll *pll) +{ + aty_st_514(0x06, 0x02, info); /* DAC Operation */ + aty_st_514(0x10, 0x01, info); /* PLL Control 1 */ + aty_st_514(0x70, 0x01, info); /* Misc Control 1 */ + aty_st_514(0x8f, 0x1f, info); /* PLL Ref. Divider Input */ + aty_st_514(0x03, 0x00, info); /* Sync Control */ + aty_st_514(0x05, 0x00, info); /* Power Management */ + aty_st_514(0x20, pll->ibm514.m, info); /* F0 / M0 */ + aty_st_514(0x21, pll->ibm514.n, info); /* F1 / N0 */ +} + +const struct aty_dac_ops aty_dac_ibm514 = { + set_dac: aty_set_dac_514, +}; + +const struct aty_pll_ops aty_pll_ibm514 = { + var_to_pll: aty_var_to_pll_514, + pll_to_var: aty_pll_514_to_var, + set_pll: aty_set_pll_514, +}; + + + /* + * ATI 68860-B DAC + */ + +static int aty_set_dac_ATI68860_B(const struct fb_info_aty *info, + const union aty_pll *pll, u32 bpp, u32 accel) +{ + u32 gModeReg, devSetupRegA, temp, mask; + + gModeReg = 0; + devSetupRegA = 0; + + switch (bpp) { + case 8: + gModeReg = 0x83; + devSetupRegA = 0x60 | 0x00 /*(info->mach64DAC8Bit ? 0x00 : 0x01) */; + break; + case 15: + gModeReg = 0xA0; + devSetupRegA = 0x60; + break; + case 16: + gModeReg = 0xA1; + devSetupRegA = 0x60; + break; + case 24: + gModeReg = 0xC0; + devSetupRegA = 0x60; + break; + case 32: + gModeReg = 0xE3; + devSetupRegA = 0x60; + break; + } + + if (!accel) { + gModeReg = 0x80; + devSetupRegA = 0x61; + } + + temp = aty_ld_8(DAC_CNTL, info); + aty_st_8(DAC_CNTL, (temp & ~DAC_EXT_SEL_RS2) | DAC_EXT_SEL_RS3, info); + + aty_st_8(DAC_REGS + 2, 0x1D, info); + aty_st_8(DAC_REGS + 3, gModeReg, info); + aty_st_8(DAC_REGS, 0x02, info); + + temp = aty_ld_8(DAC_CNTL, info); + aty_st_8(DAC_CNTL, temp | DAC_EXT_SEL_RS2 | DAC_EXT_SEL_RS3, info); + + if (info->total_vram < MEM_SIZE_1M) + mask = 0x04; + else if (info->total_vram == MEM_SIZE_1M) + mask = 0x08; + else + mask = 0x0C; + + /* The following assumes that the BIOS has correctly set R7 of the + * Device Setup Register A at boot time. + */ +#define A860_DELAY_L 0x80 + + temp = aty_ld_8(DAC_REGS, info); + aty_st_8(DAC_REGS, (devSetupRegA | mask) | (temp & A860_DELAY_L), info); + temp = aty_ld_8(DAC_CNTL, info); + aty_st_8(DAC_CNTL, (temp & ~(DAC_EXT_SEL_RS2 | DAC_EXT_SEL_RS3)), info); + + aty_st_le32(BUS_CNTL, 0x890e20f1, info); + aty_st_le32(DAC_CNTL, 0x47052100, info); + + return 0; +} + +const struct aty_dac_ops aty_dac_ati68860b = { + set_dac: aty_set_dac_ATI68860_B, +}; + + + /* + * AT&T 21C498 DAC + */ + +static int aty_set_dac_ATT21C498(const struct fb_info_aty *info, + const union aty_pll *pll, u32 bpp, u32 accel) +{ + u32 dotClock; + int muxmode = 0; + int DACMask = 0; + + dotClock = 100000000 / pll->ics2595.period_in_ps; + + switch (bpp) { + case 8: + if (dotClock > 8000) { + DACMask = 0x24; + muxmode = 1; + } else + DACMask = 0x04; + break; + case 15: + DACMask = 0x16; + break; + case 16: + DACMask = 0x36; + break; + case 24: + DACMask = 0xE6; + break; + case 32: + DACMask = 0xE6; + break; + } + + if (1 /* info->mach64DAC8Bit */) + DACMask |= 0x02; + + aty_dac_waste4(info); + aty_st_8(DAC_REGS + 2, DACMask, info); + + aty_st_le32(BUS_CNTL, 0x890e20f1, info); + aty_st_le32(DAC_CNTL, 0x00072000, info); + return muxmode; +} + +const struct aty_dac_ops aty_dac_att21c498 = { + set_dac: aty_set_dac_ATT21C498, +}; + + + /* + * ATI 18818 / ICS 2595 Clock Chip + */ + +static int aty_var_to_pll_18818(const struct fb_info_aty *info, u32 vclk_per, + u8 bpp, union aty_pll *pll) +{ + u32 MHz100; /* in 0.01 MHz */ + u32 program_bits; + u32 post_divider; + + /* Calculate the programming word */ + MHz100 = 100000000 / vclk_per; + + program_bits = -1; + post_divider = 1; + + if (MHz100 > MAX_FREQ_2595) { + MHz100 = MAX_FREQ_2595; + return -EINVAL; + } else if (MHz100 < ABS_MIN_FREQ_2595) { + program_bits = 0; /* MHz100 = 257 */ + return -EINVAL; + } else { + while (MHz100 < MIN_FREQ_2595) { + MHz100 *= 2; + post_divider *= 2; + } + } + MHz100 *= 1000; + MHz100 = (REF_DIV_2595 * MHz100) / REF_FREQ_2595; + + MHz100 += 500; /* + 0.5 round */ + MHz100 /= 1000; + + if (program_bits == -1) { + program_bits = MHz100 - N_ADJ_2595; + switch (post_divider) { + case 1: + program_bits |= 0x0600; + break; + case 2: + program_bits |= 0x0400; + break; + case 4: + program_bits |= 0x0200; + break; + case 8: + default: + break; + } + } + + program_bits |= STOP_BITS_2595; + + pll->ics2595.program_bits = program_bits; + pll->ics2595.locationAddr = 0; + pll->ics2595.post_divider = post_divider; + pll->ics2595.period_in_ps = vclk_per; + + return 0; +} + +static u32 aty_pll_18818_to_var(const struct fb_info_aty *info, + const union aty_pll *pll) +{ + return(pll->ics2595.period_in_ps); /* default for now */ +} + +static void aty_ICS2595_put1bit(u8 data, const struct fb_info_aty *info) +{ + u8 tmp; + + data &= 0x01; + tmp = aty_ld_8(CLOCK_CNTL, info); + aty_st_8(CLOCK_CNTL + info->clk_wr_offset, (tmp & ~0x04) | (data << 2), + info); + + tmp = aty_ld_8(CLOCK_CNTL, info); + aty_st_8(CLOCK_CNTL + info->clk_wr_offset, (tmp & ~0x08) | (0 << 3), info); + + aty_StrobeClock(info); + + tmp = aty_ld_8(CLOCK_CNTL, info); + aty_st_8(CLOCK_CNTL + info->clk_wr_offset, (tmp & ~0x08) | (1 << 3), info); + + aty_StrobeClock(info); + + return; +} + +static void aty_set_pll18818(const struct fb_info_aty *info, + const union aty_pll *pll) +{ + u32 program_bits; + u32 locationAddr; + + u32 i; + + u8 old_clock_cntl; + u8 old_crtc_ext_disp; + + old_clock_cntl = aty_ld_8(CLOCK_CNTL, info); + aty_st_8(CLOCK_CNTL + info->clk_wr_offset, 0, info); + + old_crtc_ext_disp = aty_ld_8(CRTC_GEN_CNTL + 3, info); + aty_st_8(CRTC_GEN_CNTL + 3, old_crtc_ext_disp | (CRTC_EXT_DISP_EN >> 24), + info); + + mdelay(15); /* delay for 50 (15) ms */ + + program_bits = pll->ics2595.program_bits; + locationAddr = pll->ics2595.locationAddr; + + /* Program the clock chip */ + aty_st_8(CLOCK_CNTL + info->clk_wr_offset, 0, info); /* Strobe = 0 */ + aty_StrobeClock(info); + aty_st_8(CLOCK_CNTL + info->clk_wr_offset, 1, info); /* Strobe = 0 */ + aty_StrobeClock(info); + + aty_ICS2595_put1bit(1, info); /* Send start bits */ + aty_ICS2595_put1bit(0, info); /* Start bit */ + aty_ICS2595_put1bit(0, info); /* Read / ~Write */ + + for (i = 0; i < 5; i++) { /* Location 0..4 */ + aty_ICS2595_put1bit(locationAddr & 1, info); + locationAddr >>= 1; + } + + for (i = 0; i < 8 + 1 + 2 + 2; i++) { + aty_ICS2595_put1bit(program_bits & 1, info); + program_bits >>= 1; + } + + mdelay(1); /* delay for 1 ms */ + + (void)aty_ld_8(DAC_REGS, info); /* Clear DAC Counter */ + aty_st_8(CRTC_GEN_CNTL + 3, old_crtc_ext_disp, info); + aty_st_8(CLOCK_CNTL + info->clk_wr_offset, old_clock_cntl | CLOCK_STROBE, + info); + + mdelay(50); /* delay for 50 (15) ms */ + aty_st_8(CLOCK_CNTL + info->clk_wr_offset, + ((pll->ics2595.locationAddr & 0x0F) | CLOCK_STROBE), info); + + return; +} + +const struct aty_pll_ops aty_pll_ati18818_1 = { + var_to_pll: aty_var_to_pll_18818, + pll_to_var: aty_pll_18818_to_var, + set_pll: aty_set_pll18818, +}; + + + /* + * STG 1703 Clock Chip + */ + +static int aty_var_to_pll_1703(const struct fb_info_aty *info, u32 vclk_per, + u8 bpp, union aty_pll *pll) +{ + u32 mhz100; /* in 0.01 MHz */ + u32 program_bits; + /* u32 post_divider; */ + u32 mach64MinFreq, mach64MaxFreq, mach64RefFreq; + u32 temp, tempB; + u16 remainder, preRemainder; + short divider = 0, tempA; + + /* Calculate the programming word */ + mhz100 = 100000000 / vclk_per; + mach64MinFreq = MIN_FREQ_2595; + mach64MaxFreq = MAX_FREQ_2595; + mach64RefFreq = REF_FREQ_2595; /* 14.32 MHz */ + + /* Calculate program word */ + if (mhz100 == 0) + program_bits = 0xE0; + else { + if (mhz100 < mach64MinFreq) + mhz100 = mach64MinFreq; + if (mhz100 > mach64MaxFreq) + mhz100 = mach64MaxFreq; + + divider = 0; + while (mhz100 < (mach64MinFreq << 3)) { + mhz100 <<= 1; + divider += 0x20; + } + + temp = (unsigned int)(mhz100); + temp = (unsigned int)(temp * (MIN_N_1703 + 2)); + temp -= (short)(mach64RefFreq << 1); + + tempA = MIN_N_1703; + preRemainder = 0xffff; + + do { + tempB = temp; + remainder = tempB % mach64RefFreq; + tempB = tempB / mach64RefFreq; + + if ((tempB & 0xffff) <= 127 && (remainder <= preRemainder)) { + preRemainder = remainder; + divider &= ~0x1f; + divider |= tempA; + divider = (divider & 0x00ff) + ((tempB & 0xff) << 8); + } + + temp += mhz100; + tempA++; + } while (tempA <= (MIN_N_1703 << 1)); + + program_bits = divider; + } + + pll->ics2595.program_bits = program_bits; + pll->ics2595.locationAddr = 0; + pll->ics2595.post_divider = divider; /* fuer nix */ + pll->ics2595.period_in_ps = vclk_per; + + return 0; +} + +static u32 aty_pll_1703_to_var(const struct fb_info_aty *info, + const union aty_pll *pll) +{ + return(pll->ics2595.period_in_ps); /* default for now */ +} + +static void aty_set_pll_1703(const struct fb_info_aty *info, + const union aty_pll *pll) +{ + u32 program_bits; + u32 locationAddr; + + char old_crtc_ext_disp; + + old_crtc_ext_disp = aty_ld_8(CRTC_GEN_CNTL + 3, info); + aty_st_8(CRTC_GEN_CNTL + 3, old_crtc_ext_disp | (CRTC_EXT_DISP_EN >> 24), + info); + + program_bits = pll->ics2595.program_bits; + locationAddr = pll->ics2595.locationAddr; + + /* Program clock */ + aty_dac_waste4(info); + + (void)aty_ld_8(DAC_REGS + 2, info); + aty_st_8(DAC_REGS+2, (locationAddr << 1) + 0x20, info); + aty_st_8(DAC_REGS+2, 0, info); + aty_st_8(DAC_REGS+2, (program_bits & 0xFF00) >> 8, info); + aty_st_8(DAC_REGS+2, (program_bits & 0xFF), info); + + (void)aty_ld_8(DAC_REGS, info); /* Clear DAC Counter */ + aty_st_8(CRTC_GEN_CNTL + 3, old_crtc_ext_disp, info); + + return; +} + +const struct aty_pll_ops aty_pll_stg1703 = { + var_to_pll: aty_var_to_pll_1703, + pll_to_var: aty_pll_1703_to_var, + set_pll: aty_set_pll_1703, +}; + + + /* + * Chrontel 8398 Clock Chip + */ + +static int aty_var_to_pll_8398(const struct fb_info_aty *info, u32 vclk_per, + u8 bpp, union aty_pll *pll) +{ + u32 tempA, tempB, fOut, longMHz100, diff, preDiff; + + u32 mhz100; /* in 0.01 MHz */ + u32 program_bits; + /* u32 post_divider; */ + u32 mach64MinFreq, mach64MaxFreq, mach64RefFreq; + u16 m, n, k=0, save_m, save_n, twoToKth; + + /* Calculate the programming word */ + mhz100 = 100000000 / vclk_per; + mach64MinFreq = MIN_FREQ_2595; + mach64MaxFreq = MAX_FREQ_2595; + mach64RefFreq = REF_FREQ_2595; /* 14.32 MHz */ + + save_m = 0; + save_n = 0; + + /* Calculate program word */ + if (mhz100 == 0) + program_bits = 0xE0; + else + { + if (mhz100 < mach64MinFreq) + mhz100 = mach64MinFreq; + if (mhz100 > mach64MaxFreq) + mhz100 = mach64MaxFreq; + + longMHz100 = mhz100 * 256 / 100; /* 8 bit scale this */ + + while (mhz100 < (mach64MinFreq << 3)) + { + mhz100 <<= 1; + k++; + } + + twoToKth = 1 << k; + diff = 0; + preDiff = 0xFFFFFFFF; + + for (m = MIN_M; m <= MAX_M; m++) + { + for (n = MIN_N; n <= MAX_N; n++) + { + tempA = (14.31818 * 65536); + tempA *= (n + 8); /* 43..256 */ + tempB = twoToKth * 256; + tempB *= (m + 2); /* 4..32 */ + fOut = tempA / tempB; /* 8 bit scale */ + + if (longMHz100 > fOut) + diff = longMHz100 - fOut; + else + diff = fOut - longMHz100; + + if (diff < preDiff) + { + save_m = m; + save_n = n; + preDiff = diff; + } + } + } + + program_bits = (k << 6) + (save_m) + (save_n << 8); + } + + pll->ics2595.program_bits = program_bits; + pll->ics2595.locationAddr = 0; + pll->ics2595.post_divider = 0; + pll->ics2595.period_in_ps = vclk_per; + + return 0; +} + +static u32 aty_pll_8398_to_var(const struct fb_info_aty *info, + const union aty_pll *pll) +{ + return(pll->ics2595.period_in_ps); /* default for now */ +} + +static void aty_set_pll_8398(const struct fb_info_aty *info, + const union aty_pll *pll) +{ + u32 program_bits; + u32 locationAddr; + + char old_crtc_ext_disp; + char tmp; + + old_crtc_ext_disp = aty_ld_8(CRTC_GEN_CNTL + 3, info); + aty_st_8(CRTC_GEN_CNTL + 3, old_crtc_ext_disp | (CRTC_EXT_DISP_EN >> 24), + info); + + program_bits = pll->ics2595.program_bits; + locationAddr = pll->ics2595.locationAddr; + + /* Program clock */ + tmp = aty_ld_8(DAC_CNTL, info); + aty_st_8(DAC_CNTL, tmp | DAC_EXT_SEL_RS2 | DAC_EXT_SEL_RS3, info); + + aty_st_8(DAC_REGS, locationAddr, info); + aty_st_8(DAC_REGS+1, (program_bits & 0xff00) >> 8, info); + aty_st_8(DAC_REGS+1, (program_bits & 0xff), info); + + tmp = aty_ld_8(DAC_CNTL, info); + aty_st_8(DAC_CNTL, (tmp & ~DAC_EXT_SEL_RS2) | DAC_EXT_SEL_RS3, info); + + (void)aty_ld_8(DAC_REGS, info); /* Clear DAC Counter */ + aty_st_8(CRTC_GEN_CNTL + 3, old_crtc_ext_disp, info); + + return; +} + +const struct aty_pll_ops aty_pll_ch8398 = { + var_to_pll: aty_var_to_pll_8398, + pll_to_var: aty_pll_8398_to_var, + set_pll: aty_set_pll_8398, +}; + + + /* + * AT&T 20C408 Clock Chip + */ + +static int aty_var_to_pll_408(const struct fb_info_aty *info, u32 vclk_per, + u8 bpp, union aty_pll *pll) +{ + u32 mhz100; /* in 0.01 MHz */ + u32 program_bits; + /* u32 post_divider; */ + u32 mach64MinFreq, mach64MaxFreq, mach64RefFreq; + u32 temp, tempB; + u16 remainder, preRemainder; + short divider = 0, tempA; + + /* Calculate the programming word */ + mhz100 = 100000000 / vclk_per; + mach64MinFreq = MIN_FREQ_2595; + mach64MaxFreq = MAX_FREQ_2595; + mach64RefFreq = REF_FREQ_2595; /* 14.32 MHz */ + + /* Calculate program word */ + if (mhz100 == 0) + program_bits = 0xFF; + else { + if (mhz100 < mach64MinFreq) + mhz100 = mach64MinFreq; + if (mhz100 > mach64MaxFreq) + mhz100 = mach64MaxFreq; + + while (mhz100 < (mach64MinFreq << 3)) { + mhz100 <<= 1; + divider += 0x40; + } + + temp = (unsigned int)mhz100; + temp = (unsigned int)(temp * (MIN_N_408 + 2)); + temp -= ((short)(mach64RefFreq << 1)); + + tempA = MIN_N_408; + preRemainder = 0xFFFF; + + do { + tempB = temp; + remainder = tempB % mach64RefFreq; + tempB = tempB / mach64RefFreq; + if (((tempB & 0xFFFF) <= 255) && (remainder <= preRemainder)) { + preRemainder = remainder; + divider &= ~0x3f; + divider |= tempA; + divider = (divider & 0x00FF) + ((tempB & 0xFF) << 8); + } + temp += mhz100; + tempA++; + } while(tempA <= 32); + + program_bits = divider; + } + + pll->ics2595.program_bits = program_bits; + pll->ics2595.locationAddr = 0; + pll->ics2595.post_divider = divider; /* fuer nix */ + pll->ics2595.period_in_ps = vclk_per; + + return 0; +} + +static u32 aty_pll_408_to_var(const struct fb_info_aty *info, + const union aty_pll *pll) +{ + return(pll->ics2595.period_in_ps); /* default for now */ +} + +static void aty_set_pll_408(const struct fb_info_aty *info, + const union aty_pll *pll) +{ + u32 program_bits; + u32 locationAddr; + + u8 tmpA, tmpB, tmpC; + char old_crtc_ext_disp; + + old_crtc_ext_disp = aty_ld_8(CRTC_GEN_CNTL + 3, info); + aty_st_8(CRTC_GEN_CNTL + 3, old_crtc_ext_disp | (CRTC_EXT_DISP_EN >> 24), + info); + + program_bits = pll->ics2595.program_bits; + locationAddr = pll->ics2595.locationAddr; + + /* Program clock */ + aty_dac_waste4(info); + tmpB = aty_ld_8(DAC_REGS + 2, info) | 1; + aty_dac_waste4(info); + aty_st_8(DAC_REGS + 2, tmpB, info); + + tmpA = tmpB; + tmpC = tmpA; + tmpA |= 8; + tmpB = 1; + + aty_st_8(DAC_REGS, tmpB, info); + aty_st_8(DAC_REGS + 2, tmpA, info); + + udelay(400); /* delay for 400 us */ + + locationAddr = (locationAddr << 2) + 0x40; + tmpB = locationAddr; + tmpA = program_bits >> 8; + + aty_st_8(DAC_REGS, tmpB, info); + aty_st_8(DAC_REGS + 2, tmpA, info); + + tmpB = locationAddr + 1; + tmpA = (u8)program_bits; + + aty_st_8(DAC_REGS, tmpB, info); + aty_st_8(DAC_REGS + 2, tmpA, info); + + tmpB = locationAddr + 2; + tmpA = 0x77; + + aty_st_8(DAC_REGS, tmpB, info); + aty_st_8(DAC_REGS + 2, tmpA, info); + + udelay(400); /* delay for 400 us */ + tmpA = tmpC & (~(1 | 8)); + tmpB = 1; + + aty_st_8(DAC_REGS, tmpB, info); + aty_st_8(DAC_REGS + 2, tmpA, info); + + (void)aty_ld_8(DAC_REGS, info); /* Clear DAC Counter */ + aty_st_8(CRTC_GEN_CNTL + 3, old_crtc_ext_disp, info); + + return; +} + +const struct aty_pll_ops aty_pll_att20c408 = { + var_to_pll: aty_var_to_pll_408, + pll_to_var: aty_pll_408_to_var, + set_pll: aty_set_pll_408, +}; + + + /* + * Unsupported DAC and Clock Chip + */ + +static int aty_set_dac_unsupported(const struct fb_info_aty *info, + const union aty_pll *pll, u32 bpp, + u32 accel) +{ + aty_st_le32(BUS_CNTL, 0x890e20f1, info); + aty_st_le32(DAC_CNTL, 0x47052100, info); + /* new in 2.2.3p1 from Geert. ???????? */ + aty_st_le32(BUS_CNTL, 0x590e10ff, info); + aty_st_le32(DAC_CNTL, 0x47012100, info); + return 0; +} + +static int dummy(void) +{ + return 0; +} + +const struct aty_dac_ops aty_dac_unsupported = { + set_dac: aty_set_dac_unsupported, +}; + +const struct aty_pll_ops aty_pll_unsupported = { + var_to_pll: (void *)dummy, + pll_to_var: (void *)dummy, + set_pll: (void *)dummy, +}; + diff --git a/fs/buffer.c b/fs/buffer.c index f62fed1c35c2..6b2702b00b75 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -184,22 +184,14 @@ void end_buffer_io_sync(struct buffer_head *bh, int uptodate) /* * The buffers have been marked clean and locked. Just submit the dang * things.. - * - * We'll wait for the first one of them - "sync" is not exactly - * performance-critical, and this makes us not hog the IO subsystem - * completely, while still allowing for a fair amount of concurrent IO. */ static void write_locked_buffers(struct buffer_head **array, unsigned int count) { - struct buffer_head *wait = *array; - get_bh(wait); do { struct buffer_head * bh = *array++; bh->b_end_io = end_buffer_io_sync; submit_bh(WRITE, bh); } while (--count); - wait_on_buffer(wait); - put_bh(wait); } #define NRSYNC (32) @@ -310,11 +302,11 @@ int fsync_super(struct super_block *sb) lock_kernel(); sync_inodes_sb(sb); + DQUOT_SYNC(dev); lock_super(sb); if (sb->s_dirt && sb->s_op && sb->s_op->write_super) sb->s_op->write_super(sb); unlock_super(sb); - DQUOT_SYNC(dev); unlock_kernel(); return sync_buffers(dev, 1); @@ -325,9 +317,9 @@ int fsync_dev(kdev_t dev) sync_buffers(dev, 0); lock_kernel(); - sync_supers(dev); sync_inodes(dev); DQUOT_SYNC(dev); + sync_supers(dev); unlock_kernel(); return sync_buffers(dev, 1); @@ -2608,8 +2600,8 @@ void wakeup_bdflush(int block) static int sync_old_buffers(void) { lock_kernel(); - sync_supers(0); sync_unlocked_inodes(); + sync_supers(0); unlock_kernel(); flush_dirty_buffers(1); diff --git a/fs/dcache.c b/fs/dcache.c index a03dd6ade160..59df6ce8fcb3 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -547,7 +547,7 @@ void shrink_dcache_parent(struct dentry * parent) * ... * 6 - base-level: try to shrink a bit. */ -void shrink_dcache_memory(int priority, unsigned int gfp_mask) +int shrink_dcache_memory(int priority, unsigned int gfp_mask) { int count = 0; @@ -563,13 +563,13 @@ void shrink_dcache_memory(int priority, unsigned int gfp_mask) * block allocations, but for now: */ if (!(gfp_mask & __GFP_FS)) - return; + return 0; - if (priority) - count = dentry_stat.nr_unused / priority; + count = dentry_stat.nr_unused >> priority; prune_dcache(count); kmem_cache_shrink(dentry_cache); + return 0; } #define NAME_ALLOC_LEN(len) ((len+16) & ~15) diff --git a/fs/inode.c b/fs/inode.c index 1645c8043ba7..d52d3ae08a70 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -681,7 +681,7 @@ free_unused: goto free_unused; } -void shrink_icache_memory(int priority, int gfp_mask) +int shrink_icache_memory(int priority, int gfp_mask) { int count = 0; @@ -693,13 +693,13 @@ void shrink_icache_memory(int priority, int gfp_mask) * in clear_inode() and friends.. */ if (!(gfp_mask & __GFP_FS)) - return; + return 0; - if (priority) - count = inodes_stat.nr_unused / priority; + count = inodes_stat.nr_unused >> priority; prune_icache(count); kmem_cache_shrink(inode_cachep); + return 0; } /* diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h index e6aa43bb1e7b..826329ef5687 100644 --- a/fs/partitions/ldm.h +++ b/fs/partitions/ldm.h @@ -96,18 +96,6 @@ #define BE32(x) ((u32)be32_to_cpu(get_unaligned((u32*)(x)))) #define BE64(x) ((u64)be64_to_cpu(get_unaligned((u64*)(x)))) -/* Borrowed from msdos.c. */ -#define SYS_IND(p) (get_unaligned(&(p)->sys_ind)) -#define NR_SECTS(p) ({ __typeof__((p)->nr_sects) __a = \ - get_unaligned(&(p)->nr_sects); \ - le32_to_cpu(__a); \ - }) - -#define START_SECT(p) ({ __typeof__((p)->start_sect) __a = \ - get_unaligned(&(p)->start_sect);\ - le32_to_cpu(__a); \ - }) - /* In memory LDM database structures. */ #define DISK_ID_SIZE 64 /* Size in bytes. */ diff --git a/include/asm-i386/softirq.h b/include/asm-i386/softirq.h index 75f742e89e2e..4bf8d607b906 100644 --- a/include/asm-i386/softirq.h +++ b/include/asm-i386/softirq.h @@ -25,9 +25,7 @@ #define local_bh_enable() \ do { \ unsigned int *ptr = &local_bh_count(smp_processor_id()); \ - unsigned long flags; \ \ - __save_flags(flags); \ barrier(); \ if (!--*ptr) \ __asm__ __volatile__ ( \ diff --git a/include/asm-ia64/a.out.h b/include/asm-ia64/a.out.h index 7cc0a00ce0a6..25de011bc7f4 100644 --- a/include/asm-ia64/a.out.h +++ b/include/asm-ia64/a.out.h @@ -32,7 +32,7 @@ struct exec { #ifdef __KERNEL__ # include <asm/page.h> # define STACK_TOP (0x8000000000000000UL + (1UL << (4*PAGE_SHIFT - 12)) - PAGE_SIZE) -# define IA64_RBS_BOT (STACK_TOP - 0x80000000L) /* bottom of register backing store */ +# define IA64_RBS_BOT (STACK_TOP - 0x80000000L + PAGE_SIZE) /* bottom of reg. backing store */ #endif #endif /* _ASM_IA64_A_OUT_H */ diff --git a/include/asm-ia64/acpi-ext.h b/include/asm-ia64/acpi-ext.h index 775934cbdfb8..b11c49e9d415 100644 --- a/include/asm-ia64/acpi-ext.h +++ b/include/asm-ia64/acpi-ext.h @@ -5,12 +5,12 @@ * Advanced Configuration and Power Infterface * Based on 'ACPI Specification 1.0b' Febryary 2, 1999 * and 'IA-64 Extensions to the ACPI Specification' Rev 0.6 - * + * * Copyright (C) 1999 VA Linux Systems * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> * Copyright (C) 2000 Intel Corp. * Copyright (C) 2000 J.I. Lee <jung-ik.lee@intel.com> - * ACPI 2.0 specification + * ACPI 2.0 specification */ #include <linux/types.h> @@ -147,6 +147,9 @@ typedef struct { u32 flags; } acpi_madt_t; +/* acpi 2.0 MADT flags */ +#define MADT_PCAT_COMPAT (1<<0) + /* acpi 2.0 MADT structure types */ #define ACPI20_ENTRY_LOCAL_APIC 0 #define ACPI20_ENTRY_IO_APIC 1 diff --git a/include/asm-ia64/acpikcfg.h b/include/asm-ia64/acpikcfg.h index a1d35aa89a19..d79cbbe4db0a 100644 --- a/include/asm-ia64/acpikcfg.h +++ b/include/asm-ia64/acpikcfg.h @@ -1,6 +1,6 @@ -#include <linux/config.h> +#ifndef _ASM_IA64_ACPIKCFG_H +#define _ASM_IA64_ACPIKCFG_H -#ifdef CONFIG_ACPI_KERNEL_CONFIG /* * acpikcfg.h - ACPI based Kernel Configuration Manager External Interfaces * @@ -26,4 +26,5 @@ acpi_cf_print_pci_vectors ( int num_pci_vectors ); #endif -#endif /* CONFIG_ACPI_KERNEL_CONFIG */ + +#endif /* _ASM_IA64_ACPIKCFG_H */ diff --git a/include/asm-ia64/bitops.h b/include/asm-ia64/bitops.h index 5c5884635aff..6499193f5d82 100644 --- a/include/asm-ia64/bitops.h +++ b/include/asm-ia64/bitops.h @@ -2,24 +2,29 @@ #define _ASM_IA64_BITOPS_H /* - * Copyright (C) 1998-2000 Hewlett-Packard Co - * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com> - * - * 02/04/00 D. Mosberger Require 64-bit alignment for bitops, per suggestion from davem + * Copyright (C) 1998-2001 Hewlett-Packard Co + * Copyright (C) 1998-2001 David Mosberger-Tang <davidm@hpl.hp.com> */ #include <asm/system.h> -/* - * These operations need to be atomic. The address must be (at least) - * 32-bit aligned. Note that there are driver (e.g., eepro100) which - * use these operations to operate on hw-defined data-structures, so - * we can't easily change these operations to force a bigger - * alignment. +/** + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + * + * The address must be (at least) "long" aligned. + * Note that there are driver (e.g., eepro100) which use these operations to operate on + * hw-defined data-structures, so we can't easily change these operations to force a + * bigger alignment. * * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). */ - static __inline__ void set_bit (int nr, volatile void *addr) { @@ -36,11 +41,37 @@ set_bit (int nr, volatile void *addr) } while (cmpxchg_acq(m, old, new) != old); } +/** + * __set_bit - Set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike set_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static __inline__ void +__set_bit (int nr, volatile void *addr) +{ + *((__u32 *) addr + (nr >> 5)) |= (1 << (nr & 31)); +} + /* * clear_bit() doesn't provide any barrier for the compiler. */ #define smp_mb__before_clear_bit() smp_mb() #define smp_mb__after_clear_bit() smp_mb() + +/** + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * in order to ensure changes are visible on other processors. + */ static __inline__ void clear_bit (int nr, volatile void *addr) { @@ -57,6 +88,15 @@ clear_bit (int nr, volatile void *addr) } while (cmpxchg_acq(m, old, new) != old); } +/** + * change_bit - Toggle a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * change_bit() is atomic and may not be reordered. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ static __inline__ void change_bit (int nr, volatile void *addr) { @@ -73,6 +113,29 @@ change_bit (int nr, volatile void *addr) } while (cmpxchg_acq(m, old, new) != old); } +/** + * __change_bit - Toggle a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike change_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static __inline__ void +__change_bit (int nr, volatile void *addr) +{ + *((__u32 *) addr + (nr >> 5)) ^= (1 << (nr & 31)); +} + +/** + * test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ static __inline__ int test_and_set_bit (int nr, volatile void *addr) { @@ -90,6 +153,34 @@ test_and_set_bit (int nr, volatile void *addr) return (old & bit) != 0; } +/** + * __test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static __inline__ int +__test_and_set_bit (int nr, volatile void *addr) +{ + __u32 *p = (__u32 *) addr + (nr >> 5); + __u32 m = 1 << (nr & 31); + int oldbitset = (*p & m) != 0; + + *p |= m; + return oldbitset; +} + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ static __inline__ int test_and_clear_bit (int nr, volatile void *addr) { @@ -107,6 +198,34 @@ test_and_clear_bit (int nr, volatile void *addr) return (old & ~mask) != 0; } +/** + * __test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static __inline__ int +__test_and_clear_bit(int nr, volatile void * addr) +{ + __u32 *p = (__u32 *) addr + (nr >> 5); + __u32 m = 1 << (nr & 31); + int oldbitset = *p & m; + + *p &= ~m; + return oldbitset; +} + +/** + * test_and_change_bit - Change a bit and return its new value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ static __inline__ int test_and_change_bit (int nr, volatile void *addr) { @@ -124,15 +243,33 @@ test_and_change_bit (int nr, volatile void *addr) return (old & bit) != 0; } +/* + * WARNING: non atomic version. + */ +static __inline__ int +__test_and_change_bit (int nr, void *addr) +{ + __u32 old, bit = (1 << (nr & 31)); + __u32 *m = (__u32 *) addr + (nr >> 5); + + old = *m; + *m = old ^ bit; + return (old & bit) != 0; +} + static __inline__ int test_bit (int nr, volatile void *addr) { return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); } -/* - * ffz = Find First Zero in word. Undefined if no zero exists, - * so code should check against ~0UL first.. +/** + * ffz - find the first zero bit in a memory region + * @x: The address to start the search at + * + * Returns the bit-number (0..63) of the first (least significant) zero bit, not + * the number of the byte containing a bit. Undefined if no zero exists, so + * code should check against ~0UL first... */ static inline unsigned long ffz (unsigned long x) @@ -146,8 +283,8 @@ ffz (unsigned long x) #ifdef __KERNEL__ /* - * Find the most significant bit that is set (undefined if no bit is - * set). + * find_last_zero_bit - find the last zero bit in a 64 bit quantity + * @x: The value to search */ static inline unsigned long ia64_fls (unsigned long x) @@ -160,9 +297,10 @@ ia64_fls (unsigned long x) } /* - * ffs: find first bit set. This is defined the same way as - * the libc and compiler builtin ffs routines, therefore - * differs in spirit from the above ffz (man ffs). + * ffs: find first bit set. This is defined the same way as the libc and compiler builtin + * ffs routines, therefore differs in spirit from the above ffz (man ffs): it operates on + * "int" values only and the result value is the bit number + 1. ffs(0) is defined to + * return zero. */ #define ffs(x) __builtin_ffs(x) diff --git a/include/asm-ia64/efi.h b/include/asm-ia64/efi.h index 70128e907b90..3157aaa68c01 100644 --- a/include/asm-ia64/efi.h +++ b/include/asm-ia64/efi.h @@ -15,6 +15,7 @@ #include <linux/string.h> #include <linux/time.h> #include <linux/types.h> +#include <linux/proc_fs.h> #include <asm/page.h> #include <asm/system.h> @@ -237,7 +238,7 @@ extern void efi_map_pal_code (void); extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg); extern void efi_gettimeofday (struct timeval *tv); extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if possible */ - +extern u64 efi_get_iobase (void); /* * Variable Attributes @@ -246,4 +247,12 @@ extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if pos #define EFI_VARIABLE_BOOTSERVICE_ACCESS 0x0000000000000002 #define EFI_VARIABLE_RUNTIME_ACCESS 0x0000000000000004 + +/* + * efi_dir is allocated in arch/ia64/kernel/efi.c. + */ +#ifdef CONFIG_PROC_FS +extern struct proc_dir_entry *efi_dir; +#endif + #endif /* _ASM_IA64_EFI_H */ diff --git a/include/asm-ia64/fpswa.h b/include/asm-ia64/fpswa.h index bbf8afcfdd44..8986f033c815 100644 --- a/include/asm-ia64/fpswa.h +++ b/include/asm-ia64/fpswa.h @@ -9,10 +9,6 @@ * Copyright (C) 1999 Goutham Rao <goutham.rao@intel.com> */ -#if 1 -#define FPSWA_BUG -#endif - typedef struct { /* 4 * 128 bits */ unsigned long fp_lp[4*2]; diff --git a/include/asm-ia64/hardirq.h b/include/asm-ia64/hardirq.h index 4befe78e0f0b..0328bd3f0d06 100644 --- a/include/asm-ia64/hardirq.h +++ b/include/asm-ia64/hardirq.h @@ -16,15 +16,15 @@ /* * No irq_cpustat_t for IA-64. The data is held in the per-CPU data structure. */ -#define softirq_active(cpu) (cpu_data[cpu].softirq.active) -#define softirq_mask(cpu) (cpu_data[cpu].softirq.mask) -#define irq_count(cpu) (cpu_data[cpu].irq_stat.f.irq_count) -#define bh_count(cpu) (cpu_data[cpu].irq_stat.f.bh_count) +#define softirq_pending(cpu) (cpu_data(cpu)->softirq_pending) +#define ksoftirqd_task(cpu) (cpu_data(cpu)->ksoftirqd) +#define irq_count(cpu) (cpu_data(cpu)->irq_stat.f.irq_count) +#define bh_count(cpu) (cpu_data(cpu)->irq_stat.f.bh_count) #define syscall_count(cpu) /* unused on IA-64 */ #define nmi_count(cpu) 0 -#define local_softirq_active() (local_cpu_data->softirq.active) -#define local_softirq_mask() (local_cpu_data->softirq.mask) +#define local_softirq_pending() (local_cpu_data->softirq_pending) +#define local_ksoftirqd_task() (local_cpu_data->ksoftirqd) #define local_irq_count() (local_cpu_data->irq_stat.f.irq_count) #define local_bh_count() (local_cpu_data->irq_stat.f.bh_count) #define local_syscall_count() /* unused on IA-64 */ diff --git a/include/asm-ia64/hw_irq.h b/include/asm-ia64/hw_irq.h index c0162769842c..75c82bc8abf5 100644 --- a/include/asm-ia64/hw_irq.h +++ b/include/asm-ia64/hw_irq.h @@ -49,6 +49,7 @@ typedef u8 ia64_vector; #define IA64_PERFMON_VECTOR 0xee /* performanc monitor interrupt vector */ #define IA64_TIMER_VECTOR 0xef /* use highest-prio group 15 interrupt for timer */ #define IA64_MCA_WAKEUP_VECTOR 0xf0 /* MCA wakeup (must be >MCA_RENDEZ_VECTOR) */ +#define IA64_IPI_RESCHEDULE 0xfd /* SMP reschedule */ #define IA64_IPI_VECTOR 0xfe /* inter-processor interrupt vector */ /* IA64 inter-cpu interrupt related definitions */ @@ -69,7 +70,7 @@ extern __u8 isa_irq_to_vector_map[16]; extern unsigned long ipi_base_addr; -extern struct hw_interrupt_type irq_type_ia64_sapic; /* CPU-internal interrupt controller */ +extern struct hw_interrupt_type irq_type_ia64_lsapic; /* CPU-internal interrupt controller */ extern int ia64_alloc_irq (void); /* allocate a free irq */ extern void ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect); diff --git a/include/asm-ia64/ia32.h b/include/asm-ia64/ia32.h index a0b2c4b78e02..7b2f88f8f80d 100644 --- a/include/asm-ia64/ia32.h +++ b/include/asm-ia64/ia32.h @@ -108,7 +108,7 @@ typedef struct { } sigset32_t; struct sigaction32 { - unsigned int sa_handler; /* Really a pointer, but need to deal + unsigned int sa_handler; /* Really a pointer, but need to deal with 32 bits */ unsigned int sa_flags; unsigned int sa_restorer; /* Another 32 bit pointer */ @@ -118,7 +118,7 @@ struct sigaction32 { typedef unsigned int old_sigset32_t; /* at least 32 bits */ struct old_sigaction32 { - unsigned int sa_handler; /* Really a pointer, but need to deal + unsigned int sa_handler; /* Really a pointer, but need to deal with 32 bits */ old_sigset32_t sa_mask; /* A 32 bit mask */ unsigned int sa_flags; @@ -133,7 +133,7 @@ typedef struct sigaltstack_ia32 { struct ucontext_ia32 { unsigned int uc_flags; - unsigned int uc_link; + unsigned int uc_link; stack_ia32_t uc_stack; struct sigcontext_ia32 uc_mcontext; sigset_t uc_sigmask; /* mask last for extensibility */ @@ -252,6 +252,15 @@ typedef struct siginfo32 { #define ELF_ARCH EM_386 #define IA32_PAGE_OFFSET 0xc0000000 +#define IA32_STACK_TOP ((IA32_PAGE_OFFSET/3) * 2) + +/* + * The system segments (GDT, TSS, LDT) have to be mapped below 4GB so the IA-32 engine can + * access them. + */ +#define IA32_GDT_OFFSET (IA32_PAGE_OFFSET) +#define IA32_TSS_OFFSET (IA32_PAGE_OFFSET + PAGE_SIZE) +#define IA32_LDT_OFFSET (IA32_PAGE_OFFSET + 2*PAGE_SIZE) #define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE IA32_PAGE_SIZE @@ -287,7 +296,7 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; /* This macro yields a bitmask that programs can use to figure out what instruction set this CPU supports. */ -#define ELF_HWCAP 0 +#define ELF_HWCAP 0 /* This macro yields a string that ld.so will use to load implementation specific libraries for optimization. Not terribly @@ -304,61 +313,64 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; /* * IA-32 ELF specific definitions for IA-64. */ - + #define __USER_CS 0x23 #define __USER_DS 0x2B -#define SEG_LIM 32 -#define SEG_TYPE 52 -#define SEG_SYS 56 -#define SEG_DPL 57 -#define SEG_P 59 -#define SEG_DB 62 -#define SEG_G 63 - #define FIRST_TSS_ENTRY 6 #define FIRST_LDT_ENTRY (FIRST_TSS_ENTRY+1) #define _TSS(n) ((((unsigned long) n)<<4)+(FIRST_TSS_ENTRY<<3)) #define _LDT(n) ((((unsigned long) n)<<4)+(FIRST_LDT_ENTRY<<3)) -#define IA64_SEG_DESCRIPTOR(base, limit, segtype, nonsysseg, dpl, segpresent, segdb, granularity) \ - ((base) | \ - (limit << SEG_LIM) | \ - (segtype << SEG_TYPE) | \ - (nonsysseg << SEG_SYS) | \ - (dpl << SEG_DPL) | \ - (segpresent << SEG_P) | \ - (segdb << SEG_DB) | \ - (granularity << SEG_G)) - -#define IA32_SEG_BASE 16 -#define IA32_SEG_TYPE 40 -#define IA32_SEG_SYS 44 -#define IA32_SEG_DPL 45 -#define IA32_SEG_P 47 -#define IA32_SEG_HIGH_LIMIT 48 -#define IA32_SEG_AVL 52 -#define IA32_SEG_DB 54 -#define IA32_SEG_G 55 -#define IA32_SEG_HIGH_BASE 56 - -#define IA32_SEG_DESCRIPTOR(base, limit, segtype, nonsysseg, dpl, segpresent, avl, segdb, granularity) \ - ((limit & 0xFFFF) | \ - (base & 0xFFFFFF << IA32_SEG_BASE) | \ - (segtype << IA32_SEG_TYPE) | \ - (nonsysseg << IA32_SEG_SYS) | \ - (dpl << IA32_SEG_DPL) | \ - (segpresent << IA32_SEG_P) | \ - (((limit >> 16) & 0xF) << IA32_SEG_HIGH_LIMIT) | \ - (avl << IA32_SEG_AVL) | \ - (segdb << IA32_SEG_DB) | \ - (granularity << IA32_SEG_G) | \ - (((base >> 24) & 0xFF) << IA32_SEG_HIGH_BASE)) - -#define IA32_IOBASE 0x2000000000000000 /* Virtual address for I/O space */ - -#define IA32_CR0 0x80000001 /* Enable PG and PE bits */ -#define IA32_CR4 0 /* No architectural extensions */ +#define IA32_SEG_BASE 16 +#define IA32_SEG_TYPE 40 +#define IA32_SEG_SYS 44 +#define IA32_SEG_DPL 45 +#define IA32_SEG_P 47 +#define IA32_SEG_HIGH_LIMIT 48 +#define IA32_SEG_AVL 52 +#define IA32_SEG_DB 54 +#define IA32_SEG_G 55 +#define IA32_SEG_HIGH_BASE 56 + +#define IA32_SEG_DESCRIPTOR(base, limit, segtype, nonsysseg, dpl, segpresent, avl, segdb, gran) \ + (((limit) & 0xffff) \ + | (((unsigned long) (base) & 0xffffff) << IA32_SEG_BASE) \ + | ((unsigned long) (segtype) << IA32_SEG_TYPE) \ + | ((unsigned long) (nonsysseg) << IA32_SEG_SYS) \ + | ((unsigned long) (dpl) << IA32_SEG_DPL) \ + | ((unsigned long) (segpresent) << IA32_SEG_P) \ + | ((((unsigned long) (limit) >> 16) & 0xf) << IA32_SEG_HIGH_LIMIT) \ + | ((unsigned long) (avl) << IA32_SEG_AVL) \ + | ((unsigned long) (segdb) << IA32_SEG_DB) \ + | ((unsigned long) (gran) << IA32_SEG_G) \ + | ((((unsigned long) (base) >> 24) & 0xff) << IA32_SEG_HIGH_BASE)) + +#define SEG_LIM 32 +#define SEG_TYPE 52 +#define SEG_SYS 56 +#define SEG_DPL 57 +#define SEG_P 59 +#define SEG_AVL 60 +#define SEG_DB 62 +#define SEG_G 63 + +/* Unscramble an IA-32 segment descriptor into the IA-64 format. */ +#define IA32_SEG_UNSCRAMBLE(sd) \ + ( (((sd) >> IA32_SEG_BASE) & 0xffffff) | ((((sd) >> IA32_SEG_HIGH_BASE) & 0xff) << 24) \ + | ((((sd) & 0xffff) | ((((sd) >> IA32_SEG_HIGH_LIMIT) & 0xf) << 16)) << SEG_LIM) \ + | ((((sd) >> IA32_SEG_TYPE) & 0xf) << SEG_TYPE) \ + | ((((sd) >> IA32_SEG_SYS) & 0x1) << SEG_SYS) \ + | ((((sd) >> IA32_SEG_DPL) & 0x3) << SEG_DPL) \ + | ((((sd) >> IA32_SEG_P) & 0x1) << SEG_P) \ + | ((((sd) >> IA32_SEG_AVL) & 0x1) << SEG_AVL) \ + | ((((sd) >> IA32_SEG_DB) & 0x1) << SEG_DB) \ + | ((((sd) >> IA32_SEG_G) & 0x1) << SEG_G)) + +#define IA32_IOBASE 0x2000000000000000 /* Virtual address for I/O space */ + +#define IA32_CR0 0x80000001 /* Enable PG and PE bits */ +#define IA32_CR4 0x600 /* MMXEX and FXSR on */ /* * IA32 floating point control registers starting values @@ -384,6 +396,25 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; regs->r12 = new_sp; \ } while (0) +/* + * Local Descriptor Table (LDT) related declarations. + */ + +#define IA32_LDT_ENTRIES 8192 /* Maximum number of LDT entries supported. */ +#define IA32_LDT_ENTRY_SIZE 8 /* The size of each LDT entry. */ + +struct ia32_modify_ldt_ldt_s { + unsigned int entry_number; + unsigned int base_addr; + unsigned int limit; + unsigned int seg_32bit:1; + unsigned int contents:2; + unsigned int read_exec_only:1; + unsigned int limit_in_pages:1; + unsigned int seg_not_present:1; + unsigned int useable:1; +}; + extern void ia32_gdt_init (void); extern int ia32_setup_frame1 (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs); @@ -392,5 +423,5 @@ extern int ia32_setup_arg_pages (struct linux_binprm *bprm); extern int ia32_exception (struct pt_regs *regs, unsigned long isr); #endif /* !CONFIG_IA32_SUPPORT */ - + #endif /* _ASM_IA64_IA32_H */ diff --git a/include/asm-ia64/io.h b/include/asm-ia64/io.h index 480b95590a66..ae5b7781a746 100644 --- a/include/asm-ia64/io.h +++ b/include/asm-ia64/io.h @@ -333,7 +333,7 @@ __writeq (unsigned long val, void *addr) #define readb(a) __readb((void *)(a)) #define readw(a) __readw((void *)(a)) #define readl(a) __readl((void *)(a)) -#define readq(a) __readqq((void *)(a)) +#define readq(a) __readq((void *)(a)) #define __raw_readb readb #define __raw_readw readw #define __raw_readl readl diff --git a/include/asm-ia64/iosapic.h b/include/asm-ia64/iosapic.h index 90ab8aacfca4..f3a199013c17 100644 --- a/include/asm-ia64/iosapic.h +++ b/include/asm-ia64/iosapic.h @@ -51,7 +51,8 @@ #ifndef __ASSEMBLY__ -extern void __init iosapic_init (unsigned long address, unsigned int base_irq); +extern void __init iosapic_init (unsigned long address, unsigned int base_irq, + int pcat_compat); extern void iosapic_register_legacy_irq (unsigned long irq, unsigned long pin, unsigned long polarity, unsigned long trigger); extern void iosapic_pci_fixup (int); diff --git a/include/asm-ia64/mca_asm.h b/include/asm-ia64/mca_asm.h index bade650e4445..b1d32e556c97 100644 --- a/include/asm-ia64/mca_asm.h +++ b/include/asm-ia64/mca_asm.h @@ -1,5 +1,5 @@ /* - * File: mca_asm.h + * File: mca_asm.h * * Copyright (C) 1999 Silicon Graphics, Inc. * Copyright (C) Vijay Chander (vijay@engr.sgi.com) @@ -16,23 +16,23 @@ #define PSR_RT 27 #define PSR_IT 36 #define PSR_BN 44 - + /* * This macro converts a instruction virtual address to a physical address * Right now for simulation purposes the virtual addresses are * direct mapped to physical addresses. - * 1. Lop off bits 61 thru 63 in the virtual address + * 1. Lop off bits 61 thru 63 in the virtual address */ #define INST_VA_TO_PA(addr) \ - dep addr = 0, addr, 61, 3; + dep addr = 0, addr, 61, 3; /* * This macro converts a data virtual address to a physical address * Right now for simulation purposes the virtual addresses are * direct mapped to physical addresses. - * 1. Lop off bits 61 thru 63 in the virtual address + * 1. Lop off bits 61 thru 63 in the virtual address */ #define DATA_VA_TO_PA(addr) \ - dep addr = 0, addr, 61, 3; + dep addr = 0, addr, 61, 3; /* * This macro converts a data physical address to a virtual address * Right now for simulation purposes the virtual addresses are @@ -40,7 +40,7 @@ * 1. Put 0x7 in bits 61 thru 63. */ #define DATA_PA_TO_VA(addr,temp) \ - mov temp = 0x7 ; \ + mov temp = 0x7 ;; \ dep addr = temp, addr, 61, 3; /* @@ -48,11 +48,11 @@ * and starts execution in physical mode with all the address * translations turned off. * 1. Save the current psr - * 2. Make sure that all the upper 32 bits are off + * 2. Make sure that all the upper 32 bits are off * * 3. Clear the interrupt enable and interrupt state collection bits * in the psr before updating the ipsr and iip. - * + * * 4. Turn off the instruction, data and rse translation bits of the psr * and store the new value into ipsr * Also make sure that the interrupts are disabled. @@ -71,7 +71,7 @@ mov old_psr = psr; \ ;; \ dep old_psr = 0, old_psr, 32, 32; \ - \ + \ mov ar.rsc = 0 ; \ ;; \ mov temp2 = ar.bspstore; \ @@ -86,7 +86,7 @@ mov temp1 = psr; \ mov temp2 = psr; \ ;; \ - \ + \ dep temp2 = 0, temp2, PSR_IC, 2; \ ;; \ mov psr.l = temp2; \ @@ -94,11 +94,11 @@ srlz.d; \ dep temp1 = 0, temp1, 32, 32; \ ;; \ - dep temp1 = 0, temp1, PSR_IT, 1; \ + dep temp1 = 0, temp1, PSR_IT, 1; \ ;; \ - dep temp1 = 0, temp1, PSR_DT, 1; \ + dep temp1 = 0, temp1, PSR_DT, 1; \ ;; \ - dep temp1 = 0, temp1, PSR_RT, 1; \ + dep temp1 = 0, temp1, PSR_RT, 1; \ ;; \ dep temp1 = 0, temp1, PSR_I, 1; \ ;; \ @@ -125,72 +125,73 @@ * This macro jumps to the instruction at the given virtual address * and starts execution in virtual mode with all the address * translations turned on. - * 1. Get the old saved psr - * - * 2. Clear the interrupt enable and interrupt state collection bits + * 1. Get the old saved psr + * + * 2. Clear the interrupt enable and interrupt state collection bits * in the current psr. - * + * * 3. Set the instruction translation bit back in the old psr * Note we have to do this since we are right now saving only the * lower 32-bits of old psr.(Also the old psr has the data and * rse translation bits on) - * + * * 4. Set ipsr to this old_psr with "it" bit set and "bn" = 1. * - * 5. Set iip to the virtual address of the next instruction bundle. + * 5. Set iip to the virtual address of the next instruction bundle. * * 6. Do an rfi to move ipsr to psr and iip to ip. */ -#define VIRTUAL_MODE_ENTER(temp1, temp2, start_addr, old_psr) \ - mov temp2 = psr; \ - ;; \ - dep temp2 = 0, temp2, PSR_IC, 2; \ - ;; \ - mov psr.l = temp2; \ - mov ar.rsc = 0; \ - ;; \ - srlz.d; \ - mov temp2 = ar.bspstore; \ - ;; \ - DATA_PA_TO_VA(temp2,temp1); \ - ;; \ - mov temp1 = ar.rnat; \ - ;; \ - mov ar.bspstore = temp2; \ - ;; \ - mov ar.rnat = temp1; \ - ;; \ - mov temp1 = old_psr; \ - ;; \ - mov temp2 = 1 ; \ - dep temp1 = temp2, temp1, PSR_I, 1; \ - ;; \ - dep temp1 = temp2, temp1, PSR_IC, 1; \ - ;; \ - dep temp1 = temp2, temp1, PSR_IT, 1; \ - ;; \ - dep temp1 = temp2, temp1, PSR_DT, 1; \ - ;; \ - dep temp1 = temp2, temp1, PSR_RT, 1; \ - ;; \ - dep temp1 = temp2, temp1, PSR_BN, 1; \ - ;; \ - \ - mov cr.ipsr = temp1; \ - movl temp2 = start_addr; \ - ;; \ - mov cr.iip = temp2; \ - DATA_PA_TO_VA(sp, temp1); \ - DATA_PA_TO_VA(gp, temp2); \ - ;; \ - nop 1; \ - nop 2; \ - nop 1; \ - rfi; \ +#define VIRTUAL_MODE_ENTER(temp1, temp2, start_addr, old_psr) \ + mov temp2 = psr; \ + ;; \ + dep temp2 = 0, temp2, PSR_IC, 2; \ + ;; \ + mov psr.l = temp2; \ + mov ar.rsc = 0; \ + ;; \ + srlz.d; \ + mov temp2 = ar.bspstore; \ + ;; \ + DATA_PA_TO_VA(temp2,temp1); \ + ;; \ + mov temp1 = ar.rnat; \ + ;; \ + mov ar.bspstore = temp2; \ + ;; \ + mov ar.rnat = temp1; \ + ;; \ + mov temp1 = old_psr; \ + ;; \ + mov temp2 = 1 \ + ;; \ + dep temp1 = temp2, temp1, PSR_I, 1; \ + ;; \ + dep temp1 = temp2, temp1, PSR_IC, 1; \ + ;; \ + dep temp1 = temp2, temp1, PSR_IT, 1; \ + ;; \ + dep temp1 = temp2, temp1, PSR_DT, 1; \ + ;; \ + dep temp1 = temp2, temp1, PSR_RT, 1; \ + ;; \ + dep temp1 = temp2, temp1, PSR_BN, 1; \ + ;; \ + \ + mov cr.ipsr = temp1; \ + movl temp2 = start_addr; \ + ;; \ + mov cr.iip = temp2; \ + DATA_PA_TO_VA(sp, temp1); \ + DATA_PA_TO_VA(gp, temp2); \ + ;; \ + nop 1; \ + nop 2; \ + nop 1; \ + rfi; \ ;; -/* +/* * The following offsets capture the order in which the * RSE related registers from the old context are * saved onto the new stack frame. @@ -198,15 +199,15 @@ * +-----------------------+ * |NDIRTY [BSP - BSPSTORE]| * +-----------------------+ - * | RNAT | + * | RNAT | * +-----------------------+ - * | BSPSTORE | + * | BSPSTORE | * +-----------------------+ - * | IFS | + * | IFS | * +-----------------------+ - * | PFS | + * | PFS | * +-----------------------+ - * | RSC | + * | RSC | * +-----------------------+ <-------- Bottom of new stack frame */ #define rse_rsc_offset 0 @@ -229,23 +230,23 @@ * 8. Read and save the new BSP to calculate the #dirty registers * NOTE: Look at pages 11-10, 11-11 in PRM Vol 2 */ -#define rse_switch_context(temp,p_stackframe,p_bspstore) \ - ;; \ - mov temp=ar.rsc;; \ - st8 [p_stackframe]=temp,8;; \ - mov temp=ar.pfs;; \ - st8 [p_stackframe]=temp,8; \ - cover ;; \ - mov temp=cr.ifs;; \ - st8 [p_stackframe]=temp,8;; \ - mov temp=ar.bspstore;; \ - st8 [p_stackframe]=temp,8;; \ - mov temp=ar.rnat;; \ - st8 [p_stackframe]=temp,8; \ - mov ar.bspstore=p_bspstore;; \ - mov temp=ar.bsp;; \ - sub temp=temp,p_bspstore;; \ - st8 [p_stackframe]=temp,8 +#define rse_switch_context(temp,p_stackframe,p_bspstore) \ + ;; \ + mov temp=ar.rsc;; \ + st8 [p_stackframe]=temp,8;; \ + mov temp=ar.pfs;; \ + st8 [p_stackframe]=temp,8; \ + cover ;; \ + mov temp=cr.ifs;; \ + st8 [p_stackframe]=temp,8;; \ + mov temp=ar.bspstore;; \ + st8 [p_stackframe]=temp,8;; \ + mov temp=ar.rnat;; \ + st8 [p_stackframe]=temp,8; \ + mov ar.bspstore=p_bspstore;; \ + mov temp=ar.bsp;; \ + sub temp=temp,p_bspstore;; \ + st8 [p_stackframe]=temp,8 /* * rse_return_context @@ -253,7 +254,7 @@ * 2. Store the number of dirty registers RSC.loadrs field * 3. Issue a loadrs to insure that any registers from the interrupted * context which were saved on the new stack frame have been loaded - * back into the stacked registers + * back into the stacked registers * 4. Restore BSPSTORE * 5. Restore RNAT * 6. Restore PFS @@ -261,44 +262,44 @@ * 8. Restore RSC * 9. Issue an RFI */ -#define rse_return_context(psr_mask_reg,temp,p_stackframe) \ - ;; \ - alloc temp=ar.pfs,0,0,0,0; \ - add p_stackframe=rse_ndirty_offset,p_stackframe;; \ - ld8 temp=[p_stackframe];; \ - shl temp=temp,16;; \ - mov ar.rsc=temp;; \ - loadrs;; \ - add p_stackframe=-rse_ndirty_offset+rse_bspstore_offset,p_stackframe;;\ - ld8 temp=[p_stackframe];; \ - mov ar.bspstore=temp;; \ - add p_stackframe=-rse_bspstore_offset+rse_rnat_offset,p_stackframe;;\ - ld8 temp=[p_stackframe];; \ - mov ar.rnat=temp;; \ - add p_stackframe=-rse_rnat_offset+rse_pfs_offset,p_stackframe;; \ - ld8 temp=[p_stackframe];; \ - mov ar.pfs=temp; \ - add p_stackframe=-rse_pfs_offset+rse_ifs_offset,p_stackframe;; \ - ld8 temp=[p_stackframe];; \ - mov cr.ifs=temp; \ - add p_stackframe=-rse_ifs_offset+rse_rsc_offset,p_stackframe;; \ - ld8 temp=[p_stackframe];; \ - mov ar.rsc=temp ; \ - add p_stackframe=-rse_rsc_offset,p_stackframe; \ - mov temp=cr.ipsr;; \ - st8 [p_stackframe]=temp,8; \ - mov temp=cr.iip;; \ - st8 [p_stackframe]=temp,-8; \ - mov temp=psr;; \ - or temp=temp,psr_mask_reg;; \ - mov cr.ipsr=temp;; \ - mov temp=ip;; \ - add temp=0x30,temp;; \ - mov cr.iip=temp;; \ - rfi;; \ - ld8 temp=[p_stackframe],8;; \ - mov cr.ipsr=temp;; \ - ld8 temp=[p_stackframe];; \ - mov cr.iip=temp +#define rse_return_context(psr_mask_reg,temp,p_stackframe) \ + ;; \ + alloc temp=ar.pfs,0,0,0,0; \ + add p_stackframe=rse_ndirty_offset,p_stackframe;; \ + ld8 temp=[p_stackframe];; \ + shl temp=temp,16;; \ + mov ar.rsc=temp;; \ + loadrs;; \ + add p_stackframe=-rse_ndirty_offset+rse_bspstore_offset,p_stackframe;;\ + ld8 temp=[p_stackframe];; \ + mov ar.bspstore=temp;; \ + add p_stackframe=-rse_bspstore_offset+rse_rnat_offset,p_stackframe;;\ + ld8 temp=[p_stackframe];; \ + mov ar.rnat=temp;; \ + add p_stackframe=-rse_rnat_offset+rse_pfs_offset,p_stackframe;; \ + ld8 temp=[p_stackframe];; \ + mov ar.pfs=temp; \ + add p_stackframe=-rse_pfs_offset+rse_ifs_offset,p_stackframe;; \ + ld8 temp=[p_stackframe];; \ + mov cr.ifs=temp; \ + add p_stackframe=-rse_ifs_offset+rse_rsc_offset,p_stackframe;; \ + ld8 temp=[p_stackframe];; \ + mov ar.rsc=temp ; \ + add p_stackframe=-rse_rsc_offset,p_stackframe; \ + mov temp=cr.ipsr;; \ + st8 [p_stackframe]=temp,8; \ + mov temp=cr.iip;; \ + st8 [p_stackframe]=temp,-8; \ + mov temp=psr;; \ + or temp=temp,psr_mask_reg;; \ + mov cr.ipsr=temp;; \ + mov temp=ip;; \ + add temp=0x30,temp;; \ + mov cr.iip=temp;; \ + rfi;; \ + ld8 temp=[p_stackframe],8;; \ + mov cr.ipsr=temp;; \ + ld8 temp=[p_stackframe];; \ + mov cr.iip=temp #endif /* _ASM_IA64_MCA_ASM_H */ diff --git a/include/asm-ia64/offsets.h b/include/asm-ia64/offsets.h index 8b2445c9cda3..82cb9553aa64 100644 --- a/include/asm-ia64/offsets.h +++ b/include/asm-ia64/offsets.h @@ -1,21 +1,19 @@ #ifndef _ASM_IA64_OFFSETS_H #define _ASM_IA64_OFFSETS_H - /* * DO NOT MODIFY * - * This file was generated by arch/ia64/tools/print_offsets. + * This file was generated by arch/ia64/tools/print_offsets.awk. * */ - -#define PT_PTRACED_BIT 0 -#define PT_TRACESYS_BIT 1 - +#define PT_PTRACED_BIT 0 +#define PT_TRACESYS_BIT 1 #define IA64_TASK_SIZE 3904 /* 0xf40 */ #define IA64_PT_REGS_SIZE 400 /* 0x190 */ #define IA64_SWITCH_STACK_SIZE 560 /* 0x230 */ #define IA64_SIGINFO_SIZE 128 /* 0x80 */ #define IA64_CPU_SIZE 16384 /* 0x4000 */ +#define SIGFRAME_SIZE 2832 /* 0xb10 */ #define UNW_FRAME_INFO_SIZE 448 /* 0x1c0 */ #define IA64_TASK_PTRACE_OFFSET 48 /* 0x30 */ @@ -24,8 +22,8 @@ #define IA64_TASK_PROCESSOR_OFFSET 100 /* 0x64 */ #define IA64_TASK_THREAD_OFFSET 1456 /* 0x5b0 */ #define IA64_TASK_THREAD_KSP_OFFSET 1456 /* 0x5b0 */ -#define IA64_TASK_THREAD_SIGMASK_OFFSET 3752 /* 0xea8 */ -#define IA64_TASK_PFM_NOTIFY_OFFSET 3648 /* 0xe40 */ +#define IA64_TASK_THREAD_SIGMASK_OFFSET 1568 /* 0x620 */ +#define IA64_TASK_PFM_NOTIFY_OFFSET 2088 /* 0x828 */ #define IA64_TASK_PID_OFFSET 196 /* 0xc4 */ #define IA64_TASK_MM_OFFSET 88 /* 0x58 */ #define IA64_PT_REGS_CR_IPSR_OFFSET 0 /* 0x0 */ @@ -75,7 +73,7 @@ #define IA64_PT_REGS_F8_OFFSET 368 /* 0x170 */ #define IA64_PT_REGS_F9_OFFSET 384 /* 0x180 */ #define IA64_SWITCH_STACK_CALLER_UNAT_OFFSET 0 /* 0x0 */ -#define IA64_SWITCH_STACK_AR_FPSR_OFFSET 8 /* 0x8 */ +#define IA64_SWITCH_STACK_AR_FPSR_OFFSET 8 /* 0x8 */ #define IA64_SWITCH_STACK_F2_OFFSET 16 /* 0x10 */ #define IA64_SWITCH_STACK_F3_OFFSET 32 /* 0x20 */ #define IA64_SWITCH_STACK_F4_OFFSET 48 /* 0x30 */ @@ -114,21 +112,30 @@ #define IA64_SWITCH_STACK_B5_OFFSET 504 /* 0x1f8 */ #define IA64_SWITCH_STACK_AR_PFS_OFFSET 512 /* 0x200 */ #define IA64_SWITCH_STACK_AR_LC_OFFSET 520 /* 0x208 */ -#define IA64_SWITCH_STACK_AR_UNAT_OFFSET 528 /* 0x210 */ -#define IA64_SWITCH_STACK_AR_RNAT_OFFSET 536 /* 0x218 */ +#define IA64_SWITCH_STACK_AR_UNAT_OFFSET 528 /* 0x210 */ +#define IA64_SWITCH_STACK_AR_RNAT_OFFSET 536 /* 0x218 */ #define IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET 544 /* 0x220 */ #define IA64_SWITCH_STACK_PR_OFFSET 552 /* 0x228 */ #define IA64_SIGCONTEXT_AR_BSP_OFFSET 72 /* 0x48 */ +#define IA64_SIGCONTEXT_AR_FPSR_OFFSET 104 /* 0x68 */ #define IA64_SIGCONTEXT_AR_RNAT_OFFSET 80 /* 0x50 */ -#define IA64_SIGCONTEXT_FLAGS_OFFSET 0 /* 0x0 */ +#define IA64_SIGCONTEXT_AR_UNAT_OFFSET 96 /* 0x60 */ +#define IA64_SIGCONTEXT_B0_OFFSET 136 /* 0x88 */ #define IA64_SIGCONTEXT_CFM_OFFSET 48 /* 0x30 */ +#define IA64_SIGCONTEXT_FLAGS_OFFSET 0 /* 0x0 */ #define IA64_SIGCONTEXT_FR6_OFFSET 560 /* 0x230 */ -#define IA64_CLONE_VFORK 16384 /* 0x4000 */ +#define IA64_SIGCONTEXT_PR_OFFSET 128 /* 0x80 */ +#define IA64_SIGCONTEXT_R12_OFFSET 296 /* 0x128 */ +#define IA64_SIGFRAME_ARG0_OFFSET 0 /* 0x0 */ +#define IA64_SIGFRAME_ARG1_OFFSET 8 /* 0x8 */ +#define IA64_SIGFRAME_ARG2_OFFSET 16 /* 0x10 */ +#define IA64_SIGFRAME_RBS_BASE_OFFSET 24 /* 0x18 */ +#define IA64_SIGFRAME_HANDLER_OFFSET 32 /* 0x20 */ +#define IA64_SIGFRAME_SIGCONTEXT_OFFSET 176 /* 0xb0 */ +#define IA64_CLONE_VFORK 16384 /* 0x4000 */ #define IA64_CLONE_VM 256 /* 0x100 */ -#define IA64_CPU_IRQ_COUNT_OFFSET 8 /* 0x8 */ -#define IA64_CPU_BH_COUNT_OFFSET 12 /* 0xc */ -#define IA64_CPU_SOFTIRQ_ACTIVE_OFFSET 0 /* 0x0 */ -#define IA64_CPU_SOFTIRQ_MASK_OFFSET 4 /* 0x4 */ -#define IA64_CPU_PHYS_STACKED_SIZE_P8_OFFSET 16 /* 0x10 */ +#define IA64_CPU_IRQ_COUNT_OFFSET 0 /* 0x0 */ +#define IA64_CPU_BH_COUNT_OFFSET 4 /* 0x4 */ +#define IA64_CPU_PHYS_STACKED_SIZE_P8_OFFSET 12 /* 0xc */ #endif /* _ASM_IA64_OFFSETS_H */ diff --git a/include/asm-ia64/pgalloc.h b/include/asm-ia64/pgalloc.h index 37cfd0b72f78..df0fd548653b 100644 --- a/include/asm-ia64/pgalloc.h +++ b/include/asm-ia64/pgalloc.h @@ -34,7 +34,7 @@ #define pgtable_cache_size (local_cpu_data->pgtable_cache_sz) static inline pgd_t* -pgd_alloc_one_fast (void) +pgd_alloc_one_fast (struct mm_struct *mm) { unsigned long *ret = pgd_quicklist; @@ -51,7 +51,7 @@ static inline pgd_t* pgd_alloc (struct mm_struct *mm) { /* the VM system never calls pgd_alloc_one_fast(), so we do it here. */ - pgd_t *pgd = pgd_alloc_one_fast(); + pgd_t *pgd = pgd_alloc_one_fast(mm); if (__builtin_expect(pgd == NULL, 0)) { pgd = (pgd_t *)__get_free_page(GFP_KERNEL); diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h index 5d844a25dc8f..51942aeee818 100644 --- a/include/asm-ia64/pgtable.h +++ b/include/asm-ia64/pgtable.h @@ -17,6 +17,7 @@ #include <asm/mman.h> #include <asm/page.h> #include <asm/processor.h> +#include <asm/system.h> #include <asm/types.h> #define IA64_MAX_PHYS_BITS 50 /* max. number of physical address bits (architected) */ @@ -125,7 +126,7 @@ #include <asm/bitops.h> #include <asm/mmu_context.h> -#include <asm/system.h> +#include <asm/processor.h> /* * Next come the mappings that determine how mmap() protection bits @@ -443,7 +444,7 @@ extern void paging_init (void); #define SWP_TYPE(entry) (((entry).val >> 1) & 0xff) #define SWP_OFFSET(entry) (((entry).val << 1) >> 10) -#define SWP_ENTRY(type,offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 9) }) +#define SWP_ENTRY(type,offset) ((swp_entry_t) { ((type) << 1) | ((long) (offset) << 9) }) #define pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define swp_entry_to_pte(x) ((pte_t) { (x).val }) @@ -464,4 +465,12 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; # endif /* !__ASSEMBLY__ */ +/* + * Identity-mapped regions use a large page size. KERNEL_PG_NUM is the + * number of the (large) page frame that mapps the kernel. + */ +#define KERNEL_PG_SHIFT _PAGE_SIZE_64M +#define KERNEL_PG_SIZE (1 << KERNEL_PG_SHIFT) +#define KERNEL_PG_NUM ((KERNEL_START - PAGE_OFFSET) / KERNEL_PG_SIZE) + #endif /* _ASM_IA64_PGTABLE_H */ diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h index 2e10c77c7bf6..bf8411d0fcb5 100644 --- a/include/asm-ia64/processor.h +++ b/include/asm-ia64/processor.h @@ -235,11 +235,7 @@ struct ia64_psr { * state comes earlier: */ struct cpuinfo_ia64 { - /* irq_stat and softirq should be 64-bit aligned */ - struct { - __u32 active; - __u32 mask; - } softirq; + /* irq_stat must be 64-bit aligned */ union { struct { __u32 irq_count; @@ -247,8 +243,8 @@ struct cpuinfo_ia64 { } f; __u64 irq_and_bh_counts; } irq_stat; + __u32 softirq_pending; __u32 phys_stacked_size_p8; /* size of physical stacked registers + 8 */ - __u32 pad0; __u64 itm_delta; /* # of clock cycles between clock ticks */ __u64 itm_next; /* interval timer mask value to use for next clock tick */ __u64 *pgd_quick; @@ -273,6 +269,7 @@ struct cpuinfo_ia64 { __u64 ptce_base; __u32 ptce_count[2]; __u32 ptce_stride[2]; + struct task_struct *ksoftirqd; /* kernel softirq daemon for this CPU */ #ifdef CONFIG_SMP __u64 loops_per_jiffy; __u64 ipi_count; @@ -280,6 +277,9 @@ struct cpuinfo_ia64 { __u64 prof_multiplier; __u64 ipi_operation; #endif +#ifdef CONFIG_NUMA + struct cpuinfo_ia64 *cpu_data[NR_CPUS]; +#endif } __attribute__ ((aligned (PAGE_SIZE))) ; /* @@ -288,7 +288,22 @@ struct cpuinfo_ia64 { */ #define local_cpu_data ((struct cpuinfo_ia64 *) PERCPU_ADDR) -extern struct cpuinfo_ia64 cpu_data[NR_CPUS]; +/* + * On NUMA systems, cpu_data for each cpu is allocated during cpu_init() & is allocated on + * the node that contains the cpu. This minimizes off-node memory references. cpu_data + * for each cpu contains an array of pointers to the cpu_data structures of each of the + * other cpus. + * + * On non-NUMA systems, cpu_data is a static array allocated at compile time. References + * to the cpu_data of another cpu is done by direct references to the appropriate entry of + * the array. + */ +#ifdef CONFIG_NUMA +# define cpu_data(cpu) local_cpu_data->cpu_data_ptrs[cpu] +#else + extern struct cpuinfo_ia64 _cpu_data[NR_CPUS]; +# define cpu_data(cpu) (&_cpu_data[cpu]) +#endif extern void identify_cpu (struct cpuinfo_ia64 *); extern void print_cpu_info (struct cpuinfo_ia64 *); @@ -314,20 +329,10 @@ struct siginfo; struct thread_struct { __u64 ksp; /* kernel stack pointer */ unsigned long flags; /* various flags */ - struct ia64_fpreg fph[96]; /* saved/loaded on demand */ - __u64 dbr[IA64_NUM_DBG_REGS]; - __u64 ibr[IA64_NUM_DBG_REGS]; -#ifdef CONFIG_PERFMON - __u64 pmc[IA64_NUM_PMC_REGS]; - __u64 pmd[IA64_NUM_PMD_REGS]; - unsigned long pfm_pend_notify; /* non-zero if we need to notify and block */ - void *pfm_context; /* pointer to detailed PMU context */ -# define INIT_THREAD_PM {0, }, {0, }, 0, 0, -#else -# define INIT_THREAD_PM -#endif __u64 map_base; /* base address for get_unmapped_area() */ __u64 task_size; /* limit for task size */ + struct siginfo *siginfo; /* current siginfo struct for ptrace() */ + #ifdef CONFIG_IA32_SUPPORT __u64 eflag; /* IA32 EFLAGS reg */ __u64 fsr; /* IA32 floating pt status reg */ @@ -345,7 +350,18 @@ struct thread_struct { #else # define INIT_THREAD_IA32 #endif /* CONFIG_IA32_SUPPORT */ - struct siginfo *siginfo; /* current siginfo struct for ptrace() */ +#ifdef CONFIG_PERFMON + __u64 pmc[IA64_NUM_PMC_REGS]; + __u64 pmd[IA64_NUM_PMD_REGS]; + unsigned long pfm_pend_notify; /* non-zero if we need to notify and block */ + void *pfm_context; /* pointer to detailed PMU context */ +# define INIT_THREAD_PM {0, }, {0, }, 0, 0, +#else +# define INIT_THREAD_PM +#endif + __u64 dbr[IA64_NUM_DBG_REGS]; + __u64 ibr[IA64_NUM_DBG_REGS]; + struct ia64_fpreg fph[96]; /* saved/loaded on demand */ }; #define INIT_MMAP { \ @@ -356,14 +372,14 @@ struct thread_struct { #define INIT_THREAD { \ 0, /* ksp */ \ 0, /* flags */ \ - {{{{0}}}, }, /* fph */ \ - {0, }, /* dbr */ \ - {0, }, /* ibr */ \ - INIT_THREAD_PM \ DEFAULT_MAP_BASE, /* map_base */ \ DEFAULT_TASK_SIZE, /* task_size */ \ + 0, /* siginfo */ \ INIT_THREAD_IA32 \ - 0 /* siginfo */ \ + INIT_THREAD_PM \ + {0, }, /* dbr */ \ + {0, }, /* ibr */ \ + {{{{0}}}, } /* fph */ \ } #define start_thread(regs,new_ip,new_sp) do { \ @@ -416,7 +432,7 @@ struct task_struct; /* * Free all resources held by a thread. This is called after the * parent of DEAD_TASK has collected the exist status of the task via - * wait(). This is a no-op on IA-64. + * wait(). */ #ifdef CONFIG_PERFMON extern void release_thread (struct task_struct *task); @@ -513,8 +529,8 @@ extern void ia64_save_debug_regs (unsigned long *save_area); extern void ia64_load_debug_regs (unsigned long *save_area); #ifdef CONFIG_IA32_SUPPORT -extern void ia32_save_state (struct thread_struct *thread); -extern void ia32_load_state (struct thread_struct *thread); +extern void ia32_save_state (struct task_struct *task); +extern void ia32_load_state (struct task_struct *task); #endif #ifdef CONFIG_PERFMON diff --git a/include/asm-ia64/ptrace.h b/include/asm-ia64/ptrace.h index 6e7417b453f1..5210d2de1df4 100644 --- a/include/asm-ia64/ptrace.h +++ b/include/asm-ia64/ptrace.h @@ -220,11 +220,16 @@ struct switch_stack { struct task_struct; /* forward decl */ extern void show_regs (struct pt_regs *); - extern unsigned long ia64_get_user_bsp (struct task_struct *, struct pt_regs *); - extern long ia64_peek (struct task_struct *, unsigned long, unsigned long, long *); - extern long ia64_poke (struct task_struct *, unsigned long, unsigned long, long); + extern unsigned long ia64_get_user_rbs_end (struct task_struct *, struct pt_regs *, + unsigned long *); + extern long ia64_peek (struct task_struct *, struct switch_stack *, unsigned long, + unsigned long, long *); + extern long ia64_poke (struct task_struct *, struct switch_stack *, unsigned long, + unsigned long, long); extern void ia64_flush_fph (struct task_struct *); extern void ia64_sync_fph (struct task_struct *); + extern long ia64_sync_user_rbs (struct task_struct *, struct switch_stack *, + unsigned long, unsigned long); /* get nat bits for scratch registers such that bit N==1 iff scratch register rN is a NaT */ extern unsigned long ia64_get_scratch_nat_bits (struct pt_regs *pt, unsigned long scratch_unat); diff --git a/include/asm-ia64/ptrace_offsets.h b/include/asm-ia64/ptrace_offsets.h index 6fa6fb446f6b..44a76cdcc4e9 100644 --- a/include/asm-ia64/ptrace_offsets.h +++ b/include/asm-ia64/ptrace_offsets.h @@ -173,7 +173,7 @@ #define PT_AR_BSPSTORE 0x0868 #define PT_PR 0x0870 #define PT_B6 0x0878 -#define PT_AR_BSP 0x0880 +#define PT_AR_BSP 0x0880 /* note: this points to the *end* of the backing store! */ #define PT_R1 0x0888 #define PT_R2 0x0890 #define PT_R3 0x0898 diff --git a/include/asm-ia64/sal.h b/include/asm-ia64/sal.h index f24928e44c9a..64e652b2721b 100644 --- a/include/asm-ia64/sal.h +++ b/include/asm-ia64/sal.h @@ -7,8 +7,8 @@ * This is based on version 2.5 of the manual "IA-64 System * Abstraction Layer". * - * Copyright (C) 1998, 1999 Hewlett-Packard Co - * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co + * Copyright (C) 1998, 1999, 2001 David Mosberger-Tang <davidm@hpl.hp.com> * Copyright (C) 1999 Srinivasa Prasad Thirumalachar <sprasad@sprasad.engr.sgi.com> * * 99/09/29 davidm Updated for SAL 2.6. diff --git a/include/asm-ia64/sigcontext.h b/include/asm-ia64/sigcontext.h index 5abb275a9e9f..5ff4a2ff67b7 100644 --- a/include/asm-ia64/sigcontext.h +++ b/include/asm-ia64/sigcontext.h @@ -40,6 +40,8 @@ struct sigcontext { unsigned long sc_gr[32]; /* general registers (static partition) */ struct ia64_fpreg sc_fr[128]; /* floating-point registers */ + unsigned long sc_rsvd[16]; /* reserved for future use */ + /* * The mask must come last so we can increase _NSIG_WORDS * without breaking binary compatibility. diff --git a/include/asm-ia64/signal.h b/include/asm-ia64/signal.h index f6a01d64034d..45dd55e59b1c 100644 --- a/include/asm-ia64/signal.h +++ b/include/asm-ia64/signal.h @@ -56,7 +56,7 @@ * SA_FLAGS values: * * SA_ONSTACK indicates that a registered stack_t will be used. - * SA_INTERRUPT is a no-op, but left due to historical reasons. Use the + * SA_INTERRUPT is a no-op, but left due to historical reasons. * SA_RESTART flag to get restarting signals (which were the default long ago) * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. * SA_RESETHAND clears the handler when the signal is delivered. @@ -105,7 +105,6 @@ #define SA_PROBE SA_ONESHOT #define SA_SAMPLE_RANDOM SA_RESTART #define SA_SHIRQ 0x04000000 -#define SA_LEGACY 0x02000000 /* installed via a legacy irq? */ #endif /* __KERNEL__ */ diff --git a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h index f9878b782f56..a4bab219f605 100644 --- a/include/asm-ia64/smp.h +++ b/include/asm-ia64/smp.h @@ -1,7 +1,7 @@ /* * SMP Support * - * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 VA Linux Systems * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> * Copyright (C) 2001 Hewlett-Packard Co * Copyright (C) 2001 David Mosberger-Tang <davidm@hpl.hp.com> @@ -35,14 +35,13 @@ extern struct smp_boot_data { extern char no_int_routing __initdata; -extern unsigned long cpu_present_map; -extern unsigned long cpu_online_map; +extern volatile unsigned long cpu_online_map; extern unsigned long ipi_base_addr; -extern int __cpu_physical_id[NR_CPUS]; extern unsigned char smp_int_redirect; extern int smp_num_cpus; -#define cpu_physical_id(i) __cpu_physical_id[i] +extern volatile int ia64_cpu_to_sapicid[]; +#define cpu_physical_id(i) ia64_cpu_to_sapicid[i] #define cpu_number_map(i) (i) #define cpu_logical_map(i) (i) @@ -70,7 +69,7 @@ cpu_logical_id (int cpuid) * max_xtp : never deliver interrupts to this CPU. */ -static inline void +static inline void min_xtp (void) { if (smp_int_redirect & SMP_IRQ_REDIRECTION) @@ -85,13 +84,13 @@ normal_xtp (void) } static inline void -max_xtp (void) +max_xtp (void) { if (smp_int_redirect & SMP_IRQ_REDIRECTION) writeb(0x0f, ipi_base_addr | XTP_OFFSET); /* Set XTP to max */ } -static inline unsigned int +static inline unsigned int hard_smp_processor_id (void) { union { diff --git a/include/asm-ia64/softirq.h b/include/asm-ia64/softirq.h index 5d3c7ab2ae60..6ac2197344b6 100644 --- a/include/asm-ia64/softirq.h +++ b/include/asm-ia64/softirq.h @@ -7,8 +7,18 @@ */ #include <asm/hardirq.h> +#define __local_bh_enable() do { barrier(); local_bh_count()--; } while (0) + #define local_bh_disable() do { local_bh_count()++; barrier(); } while (0) -#define local_bh_enable() do { barrier(); local_bh_count()--; } while (0) +#define local_bh_enable() \ +do { \ + __local_bh_enable(); \ + if (__builtin_expect(local_softirq_pending(), 0) && local_bh_count() == 0) \ + do_softirq(); \ +} while (0) + + +#define __cpu_raise_softirq(cpu,nr) set_bit((nr), &softirq_pending(cpu)) #define in_softirq() (local_bh_count() != 0) diff --git a/include/asm-ia64/spinlock.h b/include/asm-ia64/spinlock.h index 70ff9bda4b44..3a6d2e0e7cf4 100644 --- a/include/asm-ia64/spinlock.h +++ b/include/asm-ia64/spinlock.h @@ -19,12 +19,12 @@ #ifdef NEW_LOCK -typedef struct { +typedef struct { volatile unsigned int lock; } spinlock_t; #define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 } -#define spin_lock_init(x) ((x)->lock = 0) +#define spin_lock_init(x) ((x)->lock = 0) /* * Streamlined test_and_set_bit(0, (x)). We use test-and-test-and-set @@ -62,12 +62,12 @@ typedef struct { }) #define spin_is_locked(x) ((x)->lock != 0) -#define spin_unlock(x) do {((spinlock_t *) x)->lock = 0;} while (0) -#define spin_unlock_wait(x) do {} while ((x)->lock) +#define spin_unlock(x) do { barrier(); ((spinlock_t *) x)->lock = 0;} while (0) +#define spin_unlock_wait(x) do { barrier(); } while ((x)->lock) #else /* !NEW_LOCK */ -typedef struct { +typedef struct { volatile unsigned int lock; } spinlock_t; @@ -96,7 +96,7 @@ typedef struct { :: "r"(&(x)->lock) : "r2", "r29", "memory") #define spin_is_locked(x) ((x)->lock != 0) -#define spin_unlock(x) do {((spinlock_t *) x)->lock = 0; barrier(); } while (0) +#define spin_unlock(x) do { barrier(); ((spinlock_t *) x)->lock = 0; } while (0) #define spin_trylock(x) (cmpxchg_acq(&(x)->lock, 0, 1) == 0) #define spin_unlock_wait(x) do { barrier(); } while ((x)->lock) diff --git a/include/asm-ia64/string.h b/include/asm-ia64/string.h index c17fc8de4280..5c89e5c5584d 100644 --- a/include/asm-ia64/string.h +++ b/include/asm-ia64/string.h @@ -10,7 +10,6 @@ */ #include <linux/config.h> /* remove this once we remove the A-step workaround... */ -#ifndef CONFIG_ITANIUM_ASTEP_SPECIFIC #define __HAVE_ARCH_STRLEN 1 /* see arch/ia64/lib/strlen.S */ #define __HAVE_ARCH_MEMSET 1 /* see arch/ia64/lib/memset.S */ @@ -21,6 +20,4 @@ extern __kernel_size_t strlen (const char *); extern void *memset (void *, int, __kernel_size_t); extern void *memcpy (void *, const void *, __kernel_size_t); -#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */ - #endif /* _ASM_IA64_STRING_H */ diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h index cd46c860cf48..f44f3777de01 100644 --- a/include/asm-ia64/system.h +++ b/include/asm-ia64/system.h @@ -29,8 +29,7 @@ #define GATE_ADDR (0xa000000000000000 + PAGE_SIZE) #define PERCPU_ADDR (0xa000000000000000 + 2*PAGE_SIZE) -#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) \ - || defined(CONFIG_ITANIUM_B0_SPECIFIC) || defined(CONFIG_ITANIUM_B1_SPECIFIC) +#if defined(CONFIG_ITANIUM_B0_SPECIFIC) || defined(CONFIG_ITANIUM_B1_SPECIFIC) /* Workaround for Errata 97. */ # define IA64_SEMFIX_INSN mf; # define IA64_SEMFIX "mf;" diff --git a/include/asm-ia64/unaligned.h b/include/asm-ia64/unaligned.h index c9d6dca0eca7..118676881435 100644 --- a/include/asm-ia64/unaligned.h +++ b/include/asm-ia64/unaligned.h @@ -1,6 +1,8 @@ #ifndef _ASM_IA64_UNALIGNED_H #define _ASM_IA64_UNALIGNED_H +#include <linux/types.h> + /* * The main single-value unaligned transfer routines. Derived from * the Linux/Alpha version. diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h index d2e3bdd3e28e..e2f8d278e727 100644 --- a/include/asm-ia64/unistd.h +++ b/include/asm-ia64/unistd.h @@ -204,6 +204,7 @@ #define __NR_fstat 1212 #define __NR_clone2 1213 #define __NR_getdents64 1214 +#define __NR_getunwind 1215 #if !defined(__ASSEMBLY__) && !defined(ASSEMBLER) diff --git a/include/asm-ia64/unwind.h b/include/asm-ia64/unwind.h index fa00da32af01..d92df4658237 100644 --- a/include/asm-ia64/unwind.h +++ b/include/asm-ia64/unwind.h @@ -94,9 +94,10 @@ struct unw_frame_info { * Initialize unwind support. */ extern void unw_init (void); +extern void unw_create_gate_table (void); extern void *unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned long gp, - void *table_start, void *table_end); + const void *table_start, const void *table_end); extern void unw_remove_unwind_table (void *handle); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1c45c2cb3637..2b2c0bb1e7cb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -174,8 +174,6 @@ extern int * max_sectors[MAX_BLKDEV]; extern int * max_segments[MAX_BLKDEV]; -extern atomic_t queued_sectors; - #define MAX_SEGMENTS 128 #define MAX_SECTORS 255 @@ -203,14 +201,7 @@ static inline int get_hardsect_size(kdev_t dev) return 512; } -#define blk_finished_io(nsects) \ - atomic_sub(nsects, &queued_sectors); \ - if (atomic_read(&queued_sectors) < 0) { \ - printk("block: queued_sectors < 0\n"); \ - atomic_set(&queued_sectors, 0); \ - } - -#define blk_started_io(nsects) \ - atomic_add(nsects, &queued_sectors); +#define blk_finished_io(nsects) do { } while (0) +#define blk_started_io(nsects) do { } while (0) #endif diff --git a/include/linux/dcache.h b/include/linux/dcache.h index df33ef8c6a8d..f11118fac4d3 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -171,11 +171,11 @@ extern int d_invalidate(struct dentry *); #define shrink_dcache() prune_dcache(0) struct zone_struct; /* dcache memory management */ -extern void shrink_dcache_memory(int, unsigned int); +extern int shrink_dcache_memory(int, unsigned int); extern void prune_dcache(int); /* icache memory management (defined in linux/fs/inode.c) */ -extern void shrink_icache_memory(int, int); +extern int shrink_icache_memory(int, int); extern void prune_icache(int); /* only used at mount-time */ diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index bcb9a1752d0c..415a2dbec3ec 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -1652,7 +1652,6 @@ int reiserfs_convert_objectid_map_v1(struct super_block *) ; /* stree.c */ int B_IS_IN_TREE(struct buffer_head *); -extern inline void copy_key (void * to, void * from); extern inline void copy_short_key (void * to, void * from); extern inline void copy_item_head(void * p_v_to, void * p_v_from); diff --git a/include/linux/sockios.h b/include/linux/sockios.h index d360c7533d9f..1857eb928eaf 100644 --- a/include/linux/sockios.h +++ b/include/linux/sockios.h @@ -109,6 +109,8 @@ * vector. Each device should include this file and redefine these names * as their own. Because these are device dependent it is a good idea * _NOT_ to issue them to random objects and hope. + * + * THESE IOCTLS ARE _DEPRECATED_ AND WILL DISAPPEAR IN 2.5.X -DaveM */ #define SIOCDEVPRIVATE 0x89F0 /* to 89FF */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2a7f832b8b34..5507bd465a9a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -630,8 +630,8 @@ unsigned int zone_free_shortage(zone_t *zone) goto ret; if (zone->inactive_clean_pages + zone->free_pages - < zone->pages_min) { - sum += zone->pages_min; + < zone->pages_high) { + sum += zone->pages_high; sum -= zone->free_pages; sum -= zone->inactive_clean_pages; } diff --git a/mm/vmscan.c b/mm/vmscan.c index e82cc8735007..22f8effef852 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -593,13 +593,9 @@ dirty_page_rescan: * If we're freeing buffer cache pages, stop when * we've got enough free memory. */ - if (freed_page) { - if (zone) { - if (!zone_free_shortage(zone)) - break; - } else if (!free_shortage()) - break; - } + if (freed_page && !total_free_shortage()) + break; + continue; } else if (page->mapping && !PageDirty(page)) { /* @@ -1000,10 +996,8 @@ static int do_try_to_free_pages(unsigned int gfp_mask, int user) ret += page_launder(gfp_mask, user); - if (total_free_shortage()) { - shrink_dcache_memory(DEF_PRIORITY, gfp_mask); - shrink_icache_memory(DEF_PRIORITY, gfp_mask); - } + ret += shrink_dcache_memory(DEF_PRIORITY, gfp_mask); + ret += shrink_icache_memory(DEF_PRIORITY, gfp_mask); /* * If needed, we move pages from the active list diff --git a/net/core/dev.c b/net/core/dev.c index 7bd8fee7b424..89d1a0d5d797 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2169,7 +2169,10 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) default: if ((cmd >= SIOCDEVPRIVATE && cmd <= SIOCDEVPRIVATE + 15) || - cmd == SIOCETHTOOL) { + cmd == SIOCETHTOOL || + cmd == SIOCGMIIPHY || + cmd == SIOCGMIIREG || + cmd == SIOCSMIIREG) { if (dev->do_ioctl) { if (!netif_device_present(dev)) return -ENODEV; @@ -2291,6 +2294,9 @@ int dev_ioctl(unsigned int cmd, void *arg) case SIOCSIFTXQLEN: case SIOCSIFNAME: case SIOCETHTOOL: + case SIOCGMIIPHY: + case SIOCGMIIREG: + case SIOCSMIIREG: if (!capable(CAP_NET_ADMIN)) return -EPERM; dev_load(ifr.ifr_name); |
