diff options
| author | Paul Mackerras <paulus@samba.org> | 2003-05-25 05:20:54 +1000 |
|---|---|---|
| committer | Paul Mackerras <paulus@samba.org> | 2003-05-25 05:20:54 +1000 |
| commit | af75709ac2e8fc62ee00be0078bff1f72deb16e7 (patch) | |
| tree | e58f6f63fa7dfa6f98537821d61833d42800d8c0 | |
| parent | 41bd1f6ef75d4684e24102c57d9989e0c777be09 (diff) | |
| parent | 1a5694336d14095f2475e7724dfcce4510366484 (diff) | |
Merge samba.org:/stuff/paulus/kernel/linux-2.5
into samba.org:/stuff/paulus/kernel/for-linus-ppc
129 files changed, 2681 insertions, 2321 deletions
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 61710d050030..130787b49ef2 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -2013,7 +2013,7 @@ static int __init apm_init(void) apm_proc = create_proc_info_entry("apm", 0, NULL, apm_get_info); if (apm_proc) - SET_MODULE_OWNER(apm_proc); + apm_proc->owner = THIS_MODULE; kernel_thread(apm, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND | SIGCHLD); diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 4151df6e35fe..9ff6d867b3be 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -652,6 +652,7 @@ config IOMMU_DEBUG config IOMMU_LEAK bool "IOMMU leak tracing" depends on DEBUG_KERNEL + depends on IOMMU_DEBUG help Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index 290803ace1a3..1faa3f4937ab 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile @@ -46,6 +46,7 @@ CFLAGS += -pipe CFLAGS += -fno-reorder-blocks # should lower this a lot and see how much .text is saves CFLAGS += -finline-limit=2000 +CFLAGS += -Wno-sign-compare #CFLAGS += -g # don't enable this when you use kgdb: ifneq ($(CONFIG_X86_REMOTE_DEBUG),y) diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index 4e3820457f54..2839cbe424b3 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig @@ -4,7 +4,6 @@ CONFIG_X86_64=y CONFIG_X86=y CONFIG_MMU=y -CONFIG_SWAP=y CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_X86_CMPXCHG=y CONFIG_EARLY_PRINTK=y @@ -18,6 +17,7 @@ CONFIG_EXPERIMENTAL=y # # General setup # +CONFIG_SWAP=y CONFIG_SYSVIPC=y # CONFIG_BSD_PROCESS_ACCT is not set CONFIG_SYSCTL=y @@ -47,8 +47,12 @@ CONFIG_X86_CPUID=y CONFIG_X86_IO_APIC=y CONFIG_X86_LOCAL_APIC=y CONFIG_MTRR=y -CONFIG_HUGETLB_PAGE=y +# CONFIG_HUGETLB_PAGE is not set CONFIG_SMP=y +# CONFIG_PREEMPT is not set +CONFIG_K8_NUMA=y +CONFIG_DISCONTIGMEM=y +CONFIG_NUMA=y CONFIG_HAVE_DEC_LOCK=y CONFIG_NR_CPUS=8 CONFIG_GART_IOMMU=y @@ -222,9 +226,8 @@ CONFIG_NET=y # CONFIG_PACKET=y # CONFIG_PACKET_MMAP is not set -CONFIG_NETLINK_DEV=y +# CONFIG_NETLINK_DEV is not set # CONFIG_NETFILTER is not set -CONFIG_FILTER=y CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y @@ -239,8 +242,9 @@ CONFIG_IP_MULTICAST=y # CONFIG_SYN_COOKIES is not set # CONFIG_INET_AH is not set # CONFIG_INET_ESP is not set -# CONFIG_XFRM_USER is not set +# CONFIG_INET_IPCOMP is not set # CONFIG_IPV6 is not set +# CONFIG_XFRM_USER is not set # # SCTP Configuration (EXPERIMENTAL) @@ -331,6 +335,11 @@ CONFIG_E1000=m # CONFIG_R8169 is not set # CONFIG_SK98LIN is not set CONFIG_TIGON3=y + +# +# Ethernet (10000 Mbit) +# +# CONFIG_IXGB is not set # CONFIG_FDDI is not set # CONFIG_HIPPI is not set # CONFIG_PPP is not set @@ -405,15 +414,7 @@ CONFIG_KEYBOARD_ATKBD=y CONFIG_INPUT_MOUSE=y CONFIG_MOUSE_PS2=y # CONFIG_MOUSE_SERIAL is not set -CONFIG_INPUT_JOYSTICK=y -# CONFIG_JOYSTICK_IFORCE is not set -# CONFIG_JOYSTICK_WARRIOR is not set -# CONFIG_JOYSTICK_MAGELLAN is not set -# CONFIG_JOYSTICK_SPACEORB is not set -# CONFIG_JOYSTICK_SPACEBALL is not set -# CONFIG_JOYSTICK_STINGER is not set -# CONFIG_JOYSTICK_TWIDDLER is not set -# CONFIG_INPUT_JOYDUMP is not set +# CONFIG_INPUT_JOYSTICK is not set # CONFIG_INPUT_TOUCHSCREEN is not set # CONFIG_INPUT_MISC is not set @@ -452,6 +453,7 @@ CONFIG_UNIX98_PTY_COUNT=256 # # I2C Hardware Sensors Chip support # +# CONFIG_I2C_SENSOR is not set # # Mice @@ -468,8 +470,7 @@ CONFIG_UNIX98_PTY_COUNT=256 # Watchdog Cards # # CONFIG_WATCHDOG is not set -# CONFIG_INTEL_RNG is not set -# CONFIG_AMD_RNG is not set +CONFIG_HW_RANDOM=y # CONFIG_NVRAM is not set CONFIG_RTC=y # CONFIG_DTLK is not set @@ -481,8 +482,8 @@ CONFIG_RTC=y # Ftape, the floppy tape device driver # # CONFIG_FTAPE is not set -# CONFIG_AGP is not set -# CONFIG_AGP_GART is not set +CONFIG_AGP=y +CONFIG_AGP_AMD_8151=y # CONFIG_DRM is not set # CONFIG_MWAVE is not set CONFIG_RAW_DRIVER=y @@ -498,57 +499,75 @@ CONFIG_RAW_DRIVER=y # CONFIG_VIDEO_DEV is not set # +# Digital Video Broadcasting Devices +# +# CONFIG_DVB is not set + +# # File systems # -# CONFIG_QUOTA is not set -CONFIG_AUTOFS_FS=y -# CONFIG_AUTOFS4_FS is not set +CONFIG_EXT2_FS=y +# CONFIG_EXT2_FS_XATTR is not set +CONFIG_EXT3_FS=y +# CONFIG_EXT3_FS_XATTR is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set CONFIG_REISERFS_FS=y # CONFIG_REISERFS_CHECK is not set # CONFIG_REISERFS_PROC_INFO is not set +# CONFIG_JFS_FS is not set +CONFIG_XFS_FS=m +# CONFIG_XFS_RT is not set +# CONFIG_XFS_QUOTA is not set +# CONFIG_XFS_POSIX_ACL is not set +# CONFIG_MINIX_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_QUOTA is not set +CONFIG_AUTOFS_FS=y +# CONFIG_AUTOFS4_FS is not set + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=y +# CONFIG_JOLIET is not set +# CONFIG_ZISOFS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_FAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +# CONFIG_DEVFS_FS is not set +CONFIG_DEVPTS_FS=y +CONFIG_TMPFS=y +CONFIG_RAMFS=y + +# +# Miscellaneous filesystems +# # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set # CONFIG_HFS_FS is not set # CONFIG_BEFS_FS is not set # CONFIG_BFS_FS is not set -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -# CONFIG_FAT_FS is not set # CONFIG_EFS_FS is not set # CONFIG_CRAMFS is not set -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_HUGETLBFS=y -CONFIG_ISO9660_FS=y -# CONFIG_JOLIET is not set -# CONFIG_ZISOFS is not set -# CONFIG_JFS_FS is not set -# CONFIG_MINIX_FS is not set # CONFIG_VXFS_FS is not set -# CONFIG_NTFS_FS is not set # CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -# CONFIG_DEVFS_FS is not set -CONFIG_DEVPTS_FS=y # CONFIG_QNX4FS_FS is not set -# CONFIG_ROMFS_FS is not set -CONFIG_EXT2_FS=y -# CONFIG_EXT2_FS_XATTR is not set # CONFIG_SYSV_FS is not set -# CONFIG_UDF_FS is not set # CONFIG_UFS_FS is not set -CONFIG_XFS_FS=m -# CONFIG_XFS_RT is not set -# CONFIG_XFS_QUOTA is not set -# CONFIG_XFS_POSIX_ACL is not set # # Network File Systems # -# CONFIG_CODA_FS is not set -# CONFIG_INTERMEZZO_FS is not set CONFIG_NFS_FS=y CONFIG_NFS_V3=y # CONFIG_NFS_V4 is not set @@ -556,14 +575,16 @@ CONFIG_NFSD=y CONFIG_NFSD_V3=y # CONFIG_NFSD_V4 is not set CONFIG_NFSD_TCP=y -CONFIG_SUNRPC=y -# CONFIG_SUNRPC_GSS is not set CONFIG_LOCKD=y CONFIG_LOCKD_V4=y CONFIG_EXPORTFS=y -# CONFIG_CIFS is not set +CONFIG_SUNRPC=y +# CONFIG_SUNRPC_GSS is not set # CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set # CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_INTERMEZZO_FS is not set # CONFIG_AFS_FS is not set # @@ -594,6 +615,7 @@ CONFIG_DUMMY_CONSOLE=y # USB support # # CONFIG_USB is not set +# CONFIG_USB_GADGET is not set # # Bluetooth support @@ -615,6 +637,9 @@ CONFIG_MAGIC_SYSRQ=y # CONFIG_INIT_DEBUG is not set CONFIG_KALLSYMS=y # CONFIG_FRAME_POINTER is not set +CONFIG_IOMMU_DEBUG=y +CONFIG_IOMMU_LEAK=y +CONFIG_MCE_DEBUG=y # # Security options diff --git a/arch/x86_64/ia32/Makefile b/arch/x86_64/ia32/Makefile index 55734381452d..1edc834d0a35 100644 --- a/arch/x86_64/ia32/Makefile +++ b/arch/x86_64/ia32/Makefile @@ -5,3 +5,12 @@ obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_ioctl.o \ ia32_signal.o tls32.o \ ia32_binfmt.o fpu32.o ptrace32.o ipc32.o syscall32.o + +$(obj)/syscall32.o: $(src)/syscall32.c $(obj)/vsyscall.so + +# The DSO images are built using a special linker script. +$(obj)/vsyscall.so: $(src)/vsyscall.lds $(obj)/vsyscall.o + $(CC) -m32 -nostdlib -shared -s -Wl,-soname=linux-vsyscall.so.1 \ + -o $@ -Wl,-T,$^ + +AFLAGS_vsyscall.o = -m32 diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index 61d9c06708bb..db4a4adc82b1 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c @@ -6,11 +6,11 @@ * of ugly preprocessor tricks. Talk about very very poor man's inheritance. */ #include <linux/types.h> -#include <linux/compat.h> #include <linux/config.h> #include <linux/stddef.h> #include <linux/rwsem.h> #include <linux/sched.h> +#include <linux/compat.h> #include <linux/string.h> #include <linux/binfmts.h> #include <linux/mm.h> @@ -23,12 +23,17 @@ #include <asm/i387.h> #include <asm/uaccess.h> #include <asm/ia32.h> +#include <asm/vsyscall32.h> #define ELF_NAME "elf/i386" #define AT_SYSINFO 32 +#define AT_SYSINFO_EHDR 33 -#define ARCH_DLINFO NEW_AUX_ENT(AT_SYSINFO, 0xffffe000) +#define ARCH_DLINFO do { \ + NEW_AUX_ENT(AT_SYSINFO, (u32)(u64)VSYSCALL32_VSYSCALL); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL32_BASE); \ +} while(0) struct file; struct elf_phdr; @@ -54,6 +59,47 @@ typedef unsigned int elf_greg_t; #define ELF_NGREG (sizeof (struct user_regs_struct32) / sizeof(elf_greg_t)) typedef elf_greg_t elf_gregset_t[ELF_NGREG]; +/* + * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out + * extra segments containing the vsyscall DSO contents. Dumping its + * contents makes post-mortem fully interpretable later without matching up + * the same kernel and hardware config to see what PC values meant. + * Dumping its extra ELF program headers includes all the other information + * a debugger needs to easily find how the vsyscall DSO was being used. + */ +#define ELF_CORE_EXTRA_PHDRS (VSYSCALL32_EHDR->e_phnum) +#define ELF_CORE_WRITE_EXTRA_PHDRS \ +do { \ + const struct elf32_phdr *const vsyscall_phdrs = \ + (const struct elf32_phdr *) (VSYSCALL32_BASE \ + + VSYSCALL32_EHDR->e_phoff); \ + int i; \ + Elf32_Off ofs = 0; \ + for (i = 0; i < VSYSCALL32_EHDR->e_phnum; ++i) { \ + struct elf_phdr phdr = vsyscall_phdrs[i]; \ + if (phdr.p_type == PT_LOAD) { \ + ofs = phdr.p_offset = offset; \ + offset += phdr.p_filesz; \ + } \ + else \ + phdr.p_offset += ofs; \ + phdr.p_paddr = 0; /* match other core phdrs */ \ + DUMP_WRITE(&phdr, sizeof(phdr)); \ + } \ +} while (0) +#define ELF_CORE_WRITE_EXTRA_DATA \ +do { \ + const struct elf32_phdr *const vsyscall_phdrs = \ + (const struct elf32_phdr *) (VSYSCALL32_BASE \ + + VSYSCALL32_EHDR->e_phoff); \ + int i; \ + for (i = 0; i < VSYSCALL32_EHDR->e_phnum; ++i) { \ + if (vsyscall_phdrs[i].p_type == PT_LOAD) \ + DUMP_WRITE((void *) (u64) vsyscall_phdrs[i].p_vaddr, \ + vsyscall_phdrs[i].p_filesz); \ + } \ +} while (0) + struct elf_siginfo { int si_signo; /* signal number */ @@ -157,7 +203,6 @@ elf_core_copy_task_fpregs(struct task_struct *tsk, elf_fpregset_t *fpu) struct _fpstate_ia32 *fpstate = (void*)fpu; struct pt_regs *regs = (struct pt_regs *)(tsk->thread.rsp0); mm_segment_t oldfs = get_fs(); - int ret; if (!tsk->used_math) return 0; @@ -165,12 +210,12 @@ elf_core_copy_task_fpregs(struct task_struct *tsk, elf_fpregset_t *fpu) if (tsk == current) unlazy_fpu(tsk); set_fs(KERNEL_DS); - ret = save_i387_ia32(tsk, fpstate, regs, 1); + save_i387_ia32(tsk, fpstate, regs, 1); /* Correct for i386 bug. It puts the fop into the upper 16bits of the tag word (like FXSAVE), not into the fcs*/ fpstate->cssel |= fpstate->tag & 0xffff0000; set_fs(oldfs); - return ret; + return 1; } #define ELF_CORE_COPY_XFPREGS 1 @@ -302,8 +347,9 @@ int setup_arg_pages(struct linux_binprm *bprm) mpnt->vm_mm = mm; mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p; mpnt->vm_end = IA32_STACK_TOP; - mpnt->vm_page_prot = PAGE_COPY_EXEC; - mpnt->vm_flags = VM_STACK_FLAGS; + mpnt->vm_flags = vm_stack_flags32; + mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC) ? + PAGE_COPY_EXEC : PAGE_COPY; mpnt->vm_ops = NULL; mpnt->vm_pgoff = 0; mpnt->vm_file = NULL; @@ -333,7 +379,7 @@ elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int p struct task_struct *me = current; if (prot & PROT_READ) - prot |= PROT_EXEC; + prot |= vm_force_exec32; down_write(&me->mm->mmap_sem); map_addr = do_mmap(filep, ELF_PAGESTART(addr), diff --git a/arch/x86_64/ia32/ia32_ioctl.c b/arch/x86_64/ia32/ia32_ioctl.c index bed4b95343fb..fa2efa500c0f 100644 --- a/arch/x86_64/ia32/ia32_ioctl.c +++ b/arch/x86_64/ia32/ia32_ioctl.c @@ -10,10 +10,10 @@ */ #include <linux/config.h> +#include <linux/sched.h> #include <linux/types.h> #include <linux/compat.h> #include <linux/kernel.h> -#include <linux/sched.h> #include <linux/smp.h> #include <linux/smp_lock.h> #include <linux/ioctl.h> @@ -108,10 +108,6 @@ #include <asm/mtrr.h> - -#define A(__x) ((void *)(unsigned long)(__x)) -#define AA(__x) A(__x) - /* Aiee. Someone does not find a difference between int and long */ #define EXT2_IOC32_GETFLAGS _IOR('f', 1, int) #define EXT2_IOC32_SETFLAGS _IOW('f', 2, int) @@ -163,10 +159,10 @@ static int do_ext2_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) } struct video_tuner32 { - s32 tuner; - u8 name[32]; - u32 rangelow, rangehigh; - u32 flags; + compat_int_t tuner; + char name[32]; + compat_ulong_t rangelow, rangehigh; + u32 flags; /* It is really u32 in videodev.h */ u16 mode, signal; }; @@ -203,8 +199,8 @@ static int put_video_tuner32(struct video_tuner *kp, struct video_tuner32 *up) } struct video_buffer32 { - /* void * */ u32 base; - s32 height, width, depth, bytesperline; + compat_caddr_t base; + compat_int_t height, width, depth, bytesperline; }; static int get_video_buffer32(struct video_buffer *kp, struct video_buffer32 *up) @@ -235,14 +231,14 @@ static int put_video_buffer32(struct video_buffer *kp, struct video_buffer32 *up } struct video_clip32 { - s32 x, y, width, height; - /* struct video_clip32 * */ u32 next; + s32 x, y, width, height; /* Its really s32 in videodev.h */ + compat_caddr_t next; }; struct video_window32 { u32 x, y, width, height, chromakey, flags; - /* struct video_clip32 * */ u32 clips; - s32 clipcount; + compat_caddr_t clips; + compat_int_t clipcount; }; static void free_kvideo_clips(struct video_window *kp) @@ -270,7 +266,7 @@ static int get_video_window32(struct video_window *kp, struct video_window32 *up __get_user(kp->flags, &up->flags); __get_user(kp->clipcount, &up->clipcount); __get_user(tmp, &up->clips); - ucp = (struct video_clip32 *)A(tmp); + ucp = compat_ptr(tmp); kp->clips = NULL; nclips = kp->clipcount; @@ -421,8 +417,8 @@ static int do_siocgstamp(unsigned int fd, unsigned int cmd, unsigned long arg) } struct ifmap32 { - u32 mem_start; - u32 mem_end; + compat_ulong_t mem_start; + compat_ulong_t mem_end; unsigned short base_addr; unsigned char irq; unsigned char dma; @@ -442,17 +438,18 @@ struct ifreq32 { struct sockaddr ifru_netmask; struct sockaddr ifru_hwaddr; short ifru_flags; - int ifru_ivalue; - int ifru_mtu; + compat_int_t ifru_ivalue; + compat_int_t ifru_mtu; struct ifmap32 ifru_map; char ifru_slave[IFNAMSIZ]; /* Just fits the size */ char ifru_newname[IFNAMSIZ]; compat_caddr_t ifru_data; + /* XXXX? ifru_settings should be here */ } ifr_ifru; }; struct ifconf32 { - int ifc_len; /* size of buffer */ + compat_int_t ifc_len; /* size of buffer */ compat_caddr_t ifcbuf; }; @@ -504,7 +501,7 @@ static int dev_ifconf(unsigned int fd, unsigned int cmd, unsigned long arg) return -ENOMEM; } ifr = ifc.ifc_req; - ifr32 = (struct ifreq32 *)A(ifc32.ifcbuf); + ifr32 = compat_ptr(ifc32.ifcbuf); for (i = 0; i < ifc32.ifc_len; i += sizeof (struct ifreq32)) { if (copy_from_user(ifr, ifr32, sizeof (struct ifreq32))) { kfree (ifc.ifc_buf); @@ -518,7 +515,7 @@ static int dev_ifconf(unsigned int fd, unsigned int cmd, unsigned long arg) set_fs (old_fs); if (!err) { ifr = ifc.ifc_req; - ifr32 = (struct ifreq32 *)A(ifc32.ifcbuf); + ifr32 = compat_ptr(ifc32.ifcbuf); for (i = 0, j = 0; i < ifc32.ifc_len && j < ifc.ifc_len; i += sizeof (struct ifreq32), j += sizeof (struct ifreq)) { int k = copy_to_user(ifr32, ifr, sizeof (struct ifreq32)); @@ -568,7 +565,7 @@ static int ethtool_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) __get_user(data, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_data)); - if (get_user(ethcmd, (u32 *)A(data))) { + if (get_user(ethcmd, (u32 *)compat_ptr(data))) { err = -EFAULT; goto out; } @@ -579,12 +576,16 @@ static int ethtool_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) case ETHTOOL_GLINK: case ETHTOOL_NWAY_RST: len = sizeof(struct ethtool_value); break; case ETHTOOL_GREGS: { - struct ethtool_regs *regaddr = (struct ethtool_regs *)A(data); + struct ethtool_regs *regaddr = compat_ptr(data); /* darned variable size arguments */ if (get_user(len, (u32 *)®addr->len)) { err = -EFAULT; goto out; } + if (len > PAGE_SIZE - sizeof(struct ethtool_regs)) { + err = -EINVAL; + goto out; + } len += sizeof(struct ethtool_regs); break; } @@ -595,7 +596,7 @@ static int ethtool_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) goto out; } - if (copy_from_user(ifr.ifr_data, (char *)A(data), len)) { + if (copy_from_user(ifr.ifr_data, compat_ptr(data), len)) { err = -EFAULT; goto out; } @@ -608,7 +609,7 @@ static int ethtool_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) u32 data; __get_user(data, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_data)); - len = copy_to_user((char *)A(data), ifr.ifr_data, len); + len = copy_to_user(compat_ptr(data), ifr.ifr_data, len); if (len) err = -EFAULT; } @@ -651,7 +652,7 @@ static int bond_ioctl(unsigned long fd, unsigned int cmd, unsigned long arg) }; __get_user(data, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_data)); - if (copy_from_user(ifr.ifr_data, (char *)A(data), len)) { + if (copy_from_user(ifr.ifr_data, compat_ptr(data), len)) { err = -EFAULT; goto out; } @@ -661,7 +662,7 @@ static int bond_ioctl(unsigned long fd, unsigned int cmd, unsigned long arg) err = sys_ioctl (fd, cmd, (unsigned long)&ifr); set_fs (old_fs); if (!err) { - len = copy_to_user((char *)A(data), ifr.ifr_data, len); + len = copy_to_user(compat_ptr(data), ifr.ifr_data, len); if (len) err = -EFAULT; } @@ -684,7 +685,7 @@ int siocdevprivate_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) return -EFAULT; if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data)) return -EFAULT; - data64 = (void *) A(data32); + data64 = compat_ptr(data32); u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64)); @@ -821,7 +822,7 @@ static int routing_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) ret |= __get_user (r4.rt_irtt, &(((struct rtentry32 *)arg)->rt_irtt)); ret |= __get_user (rtdev, &(((struct rtentry32 *)arg)->rt_dev)); if (rtdev) { - ret |= copy_from_user (devname, (char *)A(rtdev), 15); + ret |= copy_from_user (devname, compat_ptr(rtdev), 15); r4.rt_dev = devname; devname[15] = 0; } else r4.rt_dev = 0; @@ -868,23 +869,23 @@ static int hdio_getgeo(unsigned int fd, unsigned int cmd, unsigned long arg) struct fb_fix_screeninfo32 { char id[16]; compat_caddr_t smem_start; - __u32 smem_len; - __u32 type; - __u32 type_aux; - __u32 visual; - __u16 xpanstep; - __u16 ypanstep; - __u16 ywrapstep; - __u32 line_length; + u32 smem_len; + u32 type; + u32 type_aux; + u32 visual; + u16 xpanstep; + u16 ypanstep; + u16 ywrapstep; + u32 line_length; compat_caddr_t mmio_start; - __u32 mmio_len; - __u32 accel; - __u16 reserved[3]; + u32 mmio_len; + u32 accel; + u16 reserved[3]; }; struct fb_cmap32 { - __u32 start; - __u32 len; + u32 start; + u32 len; compat_caddr_t red; compat_caddr_t green; compat_caddr_t blue; @@ -918,6 +919,10 @@ static int fb_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) err = -EFAULT; goto out; } + if (cmap.len > PAGE_SIZE/sizeof(u16)) { + err = -EINVAL; + goto out; + } err = -ENOMEM; cmap.red = kmalloc(cmap.len * sizeof(__u16), GFP_KERNEL); if (!cmap.red) @@ -937,10 +942,10 @@ static int fb_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) if (cmd == FBIOGETCMAP) break; - err = __copy_from_user(cmap.red, (char *)A(red), cmap.len * sizeof(__u16)); - err |= __copy_from_user(cmap.green, (char *)A(green), cmap.len * sizeof(__u16)); - err |= __copy_from_user(cmap.blue, (char *)A(blue), cmap.len * sizeof(__u16)); - if (cmap.transp) err |= __copy_from_user(cmap.transp, (char *)A(transp), cmap.len * sizeof(__u16)); + err = __copy_from_user(cmap.red, compat_ptr(red), cmap.len * sizeof(__u16)); + err |= __copy_from_user(cmap.green, compat_ptr(green), cmap.len * sizeof(__u16)); + err |= __copy_from_user(cmap.blue, compat_ptr(blue), cmap.len * sizeof(__u16)); + if (cmap.transp) err |= __copy_from_user(cmap.transp, compat_ptr(transp), cmap.len * sizeof(__u16)); if (err) { err = -EFAULT; goto out; @@ -979,11 +984,11 @@ static int fb_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) err |= __copy_to_user((char *)((struct fb_fix_screeninfo32 *)arg)->reserved, (char *)fix.reserved, sizeof(fix.reserved)); break; case FBIOGETCMAP: - err = __copy_to_user((char *)A(red), cmap.red, cmap.len * sizeof(__u16)); - err |= __copy_to_user((char *)A(green), cmap.blue, cmap.len * sizeof(__u16)); - err |= __copy_to_user((char *)A(blue), cmap.blue, cmap.len * sizeof(__u16)); + err = __copy_to_user(compat_ptr(red), cmap.red, cmap.len * sizeof(__u16)); + err |= __copy_to_user(compat_ptr(green), cmap.blue, cmap.len * sizeof(__u16)); + err |= __copy_to_user(compat_ptr(blue), cmap.blue, cmap.len * sizeof(__u16)); if (cmap.transp) - err |= __copy_to_user((char *)A(transp), cmap.transp, cmap.len * sizeof(__u16)); + err |= __copy_to_user(compat_ptr(transp), cmap.transp, cmap.len * sizeof(__u16)); break; case FBIOPUTCMAP: break; @@ -1018,11 +1023,11 @@ static int hdio_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg } struct floppy_struct32 { - unsigned int size; - unsigned int sect; - unsigned int head; - unsigned int track; - unsigned int stretch; + compat_uint_t size; + compat_uint_t sect; + compat_uint_t head; + compat_uint_t track; + compat_uint_t stretch; unsigned char gap; unsigned char rate; unsigned char spec1; @@ -1032,51 +1037,51 @@ struct floppy_struct32 { struct floppy_drive_params32 { char cmos; - u32 max_dtr; - u32 hlt; - u32 hut; - u32 srt; - u32 spinup; - u32 spindown; + compat_ulong_t max_dtr; + compat_ulong_t hlt; + compat_ulong_t hut; + compat_ulong_t srt; + compat_ulong_t spinup; + compat_ulong_t spindown; unsigned char spindown_offset; unsigned char select_delay; unsigned char rps; unsigned char tracks; - u32 timeout; + compat_ulong_t timeout; unsigned char interleave_sect; struct floppy_max_errors max_errors; char flags; char read_track; short autodetect[8]; - int checkfreq; - int native_format; + compat_int_t checkfreq; + compat_int_t native_format; }; struct floppy_drive_struct32 { signed char flags; - u32 spinup_date; - u32 select_date; - u32 first_read_date; + compat_ulong_t spinup_date; + compat_ulong_t select_date; + compat_ulong_t first_read_date; short probed_format; short track; short maxblock; short maxtrack; - int generation; - int keep_data; - int fd_ref; - int fd_device; - int last_checked; + compat_int_t generation; + compat_int_t keep_data; + compat_int_t fd_ref; + compat_int_t fd_device; + compat_int_t last_checked; compat_caddr_t dmabuf; - int bufblocks; + compat_int_t bufblocks; }; struct floppy_fdc_state32 { - int spec1; - int spec2; - int dtr; + compat_int_t spec1; + compat_int_t spec2; + compat_int_t dtr; unsigned char version; unsigned char dor; - u32 address; + compat_ulong_t address; unsigned int rawcmd:2; unsigned int reset:1; unsigned int need_configure:1; @@ -1088,11 +1093,11 @@ struct floppy_fdc_state32 { struct floppy_write_errors32 { unsigned int write_errors; - u32 first_error_sector; - int first_error_generation; - u32 last_error_sector; - int last_error_generation; - unsigned int badness; + compat_ulong_t first_error_sector; + compat_int_t first_error_generation; + compat_ulong_t last_error_sector; + compat_int_t last_error_generation; + compat_uint_t badness; }; #define FDSETPRM32 _IOW(2, 0x42, struct floppy_struct32) @@ -1329,42 +1334,46 @@ out: if (karg) kfree(karg); typedef struct sg_io_hdr32 { - s32 interface_id; /* [i] 'S' for SCSI generic (required) */ - s32 dxfer_direction; /* [i] data transfer direction */ - u8 cmd_len; /* [i] SCSI command length ( <= 16 bytes) */ - u8 mx_sb_len; /* [i] max length to write to sbp */ - u16 iovec_count; /* [i] 0 implies no scatter gather */ - u32 dxfer_len; /* [i] byte count of data transfer */ - u32 dxferp; /* [i], [*io] points to data transfer memory + compat_int_t interface_id; /* [i] 'S' for SCSI generic (required) */ + compat_int_t dxfer_direction; /* [i] data transfer direction */ + unsigned char cmd_len; /* [i] SCSI command length ( <= 16 bytes) */ + unsigned char mx_sb_len; /* [i] max length to write to sbp */ + unsigned short iovec_count; /* [i] 0 implies no scatter gather */ + compat_uint_t dxfer_len; /* [i] byte count of data transfer */ + compat_uint_t dxferp; /* [i], [*io] points to data transfer memory or scatter gather list */ - u32 cmdp; /* [i], [*i] points to command to perform */ - u32 sbp; /* [i], [*o] points to sense_buffer memory */ - u32 timeout; /* [i] MAX_UINT->no timeout (unit: millisec) */ - u32 flags; /* [i] 0 -> default, see SG_FLAG... */ - s32 pack_id; /* [i->o] unused internally (normally) */ - u32 usr_ptr; /* [i->o] unused internally */ - u8 status; /* [o] scsi status */ - u8 masked_status; /* [o] shifted, masked scsi status */ - u8 msg_status; /* [o] messaging level data (optional) */ - u8 sb_len_wr; /* [o] byte count actually written to sbp */ - u16 host_status; /* [o] errors from host adapter */ - u16 driver_status; /* [o] errors from software driver */ - s32 resid; /* [o] dxfer_len - actual_transferred */ - u32 duration; /* [o] time taken by cmd (unit: millisec) */ - u32 info; /* [o] auxiliary information */ + compat_uptr_t cmdp; /* [i], [*i] points to command to perform */ + compat_uptr_t sbp; /* [i], [*o] points to sense_buffer memory */ + compat_uint_t timeout; /* [i] MAX_UINT->no timeout (unit: millisec) */ + compat_uint_t flags; /* [i] 0 -> default, see SG_FLAG... */ + compat_int_t pack_id; /* [i->o] unused internally (normally) */ + compat_uptr_t usr_ptr; /* [i->o] unused internally */ + unsigned char status; /* [o] scsi status */ + unsigned char masked_status; /* [o] shifted, masked scsi status */ + unsigned char msg_status; /* [o] messaging level data (optional) */ + unsigned char sb_len_wr; /* [o] byte count actually written to sbp */ + unsigned short host_status; /* [o] errors from host adapter */ + unsigned short driver_status; /* [o] errors from software driver */ + compat_int_t resid; /* [o] dxfer_len - actual_transferred */ + compat_uint_t duration; /* [o] time taken by cmd (unit: millisec) */ + compat_uint_t info; /* [o] auxiliary information */ } sg_io_hdr32_t; /* 64 bytes long (on sparc32) */ typedef struct sg_iovec32 { - u32 iov_base; - u32 iov_len; + compat_uint_t iov_base; + compat_uint_t iov_len; } sg_iovec32_t; +#define EMU_SG_MAX 128 + static int alloc_sg_iovec(sg_io_hdr_t *sgp, u32 uptr32) { - sg_iovec32_t *uiov = (sg_iovec32_t *) A(uptr32); + sg_iovec32_t *uiov = compat_ptr(uptr32); sg_iovec_t *kiov; int i; + if (sgp->iovec_count > EMU_SG_MAX) + return -EINVAL; sgp->dxferp = kmalloc(sgp->iovec_count * sizeof(sg_iovec_t), GFP_KERNEL); if (!sgp->dxferp) @@ -1378,39 +1387,9 @@ static int alloc_sg_iovec(sg_io_hdr_t *sgp, u32 uptr32) if (__get_user(iov_base32, &uiov->iov_base) || __get_user(kiov->iov_len, &uiov->iov_len)) return -EFAULT; - - kiov->iov_base = kmalloc(kiov->iov_len, GFP_KERNEL); - if (!kiov->iov_base) - return -ENOMEM; - if (copy_from_user(kiov->iov_base, - (void *) A(iov_base32), - kiov->iov_len)) + if (verify_area(VERIFY_WRITE, compat_ptr(iov_base32), kiov->iov_len)) return -EFAULT; - - uiov++; - kiov++; - } - - return 0; -} - -static int copy_back_sg_iovec(sg_io_hdr_t *sgp, u32 uptr32) -{ - sg_iovec32_t *uiov = (sg_iovec32_t *) A(uptr32); - sg_iovec_t *kiov = (sg_iovec_t *) sgp->dxferp; - int i; - - for (i = 0; i < sgp->iovec_count; i++) { - u32 iov_base32; - - if (__get_user(iov_base32, &uiov->iov_base)) - return -EFAULT; - - if (copy_to_user((void *) A(iov_base32), - kiov->iov_base, - kiov->iov_len)) - return -EFAULT; - + kiov->iov_base = compat_ptr(iov_base32); uiov++; kiov++; } @@ -1420,16 +1399,6 @@ static int copy_back_sg_iovec(sg_io_hdr_t *sgp, u32 uptr32) static void free_sg_iovec(sg_io_hdr_t *sgp) { - sg_iovec_t *kiov = (sg_iovec_t *) sgp->dxferp; - int i; - - for (i = 0; i < sgp->iovec_count; i++) { - if (kiov->iov_base) { - kfree(kiov->iov_base); - kiov->iov_base = NULL; - } - kiov++; - } kfree(sgp->dxferp); sgp->dxferp = NULL; } @@ -1459,12 +1428,8 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) err |= __get_user(cmdp32, &sg_io32->cmdp); sg_io64.cmdp = kmalloc(sg_io64.cmd_len, GFP_KERNEL); - if (!sg_io64.cmdp) { - err = -ENOMEM; - goto out; - } if (copy_from_user(sg_io64.cmdp, - (void *) A(cmdp32), + compat_ptr(cmdp32), sg_io64.cmd_len)) { err = -EFAULT; goto out; @@ -1477,7 +1442,7 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) goto out; } if (copy_from_user(sg_io64.sbp, - (void *) A(sbp32), + compat_ptr(sbp32), sg_io64.mx_sb_len)) { err = -EFAULT; goto out; @@ -1492,17 +1457,11 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) goto out; } } else { - sg_io64.dxferp = kmalloc(sg_io64.dxfer_len, GFP_KERNEL); - if (!sg_io64.dxferp) { - err = -ENOMEM; + err = verify_area(VERIFY_WRITE, compat_ptr(dxferp32), sg_io64.dxfer_len); + if (err) goto out; - } - if (copy_from_user(sg_io64.dxferp, - (void *) A(dxferp32), - sg_io64.dxfer_len)) { - err = -EFAULT; - goto out; - } + + sg_io64.dxferp = compat_ptr(dxferp32); } /* Unused internally, do not even bother to copy it over. */ @@ -1529,15 +1488,7 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) err |= __put_user(sg_io64.resid, &sg_io32->resid); err |= __put_user(sg_io64.duration, &sg_io32->duration); err |= __put_user(sg_io64.info, &sg_io32->info); - err |= copy_to_user((void *)A(sbp32), sg_io64.sbp, sg_io64.mx_sb_len); - if (sg_io64.dxferp) { - if (sg_io64.iovec_count) - err |= copy_back_sg_iovec(&sg_io64, dxferp32); - else - err |= copy_to_user((void *)A(dxferp32), - sg_io64.dxferp, - sg_io64.dxfer_len); - } + err |= copy_to_user(compat_ptr(sbp32), sg_io64.sbp, sg_io64.mx_sb_len); if (err) err = -EFAULT; @@ -1546,19 +1497,14 @@ out: kfree(sg_io64.cmdp); if (sg_io64.sbp) kfree(sg_io64.sbp); - if (sg_io64.dxferp) { - if (sg_io64.iovec_count) { + if (sg_io64.dxferp && sg_io64.iovec_count) free_sg_iovec(&sg_io64); - } else { - kfree(sg_io64.dxferp); - } - } return err; } struct sock_fprog32 { - __u16 len; - __u32 filter; + unsigned short len; + compat_caddr_t filter; }; #define PPPIOCSPASS32 _IOW('t', 71, struct sock_fprog32) @@ -1576,7 +1522,7 @@ static int ppp_sock_fprog_ioctl_trans(unsigned int fd, unsigned int cmd, unsigne get_user(fptr32, &u_fprog32->filter)) return -EFAULT; - fptr64 = (void *) A(fptr32); + fptr64 = compat_ptr(fptr32); if (put_user(flen, &u_fprog64->len) || put_user(fptr64, &u_fprog64->filter)) @@ -1592,8 +1538,8 @@ static int ppp_sock_fprog_ioctl_trans(unsigned int fd, unsigned int cmd, unsigne struct ppp_option_data32 { compat_caddr_t ptr; - __u32 length; - int transmit; + u32 length; + compat_int_t transmit; }; #define PPPIOCSCOMPRESS32 _IOW('t', 77, struct ppp_option_data32) @@ -1622,10 +1568,12 @@ static int ppp_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) case PPPIOCSCOMPRESS32: if (copy_from_user(&data32, (struct ppp_option_data32 *)arg, sizeof(struct ppp_option_data32))) return -EFAULT; + if (data32.length > PAGE_SIZE) + return -EINVAL; data.ptr = kmalloc (data32.length, GFP_KERNEL); if (!data.ptr) return -ENOMEM; - if (copy_from_user(data.ptr, (__u8 *)A(data32.ptr), data32.length)) { + if (copy_from_user(data.ptr, compat_ptr(data32.ptr), data32.length)) { kfree(data.ptr); return -EFAULT; } @@ -1667,40 +1615,40 @@ static int ppp_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) struct mtget32 { - __u32 mt_type; - __u32 mt_resid; - __u32 mt_dsreg; - __u32 mt_gstat; - __u32 mt_erreg; + compat_long_t mt_type; + compat_long_t mt_resid; + compat_long_t mt_dsreg; + compat_long_t mt_gstat; + compat_long_t mt_erreg; compat_daddr_t mt_fileno; compat_daddr_t mt_blkno; }; #define MTIOCGET32 _IOR('m', 2, struct mtget32) struct mtpos32 { - __u32 mt_blkno; + compat_long_t mt_blkno; }; #define MTIOCPOS32 _IOR('m', 3, struct mtpos32) struct mtconfiginfo32 { - __u32 mt_type; - __u32 ifc_type; - __u16 irqnr; - __u16 dmanr; - __u16 port; - __u32 debug; - __u32 have_dens:1; - __u32 have_bsf:1; - __u32 have_fsr:1; - __u32 have_bsr:1; - __u32 have_eod:1; - __u32 have_seek:1; - __u32 have_tell:1; - __u32 have_ras1:1; - __u32 have_ras2:1; - __u32 have_ras3:1; - __u32 have_qfa:1; - __u32 pad1:5; + compat_long_t mt_type; + compat_long_t ifc_type; + unsigned short irqnr; + unsigned short dmanr; + unsigned short port; + compat_ulong_t debug; + compat_uint_t have_dens:1; + compat_uint_t have_bsf:1; + compat_uint_t have_fsr:1; + compat_uint_t have_bsr:1; + compat_uint_t have_eod:1; + compat_uint_t have_seek:1; + compat_uint_t have_tell:1; + compat_uint_t have_ras1:1; + compat_uint_t have_ras2:1; + compat_uint_t have_ras3:1; + compat_uint_t have_qfa:1; + compat_uint_t pad1:5; char reserved[10]; }; #define MTIOCGETCONFIG32 _IOR('m', 4, struct mtconfiginfo32) @@ -1790,25 +1738,25 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) } struct cdrom_read32 { - int cdread_lba; + compat_int_t cdread_lba; compat_caddr_t cdread_bufaddr; - int cdread_buflen; + compat_int_t cdread_buflen; }; struct cdrom_read_audio32 { union cdrom_addr addr; - u_char addr_format; - int nframes; + u8 addr_format; + compat_int_t nframes; compat_caddr_t buf; }; struct cdrom_generic_command32 { unsigned char cmd[CDROM_PACKET_SIZE]; compat_caddr_t buffer; - unsigned int buflen; - int stat; + compat_uint_t buflen; + compat_int_t stat; compat_caddr_t sense; - compat_caddr_t reserved[3]; + compat_caddr_t reserved[3]; /* Oops? it has data_direction, quiet and timeout fields? */ }; static int cdrom_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) @@ -1833,10 +1781,9 @@ static int cdrom_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar err |= __get_user(cdread.cdread_buflen, &((struct cdrom_read32 *)arg)->cdread_buflen); if (err) return -EFAULT; - data = kmalloc(cdread.cdread_buflen, GFP_KERNEL); - if (!data) - return -ENOMEM; - cdread.cdread_bufaddr = data; + if (verify_area(VERIFY_WRITE, compat_ptr(addr), cdread.cdread_buflen)) + return -EFAULT; + cdread.cdread_bufaddr = compat_ptr(addr); break; case CDROMREADAUDIO: karg = &cdreadaudio; @@ -1846,10 +1793,11 @@ static int cdrom_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar err |= __get_user(addr, &((struct cdrom_read_audio32 *)arg)->buf); if (err) return -EFAULT; - data = kmalloc(cdreadaudio.nframes * 2352, GFP_KERNEL); - if (!data) - return -ENOMEM; - cdreadaudio.buf = data; + + + if (verify_area(VERIFY_WRITE, compat_ptr(addr), cdreadaudio.nframes*2352)) + return -EFAULT; + cdreadaudio.buf = compat_ptr(addr); break; case CDROM_SEND_PACKET: karg = &cgc; @@ -1858,9 +1806,9 @@ static int cdrom_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar err |= __get_user(cgc.buflen, &((struct cdrom_generic_command32 *)arg)->buflen); if (err) return -EFAULT; - if ((data = kmalloc(cgc.buflen, GFP_KERNEL)) == NULL) - return -ENOMEM; - cgc.buffer = data; + if (verify_area(VERIFY_WRITE, compat_ptr(addr), cgc.buflen)) + return -EFAULT; + cgc.buffer = compat_ptr(addr); break; default: do { @@ -1875,41 +1823,23 @@ static int cdrom_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar set_fs (KERNEL_DS); err = sys_ioctl (fd, cmd, (unsigned long)karg); set_fs (old_fs); - if (err) - goto out; - switch (cmd) { - case CDROMREADMODE2: - case CDROMREADMODE1: - case CDROMREADRAW: - case CDROMREADCOOKED: - err = copy_to_user((char *)A(addr), data, cdread.cdread_buflen); - break; - case CDROMREADAUDIO: - err = copy_to_user((char *)A(addr), data, cdreadaudio.nframes * 2352); - break; - case CDROM_SEND_PACKET: - err = copy_to_user((char *)A(addr), data, cgc.buflen); - break; - default: - break; - } -out: if (data) + if (data) kfree(data); return err ? -EFAULT : 0; } struct loop_info32 { - int lo_number; /* ioctl r/o */ + compat_int_t lo_number; /* ioctl r/o */ compat_dev_t lo_device; /* ioctl r/o */ - unsigned int lo_inode; /* ioctl r/o */ + compat_ulong_t lo_inode; /* ioctl r/o */ compat_dev_t lo_rdevice; /* ioctl r/o */ - int lo_offset; - int lo_encrypt_type; - int lo_encrypt_key_size; /* ioctl w/o */ - int lo_flags; /* ioctl r/o */ + compat_int_t lo_offset; + compat_int_t lo_encrypt_type; + compat_int_t lo_encrypt_key_size; /* ioctl w/o */ + compat_int_t lo_flags; /* ioctl r/o */ char lo_name[LO_NAME_SIZE]; unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ - unsigned int lo_init[2]; + compat_ulong_t lo_init[2]; char reserved[4]; }; @@ -1925,6 +1855,7 @@ static int loop_status(unsigned int fd, unsigned int cmd, unsigned long arg) err |= __get_user(l.lo_device, &((struct loop_info32 *)arg)->lo_device); err |= __get_user(l.lo_inode, &((struct loop_info32 *)arg)->lo_inode); err |= __get_user(l.lo_rdevice, &((struct loop_info32 *)arg)->lo_rdevice); + err |= __copy_from_user((char *)&l.lo_offset, (char *)&((struct loop_info32 *)arg)->lo_offset, 8 + (unsigned long)l.lo_init - (unsigned long)&l.lo_offset); if (err) { @@ -1990,7 +1921,7 @@ static int vt_check(struct file *file) struct consolefontdesc32 { unsigned short charcount; /* characters in font (256 or 512) */ unsigned short charheight; /* scan lines per character (1-32) */ - u32 chardata; /* font data in expanded form */ + compat_caddr_t chardata; /* font data in expanded form */ }; static int do_fontx_ioctl(unsigned int fd, int cmd, struct consolefontdesc32 *user_cfd, struct file *file) @@ -2005,7 +1936,7 @@ static int do_fontx_ioctl(unsigned int fd, int cmd, struct consolefontdesc32 *us if (copy_from_user(&cfdarg, user_cfd, sizeof(struct consolefontdesc32))) return -EFAULT; - cfdarg.chardata = (unsigned char *)A(((struct consolefontdesc32 *)&cfdarg)->chardata); + cfdarg.chardata = compat_ptr(((struct consolefontdesc32 *)&cfdarg)->chardata); switch (cmd) { case PIO_FONTX: @@ -2041,11 +1972,11 @@ static int do_fontx_ioctl(unsigned int fd, int cmd, struct consolefontdesc32 *us } struct console_font_op32 { - unsigned int op; /* operation code KD_FONT_OP_* */ - unsigned int flags; /* KD_FONT_FLAG_* */ - unsigned int width, height; /* font size */ - unsigned int charcount; - u32 data; /* font data with height fixed to 32 */ + compat_uint_t op; /* operation code KD_FONT_OP_* */ + compat_uint_t flags; /* KD_FONT_FLAG_* */ + compat_uint_t width, height; /* font size */ + compat_uint_t charcount; + compat_caddr_t data; /* font data with height fixed to 32 */ }; static int do_kdfontop_ioctl(unsigned int fd, unsigned int cmd, struct console_font_op32 *fontop, struct file *file) @@ -2060,7 +1991,7 @@ static int do_kdfontop_ioctl(unsigned int fd, unsigned int cmd, struct console_f return -EFAULT; if (!perm && op.op != KD_FONT_OP_GET) return -EPERM; - op.data = (unsigned char *)A(((struct console_font_op32 *)&op)->data); + op.data = compat_ptr(((struct console_font_op32 *)&op)->data); op.flags |= KD_FONT_FLAG_OLD; vt = (struct vt_struct *)((struct tty_struct *)file->private_data)->driver_data; i = con_font_op(vt->vc_num, &op); @@ -2073,7 +2004,7 @@ static int do_kdfontop_ioctl(unsigned int fd, unsigned int cmd, struct console_f struct unimapdesc32 { unsigned short entry_ct; - u32 entries; + compat_caddr_t entries; }; static int do_unimap_ioctl(unsigned int fd, unsigned int cmd, struct unimapdesc32 *user_ud, struct file *file) @@ -2087,9 +2018,9 @@ static int do_unimap_ioctl(unsigned int fd, unsigned int cmd, struct unimapdesc3 switch (cmd) { case PIO_UNIMAP: if (!perm) return -EPERM; - return con_set_unimap(fg_console, tmp.entry_ct, (struct unipair *)A(tmp.entries)); + return con_set_unimap(fg_console, tmp.entry_ct, compat_ptr(tmp.entries)); case GIO_UNIMAP: - return con_get_unimap(fg_console, tmp.entry_ct, &(user_ud->entry_ct), (struct unipair *)A(tmp.entries)); + return con_get_unimap(fg_console, tmp.entry_ct, &(user_ud->entry_ct), compat_ptr(tmp.entries)); } return 0; } @@ -2113,13 +2044,13 @@ static int do_smb_getmountuid(unsigned int fd, unsigned int cmd, unsigned long a } struct atmif_sioc32 { - int number; - int length; + compat_int_t number; + compat_int_t length; compat_caddr_t arg; }; struct atm_iobuf32 { - int length; + compat_int_t length; compat_caddr_t buffer; }; @@ -2184,38 +2115,17 @@ static int do_atm_iobuf(unsigned int fd, unsigned int cmd, unsigned long arg) if (iobuf32.buffer == (compat_caddr_t) NULL || iobuf32.length == 0) { iobuf.buffer = (void*)(unsigned long)iobuf32.buffer; } else { - iobuf.buffer = kmalloc(iobuf.length, GFP_KERNEL); - if (iobuf.buffer == NULL) { - err = -ENOMEM; - goto out; - } - - err = copy_from_user(iobuf.buffer, A(iobuf32.buffer), iobuf.length); - if (err) { - err = -EFAULT; - goto out; - } + iobuf.buffer = compat_ptr(iobuf32.buffer); + if (verify_area(VERIFY_WRITE, iobuf.buffer, iobuf.length)) + return -EINVAL; } old_fs = get_fs(); set_fs (KERNEL_DS); err = sys_ioctl (fd, cmd, (unsigned long)&iobuf); set_fs (old_fs); - if(err) - goto out; - - if(iobuf.buffer && iobuf.length > 0) { - err = copy_to_user(A(iobuf32.buffer), iobuf.buffer, iobuf.length); - if (err) { - err = -EFAULT; - goto out; - } - } + if(!err) err = __put_user(iobuf.length, &(((struct atm_iobuf32*)arg)->length)); - out: - if(iobuf32.buffer && iobuf32.length > 0) - kfree(iobuf.buffer); - return err; } @@ -2238,39 +2148,16 @@ static int do_atmif_sioc(unsigned int fd, unsigned int cmd, unsigned long arg) if (sioc32.arg == (compat_caddr_t) NULL || sioc32.length == 0) { sioc.arg = (void*)(unsigned long)sioc32.arg; } else { - sioc.arg = kmalloc(sioc.length, GFP_KERNEL); - if (sioc.arg == NULL) { - err = -ENOMEM; - goto out; - } - - err = copy_from_user(sioc.arg, A(sioc32.arg), sioc32.length); - if (err) { - err = -EFAULT; - goto out; - } + sioc.arg = compat_ptr(sioc32.arg); + if (verify_area(VERIFY_WRITE, sioc.arg, sioc32.length)) + return -EFAULT; } old_fs = get_fs(); set_fs (KERNEL_DS); err = sys_ioctl (fd, cmd, (unsigned long)&sioc); set_fs (old_fs); - if(err) { - goto out; - } - - if(sioc.arg && sioc.length > 0) { - err = copy_to_user(A(sioc32.arg), sioc.arg, sioc.length); - if (err) { - err = -EFAULT; - goto out; - } - } + if (!err) err = __put_user(sioc.length, &(((struct atmif_sioc32*)arg)->length)); - - out: - if(sioc32.arg && sioc32.length > 0) - kfree(sioc.arg); - return err; } @@ -2340,10 +2227,10 @@ static int broken_blkgetsize(unsigned int fd, unsigned int cmd, unsigned long ar } struct blkpg_ioctl_arg32 { - int op; - int flags; - int datalen; - u32 data; + compat_int_t op; + compat_int_t flags; + compat_int_t datalen; + compat_caddr_t data; }; static int blkpg_ioctl_trans(unsigned int fd, unsigned int cmd, struct blkpg_ioctl_arg32 *arg) @@ -2402,7 +2289,7 @@ static int tiocgdev(unsigned fd, unsigned cmd, unsigned int *ptr) struct raw32_config_request { - int raw_minor; + compat_int_t raw_minor; __u64 block_major; __u64 block_minor; } __attribute__((packed)); @@ -2434,24 +2321,24 @@ static int raw_ioctl(unsigned fd, unsigned cmd, void *ptr) } struct serial_struct32 { - int type; - int line; - unsigned int port; - int irq; - int flags; - int xmit_fifo_size; - int custom_divisor; - int baud_base; + compat_int_t type; + compat_int_t line; + compat_uint_t port; + compat_int_t irq; + compat_int_t flags; + compat_int_t xmit_fifo_size; + compat_int_t custom_divisor; + compat_int_t baud_base; unsigned short close_delay; char io_type; char reserved_char[1]; - int hub6; + compat_int_t hub6; unsigned short closing_wait; /* time to wait before closing */ unsigned short closing_wait2; /* no longer used... */ - __u32 iomem_base; + compat_uint_t iomem_base; unsigned short iomem_reg_shift; unsigned int port_high; - int reserved[1]; + compat_int_t reserved[1]; }; static int serial_struct_ioctl(unsigned fd, unsigned cmd, void *ptr) @@ -2494,7 +2381,7 @@ static int reiserfs_ioctl32(unsigned fd, unsigned cmd, unsigned long ptr) } struct dirent32 { - unsigned int d_ino; + compat_int_t d_ino; compat_off_t d_off; unsigned short d_reclen; char d_name[256]; /* We must not include limits.h! */ @@ -2600,13 +2487,13 @@ static int do_blkgetsize64(unsigned int fd, unsigned int cmd, #define BNEPGETCONNINFO _IOR('B', 211, int) struct usbdevfs_ctrltransfer32 { - __u8 bRequestType; - __u8 bRequest; - __u16 wValue; - __u16 wIndex; - __u16 wLength; - __u32 timeout; /* in milliseconds */ - __u32 data; + u8 bRequestType; + u8 bRequest; + u16 wValue; + u16 wIndex; + u16 wLength; + u32 timeout; /* in milliseconds */ + compat_caddr_t data; }; #define USBDEVFS_CONTROL32 _IOWR('U', 0, struct usbdevfs_ctrltransfer32) @@ -2629,7 +2516,7 @@ static int do_usbdevfs_control(unsigned int fd, unsigned int cmd, unsigned long if (get_user(udata, &uctrl->data)) return -EFAULT; - uptr = (void *) A(udata); + uptr = compat_ptr(udata); /* In usbdevice_fs, it limits the control buffer to a page, * for simplicity so do we. @@ -2664,10 +2551,10 @@ out: } struct usbdevfs_bulktransfer32 { - unsigned int ep; - unsigned int len; - unsigned int timeout; /* in milliseconds */ - __u32 data; + compat_uint_t ep; + compat_uint_t len; + compat_uint_t timeout; /* in milliseconds */ + compat_caddr_t data; }; #define USBDEVFS_BULK32 _IOWR('U', 2, struct usbdevfs_bulktransfer32) @@ -2689,7 +2576,7 @@ static int do_usbdevfs_bulk(unsigned int fd, unsigned int cmd, unsigned long arg get_user(udata, &ubulk->data)) return -EFAULT; - uptr = (void *) A(udata); + uptr = compat_ptr(udata); /* In usbdevice_fs, it limits the control buffer to a page, * for simplicity so do we. @@ -2765,18 +2652,18 @@ out: */ #if 0 struct usbdevfs_urb32 { - __u8 type; - __u8 endpoint; - __s32 status; - __u32 flags; - __u32 buffer; - __s32 buffer_length; - __s32 actual_length; - __s32 start_frame; - __s32 number_of_packets; - __s32 error_count; - __u32 signr; - __u32 usercontext; /* unused */ + unsigned char type; + unsigned char endpoint; + compat_int_t status; + compat_uint_t flags; + compat_caddr_t buffer; + compat_int_t buffer_length; + compat_int_t actual_length; + compat_int_t start_frame; + compat_int_t number_of_packets; + compat_int_t error_count; + compat_uint_t signr; + compat_caddr_t usercontext; /* unused */ struct usbdevfs_iso_packet_desc iso_frame_desc[0]; }; @@ -2896,19 +2783,13 @@ static int do_usbdevfs_urb(unsigned int fd, unsigned int cmd, unsigned long arg) err = -EFAULT; if (__get_user(udata, &uurb->buffer)) goto out; - uptr = (void *) A(udata); + uptr = compat_ptr(udata); - err = -ENOMEM; buflen = kurb->buffer_length; - kptr = kmalloc(buflen, GFP_KERNEL); - if (!kptr) + err = verify_area(VERIFY_WRITE, uptr, buflen); + if (err) goto out; - kurb->buffer = kptr; - - err = -EFAULT; - if (copy_from_user(kptr, uptr, buflen)) - goto out_kptr; old_fs = get_fs(); set_fs(KERNEL_DS); @@ -2919,15 +2800,9 @@ static int do_usbdevfs_urb(unsigned int fd, unsigned int cmd, unsigned long arg) /* RED-PEN Shit, this doesn't work for async URBs :-( XXX */ if (put_urb32(kurb, uurb)) { err = -EFAULT; - } else if ((kurb->endpoint & USB_DIR_IN) != 0) { - if (copy_to_user(uptr, kptr, buflen)) - err = -EFAULT; } } -out_kptr: - kfree(kptr); - out: kfree(kurb); return err; @@ -2953,15 +2828,15 @@ static int do_usbdevfs_reapurb(unsigned int fd, unsigned int cmd, unsigned long set_fs(old_fs); if (err >= 0 && - put_user(((u32)(long)kptr), (u32 *) A(arg))) + put_user(((u32)(long)kptr), compat_ptr(arg))) err = -EFAULT; return err; } struct usbdevfs_disconnectsignal32 { - unsigned int signr; - u32 context; + compat_int_t signr; + compat_caddr_t context; }; #define USBDEVFS_DISCSIGNAL32 _IOR('U', 14, struct usbdevfs_disconnectsignal32) @@ -2991,9 +2866,9 @@ static int do_usbdevfs_discsignal(unsigned int fd, unsigned int cmd, unsigned lo } struct mtd_oob_buf32 { - u32 start; - u32 length; - u32 ptr; /* unsigned char* */ + u_int32_t start; + u_int32_t length; + compat_caddr_t ptr; /* unsigned char* */ }; #define MEMWRITEOOB32 _IOWR('M',3,struct mtd_oob_buf32) @@ -3005,7 +2880,6 @@ static int mtd_rw_oob(unsigned int fd, unsigned int cmd, unsigned long arg) struct mtd_oob_buf32 *uarg = (struct mtd_oob_buf32 *)arg; struct mtd_oob_buf karg; u32 tmp; - char *ptr; int ret; if (get_user(karg.start, &uarg->start) || @@ -3013,18 +2887,9 @@ static int mtd_rw_oob(unsigned int fd, unsigned int cmd, unsigned long arg) get_user(tmp, &uarg->ptr)) return -EFAULT; - ptr = (char *)A(tmp); - if (0 >= karg.length) - return -EINVAL; - - karg.ptr = kmalloc(karg.length, GFP_KERNEL); - if (NULL == karg.ptr) - return -ENOMEM; - - if (copy_from_user(karg.ptr, ptr, karg.length)) { - kfree(karg.ptr); + karg.ptr = compat_ptr(tmp); + if (verify_area(VERIFY_WRITE, karg.ptr, karg.length)) return -EFAULT; - } set_fs(KERNEL_DS); if (MEMREADOOB32 == cmd) @@ -3036,13 +2901,11 @@ static int mtd_rw_oob(unsigned int fd, unsigned int cmd, unsigned long arg) set_fs(old_fs); if (0 == ret && cmd == MEMREADOOB32) { - ret = copy_to_user(ptr, karg.ptr, karg.length); - ret |= put_user(karg.start, &uarg->start); + ret = put_user(karg.start, &uarg->start); ret |= put_user(karg.length, &uarg->length); } - kfree(karg.ptr); - return ((0 == ret) ? 0 : -EFAULT); + return ret; } /* /proc/mtrr ioctls */ @@ -3050,17 +2913,17 @@ static int mtd_rw_oob(unsigned int fd, unsigned int cmd, unsigned long arg) struct mtrr_sentry32 { - unsigned int base; /* Base address */ - unsigned int size; /* Size of region */ - unsigned int type; /* Type of region */ + compat_ulong_t base; /* Base address */ + compat_uint_t size; /* Size of region */ + compat_uint_t type; /* Type of region */ }; struct mtrr_gentry32 { - unsigned int regnum; /* Register number */ - unsigned int base; /* Base address */ - unsigned int size; /* Size of region */ - unsigned int type; /* Type of region */ + compat_ulong_t regnum; /* Register number */ + compat_uint_t base; /* Base address */ + compat_uint_t size; /* Size of region */ + compat_uint_t type; /* Type of region */ }; #define MTRR_IOCTL_BASE 'M' @@ -3131,7 +2994,7 @@ static int mtrr_ioctl32(unsigned int fd, unsigned int cmd, unsigned long arg) } #define REF_SYMBOL(handler) if (0) (void)handler; -#define HANDLE_IOCTL2(cmd,handler) REF_SYMBOL(handler); asm volatile(".quad %c0, " #handler ",0"::"i" (cmd)); +#define HANDLE_IOCTL2(cmd,handler) REF_SYMBOL(handler); asm volatile(".quad %P0, " #handler ",0"::"i" (cmd)); #define HANDLE_IOCTL(cmd,handler) HANDLE_IOCTL2(cmd,handler) #define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl) #define IOCTL_TABLE_START void ioctl_dummy(void) { asm volatile("\n.global ioctl_start\nioctl_start:\n\t" ); diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c index dcaa3ad35a77..6378a19b480d 100644 --- a/arch/x86_64/ia32/ia32_signal.c +++ b/arch/x86_64/ia32/ia32_signal.c @@ -33,6 +33,7 @@ #include <asm/sigcontext32.h> #include <asm/fpu32.h> #include <asm/proto.h> +#include <asm/vsyscall32.h> #define ptr_to_u32(x) ((u32)(u64)(x)) /* avoid gcc warning */ @@ -428,7 +429,7 @@ void ia32_setup_frame(int sig, struct k_sigaction *ka, /* Return stub is in 32bit vsyscall page */ { - void *restorer = syscall32_page + 32; + void *restorer = VSYSCALL32_SIGRETURN; if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; err |= __put_user(ptr_to_u32(restorer), &frame->pretcode); @@ -521,7 +522,7 @@ void ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, { - void *restorer = syscall32_page + 32; + void *restorer = VSYSCALL32_RTSIGRETURN; if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; err |= __put_user(ptr_to_u32(restorer), &frame->pretcode); diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index c7ab253afbfe..a3cb3f13e9a7 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -182,9 +182,9 @@ quiet_ni_syscall: PTREGSCALL stub32_sigaltstack, sys32_sigaltstack PTREGSCALL stub32_sigsuspend, sys32_sigsuspend PTREGSCALL stub32_execve, sys32_execve - PTREGSCALL stub32_fork, sys32_fork + PTREGSCALL stub32_fork, sys_fork PTREGSCALL stub32_clone, sys32_clone - PTREGSCALL stub32_vfork, sys32_vfork + PTREGSCALL stub32_vfork, sys_vfork PTREGSCALL stub32_iopl, sys_iopl PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend diff --git a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c index 2f5ee3786dd5..ffd8b13169d1 100644 --- a/arch/x86_64/ia32/ptrace32.c +++ b/arch/x86_64/ia32/ptrace32.c @@ -78,12 +78,24 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val) case offsetof(struct user32, u_debugreg[5]): return -EIO; - case offsetof(struct user32, u_debugreg[0]) ... - offsetof(struct user32, u_debugreg[3]): + case offsetof(struct user32, u_debugreg[0]): + child->thread.debugreg0 = val; + break; + + case offsetof(struct user32, u_debugreg[1]): + child->thread.debugreg1 = val; + break; + + case offsetof(struct user32, u_debugreg[2]): + child->thread.debugreg2 = val; + break; + + case offsetof(struct user32, u_debugreg[3]): + child->thread.debugreg3 = val; + break; + case offsetof(struct user32, u_debugreg[6]): - child->thread.debugreg - [(regno-offsetof(struct user32, u_debugreg[0]))/4] - = val; + child->thread.debugreg6 = val; break; case offsetof(struct user32, u_debugreg[7]): @@ -92,7 +104,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val) for(i=0; i<4; i++) if ((0x5454 >> ((val >> (16 + 4*i)) & 0xf)) & 1) return -EIO; - child->thread.debugreg[7] = val; + child->thread.debugreg7 = val; break; default: @@ -142,8 +154,23 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val) R32(eflags, eflags); R32(esp, rsp); - case offsetof(struct user32, u_debugreg[0]) ... offsetof(struct user32, u_debugreg[7]): - *val = child->thread.debugreg[(regno-offsetof(struct user32, u_debugreg[0]))/4]; + case offsetof(struct user32, u_debugreg[0]): + *val = child->thread.debugreg0; + break; + case offsetof(struct user32, u_debugreg[1]): + *val = child->thread.debugreg1; + break; + case offsetof(struct user32, u_debugreg[2]): + *val = child->thread.debugreg2; + break; + case offsetof(struct user32, u_debugreg[3]): + *val = child->thread.debugreg3; + break; + case offsetof(struct user32, u_debugreg[6]): + *val = child->thread.debugreg6; + break; + case offsetof(struct user32, u_debugreg[7]): + *val = child->thread.debugreg7; break; default: diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c index a5805d07522d..d7446a1834da 100644 --- a/arch/x86_64/ia32/sys_ia32.c +++ b/arch/x86_64/ia32/sys_ia32.c @@ -234,7 +234,7 @@ sys32_mmap(struct mmap_arg_struct *arg) } if (a.prot & PROT_READ) - a.prot |= PROT_EXEC; + a.prot |= vm_force_exec32; mm = current->mm; down_write(&mm->mmap_sem); @@ -253,7 +253,7 @@ asmlinkage long sys32_mprotect(unsigned long start, size_t len, unsigned long prot) { if (prot & PROT_READ) - prot |= PROT_EXEC; + prot |= vm_force_exec32; return sys_mprotect(start,len,prot); } @@ -929,7 +929,11 @@ struct sysinfo32 { u32 totalswap; u32 freeswap; unsigned short procs; - char _f[22]; + unsigned short pad; + u32 totalhigh; + u32 freehigh; + u32 mem_unit; + char _f[20-2*sizeof(u32)-sizeof(int)]; }; extern asmlinkage long sys_sysinfo(struct sysinfo *info); @@ -955,7 +959,10 @@ sys32_sysinfo(struct sysinfo32 *info) __put_user (s.bufferram, &info->bufferram) || __put_user (s.totalswap, &info->totalswap) || __put_user (s.freeswap, &info->freeswap) || - __put_user (s.procs, &info->procs)) + __put_user (s.procs, &info->procs) || + __put_user (s.totalhigh, &info->totalhigh) || + __put_user (s.freehigh, &info->freehigh) || + __put_user (s.mem_unit, &info->mem_unit)) return -EFAULT; return 0; } @@ -1419,7 +1426,7 @@ asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len, } if (prot & PROT_READ) - prot |= PROT_EXEC; + prot |= vm_force_exec32; down_write(&mm->mmap_sem); error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); @@ -1587,40 +1594,14 @@ free: return ret; } -asmlinkage long sys32_fork(struct pt_regs regs) -{ - struct task_struct *p; - p = do_fork(SIGCHLD, regs.rsp, ®s, 0, NULL, NULL); - return IS_ERR(p) ? PTR_ERR(p) : p->pid; -} - asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp, struct pt_regs regs) { - struct task_struct *p; void *parent_tid = (void *)regs.rdx; void *child_tid = (void *)regs.rdi; if (!newsp) newsp = regs.rsp; - p = do_fork(clone_flags & ~CLONE_IDLETASK, newsp, ®s, 0, + return do_fork(clone_flags & ~CLONE_IDLETASK, newsp, ®s, 0, parent_tid, child_tid); - return IS_ERR(p) ? PTR_ERR(p) : p->pid; -} - -/* - * This is trivial, and on the face of it looks like it - * could equally well be done in user mode. - * - * Not so, for quite unobvious reasons - register pressure. - * In user mode vfork() cannot have a stack frame, and if - * done by calling the "clone()" system call directly, you - * do not have enough call-clobbered registers to hold all - * the information you need. - */ -asmlinkage long sys32_vfork(struct pt_regs regs) -{ - struct task_struct *p; - p = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.rsp, ®s, 0, NULL, NULL); - return IS_ERR(p) ? PTR_ERR(p) : p->pid; } /* diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c index ed065f232d03..fb986d361c12 100644 --- a/arch/x86_64/ia32/syscall32.c +++ b/arch/x86_64/ia32/syscall32.c @@ -13,33 +13,14 @@ #include <asm/tlbflush.h> #include <asm/ia32_unistd.h> -/* 32bit SYSCALL stub mapped into user space. */ -asm(" .code32\n" - "\nsyscall32:\n" - " pushl %ebp\n" - " movl %ecx,%ebp\n" - " syscall\n" - " popl %ebp\n" - " ret\n" +/* 32bit VDSO mapped into user space. */ +asm(".section \".init.data\",\"aw\"\n" + "syscall32:\n" + ".incbin \"arch/x86_64/ia32/vsyscall.so\"\n" "syscall32_end:\n" - - /* signal trampolines */ - - "sig32_rt_tramp:\n" - " movl $" __stringify(__NR_ia32_rt_sigreturn) ",%eax\n" - " syscall\n" - "sig32_rt_tramp_end:\n" - - "sig32_tramp:\n" - " popl %eax\n" - " movl $" __stringify(__NR_ia32_sigreturn) ",%eax\n" - " syscall\n" - "sig32_tramp_end:\n" - " .code64\n"); + ".previous"); extern unsigned char syscall32[], syscall32_end[]; -extern unsigned char sig32_rt_tramp[], sig32_rt_tramp_end[]; -extern unsigned char sig32_tramp[], sig32_tramp_end[]; char *syscall32_page; @@ -76,10 +57,6 @@ static int __init init_syscall32(void) panic("Cannot allocate syscall32 page"); SetPageReserved(virt_to_page(syscall32_page)); memcpy(syscall32_page, syscall32, syscall32_end - syscall32); - memcpy(syscall32_page + 32, sig32_rt_tramp, - sig32_rt_tramp_end - sig32_rt_tramp); - memcpy(syscall32_page + 64, sig32_tramp, - sig32_tramp_end - sig32_tramp); return 0; } diff --git a/arch/x86_64/ia32/vsyscall.S b/arch/x86_64/ia32/vsyscall.S new file mode 100644 index 000000000000..c7af4f64d9e8 --- /dev/null +++ b/arch/x86_64/ia32/vsyscall.S @@ -0,0 +1,172 @@ +/* + * Code for the vsyscall page. This version uses the syscall instruction. + */ + +#include <asm/ia32_unistd.h> +#include <asm/offset.h> + + .text + .section .text.vsyscall,"ax" + .globl __kernel_vsyscall + .type __kernel_vsyscall,@function +__kernel_vsyscall: +.LSTART_vsyscall: + push %ebp +.Lpush_ebp: + movl %ecx, %ebp + syscall + popl %ebp +.Lpop_ebp: + ret +.LEND_vsyscall: + .size __kernel_vsyscall,.-.LSTART_vsyscall + + .section .text.sigreturn,"ax" + .balign 32 + .globl __kernel_sigreturn + .type __kernel_sigreturn,@function +__kernel_sigreturn: +.LSTART_sigreturn: + popl %eax + movl $__NR_ia32_sigreturn, %eax + syscall +.LEND_sigreturn: + .size __kernel_sigreturn,.-.LSTART_sigreturn + + .section .text.rtsigreturn,"ax" + .balign 32 + .globl __kernel_rt_sigreturn,"ax" + .type __kernel_rt_sigreturn,@function +__kernel_rt_sigreturn: +.LSTART_rt_sigreturn: + movl $__NR_ia32_rt_sigreturn, %eax + syscall +.LEND_rt_sigreturn: + .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn + + .section .eh_frame,"a",@progbits +.LSTARTFRAME: + .long .LENDCIE-.LSTARTCIE +.LSTARTCIE: + .long 0 /* CIE ID */ + .byte 1 /* Version number */ + .string "zR" /* NUL-terminated augmentation string */ + .uleb128 1 /* Code alignment factor */ + .sleb128 -4 /* Data alignment factor */ + .byte 8 /* Return address register column */ + .uleb128 1 /* Augmentation value length */ + .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ + .byte 0x0c /* DW_CFA_def_cfa */ + .uleb128 4 + .uleb128 4 + .byte 0x88 /* DW_CFA_offset, column 0x8 */ + .uleb128 1 + .align 4 +.LENDCIE: + + .long .LENDFDE1-.LSTARTFDE1 /* Length FDE */ +.LSTARTFDE1: + .long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */ + .long .LSTART_vsyscall-. /* PC-relative start address */ + .long .LEND_vsyscall-.LSTART_vsyscall + .uleb128 0 /* Augmentation length */ + /* What follows are the instructions for the table generation. + We have to record all changes of the stack pointer. */ + .byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */ + .byte 0x0e /* DW_CFA_def_cfa_offset */ + .uleb128 8 + .byte 0x85, 0x02 /* DW_CFA_offset %ebp -8 */ + .byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */ + .byte 0xc5 /* DW_CFA_restore %ebp */ + .byte 0x0e /* DW_CFA_def_cfa_offset */ + .uleb128 4 + .align 4 +.LENDFDE1: + + .long .LENDFDE2-.LSTARTFDE2 /* Length FDE */ +.LSTARTFDE2: + .long .LSTARTFDE2-.LSTARTFRAME /* CIE pointer */ + /* HACK: The dwarf2 unwind routines will subtract 1 from the + return address to get an address in the middle of the + presumed call instruction. Since we didn't get here via + a call, we need to include the nop before the real start + to make up for it. */ + .long .LSTART_sigreturn-1-. /* PC-relative start address */ + .long .LEND_sigreturn-.LSTART_sigreturn+1 + .uleb128 0 /* Augmentation length */ + /* What follows are the instructions for the table generation. + We record the locations of each register saved. This is + complicated by the fact that the "CFA" is always assumed to + be the value of the stack pointer in the caller. This means + that we must define the CFA of this body of code to be the + saved value of the stack pointer in the sigcontext. Which + also means that there is no fixed relation to the other + saved registers, which means that we must use DW_CFA_expression + to compute their addresses. It also means that when we + adjust the stack with the popl, we have to do it all over again. */ + +#define do_cfa_expr(offset) \ + .byte 0x0f; /* DW_CFA_def_cfa_expression */ \ + .uleb128 1f-0f; /* length */ \ +0: .byte 0x74; /* DW_OP_breg4 */ \ + .sleb128 offset; /* offset */ \ + .byte 0x06; /* DW_OP_deref */ \ +1: + +#define do_expr(regno, offset) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno; /* regno */ \ + .uleb128 1f-0f; /* length */ \ +0: .byte 0x74; /* DW_OP_breg4 */ \ + .sleb128 offset; /* offset */ \ +1: + + do_cfa_expr(IA32_SIGCONTEXT_esp+4) + do_expr(0, IA32_SIGCONTEXT_eax+4) + do_expr(1, IA32_SIGCONTEXT_ecx+4) + do_expr(2, IA32_SIGCONTEXT_edx+4) + do_expr(3, IA32_SIGCONTEXT_ebx+4) + do_expr(5, IA32_SIGCONTEXT_ebp+4) + do_expr(6, IA32_SIGCONTEXT_esi+4) + do_expr(7, IA32_SIGCONTEXT_edi+4) + do_expr(8, IA32_SIGCONTEXT_eip+4) + + .byte 0x42 /* DW_CFA_advance_loc 2 -- nop; popl eax. */ + + do_cfa_expr(IA32_SIGCONTEXT_esp) + do_expr(0, IA32_SIGCONTEXT_eax) + do_expr(1, IA32_SIGCONTEXT_ecx) + do_expr(2, IA32_SIGCONTEXT_edx) + do_expr(3, IA32_SIGCONTEXT_ebx) + do_expr(5, IA32_SIGCONTEXT_ebp) + do_expr(6, IA32_SIGCONTEXT_esi) + do_expr(7, IA32_SIGCONTEXT_edi) + do_expr(8, IA32_SIGCONTEXT_eip) + + .align 4 +.LENDFDE2: + + .long .LENDFDE3-.LSTARTFDE3 /* Length FDE */ +.LSTARTFDE3: + .long .LSTARTFDE3-.LSTARTFRAME /* CIE pointer */ + /* HACK: See above wrt unwind library assumptions. */ + .long .LSTART_rt_sigreturn-1-. /* PC-relative start address */ + .long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1 + .uleb128 0 /* Augmentation */ + /* What follows are the instructions for the table generation. + We record the locations of each register saved. This is + slightly less complicated than the above, since we don't + modify the stack pointer in the process. */ + + do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esp) + do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eax) + do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ecx) + do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edx) + do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebx) + do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebp) + do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esi) + do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edi) + do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eip) + + .align 4 +.LENDFDE3: diff --git a/arch/x86_64/ia32/vsyscall.lds b/arch/x86_64/ia32/vsyscall.lds new file mode 100644 index 000000000000..fa4b4dd4a9ff --- /dev/null +++ b/arch/x86_64/ia32/vsyscall.lds @@ -0,0 +1,77 @@ +/* + * Linker script for vsyscall DSO. The vsyscall page is an ELF shared + * object prelinked to its virtual address. This script controls its layout. + */ + +/* This must match <asm/fixmap.h>. */ +VSYSCALL_BASE = 0xffffe000; + +SECTIONS +{ + . = VSYSCALL_BASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + /* This linker script is used both with -r and with -shared. + For the layouts to match, we need to skip more than enough + space for the dynamic symbol table et al. If this amount + is insufficient, ld -shared will barf. Just increase it here. */ + . = VSYSCALL_BASE + 0x400; + + .text.vsyscall : { *(.text.vsyscall) } :text =0x90909090 + + /* This is an 32bit object and we cannot easily get the offsets + into the 64bit kernel. Just hardcode them here. This assumes + that all the stubs don't need more than 0x100 bytes. */ + . = VSYSCALL_BASE + 0x500; + + .text.sigreturn : { *(.text.sigreturn) } :text =0x90909090 + + . = VSYSCALL_BASE + 0x600; + + .text.rtsigreturn : { *(.text.rtsigreturn) } :text =0x90909090 + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .dynamic : { *(.dynamic) } :text :dynamic + .useless : { + *(.got.plt) *(.got) + *(.data .data.* .gnu.linkonce.d.*) + *(.dynbss) + *(.bss .bss.* .gnu.linkonce.b.*) + } :text +} + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */ +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + LINUX_2.5 { + global: + __kernel_vsyscall; + __kernel_sigreturn; + __kernel_rt_sigreturn; + + local: *; + }; +} + +/* The ELF entry point can be used to set the AT_SYSINFO value. */ +ENTRY(__kernel_vsyscall); diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index ce5d09400fcc..765fa7d91c80 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile @@ -7,7 +7,7 @@ EXTRA_AFLAGS := -traditional obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_x86_64.o \ pci-dma.o x8664_ksyms.o i387.o syscall.o vsyscall.o \ - setup64.o bluesmoke.o bootflag.o e820.o reboot.o + setup64.o bluesmoke.o bootflag.o e820.o reboot.o warmreboot.o obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_ACPI) += acpi/ diff --git a/arch/x86_64/kernel/acpi.c b/arch/x86_64/kernel/acpi.c deleted file mode 100644 index 16b5f7f01ceb..000000000000 --- a/arch/x86_64/kernel/acpi.c +++ /dev/null @@ -1,512 +0,0 @@ -/* - * acpi.c - Architecture-Specific Low-Level ACPI Support - * - * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> - * Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com> - * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org> - * Copyright (C) 2002 Andi Kleen, SuSE Labs (x86-64 port) - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - */ - -#include <linux/config.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/types.h> -#include <linux/stddef.h> -#include <linux/slab.h> -#include <linux/pci.h> -#include <linux/bootmem.h> -#include <linux/irq.h> -#include <linux/acpi.h> -#include <asm/mpspec.h> -#include <asm/io.h> -#include <asm/apic.h> -#include <asm/apicdef.h> -#include <asm/page.h> -#include <asm/pgtable.h> -#include <asm/pgalloc.h> -#include <asm/io_apic.h> -#include <asm/proto.h> -#include <asm/desc.h> -#include <asm/system.h> -#include <asm/segment.h> - -extern int acpi_disabled; - -#define PREFIX "ACPI: " - - -/* -------------------------------------------------------------------------- - Boot-time Configuration - -------------------------------------------------------------------------- */ - -#ifdef CONFIG_ACPI_BOOT - -enum acpi_irq_model_id acpi_irq_model; - -/* rely on all ACPI tables being in the direct mapping */ -char * -__acpi_map_table ( - unsigned long phys_addr, - unsigned long size) -{ - if (!phys_addr || !size) - return NULL; - - if (phys_addr < (end_pfn_map << PAGE_SHIFT)) - return __va(phys_addr); - - return NULL; -} - -#ifdef CONFIG_X86_LOCAL_APIC - -int acpi_lapic; - -static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; - - -static int __init -acpi_parse_madt ( - unsigned long phys_addr, - unsigned long size) -{ - struct acpi_table_madt *madt = NULL; - - if (!phys_addr || !size) - return -EINVAL; - - madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size); - if (!madt) { - printk(KERN_WARNING PREFIX "Unable to map MADT\n"); - return -ENODEV; - } - - if (madt->lapic_address) - acpi_lapic_addr = (u64) madt->lapic_address; - - printk(KERN_INFO PREFIX "Local APIC address 0x%08x\n", - madt->lapic_address); - - return 0; -} - - -static int __init -acpi_parse_lapic ( - acpi_table_entry_header *header) -{ - struct acpi_table_lapic *processor = NULL; - - processor = (struct acpi_table_lapic*) header; - if (!processor) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - mp_register_lapic ( - processor->id, /* APIC ID */ - processor->flags.enabled); /* Enabled? */ - - return 0; -} - - -static int __init -acpi_parse_lapic_addr_ovr ( - acpi_table_entry_header *header) -{ - struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL; - - lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header; - if (!lapic_addr_ovr) - return -EINVAL; - - acpi_lapic_addr = lapic_addr_ovr->address; - - return 0; -} - - -static int __init -acpi_parse_lapic_nmi ( - acpi_table_entry_header *header) -{ - struct acpi_table_lapic_nmi *lapic_nmi = NULL; - - lapic_nmi = (struct acpi_table_lapic_nmi*) header; - if (!lapic_nmi) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - if (lapic_nmi->lint != 1) - printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); - - return 0; -} - -#endif /*CONFIG_X86_LOCAL_APIC*/ - -#ifdef CONFIG_X86_IO_APIC - -int acpi_ioapic; - -static int __init -acpi_parse_ioapic ( - acpi_table_entry_header *header) -{ - struct acpi_table_ioapic *ioapic = NULL; - - ioapic = (struct acpi_table_ioapic*) header; - if (!ioapic) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - mp_register_ioapic ( - ioapic->id, - ioapic->address, - ioapic->global_irq_base); - - return 0; -} - - -static int __init -acpi_parse_int_src_ovr ( - acpi_table_entry_header *header) -{ - struct acpi_table_int_src_ovr *intsrc = NULL; - - intsrc = (struct acpi_table_int_src_ovr*) header; - if (!intsrc) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - mp_override_legacy_irq ( - intsrc->bus_irq, - intsrc->flags.polarity, - intsrc->flags.trigger, - intsrc->global_irq); - - return 0; -} - - -static int __init -acpi_parse_nmi_src ( - acpi_table_entry_header *header) -{ - struct acpi_table_nmi_src *nmi_src = NULL; - - nmi_src = (struct acpi_table_nmi_src*) header; - if (!nmi_src) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - /* TBD: Support nimsrc entries? */ - - return 0; -} - -#endif /*CONFIG_X86_IO_APIC*/ - -#ifdef CONFIG_HPET_TIMER -static int __init -acpi_parse_hpet ( - unsigned long phys_addr, - unsigned long size) -{ - struct acpi_table_hpet *hpet_tbl; - - hpet_tbl = __va(phys_addr); - - if (hpet_tbl->addr.space_id != ACPI_SPACE_MEM) { - printk(KERN_WARNING "acpi: HPET timers must be located in memory.\n"); - return -1; - } - - hpet.address = hpet_tbl->addr.addrl | ((long) hpet_tbl->addr.addrh << 32); - - printk(KERN_INFO "acpi: HPET id: %#x base: %#lx\n", hpet_tbl->id, hpet.address); - - return 0; -} -#endif - -static unsigned long __init -acpi_scan_rsdp ( - unsigned long start, - unsigned long length) -{ - unsigned long offset = 0; - unsigned long sig_len = sizeof("RSD PTR ") - 1; - - /* - * Scan all 16-byte boundaries of the physical memory region for the - * RSDP signature. - */ - for (offset = 0; offset < length; offset += 16) { - if (strncmp((char *) (start + offset), "RSD PTR ", sig_len)) - continue; - return (start + offset); - } - - return 0; -} - - -unsigned long __init -acpi_find_rsdp (void) -{ - unsigned long rsdp_phys = 0; - - /* - * Scan memory looking for the RSDP signature. First search EBDA (low - * memory) paragraphs and then search upper memory (E0000-FFFFF). - */ - rsdp_phys = acpi_scan_rsdp (0, 0x400); - if (!rsdp_phys) - rsdp_phys = acpi_scan_rsdp (0xE0000, 0xFFFFF); - - return rsdp_phys; -} - - -int __init -acpi_boot_init (void) -{ - int result = 0; - - /* - * The default interrupt routing model is PIC (8259). This gets - * overridden if IOAPICs are enumerated (below). - */ - acpi_irq_model = ACPI_IRQ_MODEL_PIC; - - /* - * Initialize the ACPI boot-time table parser. - */ - result = acpi_table_init(); - if (result) - return result; - - result = acpi_blacklisted(); - if (result) { - acpi_disabled = 1; - return result; - } else - printk(KERN_NOTICE PREFIX "BIOS passes blacklist\n"); - - extern int disable_apic; - if (disable_apic) - return 0; - -#ifdef CONFIG_X86_LOCAL_APIC - - /* - * MADT - * ---- - * Parse the Multiple APIC Description Table (MADT), if exists. - * Note that this table provides platform SMP configuration - * information -- the successor to MPS tables. - */ - - result = acpi_table_parse(ACPI_APIC, acpi_parse_madt); - if (!result) { - printk(KERN_WARNING PREFIX "MADT not present\n"); - return 0; - } - else if (result < 0) { - printk(KERN_ERR PREFIX "Error parsing MADT\n"); - return result; - } - else if (result > 1) - printk(KERN_WARNING PREFIX "Multiple MADT tables exist\n"); - - /* - * Local APIC - * ---------- - * Note that the LAPIC address is obtained from the MADT (32-bit value) - * and (optionally) overridden by a LAPIC_ADDR_OVR entry (64-bit value). - */ - - result = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr); - if (result < 0) { - printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n"); - return result; - } - - mp_register_lapic_address(acpi_lapic_addr); - - result = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic); - if (!result) { - printk(KERN_ERR PREFIX "No LAPIC entries present\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return -ENODEV; - } - else if (result < 0) { - printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return result; - } - - result = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi); - if (result < 0) { - printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return result; - } - - acpi_lapic = 1; - -#endif /*CONFIG_X86_LOCAL_APIC*/ - -#ifdef CONFIG_X86_IO_APIC - - /* - * I/O APIC - * -------- - */ - - result = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic); - if (!result) { - printk(KERN_ERR PREFIX "No IOAPIC entries present\n"); - return -ENODEV; - } - else if (result < 0) { - printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n"); - return result; - } - - /* Build a default routing table for legacy (ISA) interrupts. */ - mp_config_acpi_legacy_irqs(); - - result = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr); - if (result < 0) { - printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return result; - } - - result = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src); - if (result < 0) { - printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return result; - } - - acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; - - acpi_ioapic = 1; - -#endif /*CONFIG_X86_IO_APIC*/ - -#ifdef CONFIG_X86_LOCAL_APIC - if (acpi_lapic && acpi_ioapic) - smp_found_config = 1; -#endif - -#ifdef CONFIG_HPET_TIMER - result = acpi_table_parse(ACPI_HPET, acpi_parse_hpet); - if (result < 0) - printk("ACPI: no HPET table found (%d).\n", result); -#endif - - return 0; -} - -#endif /*CONFIG_ACPI_BOOT*/ - - -/* -------------------------------------------------------------------------- - Low-Level Sleep Support - -------------------------------------------------------------------------- */ - -#ifdef CONFIG_ACPI_SLEEP - -extern void acpi_prepare_wakeup(void); -extern unsigned char acpi_wakeup[], acpi_wakeup_end[], s3_prot16[]; - -/* address in low memory of the wakeup routine. */ -unsigned long acpi_wakeup_address; - -/** - * acpi_save_state_mem - save kernel state - */ -int acpi_save_state_mem (void) -{ - if (!acpi_wakeup_address) - return -1; - - memcpy((void*)acpi_wakeup_address, acpi_wakeup, acpi_wakeup_end - acpi_wakeup); - return 0; -} - -/** - * acpi_save_state_disk - save kernel state to disk - * - * Assume preemption/interrupts are already turned off and that we're running - * on the BP (note this doesn't imply SMP is handled correctly) - */ -int acpi_save_state_disk (void) -{ - unsigned long pbase = read_cr3() & PAGE_MASK; - if (pbase >= 0xffffffffUL) { - printk(KERN_ERR "ACPI: High page table. Suspend disabled.\n"); - return 1; - } - set_seg_base(smp_processor_id(), GDT_ENTRY_KERNELCS16, s3_prot16); - swap_low_mappings(); - acpi_prepare_wakeup(); - return 0; -} - -/* - * acpi_restore_state - */ -void acpi_restore_state_mem (void) -{ - swap_low_mappings(); -} - -/** - * acpi_reserve_bootmem - do _very_ early ACPI initialisation - * - * We allocate a page in 1MB low memory for the real-mode wakeup - * routine for when we come back from a sleep state. The - * runtime allocator allows specification of <16M pages, but not - * <1M pages. - */ -void __init acpi_reserve_bootmem(void) -{ - acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); - if (!acpi_wakeup_address) { - printk(KERN_ERR "ACPI: Cannot allocate lowmem. S3 disabled.\n"); - return; - } -} - -#endif /*CONFIG_ACPI_SLEEP*/ - -void acpi_pci_link_exit(void) {} diff --git a/arch/x86_64/kernel/acpi/wakeup.S b/arch/x86_64/kernel/acpi/wakeup.S index 124af5eb135f..af37ff16ce0c 100644 --- a/arch/x86_64/kernel/acpi/wakeup.S +++ b/arch/x86_64/kernel/acpi/wakeup.S @@ -94,12 +94,6 @@ wakeup_32: movw %ax, %ss mov $(wakeup_stack - __START_KERNEL_map), %esp - - call 1f -1: popl %eax - movl $0xb8040, %ebx - call early_print - movl saved_magic - __START_KERNEL_map, %eax cmpl $0x9abcdef0, %eax jne bogus_32_magic @@ -115,11 +109,7 @@ wakeup_32: movl %eax, %cr4 /* Setup early boot stage 4 level pagetables */ -#if 1 movl $(wakeup_level4_pgt - __START_KERNEL_map), %eax -#else - movl saved_cr3 - __START_KERNEL_map, %eax -#endif movl %eax, %cr3 /* Setup EFER (Extended Feature Enable Register) */ @@ -223,19 +213,6 @@ wakeup_long64: .code32 -early_print: - movl $16, %edx -1: - movl %eax, %ecx - andl $0xf, %ecx - shrl $4, %eax - addw $0x0e00 + '0', %ecx - movw %ecx, %ds:(%edx, %ebx) - decl %edx - decl %edx - jnz 1b - ret - .align 64 gdta: .word 0, 0, 0, 0 # dummy diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 0f99361988b5..7fe3311de204 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -385,10 +385,10 @@ void __init setup_local_APIC (void) value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; if (!smp_processor_id() && (pic_mode || !value)) { value = APIC_DM_EXTINT; - printk("enabled ExtINT on CPU#%d\n", smp_processor_id()); + Dprintk(KERN_INFO "enabled ExtINT on CPU#%d\n", smp_processor_id()); } else { value = APIC_DM_EXTINT | APIC_LVT_MASKED; - printk("masked ExtINT on CPU#%d\n", smp_processor_id()); + Dprintk(KERN_INFO "masked ExtINT on CPU#%d\n", smp_processor_id()); } apic_write_around(APIC_LVT0, value); diff --git a/arch/x86_64/kernel/asm-offsets.c b/arch/x86_64/kernel/asm-offsets.c index 30495a07c5df..3dda8e30f450 100644 --- a/arch/x86_64/kernel/asm-offsets.c +++ b/arch/x86_64/kernel/asm-offsets.c @@ -12,6 +12,7 @@ #include <asm/processor.h> #include <asm/segment.h> #include <asm/thread_info.h> +#include <asm/ia32.h> #define DEFINE(sym, val) \ asm volatile("\n->" #sym " %0 " #val : : "i" (val)) @@ -43,5 +44,21 @@ int main(void) ENTRY(irqstackptr); BLANK(); #undef ENTRY +#define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry)) + ENTRY(eax); + ENTRY(ebx); + ENTRY(ecx); + ENTRY(edx); + ENTRY(esi); + ENTRY(edi); + ENTRY(ebp); + ENTRY(esp); + ENTRY(eip); + BLANK(); +#undef ENTRY + DEFINE(IA32_RT_SIGFRAME_sigcontext, + offsetof (struct rt_sigframe32, uc.uc_mcontext)); + BLANK(); + return 0; } diff --git a/arch/x86_64/kernel/bluesmoke.c b/arch/x86_64/kernel/bluesmoke.c index 71fca07f8c52..3be19c8a2c05 100644 --- a/arch/x86_64/kernel/bluesmoke.c +++ b/arch/x86_64/kernel/bluesmoke.c @@ -129,12 +129,75 @@ static struct pci_dev *find_k8_nb(void) int cpu = smp_processor_id(); pci_for_each_dev(dev) { if (dev->bus->number==0 && PCI_FUNC(dev->devfn)==3 && - PCI_SLOT(dev->devfn) == (24+cpu)) + PCI_SLOT(dev->devfn) == (24U+cpu)) return dev; } return NULL; } +/* When we have kallsyms we can afford kmcedecode too. */ + +static char *transaction[] = { + "instruction", "data", "generic", "reserved" +}; +static char *cachelevel[] = { + "level 0", "level 1", "level 2", "level generic" +}; +static char *memtrans[] = { + "generic error", "generic read", "generic write", "data read", + "data write", "instruction fetch", "prefetch", "snoop", + "?", "?", "?", "?", "?", "?", "?" +}; +static char *partproc[] = { + "local node origin", "local node response", + "local node observed", "generic" +}; +static char *timeout[] = { + "request didn't time out", + "request timed out" +}; +static char *memoryio[] = { + "memory access", "res.", "i/o access", "generic" +}; +static char *extendederr[] = { + "ecc error", + "crc error", + "sync error", + "mst abort", + "tgt abort", + "gart error", + "rmw error", + "wdog error", + "chipkill ecc error", + "<9>","<10>","<11>","<12>", + "<13>","<14>","<15>" +}; +static char *highbits[32] = { + [31] = "previous error lost", + [30] = "error overflow", + [29] = "error uncorrected", + [28] = "error enable", + [27] = "misc error valid", + [26] = "error address valid", + [25] = "processor context corrupt", + [24] = "res24", + [23] = "res23", + /* 22-15 ecc syndrome bits */ + [14] = "corrected ecc error", + [13] = "uncorrected ecc error", + [12] = "res12", + [11] = "res11", + [10] = "res10", + [9] = "res9", + [8] = "dram scrub error", + [7] = "res7", + /* 6-4 ht link number of error */ + [3] = "res3", + [2] = "res2", + [1] = "err cpu0", + [0] = "err cpu1", +}; + static void check_k8_nb(void) { struct pci_dev *nb; @@ -149,20 +212,52 @@ static void check_k8_nb(void) return; printk(KERN_ERR "Northbridge status %08x%08x\n", statushigh,statuslow); - if (statuslow & 0x10) - printk(KERN_ERR "GART error %d\n", statuslow & 0xf); - if (statushigh & (1<<31)) - printk(KERN_ERR "Lost an northbridge error\n"); - if (statushigh & (1<<25)) - printk(KERN_EMERG "NB status: unrecoverable\n"); + + unsigned short errcode = statuslow & 0xffff; + switch (errcode >> 8) { + case 0: + printk(KERN_ERR " GART TLB error %s %s\n", + transaction[(errcode >> 2) & 3], + cachelevel[errcode & 3]); + break; + case 1: + if (errcode & (1<<11)) { + printk(KERN_ERR " bus error %s %s %s %s %s\n", + partproc[(errcode >> 10) & 0x3], + timeout[(errcode >> 9) & 1], + memtrans[(errcode >> 4) & 0xf], + memoryio[(errcode >> 2) & 0x3], + cachelevel[(errcode & 0x3)]); + } else if (errcode & (1<<8)) { + printk(KERN_ERR " memory error %s %s %s\n", + memtrans[(errcode >> 4) & 0xf], + transaction[(errcode >> 2) & 0x3], + cachelevel[(errcode & 0x3)]); + } else { + printk(KERN_ERR " unknown error code %x\n", errcode); + } + break; + } + if (statushigh & ((1<<14)|(1<<13))) + printk(KERN_ERR " ECC syndrome bits %x\n", + (((statuslow >> 24) & 0xff) << 8) | ((statushigh >> 15) & 0x7f)); + errcode = (statuslow >> 16) & 0xf; + printk(KERN_ERR " extended error %s\n", extendederr[(statuslow >> 16) & 0xf]); + + /* should only print when it was a HyperTransport related error. */ + printk(KERN_ERR " link number %x\n", (statushigh >> 4) & 3); + + int i; + for (i = 0; i < 32; i++) + if (highbits[i] && (statushigh & (1<<i))) + printk(KERN_ERR " %s\n", highbits[i]); + if (statushigh & (1<<26)) { u32 addrhigh, addrlow; pci_read_config_dword(nb, 0x54, &addrhigh); pci_read_config_dword(nb, 0x50, &addrlow); - printk(KERN_ERR "NB error address %08x%08x\n", addrhigh,addrlow); + printk(KERN_ERR " error address %08x%08x\n", addrhigh,addrlow); } - if (statushigh & (1<<29)) - printk(KERN_EMERG "Error uncorrected\n"); statushigh &= ~(1<<31); pci_write_config_dword(nb, 0x4c, statushigh); } diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index 2ead9043b5aa..faebb0cbff40 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -75,7 +75,7 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size) return 0; } -int __init e820_mapped(unsigned long start, unsigned long end, int type) +int __init e820_mapped(unsigned long start, unsigned long end, unsigned type) { int i; for (i = 0; i < e820.nr_map; i++) { diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 0934b664e5d5..9fd2b15630ba 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -97,8 +97,8 @@ /* * A newly forked process directly context switches into this. */ +/* rdi: prev */ ENTRY(ret_from_fork) - movq %rax,%rdi /* prev task, returned by __switch_to -> arg1 */ call schedule_tail GET_THREAD_INFO(%rcx) bt $TIF_SYSCALL_TRACE,threadinfo_flags(%rcx) @@ -414,6 +414,7 @@ iret_label: .previous .section .fixup,"ax" /* force a signal here? this matches i386 behaviour */ + /* running with kernel gs */ bad_iret: movq $-9999,%rdi /* better code? */ jmp do_exit @@ -519,21 +520,27 @@ ENTRY(spurious_interrupt) */ ENTRY(error_entry) /* rdi slot contains rax, oldrax contains error code */ - pushq %rsi - movq 8(%rsp),%rsi /* load rax */ - pushq %rdx - pushq %rcx - pushq %rsi /* store rax */ - pushq %r8 - pushq %r9 - pushq %r10 - pushq %r11 cld - SAVE_REST + subq $14*8,%rsp + movq %rsi,13*8(%rsp) + movq 14*8(%rsp),%rsi /* load rax from rdi slot */ + movq %rdx,12*8(%rsp) + movq %rcx,11*8(%rsp) + movq %rsi,10*8(%rsp) /* store rax */ + movq %r8, 9*8(%rsp) + movq %r9, 8*8(%rsp) + movq %r10,7*8(%rsp) + movq %r11,6*8(%rsp) + movq %rbx,5*8(%rsp) + movq %rbp,4*8(%rsp) + movq %r12,3*8(%rsp) + movq %r13,2*8(%rsp) + movq %r14,1*8(%rsp) + movq %r15,(%rsp) + xorl %ebx,%ebx testl $3,CS(%rsp) je error_kernelspace error_swapgs: - xorl %ebx,%ebx swapgs error_sti: movq %rdi,RDI(%rsp) @@ -557,13 +564,14 @@ error_exit: iretq error_kernelspace: + incl %ebx /* There are two places in the kernel that can potentially fault with - usergs. Handle them here. */ + usergs. Handle them here. The exception handlers after + iret run with kernel gs again, so don't set the user space flag. */ cmpq $iret_label,RIP(%rsp) je error_swapgs cmpq $gs_change,RIP(%rsp) je error_swapgs - movl $1,%ebx jmp error_sti /* Reload gs selector with exception handling */ @@ -584,7 +592,9 @@ gs_change: .quad gs_change,bad_gs .previous .section .fixup,"ax" + /* running with kernelgs */ bad_gs: + swapgs /* switch back to user gs */ xorl %eax,%eax movl %eax,%gs jmp 2b @@ -614,12 +624,8 @@ ENTRY(kernel_thread) # clone now call do_fork - + movq %rax,RAX(%rsp) xorl %edi,%edi - cmpq $-1000,%rax - jnb 1f - movl tsk_pid(%rax),%eax -1: movq %rax,RAX(%rsp) /* * It isn't worth to check for reschedule here, diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index ac68891282a6..3680cd2073a1 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S @@ -351,7 +351,7 @@ gdt32_end: ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x0000000000000000 /* unused */ + .quad 0x00af9a000000ffff ^ (1<<21) /* __KERNEL_COMPAT32_CS */ .quad 0x00af9a000000ffff /* __KERNEL_CS */ .quad 0x00cf92000000ffff /* __KERNEL_DS */ .quad 0x00cffe000000ffff /* __USER32_CS */ diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index d5bdcd71c527..1cc57b29c933 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -1060,7 +1060,8 @@ static void __init setup_ioapic_ids_from_mpc (void) phys_id_present_map |= 1 << i; mp_ioapics[apic].mpc_apicid = i; } else { - printk("Setting %d in the phys_id_present_map\n", mp_ioapics[apic].mpc_apicid); + printk(KERN_INFO + "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid); phys_id_present_map |= 1 << mp_ioapics[apic].mpc_apicid; } diff --git a/arch/x86_64/kernel/ioport.c b/arch/x86_64/kernel/ioport.c index 10bcce1a28ef..0e17adc5e1fa 100644 --- a/arch/x86_64/kernel/ioport.c +++ b/arch/x86_64/kernel/ioport.c @@ -1,5 +1,5 @@ /* - * linux/arch/i386/kernel/ioport.c + * linux/arch/x86_64/kernel/ioport.c * * This contains the io-permission bitmap code - written by obz, with changes * by Linus. @@ -15,34 +15,35 @@ #include <linux/smp_lock.h> #include <linux/stddef.h> #include <linux/slab.h> +#include <asm/io.h> /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ static void set_bitmap(unsigned long *bitmap, short base, short extent, int new_value) { - int mask; - unsigned long *bitmap_base = bitmap + (base >> 6); + unsigned long mask; + unsigned long *bitmap_base = bitmap + (base / sizeof(unsigned long)); unsigned short low_index = base & 0x3f; int length = low_index + extent; if (low_index != 0) { - mask = (~0 << low_index); + mask = (~0UL << low_index); if (length < 64) - mask &= ~(~0 << length); + mask &= ~(~0UL << length); if (new_value) *bitmap_base++ |= mask; else *bitmap_base++ &= ~mask; - length -= 32; + length -= 64; } - mask = (new_value ? ~0 : 0); + mask = (new_value ? ~0UL : 0UL); while (length >= 64) { *bitmap_base++ = mask; length -= 64; } if (length > 0) { - mask = ~(~0 << length); + mask = ~(~0UL << length); if (new_value) *bitmap_base++ |= mask; else @@ -113,3 +114,10 @@ asmlinkage long sys_iopl(unsigned int level, struct pt_regs regs) regs.eflags = (regs.eflags & 0xffffffffffffcfff) | (level << 12); return 0; } + +void eat_key(void) +{ + if (inb(0x60) & 1) + inb(0x64); +} + diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index 743637509b91..bd8c7a5eb59c 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c @@ -795,7 +795,7 @@ static unsigned int parse_hex_value (const char *buffer, { unsigned char hexnum [HEX_DIGITS]; unsigned long value; - int i; + unsigned i; if (!count) return -EINVAL; diff --git a/arch/x86_64/kernel/ldt.c b/arch/x86_64/kernel/ldt.c index c220554cbab0..1b7c821593b9 100644 --- a/arch/x86_64/kernel/ldt.c +++ b/arch/x86_64/kernel/ldt.c @@ -32,13 +32,13 @@ static void flush_ldt(void *null) } #endif -static int alloc_ldt(mm_context_t *pc, int mincount, int reload) +static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload) { void *oldldt; void *newldt; - int oldsize; + unsigned oldsize; - if (mincount <= pc->size) + if (mincount <= (unsigned)pc->size) return 0; oldsize = pc->size; mincount = (mincount+511)&(~511); @@ -63,7 +63,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) #ifdef CONFIG_SMP preempt_disable(); load_LDT(pc); - if (current->mm->cpu_vm_mask != (1<<smp_processor_id())) + if (current->mm->cpu_vm_mask != (1UL<<smp_processor_id())) smp_call_function(flush_ldt, 0, 1, 1); preempt_enable(); #else @@ -116,7 +116,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) void destroy_context(struct mm_struct *mm) { if (mm->context.size) { - if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) + if ((unsigned)mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) vfree(mm->context.ldt); else kfree(mm->context.ldt); @@ -190,7 +190,7 @@ static int write_ldt(void * ptr, unsigned long bytecount, int oldmode) } down(&mm->context.sem); - if (ldt_info.entry_number >= mm->context.size) { + if (ldt_info.entry_number >= (unsigned)mm->context.size) { error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); if (error < 0) goto out_unlock; diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index a65202531943..5f4cf23c82fb 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -892,11 +892,15 @@ void __init mp_parse_prt (void) list_for_each(node, &acpi_prt.entries) { entry = list_entry(node, struct acpi_prt_entry, node); - /* We're only interested in static (non-link) entries. */ - if (entry->link.handle) + /* Need to get irq for dynamic entry */ + if (entry->link.handle) { + irq = acpi_pci_link_get_irq(entry->link.handle, entry->link.index); + if (!irq) continue; - + } + else irq = entry->link.index; + ioapic = mp_find_ioapic(irq); if (ioapic < 0) continue; diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index 6589c652bca5..67a07a9db3b1 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c @@ -32,7 +32,6 @@ int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int i; BUG_ON(direction == PCI_DMA_NONE); - for (i = 0; i < nents; i++ ) { struct scatterlist *s = &sg[i]; diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 6ec7516bcc36..17bf333280a6 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c @@ -14,15 +14,10 @@ /* * Notebook: -agpgart_be - check if the simple reservation scheme is enough. - possible future tuning: - fast path for sg streaming mappings - more intelligent flush strategy - flush only a single NB? flush only when - gart area fills up and alloc_iommu wraps. - don't flush on allocation - need to unmap the gart area first to avoid prefetches - by the CPU + fast path for sg streaming mappings - only take the locks once. + more intelligent flush strategy - flush only the NB of the CPU directly + connected to the device? move boundary between IOMMU and AGP in GART dynamically */ @@ -67,7 +62,8 @@ static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */ #define GPTE_VALID 1 #define GPTE_COHERENT 2 -#define GPTE_ENCODE(x) (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) +#define GPTE_ENCODE(x) \ + (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) #define for_all_nb(dev) \ @@ -90,20 +86,23 @@ AGPEXTERN __u32 *agp_gatt_table; static unsigned long next_bit; /* protected by iommu_bitmap_lock */ -static unsigned long alloc_iommu(int size) +static unsigned long alloc_iommu(int size, int *flush) { unsigned long offset, flags; spin_lock_irqsave(&iommu_bitmap_lock, flags); - offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size); - if (offset == -1) + if (offset == -1) { + *flush = 1; offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size); + } if (offset != -1) { set_bit_string(iommu_gart_bitmap, offset, size); next_bit = offset+size; - if (next_bit >= iommu_pages) + if (next_bit >= iommu_pages) { next_bit = 0; + *flush = 1; + } } spin_unlock_irqrestore(&iommu_bitmap_lock, flags); return offset; @@ -114,7 +113,6 @@ static void free_iommu(unsigned long offset, int size) unsigned long flags; spin_lock_irqsave(&iommu_bitmap_lock, flags); clear_bit_string(iommu_gart_bitmap, offset, size); - next_bit = offset; spin_unlock_irqrestore(&iommu_bitmap_lock, flags); } @@ -137,6 +135,7 @@ void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, int gfp = GFP_ATOMIC; int i; unsigned long iommu_page; + int flush = 0; if (hwdev == NULL || hwdev->dma_mask < 0xffffffff || no_iommu) gfp |= GFP_DMA; @@ -150,9 +149,10 @@ void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, if (memory == NULL) { return NULL; } else { - int high = (unsigned long)virt_to_bus(memory) + size - >= 0xffffffff; - int mmu = high; + int high = 0, mmu; + if (((unsigned long)virt_to_bus(memory) + size) > 0xffffffffUL) + high = 1; + mmu = 1; if (force_mmu && !(gfp & GFP_DMA)) mmu = 1; if (no_iommu) { @@ -168,7 +168,7 @@ void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, size >>= PAGE_SHIFT; - iommu_page = alloc_iommu(size); + iommu_page = alloc_iommu(size, &flush); if (iommu_page == -1) goto error; @@ -183,6 +183,7 @@ void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem); } + if (flush) flush_gart(); *dma_handle = iommu_bus_base + (iommu_page << PAGE_SHIFT); return memory; @@ -199,25 +200,24 @@ void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, void pci_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t bus) { - u64 pte; unsigned long iommu_page; - int i; size = round_up(size, PAGE_SIZE); - if (bus < iommu_bus_base || bus > iommu_bus_base + iommu_size) { - free_pages((unsigned long)vaddr, get_order(size)); - return; - } - size >>= PAGE_SHIFT; - iommu_page = (bus - iommu_bus_base) / PAGE_SIZE; - for (i = 0; i < size; i++) { - pte = iommu_gatt_base[iommu_page + i]; + if (bus >= iommu_bus_base && bus <= iommu_bus_base + iommu_size) { + unsigned pages = size >> PAGE_SHIFT; + iommu_page = (bus - iommu_bus_base) >> PAGE_SHIFT; + vaddr = __va(GPTE_DECODE(iommu_gatt_base[iommu_page])); +#ifdef CONFIG_IOMMU_DEBUG + int i; + for (i = 0; i < pages; i++) { + u64 pte = iommu_gatt_base[iommu_page + i]; BUG_ON((pte & GPTE_VALID) == 0); iommu_gatt_base[iommu_page + i] = 0; - free_page((unsigned long) __va(GPTE_DECODE(pte))); } - flush_gart(); - free_iommu(iommu_page, size); +#endif + free_iommu(iommu_page, pages); + } + free_pages((unsigned long)vaddr, get_order(size)); } #ifdef CONFIG_IOMMU_LEAK @@ -257,7 +257,7 @@ static void iommu_full(struct pci_dev *dev, void *addr, size_t size, int dir) */ printk(KERN_ERR - "PCI-DMA: Error: ran out out IOMMU space for %p size %lu at device %s[%s]\n", + "PCI-DMA: Out of IOMMU space for %p size %lu at device %s[%s]\n", addr,size, dev ? dev->dev.name : "?", dev ? dev->slot_name : "?"); if (size > PAGE_SIZE*EMERGENCY_PAGES) { @@ -287,12 +287,12 @@ static inline int need_iommu(struct pci_dev *dev, unsigned long addr, size_t siz return mmu; } -dma_addr_t __pci_map_single(struct pci_dev *dev, void *addr, size_t size, - int dir, int flush) +dma_addr_t pci_map_single(struct pci_dev *dev, void *addr, size_t size, int dir) { unsigned long iommu_page; unsigned long phys_mem, bus; int i, npages; + int flush = 0; BUG_ON(dir == PCI_DMA_NONE); @@ -302,7 +302,7 @@ dma_addr_t __pci_map_single(struct pci_dev *dev, void *addr, size_t size, npages = round_up(size + ((u64)addr & ~PAGE_MASK), PAGE_SIZE) >> PAGE_SHIFT; - iommu_page = alloc_iommu(npages); + iommu_page = alloc_iommu(npages, &flush); if (iommu_page == -1) { iommu_full(dev, addr, size, dir); return iommu_bus_base; @@ -343,12 +343,14 @@ void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction) { unsigned long iommu_page; - int i, npages; + int npages; if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || dma_addr > iommu_bus_base + iommu_size) return; iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; npages = round_up(size + (dma_addr & ~PAGE_MASK), PAGE_SIZE) >> PAGE_SHIFT; +#ifdef CONFIG_IOMMU_DEBUG + int i; for (i = 0; i < npages; i++) { iommu_gatt_base[iommu_page + i] = 0; #ifdef CONFIG_IOMMU_LEAK @@ -356,11 +358,11 @@ void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, iommu_leak_tab[iommu_page + i] = 0; #endif } - flush_gart(); +#endif free_iommu(iommu_page, npages); } -EXPORT_SYMBOL(__pci_map_single); +EXPORT_SYMBOL(pci_map_single); EXPORT_SYMBOL(pci_unmap_single); static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) @@ -407,7 +409,7 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) * Private Northbridge GATT initialization in case we cannot use the * AGP driver for some reason. */ -static __init int init_k8_gatt(agp_kern_info *info) +static __init int init_k8_gatt(struct agp_kern_info *info) { struct pci_dev *dev; void *gatt; @@ -443,7 +445,7 @@ static __init int init_k8_gatt(agp_kern_info *info) u32 ctl; u32 gatt_reg; - gatt_reg = ((u64)gatt) >> 12; + gatt_reg = __pa(gatt) >> 12; gatt_reg <<= 4; pci_write_config_dword(dev, 0x98, gatt_reg); pci_read_config_dword(dev, 0x90, &ctl); @@ -465,9 +467,11 @@ static __init int init_k8_gatt(agp_kern_info *info) return -1; } +extern int agp_amdk8_init(void); + void __init pci_iommu_init(void) { - agp_kern_info info; + struct agp_kern_info info; unsigned long aper_size; unsigned long iommu_start; @@ -476,7 +480,6 @@ void __init pci_iommu_init(void) #else /* Add other K8 AGP bridge drivers here */ no_agp = no_agp || - (agp_init() < 0) || (agp_amdk8_init() < 0) || (agp_copy_info(&info) < 0); #endif @@ -536,8 +539,17 @@ void __init pci_iommu_init(void) iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); bad_dma_address = iommu_bus_base; - change_page_attr(virt_to_page(__va(iommu_start)), iommu_pages, PAGE_KERNEL); - global_flush_tlb(); + /* + * Unmap the IOMMU part of the GART. The alias of the page is always mapped + * with cache enabled and there is no full cache coherency across the GART + * remapping. The unmapping avoids automatic prefetches from the CPU + * allocating cache lines in there. All CPU accesses are done via the + * direct mapping to the backing memory. The GART address is only used by PCI + * devices. + */ + clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size); + + flush_gart(); } /* iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]] diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 36df3cac213e..e2c5421dcb1f 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -216,7 +216,12 @@ void flush_thread(void) { struct task_struct *tsk = current; - memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); + tsk->thread.debugreg0 = 0; + tsk->thread.debugreg1 = 0; + tsk->thread.debugreg2 = 0; + tsk->thread.debugreg3 = 0; + tsk->thread.debugreg6 = 0; + tsk->thread.debugreg7 = 0; memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* * Forget coprocessor state.. @@ -285,7 +290,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, childregs->rax = 0; childregs->rsp = rsp; - if (rsp == ~0) { + if (rsp == ~0UL) { childregs->rsp = (unsigned long)childregs; } p->set_child_tid = p->clear_child_tid = NULL; @@ -294,7 +299,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, p->thread.rsp0 = (unsigned long) (childregs+1); p->thread.userrsp = me->thread.userrsp; - p->thread.rip = (unsigned long) ret_from_fork; + set_ti_thread_flag(p->thread_info, TIF_FORK); p->thread.fs = me->thread.fs; p->thread.gs = me->thread.gs; @@ -335,8 +340,7 @@ out: /* * This special macro can be used to load a debugging register */ -#define loaddebug(thread,register) \ - set_debug(thread->debugreg[register], register) +#define loaddebug(thread,r) set_debug(thread->debugreg ## r, r) /* * switch_to(x,y) should switch tasks from x to y. @@ -422,7 +426,7 @@ struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct * /* * Now maybe reload the debug registers */ - if (unlikely(next->debugreg[7])) { + if (unlikely(next->debugreg7)) { loaddebug(next, 0); loaddebug(next, 1); loaddebug(next, 2); @@ -490,19 +494,15 @@ void set_personality_64bit(void) asmlinkage long sys_fork(struct pt_regs regs) { - struct task_struct *p; - p = do_fork(SIGCHLD, regs.rsp, ®s, 0, NULL, NULL); - return IS_ERR(p) ? PTR_ERR(p) : p->pid; + return do_fork(SIGCHLD, regs.rsp, ®s, 0, NULL, NULL); } asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void *parent_tid, void *child_tid, struct pt_regs regs) { - struct task_struct *p; if (!newsp) newsp = regs.rsp; - p = do_fork(clone_flags & ~CLONE_IDLETASK, newsp, ®s, 0, + return do_fork(clone_flags & ~CLONE_IDLETASK, newsp, ®s, 0, parent_tid, child_tid); - return IS_ERR(p) ? PTR_ERR(p) : p->pid; } /* @@ -517,10 +517,8 @@ asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void * */ asmlinkage long sys_vfork(struct pt_regs regs) { - struct task_struct *p; - p = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.rsp, ®s, 0, + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.rsp, ®s, 0, NULL, NULL); - return IS_ERR(p) ? PTR_ERR(p) : p->pid; } /* diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c index 20a66a609ecb..8aad386340dc 100644 --- a/arch/x86_64/kernel/ptrace.c +++ b/arch/x86_64/kernel/ptrace.c @@ -178,11 +178,11 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno) } -asmlinkage long sys_ptrace(long request, long pid, long addr, long data) +asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data) { struct task_struct *child; - struct user * dummy = NULL; long i, ret; + unsigned ui; /* This lock_kernel fixes a subtle race with suid exec */ lock_kernel(); @@ -240,18 +240,35 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) unsigned long tmp; ret = -EIO; - if ((addr & 7) || addr < 0 || + if ((addr & 7) || addr > sizeof(struct user) - 7) break; - tmp = 0; /* Default return condition */ - if(addr < sizeof(struct user_regs_struct)) + switch (addr) { + case 0 ... sizeof(struct user_regs_struct): tmp = getreg(child, addr); - if(addr >= (long) &dummy->u_debugreg[0] && - addr <= (long) &dummy->u_debugreg[7]){ - addr -= (long) &dummy->u_debugreg[0]; - addr = addr >> 3; - tmp = child->thread.debugreg[addr]; + break; + case offsetof(struct user, u_debugreg[0]): + tmp = child->thread.debugreg0; + break; + case offsetof(struct user, u_debugreg[1]): + tmp = child->thread.debugreg1; + break; + case offsetof(struct user, u_debugreg[2]): + tmp = child->thread.debugreg2; + break; + case offsetof(struct user, u_debugreg[3]): + tmp = child->thread.debugreg3; + break; + case offsetof(struct user, u_debugreg[6]): + tmp = child->thread.debugreg6; + break; + case offsetof(struct user, u_debugreg[7]): + tmp = child->thread.debugreg7; + break; + default: + tmp = 0; + break; } ret = put_user(tmp,(unsigned long *) data); break; @@ -268,47 +285,53 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ ret = -EIO; - if ((addr & 7) || addr < 0 || + if ((addr & 7) || addr > sizeof(struct user) - 7) break; - if (addr < sizeof(struct user_regs_struct)) { + switch (addr) { + case 0 ... sizeof(struct user_regs_struct): ret = putreg(child, addr, data); break; - } - /* We need to be very careful here. We implicitly - want to modify a portion of the task_struct, and we - have to be selective about what portions we allow someone - to modify. */ - - ret = -EIO; - if(addr >= (long) &dummy->u_debugreg[0] && - addr <= (long) &dummy->u_debugreg[7]){ - - if(addr == (long) &dummy->u_debugreg[4]) break; - if(addr == (long) &dummy->u_debugreg[5]) break; - if(addr < (long) &dummy->u_debugreg[4] && - ((unsigned long) data) >= TASK_SIZE-3) break; - - if (addr == (long) &dummy->u_debugreg[6]) { + /* Disallows to set a breakpoint into the vsyscall */ + case offsetof(struct user, u_debugreg[0]): + if (data >= TASK_SIZE-7) break; + child->thread.debugreg0 = data; + ret = 0; + break; + case offsetof(struct user, u_debugreg[1]): + if (data >= TASK_SIZE-7) break; + child->thread.debugreg1 = data; + ret = 0; + break; + case offsetof(struct user, u_debugreg[2]): + if (data >= TASK_SIZE-7) break; + child->thread.debugreg2 = data; + ret = 0; + break; + case offsetof(struct user, u_debugreg[3]): + if (data >= TASK_SIZE-7) break; + child->thread.debugreg3 = data; + ret = 0; + break; + case offsetof(struct user, u_debugreg[6]): if (data >> 32) - goto out_tsk; - } - - if(addr == (long) &dummy->u_debugreg[7]) { + break; + child->thread.debugreg6 = data; + ret = 0; + break; + case offsetof(struct user, u_debugreg[7]): data &= ~DR_CONTROL_RESERVED; for(i=0; i<4; i++) if ((0x5454 >> ((data >> (16 + 4*i)) & 0xf)) & 1) - goto out_tsk; - } - - addr -= (long) &dummy->u_debugreg; - addr = addr >> 3; - child->thread.debugreg[addr] = data; + break; + if (i == 4) { + child->thread.debugreg7 = data; ret = 0; } break; - + } + break; case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: { /* restart after signal. */ long tmp; @@ -408,8 +431,8 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) ret = -EIO; break; } - for ( i = 0; i < sizeof(struct user_regs_struct); i += sizeof(long) ) { - __put_user(getreg(child, i),(unsigned long *) data); + for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) { + __put_user(getreg(child, ui),(unsigned long *) data); data += sizeof(long); } ret = 0; @@ -422,9 +445,9 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) ret = -EIO; break; } - for ( i = 0; i < sizeof(struct user_regs_struct); i += sizeof(long) ) { + for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) { __get_user(tmp, (unsigned long *) data); - putreg(child, i, tmp); + putreg(child, ui, tmp); data += sizeof(long); } ret = 0; diff --git a/arch/x86_64/kernel/reboot.c b/arch/x86_64/kernel/reboot.c index d7dad77963e8..f8e41fcb2580 100644 --- a/arch/x86_64/kernel/reboot.c +++ b/arch/x86_64/kernel/reboot.c @@ -9,7 +9,9 @@ #include <asm/kdebug.h> #include <asm/delay.h> #include <asm/hw_irq.h> - +#include <asm/system.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> /* * Power off function, if any @@ -17,35 +19,37 @@ void (*pm_power_off)(void); static long no_idt[3]; -static int reboot_mode; - -#ifdef CONFIG_SMP -int reboot_smp = 0; -static int reboot_cpu = -1; -#endif - +static enum { + BOOT_BIOS = 'b', + BOOT_TRIPLE = 't', + BOOT_KBD = 'k' +} reboot_type = BOOT_KBD; +static int reboot_mode = 0; + +/* reboot=b[ios] | t[riple] | k[bd] [, [w]arm | [c]old] + bios Use the CPU reboto vector for warm reset + warm Don't set the cold reboot flag + cold Set the cold reboto flag + triple Force a triple fault (init) + kbd Use the keyboard controller. cold reset (default) + */ static int __init reboot_setup(char *str) { - while(1) { + for (;;) { switch (*str) { - case 'w': /* "warm" reboot (no memory testing etc) */ + case 'w': reboot_mode = 0x1234; break; - case 'c': /* "cold" reboot (with memory testing etc) */ - reboot_mode = 0x0; + + case 'c': + reboot_mode = 0; break; -#ifdef CONFIG_SMP - case 's': /* "smp" reboot by executing reset on BSP or other CPU*/ - reboot_smp = 1; - if (isdigit(str[1])) - sscanf(str+1, "%d", &reboot_cpu); - else if (!strncmp(str,"smp",3)) - sscanf(str+3, "%d", &reboot_cpu); - /* we will leave sorting out the final value - when we are ready to reboot, since we might not - have set up boot_cpu_id or smp_num_cpu */ + + case 't': + case 'b': + case 'k': + reboot_type = *str; break; -#endif } if((str = strchr(str,',')) != NULL) str++; @@ -57,6 +61,56 @@ static int __init reboot_setup(char *str) __setup("reboot=", reboot_setup); +/* overwrites random kernel memory. Should not be kernel .text */ +#define WARMBOOT_TRAMP 0x1000UL + +static void reboot_warm(void) +{ + extern unsigned char warm_reboot[], warm_reboot_end[]; + printk("warm reboot\n"); + + local_irq_disable(); + + /* restore identity mapping */ + init_level4_pgt[0] = __pml4(__pa(level3_ident_pgt) | 7); + __flush_tlb_all(); + + /* Move the trampoline to low memory */ + memcpy(__va(WARMBOOT_TRAMP), warm_reboot, warm_reboot_end - warm_reboot); + + /* Start it in compatibility mode. */ + asm volatile( " pushq $0\n" /* ss */ + " pushq $0x2000\n" /* rsp */ + " pushfq\n" /* eflags */ + " pushq %[cs]\n" + " pushq %[target]\n" + " iretq" :: + [cs] "i" (__KERNEL_COMPAT32_CS), + [target] "b" (WARMBOOT_TRAMP)); +} + +#ifdef CONFIG_SMP +static void smp_halt(void) +{ + int cpuid = safe_smp_processor_id(); + + /* Only run this on the boot processor */ + if (cpuid != boot_cpu_id) { + static int first_entry = 1; + if (first_entry) { + first_entry = 0; + smp_call_function((void *)machine_restart, NULL, 1, 0); + } else { + /* AP reentering. just halt */ + for(;;) + asm volatile("hlt"); + } + + } + smp_send_stop(); +} +#endif + static inline void kb_wait(void) { int i; @@ -68,48 +122,24 @@ static inline void kb_wait(void) void machine_restart(char * __unused) { -#ifdef CONFIG_SMP - int cpuid; - - cpuid = GET_APIC_ID(apic_read(APIC_ID)); - - if (reboot_smp) { - - /* check to see if reboot_cpu is valid - if its not, default to the BSP */ - if ((reboot_cpu == -1) || - (reboot_cpu > (NR_CPUS -1)) || - !(phys_cpu_present_map & (1<<cpuid))) - reboot_cpu = boot_cpu_id; - - reboot_smp = 0; /* use this as a flag to only go through this once*/ - /* re-run this function on the other CPUs - it will fall though this section since we have - cleared reboot_smp, and do the reboot if it is the - correct CPU, otherwise it halts. */ - if (reboot_cpu != cpuid) - smp_call_function((void *)machine_restart , NULL, 1, 0); - } + int i; - /* if reboot_cpu is still -1, then we want a tradional reboot, - and if we are not running on the reboot_cpu,, halt */ - if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) { - for (;;) - __asm__ __volatile__ ("hlt"); - } - /* - * Stop all CPUs and turn off local APICs and the IO-APIC, so - * other OSs see a clean IRQ state. - */ - smp_send_stop(); - disable_IO_APIC(); +#if CONFIG_SMP + smp_halt(); #endif + + disable_IO_APIC(); - /* rebooting needs to touch the page at absolute addr 0 */ + /* Tell the BIOS if we want cold or warm reboot */ *((unsigned short *)__va(0x472)) = reboot_mode; + for (;;) { - int i; - /* First fondle with the keyboard controller. */ + /* Could also try the reset bit in the Hammer NB */ + switch (reboot_type) { + case BOOT_BIOS: + reboot_warm(); + + case BOOT_KBD: for (i=0; i<100; i++) { kb_wait(); udelay(50); @@ -117,11 +147,13 @@ void machine_restart(char * __unused) udelay(50); } - /* Could do reset through the northbridge of Hammer here. */ - - /* That didn't work - force a triple fault.. */ + case BOOT_TRIPLE: __asm__ __volatile__("lidt %0": :"m" (no_idt)); __asm__ __volatile__("int3"); + + reboot_type = BOOT_KBD; + break; + } } } diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 3f0a3e66aff6..2143816411de 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -201,6 +201,11 @@ static __init void parse_cmdline_early (char ** cmdline_p) if (!memcmp(from, "mem=", 4)) parse_memopt(from+4, &from); +#ifdef CONFIG_DISCONTIGMEM + if (!memcmp(from, "numa=", 5)) + numa_setup(from+5); +#endif + #ifdef CONFIG_GART_IOMMU if (!memcmp(from,"iommu=",6)) { iommu_setup(from+6); @@ -236,8 +241,6 @@ static void __init contig_initmem_init(void) void __init setup_arch(char **cmdline_p) { - int i; - Dprintk("setup_arch\n"); ROOT_DEV = ORIG_ROOT_DEV; @@ -367,9 +370,12 @@ void __init setup_arch(char **cmdline_p) request_resource(&iomem_resource, &vram_resource); + { + unsigned i; /* request I/O space for devices used on all i[345]86 PCs */ for (i = 0; i < STANDARD_IO_RESOURCES; i++) request_resource(&ioport_resource, standard_io_resources+i); + } pci_mem_start = IOMAP_START; @@ -694,7 +700,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "power management:"); { - int i; + unsigned i; for (i = 0; i < 32; i++) if (c->x86_power & (1 << i)) { if (i < ARRAY_SIZE(x86_power_flags)) diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 02d8ab2a7d3a..6b71af1d1060 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -23,6 +23,7 @@ #include <asm/percpu.h> #include <asm/mtrr.h> #include <asm/proto.h> +#include <asm/mman.h> char x86_boot_params[2048] __initdata = {0,}; @@ -40,22 +41,79 @@ struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __cacheline_aligned; unsigned long __supported_pte_mask = ~0UL; -static int do_not_nx = 1; - +static int do_not_nx __initdata = 0; +unsigned long vm_stack_flags = __VM_STACK_FLAGS; +unsigned long vm_stack_flags32 = __VM_STACK_FLAGS; +unsigned long vm_data_default_flags = __VM_DATA_DEFAULT_FLAGS; +unsigned long vm_data_default_flags32 = __VM_DATA_DEFAULT_FLAGS; +unsigned long vm_force_exec32 = PROT_EXEC; + +/* noexec=on|off +Control non executable mappings for 64bit processes. + +on Enable +off Disable +noforce (default) Don't enable by default for heap/stack/data, + but allow PROT_EXEC to be effective + +*/ static int __init nonx_setup(char *str) { - if (!strncmp(str,"off",3)) { - __supported_pte_mask &= ~_PAGE_NX; - do_not_nx = 1; - } else if (!strncmp(str, "on",3)) { - do_not_nx = 0; + if (!strncmp(str, "on",3)) { __supported_pte_mask |= _PAGE_NX; + do_not_nx = 0; + vm_data_default_flags &= ~VM_EXEC; + vm_stack_flags &= ~VM_EXEC; + } else if (!strncmp(str, "noforce",7) || !strncmp(str,"off",3)) { + do_not_nx = (str[0] == 'o'); + if (do_not_nx) + __supported_pte_mask &= ~_PAGE_NX; + vm_data_default_flags |= VM_EXEC; + vm_stack_flags |= VM_EXEC; } return 1; } __setup("noexec=", nonx_setup); +/* noexec32=opt{,opt} + +Control the no exec default for 32bit processes. Can be also overwritten +per executable using ELF header flags (e.g. needed for the X server) +Requires noexec=on or noexec=noforce to be effective. + +Valid options: + all,on Heap,stack,data is non executable. + off (default) Heap,stack,data is executable + stack Stack is non executable, heap/data is. + force Don't imply PROT_EXEC for PROT_READ + compat (default) Imply PROT_EXEC for PROT_READ + +*/ + static int __init nonx32_setup(char *str) + { + char *s; + while ((s = strsep(&str, ",")) != NULL) { + if (!strcmp(s, "all") || !strcmp(s,"on")) { + vm_data_default_flags32 &= ~VM_EXEC; + vm_stack_flags32 &= ~VM_EXEC; + } else if (!strcmp(s, "off")) { + vm_data_default_flags32 |= VM_EXEC; + vm_stack_flags32 |= VM_EXEC; + } else if (!strcmp(s, "stack")) { + vm_data_default_flags32 |= VM_EXEC; + vm_stack_flags32 &= ~VM_EXEC; + } else if (!strcmp(s, "force")) { + vm_force_exec32 = 0; + } else if (!strcmp(s, "compat")) { + vm_force_exec32 = PROT_EXEC; + } + } + return 1; +} + +__setup("noexec32=", nonx32_setup); + #ifndef __GENERIC_PER_CPU unsigned long __per_cpu_offset[NR_CPUS]; diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c index c40b2b293eb2..2bd0172ef482 100644 --- a/arch/x86_64/kernel/signal.c +++ b/arch/x86_64/kernel/signal.c @@ -371,7 +371,7 @@ handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset, regs->rax = regs->orig_rax; regs->rip -= 2; } - if (regs->rax == -ERESTART_RESTARTBLOCK){ + if (regs->rax == (unsigned long)-ERESTART_RESTARTBLOCK){ regs->rax = __NR_restart_syscall; regs->rip -= 2; } @@ -434,8 +434,8 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) * have been cleared if the watchpoint triggered * inside the kernel. */ - if (current->thread.debugreg[7]) - asm volatile("movq %0,%%db7" : : "r" (current->thread.debugreg[7])); + if (current->thread.debugreg7) + asm volatile("movq %0,%%db7" : : "r" (current->thread.debugreg7)); /* Whee! Actually deliver the signal. */ handle_signal(signr, &info, oldset, regs); @@ -446,9 +446,10 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) /* Did we come from a system call? */ if (regs->orig_rax >= 0) { /* Restart the system call - no handlers present */ - if (regs->rax == -ERESTARTNOHAND || - regs->rax == -ERESTARTSYS || - regs->rax == -ERESTARTNOINTR) { + long res = regs->rax; + if (res == -ERESTARTNOHAND || + res == -ERESTARTSYS || + res == -ERESTARTNOINTR) { regs->rax = regs->orig_rax; regs->rip -= 2; } diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 3a4881757981..d554126c5e64 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -42,6 +42,7 @@ #include <linux/smp_lock.h> #include <linux/irq.h> #include <linux/bootmem.h> +#include <linux/thread_info.h> #include <linux/delay.h> #include <linux/mc146818rtc.h> @@ -123,7 +124,7 @@ static void __init synchronize_tsc_bp (void) unsigned long long t0; unsigned long long sum, avg; long long delta; - unsigned long one_usec; + long one_usec; int buggy = 0; extern unsigned cpu_khz; @@ -339,7 +340,7 @@ extern int cpu_idle(void); /* * Activate a secondary processor. */ -int __init start_secondary(void *unused) +void __init start_secondary(void) { /* * Dont put anything before smp_callin(), SMP @@ -380,29 +381,7 @@ int __init start_secondary(void *unused) set_bit(smp_processor_id(), &cpu_online_map); wmb(); - return cpu_idle(); -} - -/* - * Everything has been set up for the secondary - * CPUs - they just need to reload everything - * from the task structure - * This function must not return. - */ -void __init initialize_secondary(void) -{ - struct task_struct *me = stack_current(); - - /* - * We don't actually need to load the full TSS, - * basically just the stack pointer and the eip. - */ - - asm volatile( - "movq %0,%%rsp\n\t" - "jmp *%1" - : - :"r" (me->thread.rsp),"r" (me->thread.rip)); + cpu_idle(); } extern volatile unsigned long init_rsp; @@ -412,16 +391,16 @@ static struct task_struct * __init fork_by_hand(void) { struct pt_regs regs; /* - * don't care about the rip and regs settings since + * don't care about the eip and regs settings since * we'll never reschedule the forked task. */ - return do_fork(CLONE_VM|CLONE_IDLETASK, 0, ®s, 0, NULL, NULL); + return copy_process(CLONE_VM|CLONE_IDLETASK, 0, ®s, 0, NULL, NULL); } #if APIC_DEBUG static inline void inquire_remote_apic(int apicid) { - int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; + unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; char *names[] = { "ID", "VERSION", "SPIV" }; int timeout, status; @@ -596,6 +575,7 @@ static void __init do_boot_cpu (int apicid) idle = fork_by_hand(); if (IS_ERR(idle)) panic("failed fork for CPU %d", cpu); + wake_up_forked_process(idle); /* * We remove it from the pidhash and the runqueue @@ -603,22 +583,19 @@ static void __init do_boot_cpu (int apicid) */ init_idle(idle,cpu); - idle->thread.rip = (unsigned long)start_secondary; -// idle->thread.rsp = (unsigned long)idle->thread_info + THREAD_SIZE - 512; - unhash_process(idle); cpu_pda[cpu].pcurrent = idle; - /* start_eip had better be page-aligned! */ start_rip = setup_trampoline(); - init_rsp = (unsigned long)idle->thread_info + PAGE_SIZE + 1024; + init_rsp = idle->thread.rsp; init_tss[cpu].rsp0 = init_rsp; - initial_code = initialize_secondary; + initial_code = start_secondary; + clear_ti_thread_flag(idle->thread_info, TIF_FORK); - printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx rsp2 %lx\n", cpu, apicid, - start_rip, idle->thread.rsp, init_rsp); + printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid, + start_rip, init_rsp); /* * This grunge runs the startup process for @@ -676,7 +653,7 @@ static void __init do_boot_cpu (int apicid) if (test_bit(cpu, &cpu_callin_map)) { /* number CPUs logically, starting from 1 (BSP is 0) */ Dprintk("OK.\n"); - printk("KERN_INFO CPU%d: ", cpu); + printk(KERN_INFO "CPU%d: ", cpu); print_cpu_info(&cpu_data[cpu]); Dprintk("CPU has booted.\n"); } else { @@ -708,7 +685,7 @@ unsigned long cache_decay_ticks; static void smp_tune_scheduling (void) { - unsigned long cachesize; /* kB */ + int cachesize; /* kB */ unsigned long bandwidth = 1000; /* MB/s */ /* * Rough estimation for SMP scheduling, this is the number of @@ -753,7 +730,7 @@ static void smp_tune_scheduling (void) static void __init smp_boot_cpus(unsigned int max_cpus) { - int apicid, cpu; + unsigned apicid, cpu; /* * Setup boot CPU information diff --git a/arch/x86_64/kernel/sys_x86_64.c b/arch/x86_64/kernel/sys_x86_64.c index b33cc16a8294..0b4be5fb894b 100644 --- a/arch/x86_64/kernel/sys_x86_64.c +++ b/arch/x86_64/kernel/sys_x86_64.c @@ -117,5 +117,5 @@ asmlinkage long sys_uname(struct new_utsname * name) asmlinkage long wrap_sys_shmat(int shmid, char *shmaddr, int shmflg) { unsigned long raddr; - return sys_shmat(shmid,shmaddr,shmflg,&raddr) ?: raddr; + return sys_shmat(shmid,shmaddr,shmflg,&raddr) ?: (long)raddr; } diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 1d332418a56f..b920ae8e3192 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -584,12 +584,12 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code) /* Mask out spurious debug traps due to lazy DR7 setting */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { - if (!tsk->thread.debugreg[7]) { + if (!tsk->thread.debugreg7) { goto clear_dr7; } } - tsk->thread.debugreg[6] = condition; + tsk->thread.debugreg6 = condition; /* Mask out spurious TF errors due to lazy TF clearing */ if (condition & DR_STEP) { diff --git a/arch/x86_64/kernel/vsyscall.S b/arch/x86_64/kernel/vsyscall.S new file mode 100644 index 000000000000..3bd9a27c7eed --- /dev/null +++ b/arch/x86_64/kernel/vsyscall.S @@ -0,0 +1,169 @@ +/* + * Code for the vsyscall page. This version uses the syscall instruction. + */ + +#include <asm/ia32_unistd.h> +#include <asm/offset.h> + + .text + .globl __kernel_vsyscall + .type __kernel_vsyscall,@function +__kernel_vsyscall: +.LSTART_vsyscall: + push %ebp +.Lpush_ebp: + movl %ecx, %ebp + syscall + popl %ebp +.Lpop_ebp: + ret +.LEND_vsyscall: + .size __kernel_vsyscall,.-.LSTART_vsyscall + + .balign 32 + .globl __kernel_sigreturn + .type __kernel_sigreturn,@function +__kernel_sigreturn: +.LSTART_sigreturn: + popl %eax + movl $__NR_ia32_sigreturn, %eax + syscall +.LEND_sigreturn: + .size __kernel_sigreturn,.-.LSTART_sigreturn + + .balign 32 + .globl __kernel_rt_sigreturn + .type __kernel_rt_sigreturn,@function +__kernel_rt_sigreturn: +.LSTART_rt_sigreturn: + movl $__NR_ia32_rt_sigreturn, %eax + syscall +.LEND_rt_sigreturn: + .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn + + .section .eh_frame,"a",@progbits +.LSTARTFRAME: + .long .LENDCIE-.LSTARTCIE +.LSTARTCIE: + .long 0 /* CIE ID */ + .byte 1 /* Version number */ + .string "zR" /* NUL-terminated augmentation string */ + .uleb128 1 /* Code alignment factor */ + .sleb128 -4 /* Data alignment factor */ + .byte 8 /* Return address register column */ + .uleb128 1 /* Augmentation value length */ + .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ + .byte 0x0c /* DW_CFA_def_cfa */ + .uleb128 4 + .uleb128 4 + .byte 0x88 /* DW_CFA_offset, column 0x8 */ + .uleb128 1 + .align 4 +.LENDCIE: + + .long .LENDFDE1-.LSTARTFDE1 /* Length FDE */ +.LSTARTFDE1: + .long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */ + .long .LSTART_vsyscall-. /* PC-relative start address */ + .long .LEND_vsyscall-.LSTART_vsyscall + .uleb128 0 /* Augmentation length */ + /* What follows are the instructions for the table generation. + We have to record all changes of the stack pointer. */ + .byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */ + .byte 0x0e /* DW_CFA_def_cfa_offset */ + .uleb128 8 + .byte 0x85, 0x02 /* DW_CFA_offset %ebp -8 */ + .byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */ + .byte 0xc5 /* DW_CFA_restore %ebp */ + .byte 0x0e /* DW_CFA_def_cfa_offset */ + .uleb128 4 + .align 4 +.LENDFDE1: + + .long .LENDFDE2-.LSTARTFDE2 /* Length FDE */ +.LSTARTFDE2: + .long .LSTARTFDE2-.LSTARTFRAME /* CIE pointer */ + /* HACK: The dwarf2 unwind routines will subtract 1 from the + return address to get an address in the middle of the + presumed call instruction. Since we didn't get here via + a call, we need to include the nop before the real start + to make up for it. */ + .long .LSTART_sigreturn-1-. /* PC-relative start address */ + .long .LEND_sigreturn-.LSTART_sigreturn+1 + .uleb128 0 /* Augmentation length */ + /* What follows are the instructions for the table generation. + We record the locations of each register saved. This is + complicated by the fact that the "CFA" is always assumed to + be the value of the stack pointer in the caller. This means + that we must define the CFA of this body of code to be the + saved value of the stack pointer in the sigcontext. Which + also means that there is no fixed relation to the other + saved registers, which means that we must use DW_CFA_expression + to compute their addresses. It also means that when we + adjust the stack with the popl, we have to do it all over again. */ + +#define do_cfa_expr(offset) \ + .byte 0x0f; /* DW_CFA_def_cfa_expression */ \ + .uleb128 1f-0f; /* length */ \ +0: .byte 0x74; /* DW_OP_breg4 */ \ + .sleb128 offset; /* offset */ \ + .byte 0x06; /* DW_OP_deref */ \ +1: + +#define do_expr(regno, offset) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno; /* regno */ \ + .uleb128 1f-0f; /* length */ \ +0: .byte 0x74; /* DW_OP_breg4 */ \ + .sleb128 offset; /* offset */ \ +1: + + do_cfa_expr(IA32_SIGCONTEXT_esp+4) + do_expr(0, IA32_SIGCONTEXT_eax+4) + do_expr(1, IA32_SIGCONTEXT_ecx+4) + do_expr(2, IA32_SIGCONTEXT_edx+4) + do_expr(3, IA32_SIGCONTEXT_ebx+4) + do_expr(5, IA32_SIGCONTEXT_ebp+4) + do_expr(6, IA32_SIGCONTEXT_esi+4) + do_expr(7, IA32_SIGCONTEXT_edi+4) + do_expr(8, IA32_SIGCONTEXT_eip+4) + + .byte 0x42 /* DW_CFA_advance_loc 2 -- nop; popl eax. */ + + do_cfa_expr(IA32_SIGCONTEXT_esp) + do_expr(0, IA32_SIGCONTEXT_eax) + do_expr(1, IA32_SIGCONTEXT_ecx) + do_expr(2, IA32_SIGCONTEXT_edx) + do_expr(3, IA32_SIGCONTEXT_ebx) + do_expr(5, IA32_SIGCONTEXT_ebp) + do_expr(6, IA32_SIGCONTEXT_esi) + do_expr(7, IA32_SIGCONTEXT_edi) + do_expr(8, IA32_SIGCONTEXT_eip) + + .align 4 +.LENDFDE2: + + .long .LENDFDE3-.LSTARTFDE3 /* Length FDE */ +.LSTARTFDE3: + .long .LSTARTFDE3-.LSTARTFRAME /* CIE pointer */ + /* HACK: See above wrt unwind library assumptions. */ + .long .LSTART_rt_sigreturn-1-. /* PC-relative start address */ + .long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1 + .uleb128 0 /* Augmentation */ + /* What follows are the instructions for the table generation. + We record the locations of each register saved. This is + slightly less complicated than the above, since we don't + modify the stack pointer in the process. */ + + do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esp) + do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eax) + do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ecx) + do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edx) + do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebx) + do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebp) + do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esi) + do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edi) + do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eip) + + .align 4 +.LENDFDE3: diff --git a/arch/x86_64/kernel/warmreboot.S b/arch/x86_64/kernel/warmreboot.S new file mode 100644 index 000000000000..9bc2905e683e --- /dev/null +++ b/arch/x86_64/kernel/warmreboot.S @@ -0,0 +1,83 @@ +/* + * Switch back to real mode and call the BIOS reboot vector. + * This is a trampoline copied around in process.c + * Written 2003 by Andi Kleen, SuSE Labs. + */ + +#include <asm/msr.h> + +#define R(x) x-warm_reboot(%ebx) +#define R64(x) x-warm_reboot(%rbx) + + /* running in identity mapping and in the first 64k of memory + and in compatibility mode. This must be position independent */ + + /* Follows 14.7 "Leaving Long Mode" in the AMD x86-64 manual, volume 2 + and 8.9.2 "Switching Back to Real-Address Mode" in the Intel IA32 + manual, volume 2 */ + + /* ebx: self pointer to warm_reboot */ + + .globl warm_reboot +warm_reboot: + addl %ebx, R64(real_mode_desc) /* relocate tables */ + addl %ebx,2+R64(warm_gdt_desc) + + movq %cr0,%rax + btr $31,%rax + movq %rax,%cr0 /* disable paging */ + jmp 1f /* flush prefetch queue */ + + .code32 +1: movl $MSR_EFER,%ecx + rdmsr + andl $~((1<<_EFER_LME)|(1<<_EFER_SCE)|(1<<_EFER_NX)),%eax + wrmsr /* disable long mode in EFER */ + + xorl %eax,%eax + movl %eax,%cr3 /* flush tlb */ + + /* Running protected mode without paging now */ + + wbinvd /* flush caches. Needed? */ + + lidt R(warm_idt_desc) + lgdt R(warm_gdt_desc) + + movl $0x10,%ecx /* load segment registers with real mode settings */ + movl %ecx,%ds + movl %ecx,%es + movl %ecx,%fs + movl %ecx,%gs + movl %ecx,%ss + + lea R(real_mode_desc),%eax + ljmp *(%eax) + + .code16: +real_mode: + xorl %eax,%eax + movl %eax,%cr0 + + /* some people claim $0xf000,0xfff0 is better. Use what 32bit linux uses. */ + /* code as bytes because gas has problems with it */ + .byte 0xea,0xf0,0xff,0x00,0xf0 /* ljmp 0xf000:0xfff0 */ + +real_mode_desc: + .long real_mode - warm_reboot + .short 8 +warm_gdt_desc: + .short 8*3 + .long warm_gdt - warm_reboot +warm_gdt: + .quad 0 + .quad 0x00009a000000ffff /* 16-bit real-mode 64k code at 0x00000000 */ + .quad 0x000092000100ffff /* 16-bit real-mode 64k data at 0x00000100 */ + +warm_idt_desc: + .short 0x3ff + .long 0 + + .globl warm_reboot_end +warm_reboot_end: + diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index 5ce2431b672e..2701a00ff7d5 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c @@ -207,3 +207,12 @@ EXPORT_SYMBOL(init_level4_pgt); extern unsigned long __supported_pte_mask; EXPORT_SYMBOL(__supported_pte_mask); + +#ifdef CONFIG_DISCONTIGMEM +EXPORT_SYMBOL(memnode_shift); +EXPORT_SYMBOL(memnodemap); +EXPORT_SYMBOL(node_data); +EXPORT_SYMBOL(fake_node); +#endif + +EXPORT_SYMBOL(clear_page); diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S index 2bada6c3e513..2514345e095e 100644 --- a/arch/x86_64/lib/copy_user.S +++ b/arch/x86_64/lib/copy_user.S @@ -6,16 +6,13 @@ #define FIX_ALIGNMENT 1 -#define movnti movq /* write to cache for now */ -#define prefetch prefetcht2 - #include <asm/current.h> #include <asm/offset.h> #include <asm/thread_info.h> /* Standard copy_to_user with segment limit checking */ .globl copy_to_user - .p2align + .p2align 4 copy_to_user: GET_THREAD_INFO(%rax) movq %rdi,%rcx @@ -27,7 +24,7 @@ copy_to_user: /* Standard copy_from_user with segment limit checking */ .globl copy_from_user - .p2align + .p2align 4 copy_from_user: GET_THREAD_INFO(%rax) movq %rsi,%rcx @@ -58,23 +55,23 @@ bad_to_user: * rdx count * * Output: - * eax uncopied bytes or 0 if successful. + * eax uncopied bytes or 0 if successfull. */ .globl copy_user_generic + .p2align 4 copy_user_generic: /* Put the first cacheline into cache. This should handle the small movements in ioctls etc., but not penalize the bigger filesystem data copies too much. */ pushq %rbx - prefetch (%rsi) xorl %eax,%eax /*zero for the exception handler */ #ifdef FIX_ALIGNMENT /* check for bad alignment of destination */ movl %edi,%ecx andl $7,%ecx - jnz bad_alignment -after_bad_alignment: + jnz .Lbad_alignment +.Lafter_bad_alignment: #endif movq %rdx,%rcx @@ -82,133 +79,133 @@ after_bad_alignment: movl $64,%ebx shrq $6,%rdx decq %rdx - js handle_tail - jz loop_no_prefetch - -loop: - prefetch 64(%rsi) + js .Lhandle_tail -loop_no_prefetch: -s1: movq (%rsi),%r11 -s2: movq 1*8(%rsi),%r8 -s3: movq 2*8(%rsi),%r9 -s4: movq 3*8(%rsi),%r10 -d1: movnti %r11,(%rdi) -d2: movnti %r8,1*8(%rdi) -d3: movnti %r9,2*8(%rdi) -d4: movnti %r10,3*8(%rdi) + .p2align 4 +.Lloop: +.Ls1: movq (%rsi),%r11 +.Ls2: movq 1*8(%rsi),%r8 +.Ls3: movq 2*8(%rsi),%r9 +.Ls4: movq 3*8(%rsi),%r10 +.Ld1: movq %r11,(%rdi) +.Ld2: movq %r8,1*8(%rdi) +.Ld3: movq %r9,2*8(%rdi) +.Ld4: movq %r10,3*8(%rdi) -s5: movq 4*8(%rsi),%r11 -s6: movq 5*8(%rsi),%r8 -s7: movq 6*8(%rsi),%r9 -s8: movq 7*8(%rsi),%r10 -d5: movnti %r11,4*8(%rdi) -d6: movnti %r8,5*8(%rdi) -d7: movnti %r9,6*8(%rdi) -d8: movnti %r10,7*8(%rdi) - - addq %rbx,%rsi - addq %rbx,%rdi +.Ls5: movq 4*8(%rsi),%r11 +.Ls6: movq 5*8(%rsi),%r8 +.Ls7: movq 6*8(%rsi),%r9 +.Ls8: movq 7*8(%rsi),%r10 +.Ld5: movq %r11,4*8(%rdi) +.Ld6: movq %r8,5*8(%rdi) +.Ld7: movq %r9,6*8(%rdi) +.Ld8: movq %r10,7*8(%rdi) decq %rdx - jz loop_no_prefetch - jns loop -handle_tail: + leaq 64(%rsi),%rsi + leaq 64(%rdi),%rdi + + jns .Lloop + + .p2align 4 +.Lhandle_tail: movl %ecx,%edx andl $63,%ecx shrl $3,%ecx - jz handle_7 + jz .Lhandle_7 movl $8,%ebx -loop_8: -s9: movq (%rsi),%r8 -d9: movq %r8,(%rdi) - addq %rbx,%rdi - addq %rbx,%rsi + .p2align 4 +.Lloop_8: +.Ls9: movq (%rsi),%r8 +.Ld9: movq %r8,(%rdi) decl %ecx - jnz loop_8 + leaq 8(%rdi),%rdi + leaq 8(%rsi),%rsi + jnz .Lloop_8 -handle_7: +.Lhandle_7: movl %edx,%ecx andl $7,%ecx - jz ende -loop_1: -s10: movb (%rsi),%bl -d10: movb %bl,(%rdi) + jz .Lende + .p2align 4 +.Lloop_1: +.Ls10: movb (%rsi),%bl +.Ld10: movb %bl,(%rdi) incq %rdi incq %rsi decl %ecx - jnz loop_1 + jnz .Lloop_1 -ende: - sfence +.Lende: popq %rbx ret #ifdef FIX_ALIGNMENT /* align destination */ -bad_alignment: + .p2align 4 +.Lbad_alignment: movl $8,%r9d subl %ecx,%r9d movl %r9d,%ecx subq %r9,%rdx - jz small_align - js small_align -align_1: -s11: movb (%rsi),%bl -d11: movb %bl,(%rdi) + jz .Lsmall_align + js .Lsmall_align +.Lalign_1: +.Ls11: movb (%rsi),%bl +.Ld11: movb %bl,(%rdi) incq %rsi incq %rdi decl %ecx - jnz align_1 - jmp after_bad_alignment -small_align: + jnz .Lalign_1 + jmp .Lafter_bad_alignment +.Lsmall_align: addq %r9,%rdx - jmp handle_7 + jmp .Lhandle_7 #endif /* table sorted by exception address */ .section __ex_table,"a" .align 8 - .quad s1,s1e - .quad s2,s2e - .quad s3,s3e - .quad s4,s4e - .quad d1,s1e - .quad d2,s2e - .quad d3,s3e - .quad d4,s4e - .quad s5,s5e - .quad s6,s6e - .quad s7,s7e - .quad s8,s8e - .quad d5,s5e - .quad d6,s6e - .quad d7,s7e - .quad d8,s8e - .quad s9,e_quad - .quad d9,e_quad - .quad s10,e_byte - .quad d10,e_byte + .quad .Ls1,.Ls1e + .quad .Ls2,.Ls2e + .quad .Ls3,.Ls3e + .quad .Ls4,.Ls4e + .quad .Ld1,.Ls1e + .quad .Ld2,.Ls2e + .quad .Ld3,.Ls3e + .quad .Ld4,.Ls4e + .quad .Ls5,.Ls5e + .quad .Ls6,.Ls6e + .quad .Ls7,.Ls7e + .quad .Ls8,.Ls8e + .quad .Ld5,.Ls5e + .quad .Ld6,.Ls6e + .quad .Ld7,.Ls7e + .quad .Ld8,.Ls8e + .quad .Ls9,.Le_quad + .quad .Ld9,.Le_quad + .quad .Ls10,.Le_byte + .quad .Ld10,.Le_byte #ifdef FIX_ALIGNMENT - .quad s11,e_byte - .quad d11,e_byte + .quad .Ls11,.Le_byte + .quad .Ld11,.Le_byte #endif - .quad e5,e_zero + .quad .Le5,.Le_zero .previous /* compute 64-offset for main loop. 8 bytes accuracy with error on the pessimistic side. this is gross. it would be better to fix the interface. */ /* eax: zero, ebx: 64 */ -s1e: addl $8,%eax -s2e: addl $8,%eax -s3e: addl $8,%eax -s4e: addl $8,%eax -s5e: addl $8,%eax -s6e: addl $8,%eax -s7e: addl $8,%eax -s8e: addl $8,%eax +.Ls1e: addl $8,%eax +.Ls2e: addl $8,%eax +.Ls3e: addl $8,%eax +.Ls4e: addl $8,%eax +.Ls5e: addl $8,%eax +.Ls6e: addl $8,%eax +.Ls7e: addl $8,%eax +.Ls8e: addl $8,%eax addq %rbx,%rdi /* +64 */ subq %rax,%rdi /* correct destination with computed offset */ @@ -216,22 +213,22 @@ s8e: addl $8,%eax addq %rax,%rdx /* add offset to loopcnt */ andl $63,%ecx /* remaining bytes */ addq %rcx,%rdx /* add them */ - jmp zero_rest + jmp .Lzero_rest /* exception on quad word loop in tail handling */ /* ecx: loopcnt/8, %edx: length, rdi: correct */ -e_quad: +.Le_quad: shll $3,%ecx andl $7,%edx addl %ecx,%edx /* edx: bytes to zero, rdi: dest, eax:zero */ -zero_rest: +.Lzero_rest: movq %rdx,%rcx -e_byte: +.Le_byte: xorl %eax,%eax -e5: rep +.Le5: rep stosb /* when there is another exception while zeroing the rest just return */ -e_zero: +.Le_zero: movq %rdx,%rax - jmp ende + jmp .Lende diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 49119f0d18cc..14938935dd92 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -226,17 +226,18 @@ bad_area: bad_area_nosemaphore: - /* User mode accesses just cause a SIGSEGV */ - if (error_code & 4) { #ifdef CONFIG_IA32_EMULATION /* 32bit vsyscall. map on demand. */ if (test_thread_flag(TIF_IA32) && - address >= 0xffffe000 && address < 0xffffefff-7) { + address >= 0xffffe000 && address < 0xffffe000 + PAGE_SIZE) { if (map_syscall32(mm, address) < 0) goto out_of_memory2; return; } #endif + + /* User mode accesses just cause a SIGSEGV */ + if (error_code & 4) { printk(KERN_INFO "%s[%d] segfault at rip:%lx rsp:%lx adr:%lx err:%lx\n", tsk->comm, tsk->pid, regs->rip, regs->rsp, address, diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 5ee4ffecb5ba..cd589916e0ac 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -39,7 +39,7 @@ #include <asm/proto.h> #include <asm/smp.h> -#define Dprintk(x...) printk(x) +#define Dprintk(x...) struct mmu_gather mmu_gathers[NR_CPUS]; @@ -105,7 +105,7 @@ static void set_pte_phys(unsigned long vaddr, pml4_t *level4; pgd_t *pgd; pmd_t *pmd; - pte_t *pte; + pte_t *pte, new_pte; Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys); @@ -132,11 +132,13 @@ static void set_pte_phys(unsigned long vaddr, return; } } + new_pte = pfn_pte(phys >> PAGE_SHIFT, prot); + pte = pte_offset_kernel(pmd, vaddr); - /* CHECKME: */ - if (pte_val(*pte)) + if (!pte_none(*pte) && + pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask)) pte_ERROR(*pte); - set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, prot)); + set_pte(pte, new_pte); /* * It's enough to flush this one mapping. @@ -340,6 +342,35 @@ void __init paging_init(void) } #endif +/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches + from the CPU leading to inconsistent cache lines. address and size + must be aligned to 2MB boundaries. + Does nothing when the mapping doesn't exist. */ +void __init clear_kernel_mapping(unsigned long address, unsigned long size) +{ + unsigned long end = address + size; + + BUG_ON(address & ~LARGE_PAGE_MASK); + BUG_ON(size & ~LARGE_PAGE_MASK); + + for (; address < end; address += LARGE_PAGE_SIZE) { + pgd_t *pgd = pgd_offset_k(address); + if (!pgd || pgd_none(*pgd)) + continue; + pmd_t *pmd = pmd_offset(pgd, address); + if (!pmd || pmd_none(*pmd)) + continue; + if (0 == (pmd_val(*pmd) & _PAGE_PSE)) { + /* Could handle this, but it should not happen currently. */ + printk(KERN_ERR + "clear_kernel_mapping: mapping has been split. will leak memory\n"); + pmd_ERROR(*pmd); + } + set_pmd(pmd, __pmd(0)); + } + __flush_tlb_all(); +} + static inline int page_is_ram (unsigned long pagenr) { int i; diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c index 43f9cbdcecef..04d716c22552 100644 --- a/arch/x86_64/mm/k8topology.c +++ b/arch/x86_64/mm/k8topology.c @@ -87,10 +87,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) if (limit > end_pfn_map << PAGE_SHIFT) limit = end_pfn_map << PAGE_SHIFT; - if (limit <= base) { - printk(KERN_INFO "Node %d beyond memory map\n", nodeid); + if (limit <= base) continue; - } base >>= 16; base <<= 24; diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index b117644e24cc..0150d11586a7 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -15,7 +15,7 @@ #include <asm/dma.h> #include <asm/numa.h> -#define Dprintk(x...) printk(x) +#define Dprintk(x...) struct pglist_data *node_data[MAXNODE]; bootmem_data_t plat_node_bdata[MAX_NUMNODES]; @@ -104,8 +104,11 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT); - if (nodeid + 1 > numnodes) + if (nodeid + 1 > numnodes) { numnodes = nodeid + 1; + printk(KERN_INFO + "setup_node_bootmem: enlarging numnodes to %d\n", numnodes); + } nodes_present |= (1UL << nodeid); } @@ -121,7 +124,7 @@ void __init setup_node_zones(int nodeid) start_pfn = node_start_pfn(nodeid); end_pfn = node_end_pfn(nodeid); - printk("setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn); + printk(KERN_INFO "setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn); /* All nodes > 0 have a zero length zone DMA */ dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT; diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c index 9043424126bc..cec7a5bfc19a 100644 --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c @@ -63,29 +63,53 @@ static void flush_kernel_map(void *address) __flush_tlb_one(address); } + +static inline void flush_map(unsigned long address) +{ + on_each_cpu(flush_kernel_map, (void *)address, 1, 1); +} + +struct deferred_page { + struct deferred_page *next; + struct page *fpage; + unsigned long address; +}; +static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */ + +static inline void save_page(unsigned long address, struct page *fpage) +{ + struct deferred_page *df; + df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL); + if (!df) { + flush_map(address); + __free_page(fpage); + } else { + df->next = df_list; + df->fpage = fpage; + df->address = address; + df_list = df; + } +} + /* * No more special protections in this 2/4MB area - revert to a * large page again. */ -static inline void revert_page(struct page *kpte_page, unsigned long address) +static void revert_page(struct page *kpte_page, unsigned long address) { pgd_t *pgd; pmd_t *pmd; pte_t large_pte; pgd = pgd_offset_k(address); - if (!pgd) BUG(); pmd = pmd_offset(pgd, address); - if (!pmd) BUG(); - if ((pmd_val(*pmd) & _PAGE_GLOBAL) == 0) BUG(); - + BUG_ON(pmd_val(*pmd) & _PAGE_PSE); large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, PAGE_KERNEL_LARGE); set_pte((pte_t *)pmd, large_pte); } static int -__change_page_attr(unsigned long address, struct page *page, pgprot_t prot, - struct page **oldpage) +__change_page_attr(unsigned long address, struct page *page, pgprot_t prot) { pte_t *kpte; struct page *kpte_page; @@ -107,6 +131,7 @@ __change_page_attr(unsigned long address, struct page *page, pgprot_t prot, struct page *split = split_large_page(address, prot); if (!split) return -ENOMEM; + atomic_inc(&kpte_page->count); set_pte(kpte,mk_pte(split, PAGE_KERNEL)); } } else if ((kpte_flags & _PAGE_PSE) == 0) { @@ -115,39 +140,12 @@ __change_page_attr(unsigned long address, struct page *page, pgprot_t prot, } if (atomic_read(&kpte_page->count) == 1) { - *oldpage = kpte_page; + save_page(address, kpte_page); revert_page(kpte_page, address); } return 0; } -static inline void flush_map(unsigned long address) -{ - on_each_cpu(flush_kernel_map, (void *)address, 1, 1); -} - -struct deferred_page { - struct deferred_page *next; - struct page *fpage; - unsigned long address; -}; -static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */ - -static inline void save_page(unsigned long address, struct page *fpage) -{ - struct deferred_page *df; - df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL); - if (!df) { - flush_map(address); - __free_page(fpage); - } else { - df->next = df_list; - df->fpage = fpage; - df->address = address; - df_list = df; - } -} - /* * Change the page attributes of an page in the linear mapping. * @@ -164,24 +162,19 @@ static inline void save_page(unsigned long address, struct page *fpage) int change_page_attr(struct page *page, int numpages, pgprot_t prot) { int err = 0; - struct page *fpage, *fpage2; int i; down_write(&init_mm.mmap_sem); - for (i = 0; i < numpages; i++, page++) { + for (i = 0; i < numpages; !err && i++, page++) { unsigned long address = (unsigned long)page_address(page); - fpage = NULL; - err = __change_page_attr(address, page, prot, &fpage); + err = __change_page_attr(address, page, prot); + if (err) + break; /* Handle kernel mapping too which aliases part of the lowmem */ - if (!err && page_to_phys(page) < KERNEL_TEXT_SIZE) { + if (page_to_phys(page) < KERNEL_TEXT_SIZE) { unsigned long addr2 = __START_KERNEL_map + page_to_phys(page); - fpage2 = NULL; - err = __change_page_attr(addr2, page, prot, &fpage2); - if (fpage2) - save_page(addr2, fpage2); + err = __change_page_attr(addr2, page, prot); } - if (fpage) - save_page(address, fpage); } up_write(&init_mm.mmap_sem); return err; diff --git a/arch/x86_64/pci/irq.c b/arch/x86_64/pci/irq.c index baf6d641d453..3e14e7e717af 100644 --- a/arch/x86_64/pci/irq.c +++ b/arch/x86_64/pci/irq.c @@ -378,8 +378,9 @@ static struct irq_info *pirq_get_info(struct pci_dev *dev) return NULL; } -static void pcibios_test_irq_handler(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t pcibios_test_irq_handler(int irq, void *dev_id, struct pt_regs *regs) { + return IRQ_NONE; } static int pcibios_lookup_irq(struct pci_dev *dev, int assign) diff --git a/arch/x86_64/vmlinux.lds.S b/arch/x86_64/vmlinux.lds.S index 6bd383f57b87..2ad9f0e9f90d 100644 --- a/arch/x86_64/vmlinux.lds.S +++ b/arch/x86_64/vmlinux.lds.S @@ -127,7 +127,7 @@ SECTIONS /* Sections to be discarded */ /DISCARD/ : { *(.exit.data) - *(.exit.text) + /* *(.exit.text) */ *(.exitcall.exit) *(.eh_frame) } diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c index 3ecb6223d932..af5a35229cff 100644 --- a/drivers/char/i8k.c +++ b/drivers/char/i8k.c @@ -757,7 +757,7 @@ int __init i8k_init(void) return -ENOENT; } proc_i8k->proc_fops = &i8k_fops; - SET_MODULE_OWNER(proc_i8k); + proc_i8k->owner = THIS_MODULE; printk(KERN_INFO "Dell laptop SMM driver v%s Massimo Dal Zotto (dz@debian.org)\n", diff --git a/drivers/isdn/act2000/module.c b/drivers/isdn/act2000/module.c index 130d82dd1f93..44c43be3828d 100644 --- a/drivers/isdn/act2000/module.c +++ b/drivers/isdn/act2000/module.c @@ -587,7 +587,7 @@ act2000_alloccard(int bus, int port, int irq, char *id) INIT_WORK(&card->rcv_tq, (void *) (void *) actcapi_dispatch, card); INIT_WORK(&card->poll_tq, (void *) (void *) act2000_receive, card); init_timer(&card->ptimer); - SET_MODULE_OWNER(&card->interface); + card->interface.owner = THIS_MODULE; card->interface.channels = ACT2000_BCH; card->interface.maxbufsize = 4000; card->interface.command = if_command; diff --git a/drivers/isdn/eicon/eicon_mod.c b/drivers/isdn/eicon/eicon_mod.c index 170cc8eb0c81..8c1174bc36cb 100644 --- a/drivers/isdn/eicon/eicon_mod.c +++ b/drivers/isdn/eicon/eicon_mod.c @@ -834,7 +834,7 @@ eicon_alloccard(int Type, int membase, int irq, char *id, int card_id) tasklet_init(&card->snd_tq, eicon_transmit, (unsigned long)card); tasklet_init(&card->rcv_tq, eicon_rcv_dispatch, (unsigned long)card); tasklet_init(&card->ack_tq, eicon_ack_dispatch, (unsigned long)card); - SET_MODULE_OWNER(&card->interface); + card->interface.owner = THIS_MODULE; card->interface.maxbufsize = 4000; card->interface.command = if_command; card->interface.writebuf_skb = if_sendbuf; diff --git a/drivers/isdn/hardware/avm/b1isa.c b/drivers/isdn/hardware/avm/b1isa.c index ed5a202f655a..80af7f3ba924 100644 --- a/drivers/isdn/hardware/avm/b1isa.c +++ b/drivers/isdn/hardware/avm/b1isa.c @@ -118,7 +118,7 @@ static int __init b1isa_probe(struct pci_dev *pdev) cinfo->capi_ctrl.procinfo = b1isa_procinfo; cinfo->capi_ctrl.ctr_read_proc = b1ctl_read_proc; strcpy(cinfo->capi_ctrl.name, card->name); - SET_MODULE_OWNER(&cinfo->capi_ctrl); + cinfo->capi_ctrl.owner = THIS_MODULE; retval = attach_capi_ctr(&cinfo->capi_ctrl); if (retval) { diff --git a/drivers/isdn/hardware/avm/b1pci.c b/drivers/isdn/hardware/avm/b1pci.c index 30c3cdec4ede..14e5658234de 100644 --- a/drivers/isdn/hardware/avm/b1pci.c +++ b/drivers/isdn/hardware/avm/b1pci.c @@ -111,7 +111,7 @@ static int b1pci_probe(struct capicardparams *p, struct pci_dev *pdev) cinfo->capi_ctrl.procinfo = b1pci_procinfo; cinfo->capi_ctrl.ctr_read_proc = b1ctl_read_proc; strcpy(cinfo->capi_ctrl.name, card->name); - SET_MODULE_OWNER(&cinfo->capi_ctrl); + cinfo->capi_ctrl.owner = THIS_MODULE; retval = attach_capi_ctr(&cinfo->capi_ctrl); if (retval) { @@ -249,7 +249,7 @@ static int b1pciv4_probe(struct capicardparams *p, struct pci_dev *pdev) cinfo->capi_ctrl.procinfo = b1pciv4_procinfo; cinfo->capi_ctrl.ctr_read_proc = b1dmactl_read_proc; strcpy(cinfo->capi_ctrl.name, card->name); - SET_MODULE_OWNER(&cinfo->capi_ctrl); + cinfo->capi_ctrl.owner = THIS_MODULE; retval = attach_capi_ctr(&cinfo->capi_ctrl); if (retval) { diff --git a/drivers/isdn/hardware/avm/b1pcmcia.c b/drivers/isdn/hardware/avm/b1pcmcia.c index 048568c66b2d..8120237ef685 100644 --- a/drivers/isdn/hardware/avm/b1pcmcia.c +++ b/drivers/isdn/hardware/avm/b1pcmcia.c @@ -105,7 +105,7 @@ static int b1pcmcia_add_card(unsigned int port, unsigned irq, cinfo->capi_ctrl.procinfo = b1pcmcia_procinfo; cinfo->capi_ctrl.ctr_read_proc = b1ctl_read_proc; strcpy(cinfo->capi_ctrl.name, card->name); - SET_MODULE_OWNER(&cinfo->capi_ctrl); + cinfo->capi_ctrl.owner = THIS_MODULE; retval = attach_capi_ctr(&cinfo->capi_ctrl); if (retval) { diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c index cefb36d8f148..d86c87dff18e 100644 --- a/drivers/isdn/hardware/avm/c4.c +++ b/drivers/isdn/hardware/avm/c4.c @@ -1166,7 +1166,7 @@ static int c4_add_card(struct capicardparams *p, struct pci_dev *dev, cinfo->capi_ctrl.procinfo = c4_procinfo; cinfo->capi_ctrl.ctr_read_proc = c4_read_proc; strcpy(cinfo->capi_ctrl.name, card->name); - SET_MODULE_OWNER(&cinfo->capi_ctrl); + cinfo->capi_ctrl.owner = THIS_MODULE; retval = attach_capi_ctr(&cinfo->capi_ctrl); if (retval) { diff --git a/drivers/isdn/hardware/avm/t1isa.c b/drivers/isdn/hardware/avm/t1isa.c index 28caef344d01..c89e192a83cc 100644 --- a/drivers/isdn/hardware/avm/t1isa.c +++ b/drivers/isdn/hardware/avm/t1isa.c @@ -411,7 +411,7 @@ static int __init t1isa_probe(struct pci_dev *pdev, int cardnr) cinfo->capi_ctrl.procinfo = t1isa_procinfo; cinfo->capi_ctrl.ctr_read_proc = b1ctl_read_proc; strcpy(cinfo->capi_ctrl.name, card->name); - SET_MODULE_OWNER(&cinfo->capi_ctrl); + cinfo->capi_ctrl.owner = THIS_MODULE; retval = attach_capi_ctr(&cinfo->capi_ctrl); if (retval) { diff --git a/drivers/isdn/hardware/avm/t1pci.c b/drivers/isdn/hardware/avm/t1pci.c index c72cbb9faea9..0cb3091e5774 100644 --- a/drivers/isdn/hardware/avm/t1pci.c +++ b/drivers/isdn/hardware/avm/t1pci.c @@ -119,7 +119,7 @@ static int t1pci_add_card(struct capicardparams *p, struct pci_dev *pdev) cinfo->capi_ctrl.procinfo = t1pci_procinfo; cinfo->capi_ctrl.ctr_read_proc = b1dmactl_read_proc; strcpy(cinfo->capi_ctrl.name, card->name); - SET_MODULE_OWNER(&cinfo->capi_ctrl); + cinfo->capi_ctrl.owner = THIS_MODULE; retval = attach_capi_ctr(&cinfo->capi_ctrl); if (retval) { diff --git a/drivers/isdn/hardware/eicon/capimain.c b/drivers/isdn/hardware/eicon/capimain.c index 58ec63792e9d..f26fd00ddc6c 100644 --- a/drivers/isdn/hardware/eicon/capimain.c +++ b/drivers/isdn/hardware/eicon/capimain.c @@ -149,7 +149,7 @@ void diva_os_set_controller_struct(struct capi_ctr *ctrl) ctrl->load_firmware = 0; ctrl->reset_ctr = 0; ctrl->ctr_read_proc = diva_ctl_read_proc; - SET_MODULE_OWNER(ctrl); + ctrl->owner = THIS_MODULE; } /* diff --git a/drivers/isdn/hardware/eicon/i4lididrv.c b/drivers/isdn/hardware/eicon/i4lididrv.c index 17558487227d..e045842df617 100644 --- a/drivers/isdn/hardware/eicon/i4lididrv.c +++ b/drivers/isdn/hardware/eicon/i4lididrv.c @@ -913,7 +913,7 @@ eicon_alloccard(DESCRIPTOR *d) skb_queue_head_init(&card->sackq); skb_queue_head_init(&card->statq); card->statq_entries = 0; - SET_MODULE_OWNER(&card->interface); + card->interface.owner = THIS_MODULE; card->interface.maxbufsize = 4000; card->interface.command = if_command; card->interface.writebuf_skb = if_sendbuf; diff --git a/drivers/isdn/hisax/config.c b/drivers/isdn/hisax/config.c index d22229fb61db..e5c5514da009 100644 --- a/drivers/isdn/hisax/config.c +++ b/drivers/isdn/hisax/config.c @@ -889,7 +889,7 @@ static int __devinit checkcard(int cardnr, char *id, int *busy_flag) "HiSax: Card Type %d out of range\n", card->typ); goto outf_cs; } - SET_MODULE_OWNER(&cs->iif); + cs->iif.owner = THIS_MODULE; strcpy(cs->iif.id, id); cs->iif.channels = 2; cs->iif.maxbufsize = MAX_DATA_SIZE; diff --git a/drivers/isdn/hisax/hisax_fcclassic.c b/drivers/isdn/hisax/hisax_fcclassic.c index f349ffd99772..805d908e3486 100644 --- a/drivers/isdn/hisax/hisax_fcclassic.c +++ b/drivers/isdn/hisax/hisax_fcclassic.c @@ -293,7 +293,7 @@ new_adapter(struct pci_dev *pdev) memset(adapter, 0, sizeof(struct fritz_adapter)); - SET_MODULE_OWNER(&adapter->isac.hisax_d_if); + adapter->isac.hisax_d_if.owner = THIS_MODULE; adapter->isac.hisax_d_if.ifc.priv = &adapter->isac; adapter->isac.hisax_d_if.ifc.l2l1 = isac_d_l2l1; diff --git a/drivers/isdn/hisax/hisax_fcpcipnp.c b/drivers/isdn/hisax/hisax_fcpcipnp.c index 79c26786a62f..54daa4539c78 100644 --- a/drivers/isdn/hisax/hisax_fcpcipnp.c +++ b/drivers/isdn/hisax/hisax_fcpcipnp.c @@ -827,7 +827,7 @@ new_adapter(struct pci_dev *pdev) memset(adapter, 0, sizeof(struct fritz_adapter)); - SET_MODULE_OWNER(&adapter->isac.hisax_d_if); + adapter->isac.hisax_d_if.owner = THIS_MODULE; adapter->isac.hisax_d_if.ifc.priv = &adapter->isac; adapter->isac.hisax_d_if.ifc.l2l1 = isac_d_l2l1; diff --git a/drivers/isdn/hisax/hisax_hfcpci.c b/drivers/isdn/hisax/hisax_hfcpci.c index ed6b298203d7..8537978b9328 100644 --- a/drivers/isdn/hisax/hisax_hfcpci.c +++ b/drivers/isdn/hisax/hisax_hfcpci.c @@ -1484,7 +1484,7 @@ new_adapter(struct pci_dev *pdev) memset(adapter, 0, sizeof(struct hfcpci_adapter)); - SET_MODULE_OWNER(&adapter->d_if); + adapter->d_if.owner = THIS_MODULE; adapter->d_if.ifc.priv = adapter; adapter->d_if.ifc.l2l1 = hfcpci_d_l2l1; diff --git a/drivers/isdn/hisax/st5481_init.c b/drivers/isdn/hisax/st5481_init.c index e3329417b55d..63d14e9e0c24 100644 --- a/drivers/isdn/hisax/st5481_init.c +++ b/drivers/isdn/hisax/st5481_init.c @@ -80,7 +80,7 @@ static int probe_st5481(struct usb_interface *intf, adapter->number_of_leds = number_of_leds; adapter->usb_dev = dev; - SET_MODULE_OWNER(&adapter->hisax_d_if); + adapter->hisax_d_if.owner = THIS_MODULE; adapter->hisax_d_if.ifc.priv = adapter; adapter->hisax_d_if.ifc.l2l1 = st5481_d_l2l1; diff --git a/drivers/isdn/hysdn/hycapi.c b/drivers/isdn/hysdn/hycapi.c index 122927a5544b..81a9dce35d12 100644 --- a/drivers/isdn/hysdn/hycapi.c +++ b/drivers/isdn/hysdn/hycapi.c @@ -778,7 +778,7 @@ hycapi_capi_create(hysdn_card *card) ctrl->procinfo = hycapi_procinfo; ctrl->ctr_read_proc = hycapi_read_proc; strcpy(ctrl->name, cinfo->cardname); - SET_MODULE_OWNER(ctrl); + ctrl->owner = THIS_MODULE; retval = attach_capi_ctr(ctrl); if (retval) { diff --git a/drivers/isdn/icn/icn.c b/drivers/isdn/icn/icn.c index 751d9628ff43..6dd3ccad8f3a 100644 --- a/drivers/isdn/icn/icn.c +++ b/drivers/isdn/icn/icn.c @@ -1545,7 +1545,7 @@ icn_initcard(int port, char *id) } memset((char *) card, 0, sizeof(icn_card)); card->port = port; - SET_MODULE_OWNER(&card->interface); + card->interface.owner = THIS_MODULE; card->interface.hl_hdrlen = 1; card->interface.channels = ICN_BCH; card->interface.maxbufsize = 4000; diff --git a/drivers/isdn/isdnloop/isdnloop.c b/drivers/isdn/isdnloop/isdnloop.c index c31b71a324aa..504c5f746104 100644 --- a/drivers/isdn/isdnloop/isdnloop.c +++ b/drivers/isdn/isdnloop/isdnloop.c @@ -1450,7 +1450,7 @@ isdnloop_initcard(char *id) return (isdnloop_card *) 0; } memset((char *) card, 0, sizeof(isdnloop_card)); - SET_MODULE_OWNER(&card->interface); + card->interface.owner = THIS_MODULE; card->interface.channels = ISDNLOOP_BCH; card->interface.hl_hdrlen = 1; /* scratch area for storing ack flag*/ card->interface.maxbufsize = 4000; diff --git a/drivers/isdn/pcbit/drv.c b/drivers/isdn/pcbit/drv.c index 79fe0b3e0d6c..3b90480e3319 100644 --- a/drivers/isdn/pcbit/drv.c +++ b/drivers/isdn/pcbit/drv.c @@ -171,7 +171,7 @@ int pcbit_init_dev(int board, int mem_base, int irq) dev->dev_if = dev_if; - SET_MODULE_OWNER(dev_if); + dev_if->owner = THIS_MODULE; dev_if->channels = 2; diff --git a/drivers/isdn/sc/init.c b/drivers/isdn/sc/init.c index cb6a8543054b..01954e019ac1 100644 --- a/drivers/isdn/sc/init.c +++ b/drivers/isdn/sc/init.c @@ -287,7 +287,7 @@ static int __init sc_init(void) } memset(interface, 0, sizeof(isdn_if)); - SET_MODULE_OWNER(interface); + interface->owner = THIS_MODULE; interface->hl_hdrlen = 0; interface->channels = channels; interface->maxbufsize = BUFFER_SIZE; diff --git a/drivers/isdn/tpam/tpam_main.c b/drivers/isdn/tpam/tpam_main.c index 8bbd1dc05064..8462f483d924 100644 --- a/drivers/isdn/tpam/tpam_main.c +++ b/drivers/isdn/tpam/tpam_main.c @@ -132,7 +132,7 @@ static int __devinit tpam_probe(struct pci_dev *dev, const struct pci_device_id copy_to_pam_dword(card, (void *)0x01840070, 0x00000010); /* fill the ISDN link layer structure */ - SET_MODULE_OWNER(&card->interface); + card->interface.owner = THIS_MODULE; card->interface.channels = TPAM_NBCHANNEL; card->interface.maxbufsize = TPAM_MAXBUFSIZE; card->interface.features = diff --git a/drivers/macintosh/apm_emu.c b/drivers/macintosh/apm_emu.c index b52f0a0a4b89..5358d31869f0 100644 --- a/drivers/macintosh/apm_emu.c +++ b/drivers/macintosh/apm_emu.c @@ -524,7 +524,7 @@ static int __init apm_emu_init(void) apm_proc = create_proc_info_entry("apm", 0, NULL, apm_emu_get_info); if (apm_proc) - SET_MODULE_OWNER(apm_proc); + apm_proc->owner = THIS_MODULE; misc_register(&apm_device); diff --git a/drivers/net/wan/comx-hw-munich.c b/drivers/net/wan/comx-hw-munich.c index 5cfb63b29774..24d62f203b49 100644 --- a/drivers/net/wan/comx-hw-munich.c +++ b/drivers/net/wan/comx-hw-munich.c @@ -858,7 +858,7 @@ static int munich_probe(void) board->linecode = SLICECOM_LINECODE_DEFAULT; board->clock_source = SLICECOM_CLOCK_SOURCE_DEFAULT; board->loopback = SLICECOM_LOOPBACK_DEFAULT; - SET_MODULE_OWNER(board); + board->owner = THIS_MODULE; } else { diff --git a/drivers/net/wan/cycx_x25.c b/drivers/net/wan/cycx_x25.c index 94d5986262ca..a66e1a729689 100644 --- a/drivers/net/wan/cycx_x25.c +++ b/drivers/net/wan/cycx_x25.c @@ -82,7 +82,7 @@ #include <linux/errno.h> /* return codes */ #include <linux/if_arp.h> /* ARPHRD_HWX25 */ #include <linux/kernel.h> /* printk(), and other useful stuff */ -#include <linux/module.h> /* SET_MODULE_OWNER */ +#include <linux/module.h> #include <linux/string.h> /* inline memset(), etc. */ #include <linux/slab.h> /* kmalloc(), kfree() */ #include <linux/stddef.h> /* offsetof(), etc. */ diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 8f10c58c46f4..40ef07674dd2 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -672,6 +672,7 @@ void usb_set_maxpacket(struct usb_device *dev) { int i, b; + /* NOTE: affects all endpoints _except_ ep0 */ for (i=0; i<dev->actconfig->desc.bNumInterfaces; i++) { struct usb_interface *ifp = dev->actconfig->interface + i; struct usb_host_interface *as = ifp->altsetting + ifp->act_altsetting; @@ -862,6 +863,7 @@ int usb_set_interface(struct usb_device *dev, int interface, int alternate) usb_settoggle (dev, ep, out, 0); (out ? dev->epmaxpacketout : dev->epmaxpacketin) [ep] = iface_as->endpoint [i].desc.wMaxPacketSize; + usb_endpoint_running (dev, ep, out); } return 0; @@ -916,7 +918,7 @@ int usb_set_configuration(struct usb_device *dev, int configuration) /* if it's already configured, clear out old state first. */ if (dev->state != USB_STATE_ADDRESS && disable) { - for (i = 0; i < 15; i++) { + for (i = 1 /* skip ep0 */; i < 15; i++) { disable (dev, i); disable (dev, USB_DIR_IN | i); } diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c index bf56940f730f..783d6ad92847 100644 --- a/drivers/usb/gadget/ether.c +++ b/drivers/usb/gadget/ether.c @@ -1107,7 +1107,7 @@ eth_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) } /* respond with data transfer before status phase? */ - if (value > 0) { + if (value >= 0) { req->length = value; value = usb_ep_queue (gadget->ep0, req, GFP_ATOMIC); if (value < 0) { diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c index e22166b19b24..267da0d49798 100644 --- a/drivers/usb/gadget/net2280.c +++ b/drivers/usb/gadget/net2280.c @@ -393,7 +393,7 @@ net2280_free_request (struct usb_ep *_ep, struct usb_request *_req) struct net2280_request *req; ep = container_of (_ep, struct net2280_ep, ep); - if (!ep || !_req) + if (!_ep || !_req) return; req = container_of (_req, struct net2280_request, req); @@ -442,7 +442,7 @@ net2280_alloc_buffer ( struct net2280_ep *ep; ep = container_of (_ep, struct net2280_ep, ep); - if (!ep || (!ep->desc && ep->num != 0)) + if (!_ep || (!ep->desc && ep->num != 0)) return 0; *dma = DMA_ADDR_INVALID; @@ -561,16 +561,12 @@ static void out_flush (struct net2280_ep *ep) writel ((1 << FIFO_FLUSH), statp); mb (); tmp = readl (statp); - if (tmp & (1 << DATA_OUT_PING_TOKEN_INTERRUPT)) { + if (tmp & (1 << DATA_OUT_PING_TOKEN_INTERRUPT) + /* high speed did bulk NYET; fifo isn't filling */ + && ep->dev->gadget.speed == USB_SPEED_FULL) { unsigned usec; - if (ep->dev->gadget.speed == USB_SPEED_HIGH) { - if (ep->ep.maxpacket <= 512) - usec = 10; /* 512 byte bulk */ - else - usec = 21; /* 1024 byte interrupt */ - } else - usec = 50; /* 64 byte bulk/interrupt */ + usec = 50; /* 64 byte bulk/interrupt */ handshake (statp, (1 << USB_OUT_PING_NAK_SENT), (1 << USB_OUT_PING_NAK_SENT), usec); /* NAK done; now CLEAR_NAK_OUT_PACKETS is safe */ @@ -614,15 +610,13 @@ read_fifo (struct net2280_ep *ep, struct net2280_request *req) count = readl (®s->ep_avail); tmp = req->req.length - req->req.actual; if (count > tmp) { - unsigned over = tmp % ep->ep.maxpacket; - - /* FIXME handle this consistently between PIO and DMA */ - if (over) { + /* as with DMA, data overflow gets flushed */ + if ((tmp % ep->ep.maxpacket) != 0) { ERROR (ep->dev, - "%s out fifo %d bytes, over %d extra %d\n", - ep->ep.name, count, over, count - tmp); + "%s out fifo %d bytes, expected %d\n", + ep->ep.name, count, tmp); req->req.status = -EOVERFLOW; - tmp -= over; + cleanup = 1; } count = tmp; } @@ -670,10 +664,12 @@ fill_dma_desc (struct net2280_ep *ep, struct net2280_request *req, int valid) /* don't let DMA continue after a short OUT packet, * so overruns can't affect the next transfer. + * in case of overruns on max-size packets, we can't + * stop the fifo from filling but we can flush it. */ if (ep->is_in) dmacount |= (1 << DMA_DIRECTION); - else if ((dmacount % ep->ep.maxpacket) != 0) + else dmacount |= (1 << END_OF_CHAIN); req->valid = valid; @@ -897,8 +893,12 @@ net2280_queue (struct usb_ep *_ep, struct usb_request *_req, int gfp_flags) start_dma (ep, req); else { /* maybe there's no control data, just status ack */ - if (ep->num == 0 && _req->length == 0) + if (ep->num == 0 && _req->length == 0) { + allow_status (ep); + done (ep, req, 0); + VDEBUG (dev, "%s status ack\n", ep->ep.name); goto done; + } /* PIO ... stuff the fifo, or unblock it. */ if (ep->is_in) @@ -948,10 +948,9 @@ net2280_queue (struct usb_ep *_ep, struct usb_request *_req, int gfp_flags) } /* else the irq handler advances the queue. */ - if (req) { -done: + if (req) list_add_tail (&req->queue, &ep->queue); - } +done: spin_unlock_irqrestore (&dev->lock, flags); /* pci writes may still be posted */ @@ -992,6 +991,8 @@ static void scan_dma_completions (struct net2280_ep *ep) /* SHORT_PACKET_TRANSFERRED_INTERRUPT handles "usb-short" * packets, including overruns, even when the transfer was * exactly the length requested (dmacount now zero). + * FIXME there's an overrun case here too, where we expect + * a short packet but receive a max length one (won't NAK). */ if (!ep->is_in && (req->req.length % ep->ep.maxpacket) != 0) { req->dma_done = 1; @@ -1186,7 +1187,8 @@ net2280_set_halt (struct usb_ep *_ep, int value) return -EINVAL; if (!ep->dev->driver || ep->dev->gadget.speed == USB_SPEED_UNKNOWN) return -ESHUTDOWN; - if ((ep->desc->bmAttributes & 0x03) == USB_ENDPOINT_XFER_ISOC) + if (ep->desc /* not ep0 */ && (ep->desc->bmAttributes & 0x03) + == USB_ENDPOINT_XFER_ISOC) return -EINVAL; VDEBUG (ep->dev, "%s %s halt\n", _ep->name, value ? "set" : "clear"); @@ -1712,7 +1714,7 @@ static void usb_reinit (struct net2280 *dev) static void ep0_start (struct net2280 *dev) { - writel ( (1 << SET_EP_HIDE_STATUS_PHASE) + writel ( (1 << CLEAR_EP_HIDE_STATUS_PHASE) | (1 << CLEAR_NAK_OUT_PACKETS) | (1 << CLEAR_CONTROL_STATUS_PHASE_HANDSHAKE) , &dev->epregs [0].ep_rsp); @@ -1916,22 +1918,27 @@ static void handle_ep_small (struct net2280_ep *ep) if (ep->is_in) { /* status; stop NAKing */ if (t & (1 << DATA_OUT_PING_TOKEN_INTERRUPT)) { - if (ep->dev->protocol_stall) + if (ep->dev->protocol_stall) { + ep->stopped = 1; set_halt (ep); + } mode = 2; - /* reply to extra IN tokens with a zlp */ + /* reply to extra IN data tokens with a zlp */ } else if (t & (1 << DATA_IN_TOKEN_INTERRUPT)) { if (ep->dev->protocol_stall) { + ep->stopped = 1; set_halt (ep); mode = 2; - } else if (!req) + } else if (!req && ep->stopped) write_fifo (ep, 0); } } else { /* status; stop NAKing */ if (t & (1 << DATA_IN_TOKEN_INTERRUPT)) { - if (ep->dev->protocol_stall) + if (ep->dev->protocol_stall) { + ep->stopped = 1; set_halt (ep); + } mode = 2; /* an extra OUT token is an error */ } else if (((t & (1 << DATA_OUT_PING_TOKEN_INTERRUPT)) @@ -2031,6 +2038,10 @@ static void handle_ep_small (struct net2280_ep *ep) /* maybe advance queue to next request */ if (ep->num == 0) { + /* FIXME need mechanism (request flag?) so control OUT + * can decide to stall ep0 after that done() returns, + * from non-irq context + */ allow_status (ep); req = 0; } else { @@ -2171,6 +2182,7 @@ static void handle_stat0_irqs (struct net2280 *dev, u32 stat) struct net2280_ep *e; u16 status; + /* hw handles device and interface status */ if (u.r.bRequestType != (USB_DIR_IN|USB_RECIP_ENDPOINT)) goto delegate; if ((e = get_ep_by_addr (dev, u.r.wIndex)) == 0 @@ -2188,12 +2200,14 @@ static void handle_stat0_irqs (struct net2280 *dev, u32 stat) set_fifo_bytecount (ep, u.r.wLength); writel (status, &dev->epregs [0].ep_data); allow_status (ep); + VDEBUG (dev, "%s stat %02x\n", ep->ep.name, status); goto next_endpoints; } break; case USB_REQ_CLEAR_FEATURE: { struct net2280_ep *e; + /* hw handles device features */ if (u.r.bRequestType != USB_RECIP_ENDPOINT) goto delegate; if (u.r.wIndex != 0 /* HALT feature */ @@ -2202,11 +2216,15 @@ static void handle_stat0_irqs (struct net2280 *dev, u32 stat) if ((e = get_ep_by_addr (dev, u.r.wIndex)) == 0) goto do_stall; clear_halt (e); + allow_status (ep); + VDEBUG (dev, "%s clear halt\n", ep->ep.name); + goto next_endpoints; } break; case USB_REQ_SET_FEATURE: { struct net2280_ep *e; + /* hw handles device features */ if (u.r.bRequestType != USB_RECIP_ENDPOINT) goto delegate; if (u.r.wIndex != 0 /* HALT feature */ @@ -2215,6 +2233,9 @@ static void handle_stat0_irqs (struct net2280 *dev, u32 stat) if ((e = get_ep_by_addr (dev, u.r.wIndex)) == 0) goto do_stall; set_halt (e); + allow_status (ep); + VDEBUG (dev, "%s set halt\n", ep->ep.name); + goto next_endpoints; } break; default: @@ -2235,23 +2256,12 @@ do_stall: VDEBUG (dev, "req %02x.%02x protocol STALL; stat %d\n", u.r.bRequestType, u.r.bRequest, tmp); dev->protocol_stall = 1; - - /* when there's no data, queueing a response is optional */ - } else if (list_empty (&ep->queue)) { - if (u.r.wLength == 0) { - /* done() not possible/requested */ - allow_status (ep); - } else { - DEBUG (dev, "req %02x.%02x v%04x " - "gadget error, len %d, stat %d\n", - u.r.bRequestType, u.r.bRequest, - le16_to_cpu (u.r.wValue), - u.r.wLength, tmp); - dev->protocol_stall = 1; - } } - /* some in/out token irq should follow; maybe stall then. */ + /* some in/out token irq should follow; maybe stall then. + * driver must queue a request (even zlp) or halt ep0 + * before the host times out. + */ } next_endpoints: diff --git a/drivers/usb/gadget/net2280.h b/drivers/usb/gadget/net2280.h index 7e25111a8721..7b9ba5c29529 100644 --- a/drivers/usb/gadget/net2280.h +++ b/drivers/usb/gadget/net2280.h @@ -437,6 +437,8 @@ struct net2280_ep_regs { /* [11.9] */ /*-------------------------------------------------------------------------*/ +#ifdef __KERNEL__ + /* indexed registers [11.10] are accessed indirectly * caller must own the device lock. */ @@ -457,6 +459,9 @@ set_idx_reg (struct net2280_regs *regs, u32 index, u32 value) /* posted, may not be visible yet */ } +#endif /* __KERNEL__ */ + + #define REG_DIAG 0x0 #define RETRY_COUNTER 16 #define FORCE_PCI_SERR 11 @@ -471,6 +476,8 @@ set_idx_reg (struct net2280_regs *regs, u32 index, u32 value) #define REG_CHIPREV 0x03 /* in bcd */ #define REG_HS_NAK_RATE 0x0a /* NAK per N uframes */ +#ifdef __KERNEL__ + /* ep a-f highspeed and fullspeed maxpacket, addresses * computed from ep->num */ @@ -519,6 +526,7 @@ static inline void allow_status (struct net2280_ep *ep) writel ( (1 << CLEAR_CONTROL_STATUS_PHASE_HANDSHAKE) | (1 << CLEAR_NAK_OUT_PACKETS_MODE) , &ep->regs->ep_rsp); + ep->stopped = 1; } static inline void set_halt (struct net2280_ep *ep) @@ -707,3 +715,4 @@ static inline void stop_out_naking (struct net2280_ep *ep) writel ((1 << CLEAR_NAK_OUT_PACKETS), &ep->regs->ep_rsp); } +#endif /* __KERNEL__ */ diff --git a/drivers/usb/gadget/zero.c b/drivers/usb/gadget/zero.c index a9b111f2df49..bd9445d9ff6d 100644 --- a/drivers/usb/gadget/zero.c +++ b/drivers/usb/gadget/zero.c @@ -653,7 +653,7 @@ static void source_sink_complete (struct usb_ep *ep, struct usb_request *req) /* this endpoint is normally active while we're configured */ case -ECONNRESET: /* request dequeued */ case -ESHUTDOWN: /* disconnect from host */ - DEBUG (dev, "%s gone (%d), %d/%d\n", ep->name, status, + VDEBUG (dev, "%s gone (%d), %d/%d\n", ep->name, status, req->actual, req->length); free_ep_req (ep, req); return; @@ -1035,9 +1035,6 @@ zero_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) if (ctrl->bRequestType != 0) break; spin_lock (&dev->lock); - /* change hardware configuration! - * no response queued, just zero status == success - */ value = zero_set_config (dev, ctrl->wValue, GFP_ATOMIC); spin_unlock (&dev->lock); break; @@ -1092,7 +1089,7 @@ zero_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) } /* respond with data transfer before status phase? */ - if (value > 0) { + if (value >= 0) { req->length = value; value = usb_ep_queue (gadget->ep0, req, GFP_ATOMIC); if (value < 0) { diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c index 5497a4dc32a3..c5efae80777f 100644 --- a/drivers/usb/host/uhci-hcd.c +++ b/drivers/usb/host/uhci-hcd.c @@ -61,7 +61,7 @@ /* * Version Information */ -#define DRIVER_VERSION "v2.0" +#define DRIVER_VERSION "v2.1" #define DRIVER_AUTHOR "Linus 'Frodo Rabbit' Torvalds, Johannes Erdfelt, Randy Dunlap, Georg Acher, Deti Fliegl, Thomas Sailer, Roman Weissgaerber" #define DRIVER_DESC "USB Universal Host Controller Interface driver" @@ -91,9 +91,7 @@ static int uhci_get_current_frame_number(struct uhci_hcd *uhci); static int uhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb); static void uhci_unlink_generic(struct uhci_hcd *uhci, struct urb *urb); -static int ports_active(struct uhci_hcd *uhci); -static void suspend_hc(struct uhci_hcd *uhci); -static void wakeup_hc(struct uhci_hcd *uhci); +static void hc_state_transitions(struct uhci_hcd *uhci); /* If a transfer is still active after this much time, turn off FSBR */ #define IDLE_TIMEOUT (HZ / 20) /* 50 ms */ @@ -1757,9 +1755,8 @@ static void stall_callback(unsigned long ptr) uhci->skel_term_qh->link = UHCI_PTR_TERM; } - /* enter global suspend if nothing connected */ - if (!uhci->is_suspended && !ports_active(uhci)) - suspend_hc(uhci); + /* Poll for and perform state transitions */ + hc_state_transitions(uhci); init_stall_timer(hcd); } @@ -1884,14 +1881,14 @@ static void uhci_irq(struct usb_hcd *hcd, struct pt_regs *regs) err("%x: host system error, PCI problems?", io_addr); if (status & USBSTS_HCPE) err("%x: host controller process error. something bad happened", io_addr); - if ((status & USBSTS_HCH) && !uhci->is_suspended) { + if ((status & USBSTS_HCH) && uhci->state > 0) { err("%x: host controller halted. very bad", io_addr); /* FIXME: Reset the controller, fix the offending TD */ } } if (status & USBSTS_RD) - wakeup_hc(uhci); + uhci->resume_detect = 1; uhci_free_pending_qhs(uhci); @@ -1922,10 +1919,18 @@ static void reset_hc(struct uhci_hcd *uhci) unsigned int io_addr = uhci->io_addr; /* Global reset for 50ms */ + uhci->state = UHCI_RESET; outw(USBCMD_GRESET, io_addr + USBCMD); - wait_ms(50); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout((HZ*50+999) / 1000); + set_current_state(TASK_RUNNING); outw(0, io_addr + USBCMD); - wait_ms(10); + + /* Another 10ms delay */ + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout((HZ*10+999) / 1000); + set_current_state(TASK_RUNNING); + uhci->resume_detect = 0; } static void suspend_hc(struct uhci_hcd *uhci) @@ -1933,34 +1938,49 @@ static void suspend_hc(struct uhci_hcd *uhci) unsigned int io_addr = uhci->io_addr; dbg("%x: suspend_hc", io_addr); - - uhci->is_suspended = 1; - smp_wmb(); - + uhci->state = UHCI_SUSPENDED; + uhci->resume_detect = 0; outw(USBCMD_EGSM, io_addr + USBCMD); } static void wakeup_hc(struct uhci_hcd *uhci) { unsigned int io_addr = uhci->io_addr; - unsigned int status; - dbg("%x: wakeup_hc", io_addr); + switch (uhci->state) { + case UHCI_SUSPENDED: /* Start the resume */ + dbg("%x: wakeup_hc", io_addr); - /* Global resume for 20ms */ - outw(USBCMD_FGR | USBCMD_EGSM, io_addr + USBCMD); - wait_ms(20); - outw(0, io_addr + USBCMD); - - /* wait for EOP to be sent */ - status = inw(io_addr + USBCMD); - while (status & USBCMD_FGR) - status = inw(io_addr + USBCMD); + /* Global resume for >= 20ms */ + outw(USBCMD_FGR | USBCMD_EGSM, io_addr + USBCMD); + uhci->state = UHCI_RESUMING_1; + uhci->state_end = jiffies + (20*HZ+999) / 1000; + break; - uhci->is_suspended = 0; + case UHCI_RESUMING_1: /* End global resume */ + uhci->state = UHCI_RESUMING_2; + outw(0, io_addr + USBCMD); + /* Falls through */ - /* Run and mark it configured with a 64-byte max packet */ - outw(USBCMD_RS | USBCMD_CF | USBCMD_MAXP, io_addr + USBCMD); + case UHCI_RESUMING_2: /* Wait for EOP to be sent */ + if (inw(io_addr + USBCMD) & USBCMD_FGR) + break; + + /* Run for at least 1 second, and + * mark it configured with a 64-byte max packet */ + uhci->state = UHCI_RUNNING_GRACE; + uhci->state_end = jiffies + HZ; + outw(USBCMD_RS | USBCMD_CF | USBCMD_MAXP, + io_addr + USBCMD); + break; + + case UHCI_RUNNING_GRACE: /* Now allowed to suspend */ + uhci->state = UHCI_RUNNING; + break; + + default: + break; + } } static int ports_active(struct uhci_hcd *uhci) @@ -1975,6 +1995,73 @@ static int ports_active(struct uhci_hcd *uhci) return connection; } +static int suspend_allowed(struct uhci_hcd *uhci) +{ + unsigned int io_addr = uhci->io_addr; + int i; + + if (!uhci->hcd.pdev || + uhci->hcd.pdev->vendor != PCI_VENDOR_ID_INTEL || + uhci->hcd.pdev->device != PCI_DEVICE_ID_INTEL_82371AB_2) + return 1; + + /* This is a 82371AB/EB/MB USB controller which has a bug that + * causes false resume indications if any port has an + * over current condition. To prevent problems, we will not + * allow a global suspend if any ports are OC. + * + * Some motherboards using the 82371AB/EB/MB (but not the USB portion) + * appear to hardwire the over current inputs active to disable + * the USB ports. + */ + + /* check for over current condition on any port */ + for (i = 0; i < uhci->rh_numports; i++) { + if (inw(io_addr + USBPORTSC1 + i * 2) & USBPORTSC_OC) + return 0; + } + + return 1; +} + +static void hc_state_transitions(struct uhci_hcd *uhci) +{ + switch (uhci->state) { + case UHCI_RUNNING: + + /* global suspend if nothing connected for 1 second */ + if (!ports_active(uhci) && suspend_allowed(uhci)) { + uhci->state = UHCI_SUSPENDING_GRACE; + uhci->state_end = jiffies + HZ; + } + break; + + case UHCI_SUSPENDING_GRACE: + if (ports_active(uhci)) + uhci->state = UHCI_RUNNING; + else if (time_after_eq(jiffies, uhci->state_end)) + suspend_hc(uhci); + break; + + case UHCI_SUSPENDED: + + /* wakeup if requested by a device */ + if (uhci->resume_detect) + wakeup_hc(uhci); + break; + + case UHCI_RESUMING_1: + case UHCI_RESUMING_2: + case UHCI_RUNNING_GRACE: + if (time_after_eq(jiffies, uhci->state_end)) + wakeup_hc(uhci); + break; + + default: + break; + } +} + static void start_hc(struct uhci_hcd *uhci) { unsigned int io_addr = uhci->io_addr; @@ -2003,6 +2090,8 @@ static void start_hc(struct uhci_hcd *uhci) outl(uhci->fl->dma_handle, io_addr + USBFLBASEADD); /* Run and mark it configured with a 64-byte max packet */ + uhci->state = UHCI_RUNNING_GRACE; + uhci->state_end = jiffies + HZ; outw(USBCMD_RS | USBCMD_CF | USBCMD_MAXP, io_addr + USBCMD); uhci->hcd.state = USB_STATE_READY; @@ -2101,8 +2190,6 @@ static int __devinit uhci_start(struct usb_hcd *hcd) uhci->fsbr = 0; uhci->fsbrtimeout = 0; - uhci->is_suspended = 0; - spin_lock_init(&uhci->qh_remove_list_lock); INIT_LIST_HEAD(&uhci->qh_remove_list); @@ -2335,7 +2422,11 @@ static int uhci_suspend(struct usb_hcd *hcd, u32 state) { struct uhci_hcd *uhci = hcd_to_uhci(hcd); - suspend_hc(uhci); + /* Don't try to suspend broken motherboards, reset instead */ + if (suspend_allowed(uhci)) + suspend_hc(uhci); + else + reset_hc(uhci); return 0; } @@ -2345,8 +2436,13 @@ static int uhci_resume(struct usb_hcd *hcd) pci_set_master(uhci->hcd.pdev); - reset_hc(uhci); - start_hc(uhci); + if (uhci->state == UHCI_SUSPENDED) + uhci->resume_detect = 1; + else { + reset_hc(uhci); + start_hc(uhci); + } + uhci->hcd.state = USB_STATE_READY; return 0; } #endif diff --git a/drivers/usb/host/uhci-hcd.h b/drivers/usb/host/uhci-hcd.h index 877f475c3ef4..0024d239a57f 100644 --- a/drivers/usb/host/uhci-hcd.h +++ b/drivers/usb/host/uhci-hcd.h @@ -53,6 +53,7 @@ #define USBPORTSC_RD 0x0040 /* Resume Detect */ #define USBPORTSC_LSDA 0x0100 /* Low Speed Device Attached */ #define USBPORTSC_PR 0x0200 /* Port Reset */ +#define USBPORTSC_OC 0x0400 /* Over Current condition */ #define USBPORTSC_SUSP 0x1000 /* Suspend */ /* Legacy support register */ @@ -282,6 +283,29 @@ static inline int __interval_to_skel(int interval) return 0; /* int128 for 128-255 ms (Max.) */ } +/* + * Device states for the host controller. + * + * To prevent "bouncing" in the presence of electrical noise, + * we insist on a 1-second "grace" period, before switching to + * the RUNNING or SUSPENDED states, during which the state is + * not allowed to change. + * + * The resume process is divided into substates in order to avoid + * potentially length delays during the timer handler. + * + * States in which the host controller is halted must have values <= 0. + */ +enum uhci_state { + UHCI_RESET, + UHCI_RUNNING_GRACE, /* Before RUNNING */ + UHCI_RUNNING, /* The normal state */ + UHCI_SUSPENDING_GRACE, /* Before SUSPENDED */ + UHCI_SUSPENDED = -10, /* When no devices are attached */ + UHCI_RESUMING_1, + UHCI_RESUMING_2 +}; + #define hcd_to_uhci(hcd_ptr) container_of(hcd_ptr, struct uhci_hcd, hcd) /* @@ -313,7 +337,10 @@ struct uhci_hcd { struct uhci_frame_list *fl; /* P: uhci->frame_list_lock */ int fsbr; /* Full speed bandwidth reclamation */ unsigned long fsbrtimeout; /* FSBR delay */ - int is_suspended; + + enum uhci_state state; /* FIXME: needs a spinlock */ + unsigned long state_end; /* Time of next transition */ + int resume_detect; /* Need a Global Resume */ /* Main list of URB's currently controlled by this HC */ spinlock_t urb_list_lock; diff --git a/drivers/usb/misc/speedtch.c b/drivers/usb/misc/speedtch.c index f4844267ddc8..c4925ad5b696 100644 --- a/drivers/usb/misc/speedtch.c +++ b/drivers/usb/misc/speedtch.c @@ -1,7 +1,8 @@ /****************************************************************************** - * speedtouch.c -- Alcatel SpeedTouch USB xDSL modem driver. + * speedtouch.c - Alcatel SpeedTouch USB xDSL modem driver * * Copyright (C) 2001, Alcatel + * Copyright (C) 2003, Duncan Sands * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -40,7 +41,6 @@ * udsl_usb_send_data_context->urb to a pointer and adding code * to alloc and free it * - remove_wait_queue() added to udsl_atm_processqueue_thread() - * - Duncan Sands (duncan.sands@wanadoo.fr) is the new maintainer * * 1.5: - fixed memory leak when atmsar_decode_aal5 returned NULL. * (reported by stephen.robinson@zen.co.uk) @@ -97,48 +97,65 @@ static int udsl_print_packet (const unsigned char *data, int len); #define DRIVER_DESC "Alcatel SpeedTouch USB driver" #define DRIVER_VERSION "1.6" +static const char udsl_driver_name [] = "speedtch"; + #define SPEEDTOUCH_VENDORID 0x06b9 #define SPEEDTOUCH_PRODUCTID 0x4061 -#define UDSL_NUMBER_RCV_URBS 1 -#define UDSL_NUMBER_SND_URBS 1 -#define UDSL_NUMBER_SND_BUFS (2*UDSL_NUMBER_SND_URBS) -#define UDSL_RCV_BUFFER_SIZE (1*64) /* ATM cells */ -#define UDSL_SND_BUFFER_SIZE (1*64) /* ATM cells */ -/* max should be (1500 IP mtu + 2 ppp bytes + 32 * 5 cellheader overhead) for - * PPPoA and (1500 + 14 + 32*5 cellheader overhead) for PPPoE */ -#define UDSL_MAX_AAL5_MRU 2048 - -#define UDSL_IOCTL_START 1 -#define UDSL_IOCTL_STOP 2 +#define UDSL_NUM_RCV_URBS 1 +#define UDSL_NUM_SND_URBS 1 +#define UDSL_NUM_RCV_BUFS (2*UDSL_NUM_RCV_URBS) +#define UDSL_NUM_SND_BUFS (2*UDSL_NUM_SND_URBS) +#define UDSL_RCV_BUF_SIZE 32 /* ATM cells */ +#define UDSL_SND_BUF_SIZE 64 /* ATM cells */ -/* endpoint declarations */ +#define UDSL_IOCTL_LINE_UP 1 +#define UDSL_IOCTL_LINE_DOWN 2 #define UDSL_ENDPOINT_DATA_OUT 0x07 #define UDSL_ENDPOINT_DATA_IN 0x87 #define ATM_CELL_HEADER (ATM_CELL_SIZE - ATM_CELL_PAYLOAD) +#define UDSL_NUM_CELLS(x) (((x) + ATM_AAL5_TRAILER + ATM_CELL_PAYLOAD - 1) / ATM_CELL_PAYLOAD) -#define hex2int(c) ( (c >= '0')&&(c <= '9') ? (c - '0') : ((c & 0xf)+9) ) - -/* usb_device_id struct */ +#define hex2int(c) ( (c >= '0') && (c <= '9') ? (c - '0') : ((c & 0xf) + 9) ) static struct usb_device_id udsl_usb_ids [] = { { USB_DEVICE (SPEEDTOUCH_VENDORID, SPEEDTOUCH_PRODUCTID) }, - { } /* Terminating entry */ + { } }; MODULE_DEVICE_TABLE (usb, udsl_usb_ids); -/* context declarations */ +/* receive */ + +struct udsl_receive_buffer { + struct list_head list; + unsigned char *base; + unsigned int filled_cells; +}; struct udsl_receiver { struct list_head list; - struct sk_buff *skb; + struct udsl_receive_buffer *buffer; struct urb *urb; struct udsl_instance_data *instance; }; +struct udsl_vcc_data { + /* vpi/vci lookup */ + struct list_head list; + short vpi; + int vci; + struct atm_vcc *vcc; + + /* raw cell reassembly */ + struct sk_buff *skb; + unsigned int max_pdu; +}; + +/* send */ + struct udsl_send_buffer { struct list_head list; unsigned char *base; @@ -157,73 +174,55 @@ struct udsl_control { struct atm_skb_data atm_data; unsigned int num_cells; unsigned int num_entire; - unsigned char cell_header [ATM_CELL_HEADER]; unsigned int pdu_padding; + unsigned char cell_header [ATM_CELL_HEADER]; unsigned char aal5_trailer [ATM_AAL5_TRAILER]; }; #define UDSL_SKB(x) ((struct udsl_control *)(x)->cb) -struct udsl_vcc_data { - /* vpi/vci lookup */ - struct list_head list; - short vpi; - int vci; - struct atm_vcc *vcc; - - /* raw cell reassembly */ - unsigned short mtu; - struct sk_buff *reasBuffer; -}; - -/* - * UDSL main driver data - */ +/* main driver data */ struct udsl_instance_data { struct semaphore serialize; - /* usb device part */ + /* USB device part */ struct usb_device *usb_dev; char description [64]; int firmware_loaded; - /* atm device part */ + /* ATM device part */ struct atm_dev *atm_dev; struct list_head vcc_list; - /* receiving */ - struct udsl_receiver all_receivers [UDSL_NUMBER_RCV_URBS]; + /* receive */ + struct udsl_receiver receivers [UDSL_NUM_RCV_URBS]; + struct udsl_receive_buffer receive_buffers [UDSL_NUM_RCV_BUFS]; - spinlock_t spare_receivers_lock; + spinlock_t receive_lock; struct list_head spare_receivers; - - spinlock_t completed_receivers_lock; - struct list_head completed_receivers; + struct list_head filled_receive_buffers; struct tasklet_struct receive_tasklet; + struct list_head spare_receive_buffers; - /* sending */ - struct udsl_sender all_senders [UDSL_NUMBER_SND_URBS]; - struct udsl_send_buffer all_buffers [UDSL_NUMBER_SND_BUFS]; + /* send */ + struct udsl_sender senders [UDSL_NUM_SND_URBS]; + struct udsl_send_buffer send_buffers [UDSL_NUM_SND_BUFS]; struct sk_buff_head sndqueue; spinlock_t send_lock; struct list_head spare_senders; - struct list_head spare_buffers; + struct list_head spare_send_buffers; struct tasklet_struct send_tasklet; struct sk_buff *current_skb; /* being emptied */ struct udsl_send_buffer *current_buffer; /* being filled */ - struct list_head filled_buffers; + struct list_head filled_send_buffers; }; -static const char udsl_driver_name [] = "speedtch"; - -/* - * atm driver prototypes and structures - */ +/* ATM */ static void udsl_atm_dev_close (struct atm_dev *dev); static int udsl_atm_open (struct atm_vcc *vcc, short vpi, int vci); @@ -239,17 +238,17 @@ static struct atmdev_ops udsl_atm_devops = { .ioctl = udsl_atm_ioctl, .send = udsl_atm_send, .proc_read = udsl_atm_proc_read, + .owner = THIS_MODULE, }; -/* - * usb driver prototypes and structures - */ -static int udsl_usb_probe (struct usb_interface *intf, - const struct usb_device_id *id); +/* USB */ + +static int udsl_usb_probe (struct usb_interface *intf, const struct usb_device_id *id); static void udsl_usb_disconnect (struct usb_interface *intf); static int udsl_usb_ioctl (struct usb_interface *intf, unsigned int code, void *user_data); static struct usb_driver udsl_usb_driver = { + .owner = THIS_MODULE, .name = udsl_driver_name, .probe = udsl_usb_probe, .disconnect = udsl_usb_disconnect, @@ -272,133 +271,110 @@ static inline struct udsl_vcc_data *udsl_find_vcc (struct udsl_instance_data *in return NULL; } -static struct sk_buff *udsl_decode_rawcell (struct udsl_instance_data *instance, struct sk_buff *skb, struct udsl_vcc_data **ctx) +static void udsl_extract_cells (struct udsl_instance_data *instance, unsigned char *source, unsigned int howmany) { - if (!instance || !skb || !ctx) - return NULL; - if (!skb->data || !skb->tail) - return NULL; + struct udsl_vcc_data *cached_vcc = NULL; + struct atm_vcc *vcc; + struct sk_buff *skb; + struct udsl_vcc_data *vcc_data; + int cached_vci = 0; + unsigned int i; + unsigned int length; + unsigned int pdu_length; + int pti; + int vci; + short cached_vpi = 0; + short vpi; - while (skb->len) { - unsigned char *cell = skb->data; - unsigned char *cell_payload; - struct udsl_vcc_data *vcc; - short vpi; - int vci; + for (i = 0; i < howmany; i++, source += ATM_CELL_SIZE) { + vpi = ((source [0] & 0x0f) << 4) | (source [1] >> 4); + vci = ((source [1] & 0x0f) << 12) | (source [2] << 4) | (source [3] >> 4); + pti = (source [3] & 0x2) != 0; - vpi = ((cell[0] & 0x0f) << 4) | (cell[1] >> 4); - vci = ((cell[1] & 0x0f) << 12) | (cell[2] << 4) | (cell[3] >> 4); + vdbg ("udsl_extract_cells: vpi %hd, vci %d, pti %d", vpi, vci, pti); - vdbg ("udsl_decode_rawcell (0x%p, 0x%p, 0x%p) called", instance, skb, ctx); - vdbg ("udsl_decode_rawcell skb->data %p, skb->tail %p", skb->data, skb->tail); + if (cached_vcc && (vci == cached_vci) && (vpi == cached_vpi)) + vcc_data = cached_vcc; + else if ((vcc_data = udsl_find_vcc (instance, vpi, vci))) { + cached_vcc = vcc_data; + cached_vpi = vpi; + cached_vci = vci; + } else { + dbg ("udsl_extract_cells: unknown vpi/vci (%hd/%d)!", vpi, vci); + continue; + } - /* here should the header CRC check be... */ + vcc = vcc_data->vcc; - if (!(vcc = udsl_find_vcc (instance, vpi, vci))) { - dbg ("udsl_decode_rawcell: no vcc found for packet on vpi %d, vci %d", vpi, vci); - __skb_pull (skb, min (skb->len, (unsigned) 53)); - } else { - vdbg ("udsl_decode_rawcell found vcc %p for packet on vpi %d, vci %d", vcc, vpi, vci); - - if (skb->len >= 53) { - cell_payload = cell + 5; - - if (!vcc->reasBuffer) - vcc->reasBuffer = dev_alloc_skb (vcc->mtu); - - /* if alloc fails, we just drop the cell. it is possible that we can still - * receive cells on other vcc's - */ - if (vcc->reasBuffer) { - /* if (buffer overrun) discard received cells until now */ - if ((vcc->reasBuffer->len) > (vcc->mtu - 48)) - skb_trim (vcc->reasBuffer, 0); - - /* copy data */ - memcpy (vcc->reasBuffer->tail, cell_payload, 48); - skb_put (vcc->reasBuffer, 48); - - /* check for end of buffer */ - if (cell[3] & 0x2) { - struct sk_buff *tmp; - - /* the aal5 buffer ends here, cut the buffer. */ - /* buffer will always have at least one whole cell, so */ - /* don't need to check return from skb_pull */ - skb_pull (skb, 53); - *ctx = vcc; - tmp = vcc->reasBuffer; - vcc->reasBuffer = NULL; - - vdbg ("udsl_decode_rawcell returns ATM_AAL5 pdu 0x%p with length %d", tmp, tmp->len); - return tmp; - } - } - /* flush the cell */ - /* buffer will always contain at least one whole cell, so don't */ - /* need to check return value from skb_pull */ - skb_pull (skb, 53); - } else { - /* If data is corrupt and skb doesn't hold a whole cell, flush the lot */ - __skb_pull (skb, skb->len); - return NULL; - } + if (!vcc_data->skb && !(vcc_data->skb = dev_alloc_skb (vcc_data->max_pdu))) { + dbg ("udsl_extract_cells: no memory for skb (vcc: 0x%p)!", vcc); + if (pti) + atomic_inc (&vcc->stats->rx_err); + continue; } - } - return NULL; -} + skb = vcc_data->skb; -static struct sk_buff *udsl_decode_aal5 (struct udsl_vcc_data *ctx, struct sk_buff *skb) -{ - uint crc = 0xffffffff; - uint length, pdu_crc, pdu_length; + if (skb->len + ATM_CELL_PAYLOAD > vcc_data->max_pdu) { + dbg ("udsl_extract_cells: buffer overrun (max_pdu: %u, skb->len %u, vcc: 0x%p)", vcc_data->max_pdu, skb->len, vcc); + /* discard cells already received */ + skb_trim (skb, 0); + BUG_ON (vcc_data->max_pdu < ATM_CELL_PAYLOAD); + } - vdbg ("udsl_decode_aal5 (0x%p, 0x%p) called", ctx, skb); + memcpy (skb->tail, source + ATM_CELL_HEADER, ATM_CELL_PAYLOAD); + __skb_put (skb, ATM_CELL_PAYLOAD); - if (skb->len && (skb->len % 48)) - return NULL; + if (pti) { + length = (source [ATM_CELL_SIZE - 6] << 8) + source [ATM_CELL_SIZE - 5]; - length = (skb->tail[-6] << 8) + skb->tail[-5]; - pdu_crc = - (skb->tail[-4] << 24) + (skb->tail[-3] << 16) + (skb->tail[-2] << 8) + skb->tail[-1]; - pdu_length = ((length + 47 + 8) / 48) * 48; + /* guard against overflow */ + if (length > ATM_MAX_AAL5_PDU) { + dbg ("udsl_extract_cells: bogus length %u (vcc: 0x%p)", length, vcc); + goto drop; + } - vdbg ("udsl_decode_aal5: skb->len = %d, length = %d, pdu_crc = 0x%x, pdu_length = %d", skb->len, length, pdu_crc, pdu_length); + pdu_length = UDSL_NUM_CELLS (length) * ATM_CELL_PAYLOAD; - /* is skb long enough ? */ - if (skb->len < pdu_length) { - atomic_inc (&ctx->vcc->stats->rx_err); - return NULL; - } + if (skb->len < pdu_length) { + dbg ("udsl_extract_cells: bogus pdu_length %u (skb->len: %u, vcc: 0x%p)", pdu_length, skb->len, vcc); + goto drop; + } - /* is skb too long ? */ - if (skb->len > pdu_length) { - dbg ("udsl_decode_aal5: Warning: readjusting illegal size %d -> %d", skb->len, pdu_length); - /* buffer is too long. we can try to recover - * if we discard the first part of the skb. - * the crc will decide whether this was ok - */ - skb_pull (skb, skb->len - pdu_length); - } + if (crc32_be (~0, skb->tail - pdu_length, pdu_length) != 0xc704dd7b) { + dbg ("udsl_extract_cells: packet failed crc check (vcc: 0x%p)", vcc); + goto drop; + } - crc = ~crc32_be (crc, skb->data, pdu_length - 4); + if (!atm_charge (vcc, skb->truesize)) { + dbg ("udsl_extract_cells: failed atm_charge (skb->truesize: %u)", skb->truesize); + goto drop_no_stats; /* atm_charge increments rx_drop */ + } - /* check crc */ - if (pdu_crc != crc) { - dbg ("udsl_decode_aal5: crc check failed!"); - atomic_inc (&ctx->vcc->stats->rx_err); - return NULL; - } + /* now that we are sure to send the skb, it is ok to change skb->data */ + if (skb->len > pdu_length) + skb_pull (skb, skb->len - pdu_length); /* discard initial junk */ - /* pdu is ok */ - skb_trim (skb, length); + skb_trim (skb, length); /* drop zero padding and trailer */ - /* update stats */ - atomic_inc (&ctx->vcc->stats->rx); + atomic_inc (&vcc->stats->rx); - vdbg ("udsl_decode_aal5 returns pdu 0x%p with length %d", skb, skb->len); - return skb; + PACKETDEBUG (skb->data, skb->len); + + vdbg ("udsl_extract_cells: sending skb 0x%p, skb->len %u, skb->truesize %u", skb, skb->len, skb->truesize); + + vcc->push (vcc, skb); + + vcc_data->skb = NULL; + + continue; + +drop: + atomic_inc (&vcc->stats->rx_err); +drop_no_stats: + skb_trim (skb, 0); + } + } } @@ -406,7 +382,7 @@ static struct sk_buff *udsl_decode_aal5 (struct udsl_vcc_data *ctx, struct sk_bu ** encode ** *************/ -static const unsigned char zeros[ATM_CELL_PAYLOAD]; +static const unsigned char zeros [ATM_CELL_PAYLOAD]; static void udsl_groom_skb (struct atm_vcc *vcc, struct sk_buff *skb) { @@ -421,7 +397,7 @@ static void udsl_groom_skb (struct atm_vcc *vcc, struct sk_buff *skb) ctrl->cell_header [3] = vcc->vci << 4; ctrl->cell_header [4] = 0xec; - ctrl->num_cells = (skb->len + ATM_AAL5_TRAILER + ATM_CELL_PAYLOAD - 1) / ATM_CELL_PAYLOAD; + ctrl->num_cells = UDSL_NUM_CELLS (skb->len); ctrl->num_entire = skb->len / ATM_CELL_PAYLOAD; zero_padding = ctrl->num_cells * ATM_CELL_PAYLOAD - skb->len - ATM_AAL5_TRAILER; @@ -490,8 +466,7 @@ static unsigned int udsl_write_cells (unsigned int howmany, struct sk_buff *skb, memset (target, 0, ATM_CELL_PAYLOAD - ATM_AAL5_TRAILER); target += ATM_CELL_PAYLOAD - ATM_AAL5_TRAILER; - if (--ctrl->num_cells) - BUG(); + BUG_ON (--ctrl->num_cells); } memcpy (target, ctrl->aal5_trailer, ATM_AAL5_TRAILER); @@ -511,145 +486,89 @@ out: static void udsl_complete_receive (struct urb *urb, struct pt_regs *regs) { + struct udsl_receive_buffer *buf; struct udsl_instance_data *instance; struct udsl_receiver *rcv; unsigned long flags; - if (!urb || !(rcv = urb->context) || !(instance = rcv->instance)) { + if (!urb || !(rcv = urb->context)) { dbg ("udsl_complete_receive: bad urb!"); return; } - vdbg ("udsl_complete_receive entered (urb 0x%p, status %d)", urb, urb->status); + instance = rcv->instance; + buf = rcv->buffer; + + buf->filled_cells = urb->actual_length / ATM_CELL_SIZE; + + vdbg ("udsl_complete_receive: urb 0x%p, status %d, actual_length %d, filled_cells %u, rcv 0x%p, buf 0x%p", urb, urb->status, urb->actual_length, buf->filled_cells, rcv, buf); + + BUG_ON (buf->filled_cells > UDSL_RCV_BUF_SIZE); /* may not be in_interrupt() */ - spin_lock_irqsave (&instance->completed_receivers_lock, flags); - list_add_tail (&rcv->list, &instance->completed_receivers); - tasklet_schedule (&instance->receive_tasklet); - spin_unlock_irqrestore (&instance->completed_receivers_lock, flags); + spin_lock_irqsave (&instance->receive_lock, flags); + list_add (&rcv->list, &instance->spare_receivers); + list_add_tail (&buf->list, &instance->filled_receive_buffers); + if (likely (!urb->status)) + tasklet_schedule (&instance->receive_tasklet); + spin_unlock_irqrestore (&instance->receive_lock, flags); } static void udsl_process_receive (unsigned long data) { + struct udsl_receive_buffer *buf; struct udsl_instance_data *instance = (struct udsl_instance_data *) data; struct udsl_receiver *rcv; - unsigned char *data_start; - struct sk_buff *skb; - struct urb *urb; - struct udsl_vcc_data *atmsar_vcc = NULL; - struct sk_buff *new = NULL, *tmp = NULL; int err; - vdbg ("udsl_process_receive entered"); - - spin_lock_irq (&instance->completed_receivers_lock); - while (!list_empty (&instance->completed_receivers)) { - rcv = list_entry (instance->completed_receivers.next, struct udsl_receiver, list); - list_del (&rcv->list); - spin_unlock_irq (&instance->completed_receivers_lock); - - urb = rcv->urb; - vdbg ("udsl_process_receive: got packet %p with length %d and status %d", urb, urb->actual_length, urb->status); - - switch (urb->status) { - case 0: - vdbg ("udsl_process_receive: processing urb with rcv %p, urb %p, skb %p", rcv, urb, rcv->skb); - - /* update the skb structure */ - skb = rcv->skb; - skb_trim (skb, 0); - skb_put (skb, urb->actual_length); - data_start = skb->data; - - vdbg ("skb->len = %d", skb->len); - PACKETDEBUG (skb->data, skb->len); - - while ((new = udsl_decode_rawcell (instance, skb, &atmsar_vcc))) { - vdbg ("(after cell processing)skb->len = %d", new->len); - - tmp = new; - new = udsl_decode_aal5 (atmsar_vcc, new); - - /* we can't send NULL skbs upstream, the ATM layer would try to close the vcc... */ - if (new) { - vdbg ("(after aal5 decap) skb->len = %d", new->len); - if (new->len && atm_charge (atmsar_vcc->vcc, new->truesize)) { - PACKETDEBUG (new->data, new->len); - atmsar_vcc->vcc->push (atmsar_vcc->vcc, new); - } else { - dbg - ("dropping incoming packet : vcc->sk->rcvbuf = %d, skb->true_size = %d", - atmsar_vcc->vcc->sk->rcvbuf, new->truesize); - dev_kfree_skb (new); - } - } else { - dbg ("udsl_decode_aal5 returned NULL!"); - dev_kfree_skb (tmp); - } - } - - /* restore skb */ - skb_push (skb, skb->data - data_start); - - usb_fill_bulk_urb (urb, - instance->usb_dev, - usb_rcvbulkpipe (instance->usb_dev, UDSL_ENDPOINT_DATA_IN), - (unsigned char *) rcv->skb->data, - UDSL_RCV_BUFFER_SIZE * ATM_CELL_SIZE, - udsl_complete_receive, - rcv); - if (!(err = usb_submit_urb (urb, GFP_ATOMIC))) - break; - dbg ("udsl_process_receive: submission failed (%d)", err); - /* fall through */ - default: /* error or urb unlinked */ - vdbg ("udsl_process_receive: adding to spare_receivers"); - spin_lock_irq (&instance->spare_receivers_lock); - list_add (&rcv->list, &instance->spare_receivers); - spin_unlock_irq (&instance->spare_receivers_lock); +made_progress: + while (!list_empty (&instance->spare_receive_buffers)) { + spin_lock_irq (&instance->receive_lock); + if (list_empty (&instance->spare_receivers)) { + spin_unlock_irq (&instance->receive_lock); break; - } /* switch */ - - spin_lock_irq (&instance->completed_receivers_lock); - } /* while */ - spin_unlock_irq (&instance->completed_receivers_lock); - vdbg ("udsl_process_receive successful"); -} - -static void udsl_fire_receivers (struct udsl_instance_data *instance) -{ - struct list_head receivers, *pos, *n; - - INIT_LIST_HEAD (&receivers); - - down (&instance->serialize); - - spin_lock_irq (&instance->spare_receivers_lock); - list_splice_init (&instance->spare_receivers, &receivers); - spin_unlock_irq (&instance->spare_receivers_lock); + } + rcv = list_entry (instance->spare_receivers.next, struct udsl_receiver, list); + list_del (&rcv->list); + spin_unlock_irq (&instance->receive_lock); - list_for_each_safe (pos, n, &receivers) { - struct udsl_receiver *rcv = list_entry (pos, struct udsl_receiver, list); + buf = list_entry (instance->spare_receive_buffers.next, struct udsl_receive_buffer, list); + list_del (&buf->list); - dbg ("udsl_fire_receivers: firing urb %p", rcv->urb); + rcv->buffer = buf; usb_fill_bulk_urb (rcv->urb, instance->usb_dev, usb_rcvbulkpipe (instance->usb_dev, UDSL_ENDPOINT_DATA_IN), - (unsigned char *) rcv->skb->data, - UDSL_RCV_BUFFER_SIZE * ATM_CELL_SIZE, + buf->base, + UDSL_RCV_BUF_SIZE * ATM_CELL_SIZE, udsl_complete_receive, rcv); - if (usb_submit_urb (rcv->urb, GFP_KERNEL) < 0) { - dbg ("udsl_fire_receivers: submit failed!"); - spin_lock_irq (&instance->spare_receivers_lock); - list_move (pos, &instance->spare_receivers); - spin_unlock_irq (&instance->spare_receivers_lock); + vdbg ("udsl_process_receive: sending urb 0x%p, rcv 0x%p, buf 0x%p", rcv->urb, rcv, buf); + + if ((err = usb_submit_urb(rcv->urb, GFP_ATOMIC)) < 0) { + dbg ("udsl_process_receive: urb submission failed (%d)!", err); + list_add (&buf->list, &instance->spare_receive_buffers); + spin_lock_irq (&instance->receive_lock); + list_add (&rcv->list, &instance->spare_receivers); + spin_unlock_irq (&instance->receive_lock); + break; } } - up (&instance->serialize); + spin_lock_irq (&instance->receive_lock); + if (list_empty (&instance->filled_receive_buffers)) { + spin_unlock_irq (&instance->receive_lock); + return; /* done - no more buffers */ + } + buf = list_entry (instance->filled_receive_buffers.next, struct udsl_receive_buffer, list); + list_del (&buf->list); + spin_unlock_irq (&instance->receive_lock); + vdbg ("udsl_process_receive: processing buf 0x%p", buf); + udsl_extract_cells (instance, buf->base, buf->filled_cells); + list_add (&buf->list, &instance->spare_receive_buffers); + goto made_progress; } @@ -673,7 +592,7 @@ static void udsl_complete_send (struct urb *urb, struct pt_regs *regs) /* may not be in_interrupt() */ spin_lock_irqsave (&instance->send_lock, flags); list_add (&snd->list, &instance->spare_senders); - list_add (&snd->buffer->list, &instance->spare_buffers); + list_add (&snd->buffer->list, &instance->spare_send_buffers); tasklet_schedule (&instance->send_tasklet); spin_unlock_irqrestore (&instance->send_lock, flags); } @@ -681,17 +600,17 @@ static void udsl_complete_send (struct urb *urb, struct pt_regs *regs) static void udsl_process_send (unsigned long data) { struct udsl_send_buffer *buf; - int err; struct udsl_instance_data *instance = (struct udsl_instance_data *) data; - unsigned int num_written; struct sk_buff *skb; struct udsl_sender *snd; + int err; + unsigned int num_written; made_progress: spin_lock_irq (&instance->send_lock); while (!list_empty (&instance->spare_senders)) { - if (!list_empty (&instance->filled_buffers)) { - buf = list_entry (instance->filled_buffers.next, struct udsl_send_buffer, list); + if (!list_empty (&instance->filled_send_buffers)) { + buf = list_entry (instance->filled_send_buffers.next, struct udsl_send_buffer, list); list_del (&buf->list); } else if ((buf = instance->current_buffer)) { instance->current_buffer = NULL; @@ -707,7 +626,7 @@ made_progress: instance->usb_dev, usb_sndbulkpipe (instance->usb_dev, UDSL_ENDPOINT_DATA_OUT), buf->base, - (UDSL_SND_BUFFER_SIZE - buf->free_cells) * ATM_CELL_SIZE, + (UDSL_SND_BUF_SIZE - buf->free_cells) * ATM_CELL_SIZE, udsl_complete_send, snd); @@ -718,33 +637,32 @@ made_progress: spin_lock_irq (&instance->send_lock); list_add (&snd->list, &instance->spare_senders); spin_unlock_irq (&instance->send_lock); - list_add (&buf->list, &instance->filled_buffers); - return; + list_add (&buf->list, &instance->filled_send_buffers); + return; /* bail out */ } spin_lock_irq (&instance->send_lock); } /* while */ spin_unlock_irq (&instance->send_lock); - if (!instance->current_skb && !(instance->current_skb = skb_dequeue (&instance->sndqueue))) { + if (!instance->current_skb && !(instance->current_skb = skb_dequeue (&instance->sndqueue))) return; /* done - no more skbs */ - } skb = instance->current_skb; if (!(buf = instance->current_buffer)) { spin_lock_irq (&instance->send_lock); - if (list_empty (&instance->spare_buffers)) { + if (list_empty (&instance->spare_send_buffers)) { instance->current_buffer = NULL; spin_unlock_irq (&instance->send_lock); return; /* done - no more buffers */ } - buf = list_entry (instance->spare_buffers.next, struct udsl_send_buffer, list); + buf = list_entry (instance->spare_send_buffers.next, struct udsl_send_buffer, list); list_del (&buf->list); spin_unlock_irq (&instance->send_lock); buf->free_start = buf->base; - buf->free_cells = UDSL_SND_BUFFER_SIZE; + buf->free_cells = UDSL_SND_BUF_SIZE; instance->current_buffer = buf; } @@ -754,7 +672,7 @@ made_progress: vdbg ("udsl_process_send: wrote %u cells from skb 0x%p to buffer 0x%p", num_written, skb, buf); if (!(buf->free_cells -= num_written)) { - list_add_tail (&buf->list, &instance->filled_buffers); + list_add_tail (&buf->list, &instance->filled_send_buffers); instance->current_buffer = NULL; } @@ -766,7 +684,7 @@ made_progress: if (vcc->pop) vcc->pop (vcc, skb); else - kfree_skb (skb); + dev_kfree_skb (skb); instance->current_skb = NULL; atomic_inc (&vcc->stats->tx); @@ -788,7 +706,7 @@ static void udsl_cancel_send (struct udsl_instance_data *instance, struct atm_vc if (vcc->pop) vcc->pop (vcc, skb); else - kfree_skb (skb); + dev_kfree_skb (skb); } spin_unlock_irq (&instance->sndqueue.lock); @@ -799,7 +717,7 @@ static void udsl_cancel_send (struct udsl_instance_data *instance, struct atm_vc if (vcc->pop) vcc->pop (vcc, skb); else - kfree_skb (skb); + dev_kfree_skb (skb); } tasklet_enable (&instance->send_tasklet); dbg ("udsl_cancel_send done"); @@ -851,6 +769,7 @@ static void udsl_atm_dev_close (struct atm_dev *dev) dbg ("udsl_atm_dev_close: queue has %u elements", instance->sndqueue.qlen); + tasklet_kill (&instance->receive_tasklet); tasklet_kill (&instance->send_tasklet); kfree (instance); dev->dev_data = NULL; @@ -871,8 +790,8 @@ static int udsl_atm_proc_read (struct atm_dev *atm_dev, loff_t *pos, char *page) if (!left--) return sprintf (page, "MAC: %02x:%02x:%02x:%02x:%02x:%02x\n", - atm_dev->esi[0], atm_dev->esi[1], atm_dev->esi[2], - atm_dev->esi[3], atm_dev->esi[4], atm_dev->esi[5]); + atm_dev->esi [0], atm_dev->esi [1], atm_dev->esi [2], + atm_dev->esi [3], atm_dev->esi [4], atm_dev->esi [5]); if (!left--) return sprintf (page, "AAL5: tx %d ( %d err ), rx %d ( %d err, %d drop )\n", @@ -925,7 +844,7 @@ static int udsl_atm_open (struct atm_vcc *vcc, short vpi, int vci) return -EINVAL; /* only support AAL5 */ - if (vcc->qos.aal != ATM_AAL5) + if ((vcc->qos.aal != ATM_AAL5) || (vcc->qos.rxtp.max_sdu < 0) || (vcc->qos.rxtp.max_sdu > ATM_MAX_AAL5_PDU)) return -EINVAL; if (!instance->firmware_loaded) { @@ -949,7 +868,7 @@ static int udsl_atm_open (struct atm_vcc *vcc, short vpi, int vci) new->vcc = vcc; new->vpi = vpi; new->vci = vci; - new->mtu = UDSL_MAX_AAL5_MRU; + new->max_pdu = max (1, UDSL_NUM_CELLS (vcc->qos.rxtp.max_sdu)) * ATM_CELL_PAYLOAD; vcc->dev_data = new; vcc->vpi = vpi; @@ -965,7 +884,7 @@ static int udsl_atm_open (struct atm_vcc *vcc, short vpi, int vci) up (&instance->serialize); - udsl_fire_receivers (instance); + tasklet_schedule (&instance->receive_tasklet); dbg ("udsl_atm_open: allocated vcc data 0x%p (max_pdu: %u)", new, new->max_pdu); @@ -994,9 +913,9 @@ static void udsl_atm_close (struct atm_vcc *vcc) list_del (&vcc_data->list); tasklet_enable (&instance->receive_tasklet); - if (vcc_data->reasBuffer) - kfree_skb (vcc_data->reasBuffer); - vcc_data->reasBuffer = NULL; + if (vcc_data->skb) + dev_kfree_skb (vcc_data->skb); + vcc_data->skb = NULL; kfree (vcc_data); vcc->dev_data = NULL; @@ -1041,7 +960,9 @@ static int udsl_set_alternate (struct udsl_instance_data *instance) instance->firmware_loaded = 1; } up (&instance->serialize); - udsl_fire_receivers (instance); + + tasklet_schedule (&instance->receive_tasklet); + return 0; } @@ -1057,10 +978,10 @@ static int udsl_usb_ioctl (struct usb_interface *intf, unsigned int code, void * } switch (code) { - case UDSL_IOCTL_START: + case UDSL_IOCTL_LINE_UP: instance->atm_dev->signal = ATM_PHY_SIG_FOUND; return udsl_set_alternate (instance); - case UDSL_IOCTL_STOP: + case UDSL_IOCTL_LINE_DOWN: instance->atm_dev->signal = ATM_PHY_SIG_LOST; return 0; default: @@ -1101,31 +1022,25 @@ static int udsl_usb_probe (struct usb_interface *intf, const struct usb_device_i INIT_LIST_HEAD (&instance->vcc_list); - spin_lock_init (&instance->spare_receivers_lock); + spin_lock_init (&instance->receive_lock); INIT_LIST_HEAD (&instance->spare_receivers); - - spin_lock_init (&instance->completed_receivers_lock); - INIT_LIST_HEAD (&instance->completed_receivers); + INIT_LIST_HEAD (&instance->filled_receive_buffers); tasklet_init (&instance->receive_tasklet, udsl_process_receive, (unsigned long) instance); + INIT_LIST_HEAD (&instance->spare_receive_buffers); skb_queue_head_init (&instance->sndqueue); spin_lock_init (&instance->send_lock); INIT_LIST_HEAD (&instance->spare_senders); - INIT_LIST_HEAD (&instance->spare_buffers); + INIT_LIST_HEAD (&instance->spare_send_buffers); tasklet_init (&instance->send_tasklet, udsl_process_send, (unsigned long) instance); - INIT_LIST_HEAD (&instance->filled_buffers); + INIT_LIST_HEAD (&instance->filled_send_buffers); /* receive init */ - for (i = 0; i < UDSL_NUMBER_RCV_URBS; i++) { - struct udsl_receiver *rcv = &(instance->all_receivers[i]); - - if (!(rcv->skb = dev_alloc_skb (UDSL_RCV_BUFFER_SIZE * ATM_CELL_SIZE))) { - dbg ("udsl_usb_probe: no memory for skb %d!", i); - goto fail; - } + for (i = 0; i < UDSL_NUM_RCV_URBS; i++) { + struct udsl_receiver *rcv = &(instance->receivers [i]); if (!(rcv->urb = usb_alloc_urb (0, GFP_KERNEL))) { dbg ("udsl_usb_probe: no memory for receive urb %d!", i); @@ -1135,13 +1050,22 @@ static int udsl_usb_probe (struct usb_interface *intf, const struct usb_device_i rcv->instance = instance; list_add (&rcv->list, &instance->spare_receivers); + } + + for (i = 0; i < UDSL_NUM_RCV_BUFS; i++) { + struct udsl_receive_buffer *buf = &(instance->receive_buffers [i]); + + if (!(buf->base = kmalloc (UDSL_RCV_BUF_SIZE * ATM_CELL_SIZE, GFP_KERNEL))) { + dbg ("udsl_usb_probe: no memory for receive buffer %d!", i); + goto fail; + } - dbg ("udsl_usb_probe: skb->truesize = %d (asked for %d)", rcv->skb->truesize, UDSL_RCV_BUF_SIZE * ATM_CELL_SIZE); + list_add (&buf->list, &instance->spare_receive_buffers); } /* send init */ - for (i = 0; i < UDSL_NUMBER_SND_URBS; i++) { - struct udsl_sender *snd = &(instance->all_senders[i]); + for (i = 0; i < UDSL_NUM_SND_URBS; i++) { + struct udsl_sender *snd = &(instance->senders [i]); if (!(snd->urb = usb_alloc_urb (0, GFP_KERNEL))) { dbg ("udsl_usb_probe: no memory for send urb %d!", i); @@ -1153,18 +1077,18 @@ static int udsl_usb_probe (struct usb_interface *intf, const struct usb_device_i list_add (&snd->list, &instance->spare_senders); } - for (i = 0; i < UDSL_NUMBER_SND_BUFS; i++) { - struct udsl_send_buffer *buf = &(instance->all_buffers[i]); + for (i = 0; i < UDSL_NUM_SND_BUFS; i++) { + struct udsl_send_buffer *buf = &(instance->send_buffers [i]); - if (!(buf->base = kmalloc (UDSL_SND_BUFFER_SIZE * ATM_CELL_SIZE, GFP_KERNEL))) { + if (!(buf->base = kmalloc (UDSL_SND_BUF_SIZE * ATM_CELL_SIZE, GFP_KERNEL))) { dbg ("udsl_usb_probe: no memory for send buffer %d!", i); goto fail; } - list_add (&buf->list, &instance->spare_buffers); + list_add (&buf->list, &instance->spare_send_buffers); } - /* atm init */ + /* ATM init */ if (!(instance->atm_dev = atm_dev_register (udsl_driver_name, &udsl_atm_devops, -1, 0))) { dbg ("udsl_usb_probe: failed to register ATM device!"); goto fail; @@ -1174,14 +1098,14 @@ static int udsl_usb_probe (struct usb_interface *intf, const struct usb_device_i instance->atm_dev->ci_range.vci_bits = ATM_CI_MAX; instance->atm_dev->signal = ATM_PHY_SIG_UNKNOWN; - /* tmp init atm device, set to 128kbit */ + /* temp init ATM device, set to 128kbit */ instance->atm_dev->link_rate = 128 * 1000 / 424; /* set MAC address, it is stored in the serial number */ memset (instance->atm_dev->esi, 0, sizeof (instance->atm_dev->esi)); if (usb_string (dev, dev->descriptor.iSerialNumber, mac_str, sizeof (mac_str)) == 12) for (i = 0; i < 6; i++) - instance->atm_dev->esi[i] = (hex2int (mac_str[i * 2]) * 16) + (hex2int (mac_str[i * 2 + 1])); + instance->atm_dev->esi [i] = (hex2int (mac_str [i * 2]) * 16) + (hex2int (mac_str [i * 2 + 1])); /* device description */ buf = instance->description; @@ -1215,20 +1139,17 @@ finish: return 0; fail: - for (i = 0; i < UDSL_NUMBER_SND_BUFS; i++) - kfree (instance->all_buffers[i].base); + for (i = 0; i < UDSL_NUM_SND_BUFS; i++) + kfree (instance->send_buffers [i].base); - for (i = 0; i < UDSL_NUMBER_SND_URBS; i++) - usb_free_urb (instance->all_senders[i].urb); + for (i = 0; i < UDSL_NUM_SND_URBS; i++) + usb_free_urb (instance->senders [i].urb); - for (i = 0; i < UDSL_NUMBER_RCV_URBS; i++) { - struct udsl_receiver *rcv = &(instance->all_receivers[i]); + for (i = 0; i < UDSL_NUM_RCV_BUFS; i++) + kfree (instance->receive_buffers [i].base); - usb_free_urb (rcv->urb); - - if (rcv->skb) - kfree_skb (rcv->skb); - } + for (i = 0; i < UDSL_NUM_RCV_URBS; i++) + usb_free_urb (instance->receivers [i].urb); kfree (instance); @@ -1239,7 +1160,7 @@ static void udsl_usb_disconnect (struct usb_interface *intf) { struct udsl_instance_data *instance = usb_get_intfdata (intf); struct list_head *pos; - unsigned int count = 0; + unsigned int count; int result, i; dbg ("udsl_usb_disconnect entered"); @@ -1251,38 +1172,25 @@ static void udsl_usb_disconnect (struct usb_interface *intf) return; } - tasklet_disable (&instance->receive_tasklet); - /* receive finalize */ - down (&instance->serialize); /* vs udsl_fire_receivers */ - /* no need to take the spinlock */ - list_for_each (pos, &instance->spare_receivers) - if (++count > UDSL_NUMBER_RCV_URBS) - panic (__FILE__ ": memory corruption detected at line %d!\n", __LINE__); - INIT_LIST_HEAD (&instance->spare_receivers); - up (&instance->serialize); - - dbg ("udsl_usb_disconnect: flushed %u spare receivers", count); - - count = UDSL_NUMBER_RCV_URBS - count; + tasklet_disable (&instance->receive_tasklet); - for (i = 0; i < UDSL_NUMBER_RCV_URBS; i++) - if ((result = usb_unlink_urb (instance->all_receivers[i].urb)) < 0) + for (i = 0; i < UDSL_NUM_RCV_URBS; i++) + if ((result = usb_unlink_urb (instance->receivers [i].urb)) < 0) dbg ("udsl_usb_disconnect: usb_unlink_urb on receive urb %d returned %d", i, result); /* wait for completion handlers to finish */ do { - unsigned int completed = 0; - - spin_lock_irq (&instance->completed_receivers_lock); - list_for_each (pos, &instance->completed_receivers) - if (++completed > count) + count = 0; + spin_lock_irq (&instance->receive_lock); + list_for_each (pos, &instance->spare_receivers) + if (++count > UDSL_NUM_RCV_URBS) panic (__FILE__ ": memory corruption detected at line %d!\n", __LINE__); - spin_unlock_irq (&instance->completed_receivers_lock); + spin_unlock_irq (&instance->receive_lock); - dbg ("udsl_usb_disconnect: found %u completed receivers", completed); + dbg ("udsl_usb_disconnect: found %u spare receivers", count); - if (completed == count) + if (count == UDSL_NUM_RCV_URBS) break; set_current_state (TASK_RUNNING); @@ -1290,37 +1198,36 @@ static void udsl_usb_disconnect (struct usb_interface *intf) } while (1); /* no need to take the spinlock */ - INIT_LIST_HEAD (&instance->completed_receivers); + INIT_LIST_HEAD (&instance->filled_receive_buffers); + INIT_LIST_HEAD (&instance->spare_receive_buffers); tasklet_enable (&instance->receive_tasklet); - tasklet_kill (&instance->receive_tasklet); - for (i = 0; i < UDSL_NUMBER_RCV_URBS; i++) { - struct udsl_receiver *rcv = &(instance->all_receivers[i]); + for (i = 0; i < UDSL_NUM_RCV_URBS; i++) + usb_free_urb (instance->receivers [i].urb); - usb_free_urb (rcv->urb); - kfree_skb (rcv->skb); - } + for (i = 0; i < UDSL_NUM_RCV_BUFS; i++) + kfree (instance->receive_buffers [i].base); /* send finalize */ tasklet_disable (&instance->send_tasklet); - for (i = 0; i < UDSL_NUMBER_SND_URBS; i++) - if ((result = usb_unlink_urb (instance->all_senders[i].urb)) < 0) + for (i = 0; i < UDSL_NUM_SND_URBS; i++) + if ((result = usb_unlink_urb (instance->senders [i].urb)) < 0) dbg ("udsl_usb_disconnect: usb_unlink_urb on send urb %d returned %d", i, result); /* wait for completion handlers to finish */ do { count = 0; - spin_lock (&instance->send_lock); + spin_lock_irq (&instance->send_lock); list_for_each (pos, &instance->spare_senders) - if (++count > UDSL_NUMBER_SND_URBS) + if (++count > UDSL_NUM_SND_URBS) panic (__FILE__ ": memory corruption detected at line %d!\n", __LINE__); - spin_unlock (&instance->send_lock); + spin_unlock_irq (&instance->send_lock); dbg ("udsl_usb_disconnect: found %u spare senders", count); - if (count == UDSL_NUMBER_SND_URBS) + if (count == UDSL_NUM_SND_URBS) break; set_current_state (TASK_RUNNING); @@ -1329,22 +1236,22 @@ static void udsl_usb_disconnect (struct usb_interface *intf) /* no need to take the spinlock */ INIT_LIST_HEAD (&instance->spare_senders); - INIT_LIST_HEAD (&instance->spare_buffers); + INIT_LIST_HEAD (&instance->spare_send_buffers); instance->current_buffer = NULL; tasklet_enable (&instance->send_tasklet); - for (i = 0; i < UDSL_NUMBER_SND_URBS; i++) - usb_free_urb (instance->all_senders[i].urb); + for (i = 0; i < UDSL_NUM_SND_URBS; i++) + usb_free_urb (instance->senders [i].urb); - for (i = 0; i < UDSL_NUMBER_SND_BUFS; i++) - kfree (instance->all_buffers[i].base); + for (i = 0; i < UDSL_NUM_SND_BUFS; i++) + kfree (instance->send_buffers [i].base); wmb (); instance->usb_dev = NULL; - /* atm finalize */ - shutdown_atm_dev (instance->atm_dev); /* frees instance */ + /* ATM finalize */ + shutdown_atm_dev (instance->atm_dev); /* frees instance, kills tasklets */ } @@ -1392,10 +1299,10 @@ static int udsl_print_packet (const unsigned char *data, int len) int i = 0, j = 0; for (i = 0; i < len;) { - buffer[0] = '\0'; + buffer [0] = '\0'; sprintf (buffer, "%.3d :", i); for (j = 0; (j < 16) && (i < len); j++, i++) { - sprintf (buffer, "%s %2.2x", buffer, data[i]); + sprintf (buffer, "%s %2.2x", buffer, data [i]); } dbg ("%s", buffer); } diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c index cd49b91b3040..7bcce5bd025a 100644 --- a/drivers/usb/storage/transport.c +++ b/drivers/usb/storage/transport.c @@ -988,7 +988,7 @@ int usb_stor_Bulk_transport(Scsi_Cmnd *srb, struct us_data *us) US_DEBUGP("Bulk status Sig 0x%x T 0x%x R %d Stat 0x%x\n", le32_to_cpu(bcs.Signature), bcs.Tag, bcs.Residue, bcs.Status); - if (bcs.Signature != cpu_to_le32(US_BULK_CS_SIGN) || + if ((bcs.Signature != cpu_to_le32(US_BULK_CS_SIGN) && bcs.Signature != cpu_to_le32(US_BULK_CS_OLYMPUS_SIGN)) || bcs.Tag != bcb.Tag || bcs.Status > US_BULK_STAT_PHASE) { US_DEBUGP("Bulk logical error\n"); diff --git a/drivers/usb/storage/transport.h b/drivers/usb/storage/transport.h index 325389b4433f..f8b3740ef709 100644 --- a/drivers/usb/storage/transport.h +++ b/drivers/usb/storage/transport.h @@ -105,6 +105,8 @@ struct bulk_cs_wrap { #define US_BULK_CS_WRAP_LEN 13 #define US_BULK_CS_SIGN 0x53425355 /* spells out 'USBS' */ +/* This is for Olympus Camedia digital cameras */ +#define US_BULK_CS_OLYMPUS_SIGN 0x55425355 /* spells out 'USBU' */ #define US_BULK_STAT_OK 0 #define US_BULK_STAT_FAIL 1 #define US_BULK_STAT_PHASE 2 diff --git a/include/asm-x86_64/apic.h b/include/asm-x86_64/apic.h index bce8e2252907..a07746b1dc07 100644 --- a/include/asm-x86_64/apic.h +++ b/include/asm-x86_64/apic.h @@ -9,7 +9,7 @@ #ifdef CONFIG_X86_LOCAL_APIC -#define APIC_DEBUG 1 +#define APIC_DEBUG 0 #if APIC_DEBUG #define Dprintk(x...) printk(x) diff --git a/include/asm-x86_64/calling.h b/include/asm-x86_64/calling.h index 8bbf1971fd7b..a3a04b806744 100644 --- a/include/asm-x86_64/calling.h +++ b/include/asm-x86_64/calling.h @@ -84,8 +84,9 @@ movq \offset+72(%rsp),%rax .endm +#define REST_SKIP 6*8 .macro SAVE_REST - subq $6*8,%rsp + subq $REST_SKIP,%rsp movq %rbx,5*8(%rsp) movq %rbp,4*8(%rsp) movq %r12,3*8(%rsp) @@ -94,7 +95,6 @@ movq %r15,(%rsp) .endm -#define REST_SKIP 6*8 .macro RESTORE_REST movq (%rsp),%r15 movq 1*8(%rsp),%r14 diff --git a/include/asm-x86_64/compat.h b/include/asm-x86_64/compat.h index b73ae6b2a85f..63064e779369 100644 --- a/include/asm-x86_64/compat.h +++ b/include/asm-x86_64/compat.h @@ -1,9 +1,11 @@ #ifndef _ASM_X86_64_COMPAT_H #define _ASM_X86_64_COMPAT_H + /* * Architecture specific compatibility types */ #include <linux/types.h> +#include <linux/sched.h> #define COMPAT_USER_HZ 100 diff --git a/include/asm-x86_64/debugreg.h b/include/asm-x86_64/debugreg.h index 2c4fe65e69ac..bd1aab1d8c4a 100644 --- a/include/asm-x86_64/debugreg.h +++ b/include/asm-x86_64/debugreg.h @@ -58,7 +58,7 @@ We can slow the instruction pipeline for instructions coming via the gdt or the ldt if we want to. I am not sure why this is an advantage */ -#define DR_CONTROL_RESERVED (0xFFFFFFFFFC00) /* Reserved by Intel */ +#define DR_CONTROL_RESERVED (0xFFFFFFFF0000FC00UL) /* Reserved */ #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ diff --git a/include/asm-x86_64/e820.h b/include/asm-x86_64/e820.h index 9b447dd69663..ad24c50d4967 100644 --- a/include/asm-x86_64/e820.h +++ b/include/asm-x86_64/e820.h @@ -50,7 +50,7 @@ extern void contig_e820_setup(void); extern unsigned long e820_end_of_ram(void); extern void e820_reserve_resources(void); extern void e820_print_map(char *who); -extern int e820_mapped(unsigned long start, unsigned long end, int type); +extern int e820_mapped(unsigned long start, unsigned long end, unsigned type); extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end); diff --git a/include/asm-x86_64/floppy.h b/include/asm-x86_64/floppy.h index 738395d6d31c..f68f972f3685 100644 --- a/include/asm-x86_64/floppy.h +++ b/include/asm-x86_64/floppy.h @@ -64,7 +64,7 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id, struct pt_regs * regs) #endif if(!doing_pdma) { floppy_interrupt(irq, dev_id, regs); - return; + return IRQ_HANDLED; } #ifdef TRACE_FLPY_INT @@ -96,7 +96,7 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id, struct pt_regs * regs) calls++; #endif if(st == 0x20) - return; + return IRQ_HANDLED; if(!(st & 0x20)) { virtual_dma_residue += virtual_dma_count; virtual_dma_count=0; diff --git a/include/asm-x86_64/i387.h b/include/asm-x86_64/i387.h index e5bc31f19001..b9ecf53519a2 100644 --- a/include/asm-x86_64/i387.h +++ b/include/asm-x86_64/i387.h @@ -17,6 +17,7 @@ #include <asm/sigcontext.h> #include <asm/user.h> #include <asm/thread_info.h> +#include <asm/uaccess.h> extern void fpu_init(void); extern void init_fpu(struct task_struct *child); @@ -91,6 +92,8 @@ static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) ".previous" : [err] "=r" (err) : [fx] "r" (fx), "0" (0)); + if (unlikely(err)) + init_fpu(current); return err; } @@ -109,6 +112,8 @@ static inline int save_i387_checking(struct i387_fxsave_struct *fx) ".previous" : [err] "=r" (err) : [fx] "r" (fx), "0" (0)); + if (unlikely(err)) + __clear_user(fx, sizeof(struct i387_fxsave_struct)); return err; } diff --git a/include/asm-x86_64/ia32.h b/include/asm-x86_64/ia32.h index a102c0ed785d..d558fc04fcdc 100644 --- a/include/asm-x86_64/ia32.h +++ b/include/asm-x86_64/ia32.h @@ -133,6 +133,25 @@ typedef struct siginfo32 { } _sifields; } siginfo_t32; +struct sigframe32 +{ + u32 pretcode; + int sig; + struct sigcontext_ia32 sc; + struct _fpstate_ia32 fpstate; + unsigned int extramask[_COMPAT_NSIG_WORDS-1]; +}; + +struct rt_sigframe32 +{ + u32 pretcode; + int sig; + u32 pinfo; + u32 puc; + struct siginfo32 info; + struct ucontext_ia32 uc; + struct _fpstate_ia32 fpstate; +}; struct ustat32 { __u32 f_tfree; diff --git a/include/asm-x86_64/io.h b/include/asm-x86_64/io.h index e6614c5ebb1f..7d80bcbc798e 100644 --- a/include/asm-x86_64/io.h +++ b/include/asm-x86_64/io.h @@ -120,6 +120,7 @@ __OUTS(l) #define __io_virt(x) ((void *)(x)) #endif +#ifndef __i386__ /* * Change virtual addresses to physical addresses and vv. * These are pretty trivial @@ -133,6 +134,7 @@ extern inline void * phys_to_virt(unsigned long address) { return __va(address); } +#endif /* * Change "struct page" to physical address. @@ -259,6 +261,7 @@ out: return retval; } +#ifndef __i386__ /** * isa_check_signature - find BIOS signatures * @io_addr: mmio address to check @@ -288,6 +291,7 @@ static inline int isa_check_signature(unsigned long io_addr, out: return retval; } +#endif /* Nothing to do */ diff --git a/include/asm-x86_64/irq.h b/include/asm-x86_64/irq.h index 16c5cea93ab7..7415660ffb69 100644 --- a/include/asm-x86_64/irq.h +++ b/include/asm-x86_64/irq.h @@ -32,4 +32,8 @@ extern void disable_irq(unsigned int); extern void disable_irq_nosync(unsigned int); extern void enable_irq(unsigned int); +#ifdef CONFIG_X86_LOCAL_APIC +#define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */ +#endif + #endif /* _ASM_IRQ_H */ diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h index a0399da03099..e4198d618b26 100644 --- a/include/asm-x86_64/page.h +++ b/include/asm-x86_64/page.h @@ -58,9 +58,13 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define __pte(x) ((pte_t) { (x) } ) #define __pmd(x) ((pmd_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) -#define __level4(x) ((level4_t) { (x) } ) +#define __pml4(x) ((pml4_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) +extern unsigned long vm_stack_flags, vm_stack_flags32; +extern unsigned long vm_data_default_flags, vm_data_default_flags32; +extern unsigned long vm_force_exec32; + #endif /* !__ASSEMBLY__ */ /* to align the pointer to the (next) page boundary */ @@ -119,9 +123,19 @@ extern __inline__ int get_order(unsigned long size) #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ +#define __VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define __VM_STACK_FLAGS (VM_GROWSDOWN | VM_READ | VM_WRITE | VM_EXEC | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS \ + (test_thread_flag(TIF_IA32) ? vm_data_default_flags32 : \ + vm_data_default_flags) + +#define VM_STACK_DEFAULT_FLAGS \ + (test_thread_flag(TIF_IA32) ? vm_stack_flags32 : vm_stack_flags) + + #endif /* __KERNEL__ */ #endif /* _X86_64_PAGE_H */ diff --git a/include/asm-x86_64/pci.h b/include/asm-x86_64/pci.h index d94c514a06ab..4fd91a20ab84 100644 --- a/include/asm-x86_64/pci.h +++ b/include/asm-x86_64/pci.h @@ -76,8 +76,8 @@ extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, * Once the device is given the dma address, the device owns this memory * until either pci_unmap_single or pci_dma_sync_single is performed. */ -extern dma_addr_t __pci_map_single(struct pci_dev *hwdev, void *ptr, - size_t size, int direction, int flush); +extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, + int direction); void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t addr, @@ -126,8 +126,8 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev, #else -static inline dma_addr_t __pci_map_single(struct pci_dev *hwdev, void *ptr, - size_t size, int direction, int flush) +static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, + size_t size, int direction) { dma_addr_t addr; @@ -214,12 +214,6 @@ extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction); -static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, - size_t size, int direction) -{ - return __pci_map_single(hwdev,ptr,size,direction,1); -} - #define pci_unmap_page pci_unmap_single /* Return whether the given PCI device DMA address mask can diff --git a/include/asm-x86_64/pda.h b/include/asm-x86_64/pda.h index da8b4f570d24..dbb63c9047d7 100644 --- a/include/asm-x86_64/pda.h +++ b/include/asm-x86_64/pda.h @@ -46,11 +46,11 @@ extern void __bad_pda_field(void); #define pda_to_op(op,field,val) do { \ switch (sizeof_field(struct x8664_pda, field)) { \ case 2: \ -asm volatile(op "w %0,%%gs:%c1"::"r" (val),"i"(pda_offset(field)):"memory"); break; \ +asm volatile(op "w %0,%%gs:%P1"::"r" (val),"i"(pda_offset(field)):"memory"); break; \ case 4: \ -asm volatile(op "l %0,%%gs:%c1"::"r" (val),"i"(pda_offset(field)):"memory"); break; \ +asm volatile(op "l %0,%%gs:%P1"::"r" (val),"i"(pda_offset(field)):"memory"); break; \ case 8: \ -asm volatile(op "q %0,%%gs:%c1"::"r" (val),"i"(pda_offset(field)):"memory"); break; \ +asm volatile(op "q %0,%%gs:%P1"::"r" (val),"i"(pda_offset(field)):"memory"); break; \ default: __bad_pda_field(); \ } \ } while (0) @@ -63,11 +63,11 @@ asm volatile(op "q %0,%%gs:%c1"::"r" (val),"i"(pda_offset(field)):"memory"); bre typedef typeof_field(struct x8664_pda, field) T__; T__ ret__; \ switch (sizeof_field(struct x8664_pda, field)) { \ case 2: \ -asm volatile(op "w %%gs:%c1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\ +asm volatile(op "w %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\ case 4: \ -asm volatile(op "l %%gs:%c1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\ +asm volatile(op "l %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\ case 8: \ -asm volatile(op "q %%gs:%c1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\ +asm volatile(op "q %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\ default: __bad_pda_field(); \ } \ ret__; }) diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index 7e6a4b577bae..dc105425b961 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -28,6 +28,7 @@ extern unsigned long __supported_pte_mask; #define swapper_pg_dir NULL extern void paging_init(void); +extern void clear_kernel_mapping(unsigned long addr, unsigned long size); extern unsigned long pgkern_mask; @@ -165,11 +166,11 @@ static inline void set_pml4(pml4_t *dst, pml4_t val) #define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) #define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) #define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) +#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) +#define PAGE_COPY PAGE_COPY_NOEXEC #define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) #define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) #define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) - #define __PAGE_KERNEL \ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX) #define __PAGE_KERNEL_EXECUTABLE \ @@ -181,7 +182,7 @@ static inline void set_pml4(pml4_t *dst, pml4_t val) #define __PAGE_KERNEL_VSYSCALL \ (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) #define __PAGE_KERNEL_LARGE \ - (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_PSE | _PAGE_NX) + (__PAGE_KERNEL | _PAGE_PSE) #define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL) @@ -241,6 +242,7 @@ static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) * The following only work if pte_present() is true. * Undefined behaviour if not.. */ +static inline int pte_user(pte_t pte) { return pte_val(pte) & _PAGE_USER; } extern inline int pte_read(pte_t pte) { return pte_val(pte) & _PAGE_USER; } extern inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_USER; } extern inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index 7021042a4444..c1fbae233c35 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h @@ -218,14 +218,18 @@ struct tss_struct { struct thread_struct { unsigned long rsp0; - unsigned long rip; unsigned long rsp; unsigned long userrsp; /* Copy from PDA */ unsigned long fs; unsigned long gs; unsigned short es, ds, fsindex, gsindex; /* Hardware debugging registers */ - unsigned long debugreg[8]; /* %%db0-7 debug registers */ + unsigned long debugreg0; + unsigned long debugreg1; + unsigned long debugreg2; + unsigned long debugreg3; + unsigned long debugreg6; + unsigned long debugreg7; /* fault info */ unsigned long cr2, trap_no, error_code; /* floating point info */ @@ -304,17 +308,30 @@ extern inline void sync_core(void) #define cpu_has_fpu 1 -#if 0 -/* disabled for now to work around opteron errata #91. Also gcc 3.2 - doesn't like this in some cases. */ +/* Some early Opteron versions incorrectly fault on prefetch (errata #91). + If this happens just jump back. */ #define ARCH_HAS_PREFETCH -#define prefetch(x) __builtin_prefetch((x),0,1) -#endif +static inline void prefetch(void *x) +{ + asm volatile("2: prefetchnta %0\n1:\t" + ".section __ex_table,\"a\"\n\t" + " .align 8\n\t" + " .quad 2b,1b\n\t" + ".previous" :: "m" (*(unsigned long *)x)); +} #define ARCH_HAS_PREFETCHW +static inline void prefetchw(void *x) +{ + asm volatile("2: prefetchw %0\n1:\t" + ".section __ex_table,\"a\"\n\t" + " .align 8\n\t" + " .quad 2b,1b\n\t" + ".previous" :: "m" (*(unsigned long *)x)); +} + #define ARCH_HAS_SPINLOCK_PREFETCH -#define prefetchw(x) __builtin_prefetch((x),1,1) #define spin_lock_prefetch(x) prefetchw(x) #define cpu_relax() rep_nop() diff --git a/include/asm-x86_64/segment.h b/include/asm-x86_64/segment.h index d8a494f1cd69..848141b81158 100644 --- a/include/asm-x86_64/segment.h +++ b/include/asm-x86_64/segment.h @@ -18,6 +18,7 @@ #define __USER_CS 0x33 /* 6*8+3 */ #define __USER32_DS __USER_DS #define __KERNEL16_CS (GDT_ENTRY_KERNELCS16 * 8) +#define __KERNEL_COMPAT32_CS 0x8 #define GDT_ENTRY_TLS 1 #define GDT_ENTRY_TSS 8 /* needs two entries */ diff --git a/include/asm-x86_64/suspend.h b/include/asm-x86_64/suspend.h index cc193889eefa..6ac4f0315185 100644 --- a/include/asm-x86_64/suspend.h +++ b/include/asm-x86_64/suspend.h @@ -54,13 +54,6 @@ extern unsigned long saved_ebx; extern unsigned long saved_esi; extern unsigned long saved_edi; -static inline void acpi_save_register_state(unsigned long return_point) -{ - /* FIXME: This is probably no longer correct: we need to save all caller-saved registers */ -} - -#define acpi_restore_register_state() do {} while (0) - /* routines for saving/restoring kernel state */ extern int acpi_save_state_mem(void); extern int acpi_save_state_disk(void); diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h index a392e47b5fa7..e9efe11d03e7 100644 --- a/include/asm-x86_64/system.h +++ b/include/asm-x86_64/system.h @@ -16,9 +16,14 @@ #define __STR(x) #x #define STR(x) __STR(x) -#define __PUSH(x) "pushq %%" __STR(x) "\n\t" -#define __POP(x) "popq %%" __STR(x) "\n\t" +#define __SAVE(reg,offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t" +#define __RESTORE(reg,offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t" +#ifdef CONFIG_X86_REMOTE_DEBUG + +/* full frame for the debug stub */ +/* Should be replaced with a dwarf2 cie/fde description, then gdb could + figure it out all by itself. */ struct save_context_frame { unsigned long rbp; unsigned long rbx; @@ -34,46 +39,64 @@ struct save_context_frame { unsigned long r12; unsigned long rdi; unsigned long rsi; + unsigned long flags; }; -/* frame pointer must be last for get_wchan */ -/* It would be more efficient to let the compiler clobber most of these registers. - Clobbering all is not possible because that lets reload freak out. Even just - clobbering six generates wrong code with gcc 3.1 for me so do it this way for now. - rbp needs to be always explicitly saved because gcc cannot clobber the - frame pointer and the scheduler is compiled with frame pointers. -AK */ #define SAVE_CONTEXT \ - __PUSH(rsi) __PUSH(rdi) \ - __PUSH(r12) __PUSH(r13) __PUSH(r14) __PUSH(r15) \ - __PUSH(rdx) __PUSH(rcx) __PUSH(r8) __PUSH(r9) __PUSH(r10) __PUSH(r11) \ - __PUSH(rbx) __PUSH(rbp) + "pushfq\n\t" \ + "subq $14*8,%%rsp\n\t" \ + __SAVE(rbx, 12) __SAVE(rdi, 1) \ + __SAVE(rdx, 6) __SAVE(rcx, 7) \ + __SAVE(r8, 8) __SAVE(r9, 9) \ + __SAVE(r12, 2) __SAVE(r13, 3) \ + __SAVE(r14, 4) __SAVE(r15, 5) \ + __SAVE(r10, 10) __SAVE(r11, 11) \ + __SAVE(rsi, 0) __SAVE(rbp, 13) \ + + #define RESTORE_CONTEXT \ - __POP(rbp) __POP(rbx) \ - __POP(r11) __POP(r10) __POP(r9) __POP(r8) __POP(rcx) __POP(rdx) \ - __POP(r15) __POP(r14) __POP(r13) __POP(r12) \ - __POP(rdi) __POP(rsi) + __RESTORE(rbx, 12) __RESTORE(rdi, 1) \ + __RESTORE(rdx, 6) __RESTORE(rcx, 7) \ + __RESTORE(r12, 2) __RESTORE(r13, 3) \ + __RESTORE(r14, 4) __RESTORE(r15, 5) \ + __RESTORE(r10, 10) __RESTORE(r11, 11) \ + __RESTORE(r8, 8) __RESTORE(r9, 9) \ + __RESTORE(rbp, 13) __RESTORE(rsi, 0) \ + "addq $14*8,%%rsp\n\t" \ + "popfq\n\t" + +#define __EXTRA_CLOBBER -/* RED-PEN: pipeline stall on ret because it is not predicted */ -/* RED-PEN: the register saving could be optimized */ +#else /* frame pointer must be last for get_wchan */ +#define SAVE_CONTEXT "pushfq ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" +#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popfq\n\t" + +#define __EXTRA_CLOBBER \ + ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" +#endif #define switch_to(prev,next,last) \ asm volatile(SAVE_CONTEXT \ - "movq %%rsp,%[prevrsp]\n\t" \ - "movq %[nextrsp],%%rsp\n\t" \ - "movq $thread_return,%[prevrip]\n\t" \ - "pushq %[nextrip]\n\t" \ - "jmp __switch_to\n\t" \ + "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ + "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ + "call __switch_to\n\t" \ ".globl thread_return\n" \ "thread_return:\n\t" \ + "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ + "movq %P[thread_info](%%rsi),%%r8\n\t" \ + "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ + "movq %%rax,%%rdi\n\t" \ + "jc ret_from_fork\n\t" \ RESTORE_CONTEXT \ - :[prevrsp] "=m" (prev->thread.rsp), \ - [prevrip] "=m" (prev->thread.rip), \ - "=a" (last) \ - :[nextrsp] "m" (next->thread.rsp), \ - [nextrip] "m" (next->thread.rip), \ - [next] "S" (next), [prev] "D" (prev) \ - :"memory") + : "=a" (last) \ + : [next] "S" (next), [prev] "D" (prev), \ + [threadrsp] "i" (offsetof(struct task_struct, thread.rsp)), \ + [ti_flags] "i" (offsetof(struct thread_info, flags)),\ + [tif_fork] "i" (TIF_FORK), \ + [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ + [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ + : "memory", "cc" __EXTRA_CLOBBER) extern void load_gs_index(unsigned); @@ -88,14 +111,14 @@ extern void load_gs_index(unsigned); "2:\n" \ ".section .fixup,\"ax\"\n" \ "3:\t" \ - "pushq $0 ; popq %% " #seg "\n\t" \ + "movl %1,%%" #seg "\n\t" \ "jmp 2b\n" \ ".previous\n" \ ".section __ex_table,\"a\"\n\t" \ ".align 8\n\t" \ ".quad 1b,3b\n" \ ".previous" \ - : :"r" (value)) + : :"r" (value), "r" (0)) #define set_debug(value,register) \ __asm__("movq %0,%%db" #register \ @@ -298,4 +321,7 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, void disable_hlt(void); void enable_hlt(void); +#define HAVE_EAT_KEY +void eat_key(void); + #endif diff --git a/include/asm-x86_64/thread_info.h b/include/asm-x86_64/thread_info.h index 1bdf0dd9914c..a4bbd2239afa 100644 --- a/include/asm-x86_64/thread_info.h +++ b/include/asm-x86_64/thread_info.h @@ -68,7 +68,7 @@ static inline struct thread_info *current_thread_info(void) static inline struct thread_info *stack_thread_info(void) { struct thread_info *ti; - __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (~8191UL)); + __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (~(THREAD_SIZE - 1))); return ti; } @@ -104,6 +104,7 @@ static inline struct thread_info *stack_thread_info(void) #define TIF_IRET 5 /* force IRET */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_IA32 17 /* 32bit process */ +#define TIF_FORK 18 /* ret_from_fork */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) @@ -113,6 +114,7 @@ static inline struct thread_info *stack_thread_info(void) #define _TIF_IRET (1<<TIF_IRET) #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) #define _TIF_IA32 (1<<TIF_IA32) +#define _TIF_FORK (1<<TIF_FORK) #define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */ #define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */ diff --git a/include/asm-x86_64/user.h b/include/asm-x86_64/user.h index 4d263733a864..12785c649ac5 100644 --- a/include/asm-x86_64/user.h +++ b/include/asm-x86_64/user.h @@ -52,13 +52,13 @@ struct user_i387_struct { unsigned short swd; unsigned short twd; /* Note this is not the same as the 32bit/x87/FSAVE twd */ unsigned short fop; - u64 rip; - u64 rdp; - u32 mxcsr; - u32 mxcsr_mask; - u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ - u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ - u32 padding[24]; + __u64 rip; + __u64 rdp; + __u32 mxcsr; + __u32 mxcsr_mask; + __u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + __u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ + __u32 padding[24]; }; /* diff --git a/include/asm-x86_64/vsyscall32.h b/include/asm-x86_64/vsyscall32.h new file mode 100644 index 000000000000..36aa57397b95 --- /dev/null +++ b/include/asm-x86_64/vsyscall32.h @@ -0,0 +1,13 @@ +#ifndef _ASM_VSYSCALL32_H +#define _ASM_VSYSCALL32_H 1 + +/* Values need to match arch/x86_64/ia32/vsyscall.lds */ + +#define VSYSCALL32_BASE 0xffffe000UL +#define VSYSCALL32_EHDR ((const struct elf32_hdr *) VSYSCALL32_BASE) + +#define VSYSCALL32_VSYSCALL ((void *)VSYSCALL32_BASE + 0x400) +#define VSYSCALL32_SIGRETURN ((void *)VSYSCALL32_BASE + 0x500) +#define VSYSCALL32_RTSIGRETURN ((void *)VSYSCALL32_BASE + 0x500) + +#endif diff --git a/include/net/flow.h b/include/net/flow.h index f09d5763ecae..c10122c4c699 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -7,6 +7,8 @@ #ifndef _NET_FLOW_H #define _NET_FLOW_H +#include <linux/in6.h> + struct flowi { int oif; int iif; @@ -21,8 +23,8 @@ struct flowi { } ip4_u; struct { - struct in6_addr * daddr; - struct in6_addr * saddr; + struct in6_addr daddr; + struct in6_addr saddr; __u32 flowlabel; } ip6_u; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index bf0e5052f824..76fdecebd5f6 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -334,8 +334,7 @@ extern void ip6_flush_pending_frames(struct sock *sk); extern int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, - struct flowi *fl, - struct in6_addr **saddr); + struct flowi *fl); /* * skb processing functions diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4278f9490551..1a00ffc2e363 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -315,14 +315,14 @@ static inline u32 __flow_hash4(struct flowi *fl) static inline u32 __flow_hash6(struct flowi *fl) { - u32 hash = fl->fl6_src->s6_addr32[2] ^ - fl->fl6_src->s6_addr32[3] ^ + u32 hash = fl->fl6_src.s6_addr32[2] ^ + fl->fl6_src.s6_addr32[3] ^ fl->fl_ip_sport; hash = ((hash & 0xF0F0F0F0) >> 4) | ((hash & 0x0F0F0F0F) << 4); - hash ^= fl->fl6_dst->s6_addr32[2] ^ - fl->fl6_dst->s6_addr32[3] ^ + hash ^= fl->fl6_dst.s6_addr32[2] ^ + fl->fl6_dst.s6_addr32[3] ^ fl->fl_ip_dport; hash ^= (hash >> 10); hash ^= (hash >> 20); @@ -471,8 +471,8 @@ __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) static inline int __xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl) { - return addr_match(fl->fl6_dst, &sel->daddr, sel->prefixlen_d) && - addr_match(fl->fl6_src, &sel->saddr, sel->prefixlen_s) && + return addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) && + addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) && !((fl->fl_ip_dport^sel->dport)&sel->dport_mask) && !((fl->fl_ip_sport^sel->sport)&sel->sport_mask) && (fl->proto == sel->proto || !sel->proto) && @@ -654,7 +654,7 @@ xfrm_address_t *xfrm_flowi_daddr(struct flowi *fl, unsigned short family) case AF_INET: return (xfrm_address_t *)&fl->fl4_dst; case AF_INET6: - return (xfrm_address_t *)fl->fl6_dst; + return (xfrm_address_t *)&fl->fl6_dst; } return NULL; } @@ -666,7 +666,7 @@ xfrm_address_t *xfrm_flowi_saddr(struct flowi *fl, unsigned short family) case AF_INET: return (xfrm_address_t *)&fl->fl4_src; case AF_INET6: - return (xfrm_address_t *)fl->fl6_src; + return (xfrm_address_t *)&fl->fl6_src; } return NULL; } diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index a768b9189930..2a3ca6779f83 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -125,13 +125,6 @@ config IP_ROUTE_VERBOSE handled by the klogd daemon which is responsible for kernel messages ("man klogd"). -config IP_ROUTE_LARGE_TABLES - bool "IP: large routing tables" - depends on IP_ADVANCED_ROUTER - help - If you have routing zones that grow to more than about 64 entries, - you may want to say Y here to speed up the routing process. - config IP_PNP bool "IP: kernel level autoconfiguration" depends on INET diff --git a/net/ipv4/esp.c b/net/ipv4/esp.c index 302f78fe5aa9..9c51f48905a1 100644 --- a/net/ipv4/esp.c +++ b/net/ipv4/esp.c @@ -578,7 +578,7 @@ int __init esp4_init(void) decap_data_too_small(); } - SET_MODULE_OWNER(&esp_type); + esp_type.owner = THIS_MODULE; if (xfrm_register_type(&esp_type, AF_INET) < 0) { printk(KERN_INFO "ip esp init: can't add xfrm type\n"); return -EAGAIN; diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index d6ceba8beb16..94cf6e7fdb11 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -89,7 +89,7 @@ struct fn_zone int fz_nent; /* Number of entries */ int fz_divisor; /* Hash divisor */ - u32 fz_hashmask; /* (1<<fz_divisor) - 1 */ + u32 fz_hashmask; /* (fz_divisor - 1) */ #define FZ_HASHMASK(fz) ((fz)->fz_hashmask) int fz_order; /* Zone order */ @@ -149,9 +149,30 @@ static __inline__ int fn_key_leq(fn_key_t a, fn_key_t b) static rwlock_t fib_hash_lock = RW_LOCK_UNLOCKED; -#define FZ_MAX_DIVISOR 1024 +#define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct fib_node *)) -#ifdef CONFIG_IP_ROUTE_LARGE_TABLES +static unsigned long size_to_order(unsigned long size) +{ + unsigned long order; + + for (order = 0; order < MAX_ORDER; order++) { + if ((PAGE_SIZE << order) >= size) + break; + } + return order; +} + +static struct fib_node **fz_hash_alloc(int divisor) +{ + unsigned long size = divisor * sizeof(struct fib_node *); + + if (divisor <= 1024) { + return kmalloc(size, GFP_KERNEL); + } else { + return (struct fib_node **) + __get_free_pages(GFP_KERNEL, size_to_order(size)); + } +} /* The fib hash lock must be held when this is called. */ static __inline__ void fn_rebuild_zone(struct fn_zone *fz, @@ -174,6 +195,15 @@ static __inline__ void fn_rebuild_zone(struct fn_zone *fz, } } +static void fz_hash_free(struct fib_node **hash, int divisor) +{ + if (divisor <= 1024) + kfree(hash); + else + free_pages((unsigned long) hash, + size_to_order(divisor * sizeof(struct fib_node *))); +} + static void fn_rehash_zone(struct fn_zone *fz) { struct fib_node **ht, **old_ht; @@ -185,24 +215,30 @@ static void fn_rehash_zone(struct fn_zone *fz) switch (old_divisor) { case 16: new_divisor = 256; - new_hashmask = 0xFF; break; case 256: new_divisor = 1024; - new_hashmask = 0x3FF; break; default: - printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor); - return; + if ((old_divisor << 1) > FZ_MAX_DIVISOR) { + printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor); + return; + } + new_divisor = (old_divisor << 1); + break; } + + new_hashmask = (new_divisor - 1); + #if RT_CACHE_DEBUG >= 2 printk("fn_rehash_zone: hash for zone %d grows from %d\n", fz->fz_order, old_divisor); #endif - ht = kmalloc(new_divisor*sizeof(struct fib_node*), GFP_KERNEL); + ht = fz_hash_alloc(new_divisor); if (ht) { memset(ht, 0, new_divisor*sizeof(struct fib_node*)); + write_lock_bh(&fib_hash_lock); old_ht = fz->fz_hash; fz->fz_hash = ht; @@ -210,10 +246,10 @@ static void fn_rehash_zone(struct fn_zone *fz) fz->fz_divisor = new_divisor; fn_rebuild_zone(fz, old_ht, old_divisor); write_unlock_bh(&fib_hash_lock); - kfree(old_ht); + + fz_hash_free(old_ht, old_divisor); } } -#endif /* CONFIG_IP_ROUTE_LARGE_TABLES */ static void fn_free_node(struct fib_node * f) { @@ -233,12 +269,11 @@ fn_new_zone(struct fn_hash *table, int z) memset(fz, 0, sizeof(struct fn_zone)); if (z) { fz->fz_divisor = 16; - fz->fz_hashmask = 0xF; } else { fz->fz_divisor = 1; - fz->fz_hashmask = 0; } - fz->fz_hash = kmalloc(fz->fz_divisor*sizeof(struct fib_node*), GFP_KERNEL); + fz->fz_hashmask = (fz->fz_divisor - 1); + fz->fz_hash = fz_hash_alloc(fz->fz_divisor); if (!fz->fz_hash) { kfree(fz); return NULL; @@ -467,12 +502,10 @@ rta->rta_prefsrc ? *(u32*)rta->rta_prefsrc : 0); if ((fi = fib_create_info(r, rta, n, &err)) == NULL) return err; -#ifdef CONFIG_IP_ROUTE_LARGE_TABLES - if (fz->fz_nent > (fz->fz_divisor<<2) && + if (fz->fz_nent > (fz->fz_divisor<<1) && fz->fz_divisor < FZ_MAX_DIVISOR && (z==32 || (1<<z) > fz->fz_divisor)) fn_rehash_zone(fz); -#endif fp = fz_chain_p(key, fz); diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index cf4e74aeb368..55809ad7c1f6 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -400,7 +400,7 @@ static struct inet_protocol ipcomp4_protocol = { static int __init ipcomp4_init(void) { - SET_MODULE_OWNER(&ipcomp_type); + ipcomp_type.owner = THIS_MODULE; if (xfrm_register_type(&ipcomp_type, AF_INET) < 0) { printk(KERN_INFO "ipcomp init: can't add xfrm type\n"); return -EAGAIN; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 050441e25259..635b438058f5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1391,8 +1391,11 @@ static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) sk = v; sk = sk->next; - if (sk) - goto out; + + for (; sk; sk = sk->next) { + if (sk->family == AF_INET) + goto out; + } state = seq->private; if (++state->bucket >= UDP_HTABLE_SIZE) diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index fcf0c78f823e..3ad091ecd58f 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -229,7 +229,7 @@ static struct inet_protocol ipip_protocol = { static int __init ipip_init(void) { - SET_MODULE_OWNER(&ipip_type); + ipip_type.owner = THIS_MODULE; if (xfrm_register_type(&ipip_type, AF_INET) < 0) { printk(KERN_INFO "ipip init: can't add xfrm type\n"); return -EAGAIN; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 6bfe8f8babec..25821a76d765 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -77,11 +77,13 @@ MODULE_LICENSE("GPL"); extern int raw6_proc_init(void); extern int raw6_proc_exit(void); -extern int anycast6_get_info(char *, char **, off_t, int); extern int tcp6_get_info(char *, char **, off_t, int); extern int udp6_get_info(char *, char **, off_t, int); -extern int afinet6_get_info(char *, char **, off_t, int); -extern int afinet6_get_snmp(char *, char **, off_t, int); + +extern int ipv6_misc_proc_init(void); +extern int ipv6_misc_proc_exit(void); + +extern int anycast6_get_info(char *, char **, off_t, int); #endif #ifdef CONFIG_SYSCTL @@ -816,9 +818,7 @@ static int __init inet6_init(void) #ifdef CONFIG_PROC_FS proc_anycast6_fail: - proc_net_remove("snmp6"); - proc_net_remove("dev_snmp6"); - proc_net_remove("sockstat6"); + ipv6_misc_proc_exit(); proc_misc6_fail: proc_net_remove("udp6"); proc_udp6_fail: @@ -852,9 +852,7 @@ static void inet6_exit(void) raw6_proc_exit(); proc_net_remove("tcp6"); proc_net_remove("udp6"); - proc_net_remove("sockstat6"); - proc_net_remove("dev_snmp6"); - proc_net_remove("snmp6"); + ipv6_misc_proc_exit(); proc_net_remove("anycast6"); #endif /* Cleanup code parts. */ diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 76660a2a54a3..0bd25c1710cc 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -80,7 +80,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info) iph = (struct ipv6hdr*)skb_put(skb, sizeof(struct ipv6hdr)); skb->nh.ipv6h = iph; - ipv6_addr_copy(&iph->daddr, fl->fl6_dst); + ipv6_addr_copy(&iph->daddr, &fl->fl6_dst); serr = SKB_EXT_ERR(skb); serr->ee.ee_errno = err; @@ -297,7 +297,8 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, goto exit_f; } - fl->fl6_src = &src_info->ipi6_addr; + ipv6_addr_copy(&fl->fl6_src, + &src_info->ipi6_addr); } break; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 5ee3d47391a8..e86dc93d3dcc 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -223,9 +223,10 @@ int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hd if (skb_queue_len(&sk->write_queue) == 1) { skb->csum = csum_partial((char *)icmp6h, sizeof(struct icmp6hdr), skb->csum); - icmp6h->icmp6_cksum = csum_ipv6_magic(fl->fl6_src, - fl->fl6_dst, - len, fl->proto, skb->csum); + icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, + &fl->fl6_dst, + len, fl->proto, + skb->csum); } else { u32 tmp_csum = 0; @@ -235,8 +236,8 @@ int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hd tmp_csum = csum_partial((char *)icmp6h, sizeof(struct icmp6hdr), tmp_csum); - tmp_csum = csum_ipv6_magic(fl->fl6_src, - fl->fl6_dst, + tmp_csum = csum_ipv6_magic(&fl->fl6_src, + &fl->fl6_dst, len, fl->proto, tmp_csum); icmp6h->icmp6_cksum = tmp_csum; } @@ -266,7 +267,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, struct ipv6hdr *hdr = skb->nh.ipv6h; struct sock *sk = icmpv6_socket->sk; struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *saddr = NULL, *tmp_saddr = NULL; + struct in6_addr *saddr = NULL; struct dst_entry *dst; struct icmp6hdr tmp_hdr; struct flowi fl; @@ -332,11 +333,12 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, return; } + memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_ICMPV6; - fl.fl6_dst = &hdr->saddr; - fl.fl6_src = saddr; + ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr); + if (saddr) + ipv6_addr_copy(&fl.fl6_src, saddr); fl.oif = iif; - fl.fl6_flowlabel = 0; fl.fl_icmp_type = type; fl.fl_icmp_code = code; @@ -350,14 +352,14 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, tmp_hdr.icmp6_cksum = 0; tmp_hdr.icmp6_pointer = htonl(info); - if (!fl.oif && ipv6_addr_is_multicast(fl.fl6_dst)) + if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) fl.oif = np->mcast_oif; - err = ip6_dst_lookup(sk, &dst, &fl, &tmp_saddr); + err = ip6_dst_lookup(sk, &dst, &fl); if (err) goto out; if (hlimit < 0) { - if (ipv6_addr_is_multicast(fl.fl6_dst)) + if (ipv6_addr_is_multicast(&fl.fl6_dst)) hlimit = np->mcast_hops; else hlimit = np->hop_limit; @@ -394,7 +396,6 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (likely(idev != NULL)) in6_dev_put(idev); out: - if (tmp_saddr) kfree(tmp_saddr); icmpv6_xmit_unlock(); } @@ -403,7 +404,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct sock *sk = icmpv6_socket->sk; struct inet6_dev *idev; struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *saddr = NULL, *tmp_saddr = NULL; + struct in6_addr *saddr = NULL; struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw; struct icmp6hdr tmp_hdr; struct flowi fl; @@ -420,25 +421,25 @@ static void icmpv6_echo_reply(struct sk_buff *skb) memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY; + memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_ICMPV6; - fl.fl6_dst = &skb->nh.ipv6h->saddr; - fl.fl6_src = saddr; + ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr); + if (saddr) + ipv6_addr_copy(&fl.fl6_src, saddr); fl.oif = skb->dev->ifindex; - fl.fl6_flowlabel = 0; fl.fl_icmp_type = ICMPV6_ECHO_REPLY; - fl.fl_icmp_code = 0; icmpv6_xmit_lock(); - if (!fl.oif && ipv6_addr_is_multicast(fl.nl_u.ip6_u.daddr)) + if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) fl.oif = np->mcast_oif; - err = ip6_dst_lookup(sk, &dst, &fl, &tmp_saddr); + err = ip6_dst_lookup(sk, &dst, &fl); if (err) goto out; if (hlimit < 0) { - if (ipv6_addr_is_multicast(fl.fl6_dst)) + if (ipv6_addr_is_multicast(&fl.fl6_dst)) hlimit = np->mcast_hops; else hlimit = np->hop_limit; @@ -464,7 +465,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (likely(idev != NULL)) in6_dev_put(idev); out: - if (tmp_saddr) kfree(tmp_saddr); icmpv6_xmit_unlock(); } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5e5f9051e2b1..519ac97d90e7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -152,15 +152,14 @@ int ip6_route_me_harder(struct sk_buff *skb) { struct ipv6hdr *iph = skb->nh.ipv6h; struct dst_entry *dst; - struct flowi fl; - - fl.proto = iph->nexthdr; - fl.fl6_dst = &iph->daddr; - fl.fl6_src = &iph->saddr; - fl.oif = skb->sk ? skb->sk->bound_dev_if : 0; - fl.fl6_flowlabel = 0; - fl.fl_ip_dport = 0; - fl.fl_ip_sport = 0; + struct flowi fl = { + .oif = skb->sk ? skb->sk->bound_dev_if : 0, + .nl_u = + { .ip6_u = + { .daddr = iph->daddr, + .saddr = iph->saddr, } }, + .proto = iph->nexthdr, + }; dst = ip6_route_output(skb->sk, &fl); @@ -200,7 +199,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, struct ipv6_txoptions *opt) { struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL; - struct in6_addr *first_hop = fl->fl6_dst; + struct in6_addr *first_hop = &fl->fl6_dst; struct dst_entry *dst = skb->dst; struct ipv6hdr *hdr; u8 proto = fl->proto; @@ -255,7 +254,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, hdr->nexthdr = proto; hdr->hop_limit = hlimit; - ipv6_addr_copy(&hdr->saddr, fl->fl6_src); + ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); ipv6_addr_copy(&hdr->daddr, first_hop); mtu = dst_pmtu(dst); @@ -320,8 +319,8 @@ static struct ipv6hdr * ip6_bld_1(struct sock *sk, struct sk_buff *skb, struct f hdr->hop_limit = hlimit; hdr->nexthdr = fl->proto; - ipv6_addr_copy(&hdr->saddr, fl->fl6_src); - ipv6_addr_copy(&hdr->daddr, fl->fl6_dst); + ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); + ipv6_addr_copy(&hdr->daddr, &fl->fl6_dst); return hdr; } @@ -526,19 +525,19 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data, { struct inet_opt *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *final_dst = NULL; + struct in6_addr final_dst_buf, *final_dst = NULL; struct dst_entry *dst; int err = 0; unsigned int pktlength, jumbolen, mtu; - struct in6_addr saddr; if (opt && opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - final_dst = fl->fl6_dst; - fl->fl6_dst = rt0->addr; + ipv6_addr_copy(&final_dst_buf, &fl->fl6_dst); + final_dst = &final_dst_buf; + ipv6_addr_copy(&fl->fl6_dst, rt0->addr); } - if (!fl->oif && ipv6_addr_is_multicast(fl->fl6_dst)) + if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) fl->oif = np->mcast_oif; dst = __sk_dst_check(sk, np->dst_cookie); @@ -564,9 +563,9 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data, */ if (((rt->rt6i_dst.plen != 128 || - ipv6_addr_cmp(fl->fl6_dst, &rt->rt6i_dst.addr)) + ipv6_addr_cmp(&fl->fl6_dst, &rt->rt6i_dst.addr)) && (np->daddr_cache == NULL || - ipv6_addr_cmp(fl->fl6_dst, np->daddr_cache))) + ipv6_addr_cmp(&fl->fl6_dst, np->daddr_cache))) || (fl->oif && fl->oif != dst->dev->ifindex)) { dst = NULL; } else @@ -582,8 +581,8 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data, return -ENETUNREACH; } - if (fl->fl6_src == NULL) { - err = ipv6_get_saddr(dst, fl->fl6_dst, &saddr); + if (ipv6_addr_any(&fl->fl6_src)) { + err = ipv6_get_saddr(dst, &fl->fl6_dst, &fl->fl6_src); if (err) { #if IP6_DEBUG >= 2 @@ -592,7 +591,6 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data, #endif goto out; } - fl->fl6_src = &saddr; } pktlength = length; @@ -604,7 +602,7 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data, } if (hlimit < 0) { - if (ipv6_addr_is_multicast(fl->fl6_dst)) + if (ipv6_addr_is_multicast(&fl->fl6_dst)) hlimit = np->mcast_hops; else hlimit = np->hop_limit; @@ -715,7 +713,9 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data, * cleanup */ out: - ip6_dst_store(sk, dst, fl->fl6_dst == &np->daddr ? &np->daddr : NULL); + ip6_dst_store(sk, dst, + !ipv6_addr_cmp(&fl->fl6_dst, &np->daddr) ? + &np->daddr : NULL); if (err > 0) err = np->recverr ? net_xmit_errno(err) : 0; return err; @@ -1135,7 +1135,7 @@ fail: return err; } -int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl, struct in6_addr **saddr) +int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) { struct ipv6_pinfo *np = inet6_sk(sk); int err = 0; @@ -1163,9 +1163,9 @@ int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl, st */ if (((rt->rt6i_dst.plen != 128 || - ipv6_addr_cmp(fl->fl6_dst, &rt->rt6i_dst.addr)) + ipv6_addr_cmp(&fl->fl6_dst, &rt->rt6i_dst.addr)) && (np->daddr_cache == NULL || - ipv6_addr_cmp(fl->fl6_dst, np->daddr_cache))) + ipv6_addr_cmp(&fl->fl6_dst, np->daddr_cache))) || (fl->oif && fl->oif != (*dst)->dev->ifindex)) { *dst = NULL; } else @@ -1181,9 +1181,8 @@ int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl, st return -ENETUNREACH; } - if (fl->fl6_src == NULL) { - *saddr = kmalloc(sizeof(struct in6_addr), GFP_ATOMIC); - err = ipv6_get_saddr(*dst, fl->fl6_dst, *saddr); + if (ipv6_addr_any(&fl->fl6_src)) { + err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src); if (err) { #if IP6_DEBUG >= 2 @@ -1192,7 +1191,6 @@ int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl, st #endif return err; } - fl->fl6_src = *saddr; } if (*dst) { @@ -1415,7 +1413,7 @@ int ip6_push_pending_frames(struct sock *sk) { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; - struct in6_addr *final_dst = NULL; + struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; struct inet_opt *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *hdr; @@ -1446,7 +1444,7 @@ int ip6_push_pending_frames(struct sock *sk) #endif } - final_dst = fl->fl6_dst; + ipv6_addr_copy(final_dst, &fl->fl6_dst); __skb_pull(skb, skb->h.raw - skb->nh.raw); if (opt && opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); @@ -1463,7 +1461,7 @@ int ip6_push_pending_frames(struct sock *sk) hdr->payload_len = 0; hdr->hop_limit = np->hop_limit; hdr->nexthdr = proto; - ipv6_addr_copy(&hdr->saddr, fl->fl6_src); + ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); ipv6_addr_copy(&hdr->daddr, final_dst); skb->dst = dst_clone(&rt->u.dst); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 48ecdcc35dae..02631d71018d 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -405,8 +405,8 @@ static inline void ndisc_flow_init(struct flowi *fl, u8 type, struct in6_addr *saddr, struct in6_addr *daddr) { memset(fl, 0, sizeof(*fl)); - fl->fl6_src = saddr; - fl->fl6_dst = daddr; + ipv6_addr_copy(&fl->fl6_src, saddr); + ipv6_addr_copy(&fl->fl6_dst, daddr); fl->proto = IPPROTO_ICMPV6; fl->fl_icmp_type = type; fl->fl_icmp_code = 0; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 3c9a3b75bac0..1c74bc086c15 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -291,10 +291,19 @@ out: return rc; proc_sockstat6_fail: - remove_proc_entry("dev_snmp6", proc_net); + proc_net_remove("dev_snmp6"); proc_dev_snmp6_fail: - remove_proc_entry("snmp6", proc_net); + proc_net_remove("snmp6"); proc_snmp6_fail: rc = -ENOMEM; goto out; } + +int ipv6_misc_proc_exit(void) +{ + proc_net_remove("sockstat6"); + proc_net_remove("dev_snmp6"); + proc_net_remove("snmp6"); + return 0; +} + diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 46ea3c341fd4..649cb253da7e 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -461,9 +461,9 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct r * Only one fragment on the socket. */ /* should be check HW csum miyazawa */ - *csum = csum_ipv6_magic(fl->fl6_src, - fl->fl6_dst, - len, fl->proto, skb->csum); + *csum = csum_ipv6_magic(&fl->fl6_src, + &fl->fl6_dst, + len, fl->proto, skb->csum); } else { u32 tmp_csum = 0; @@ -471,9 +471,9 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct r tmp_csum = csum_add(tmp_csum, skb->csum); } - tmp_csum = csum_ipv6_magic(fl->fl6_src, - fl->fl6_dst, - len, fl->proto, tmp_csum); + tmp_csum = csum_ipv6_magic(&fl->fl6_src, + &fl->fl6_dst, + len, fl->proto, tmp_csum); *csum = tmp_csum; } if (*csum == 0) @@ -540,7 +540,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg { struct ipv6_txoptions opt_space; struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name; - struct in6_addr *daddr, *saddr = NULL; + struct in6_addr *daddr; struct inet_opt *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct raw6_opt *raw_opt = raw6_sk(sk); @@ -566,9 +566,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg /* * Get and verify the address. */ - - fl.fl6_flowlabel = 0; - fl.oif = 0; + memset(&fl, 0, sizeof(fl)); if (sin6) { if (addr_len < SIN6_LEN_RFC2133) @@ -628,7 +626,6 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg if (fl.oif == 0) fl.oif = sk->bound_dev_if; - fl.fl6_src = NULL; if (msg->msg_controllen) { opt = &opt_space; @@ -653,26 +650,25 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg opt = fl6_merge_options(&opt_space, flowlabel, opt); fl.proto = proto; - fl.fl6_dst = daddr; - if (fl.fl6_src == NULL && !ipv6_addr_any(&np->saddr)) - fl.fl6_src = &np->saddr; - fl.fl_icmp_type = 0; - fl.fl_icmp_code = 0; + ipv6_addr_copy(&fl.fl6_dst, daddr); + if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) + ipv6_addr_copy(&fl.fl6_src, &np->saddr); /* merge ip6_build_xmit from ip6_output */ if (opt && opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - fl.fl6_dst = rt0->addr; + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); } - if (!fl.oif && ipv6_addr_is_multicast(fl.nl_u.ip6_u.daddr)) + if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) fl.oif = np->mcast_oif; - err = ip6_dst_lookup(sk, &dst, &fl, &saddr); - if (err) goto out; + err = ip6_dst_lookup(sk, &dst, &fl); + if (err) + goto out; if (hlimit < 0) { - if (ipv6_addr_is_multicast(fl.fl6_dst)) + if (ipv6_addr_is_multicast(&fl.fl6_dst)) hlimit = np->mcast_hops; else hlimit = np->hop_limit; @@ -702,14 +698,15 @@ back_from_confirm: } } done: - ip6_dst_store(sk, dst, fl.nl_u.ip6_u.daddr == &np->daddr ? &np->daddr : NULL); + ip6_dst_store(sk, dst, + !ipv6_addr_cmp(&fl.fl6_dst, &np->daddr) ? + &np->daddr : NULL); if (err > 0) err = np->recverr ? net_xmit_errno(err) : 0; release_sock(sk); out: fl6_sock_release(flowlabel); - if (saddr) kfree(saddr); return err<0?err:len; do_confirm: dst_confirm(dst); @@ -1061,6 +1058,6 @@ int __init raw6_proc_init(void) void raw6_proc_exit(void) { - remove_proc_entry("raw6", proc_net); + proc_net_remove("raw6"); } #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e88bd074095b..a8664bf6cf56 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -454,12 +454,12 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) int strict; int attempts = 3; - strict = ipv6_addr_type(fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL); + strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL); relookup: read_lock_bh(&rt6_lock); - fn = fib6_lookup(&ip6_routing_table, fl->fl6_dst, fl->fl6_src); + fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src); restart: rt = fn->leaf; @@ -481,7 +481,7 @@ restart: if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) { read_unlock_bh(&rt6_lock); - rt = rt6_cow(rt, fl->fl6_dst, fl->fl6_src); + rt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src); if (rt->u.dst.error != -EEXIST || --attempts <= 0) goto out2; @@ -1616,9 +1616,11 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) memset(&fl, 0, sizeof(fl)); if (rta[RTA_SRC-1]) - fl.fl6_src = (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]); + ipv6_addr_copy(&fl.fl6_src, + (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1])); if (rta[RTA_DST-1]) - fl.fl6_dst = (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]); + ipv6_addr_copy(&fl.fl6_dst, + (struct in6_addr*)RTA_DATA(rta[RTA_DST-1])); if (rta[RTA_IIF-1]) memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); @@ -1642,7 +1644,7 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; err = rt6_fill_node(skb, rt, - fl.fl6_dst, fl.fl6_src, + &fl.fl6_dst, &fl.fl6_src, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, nlh); @@ -1929,7 +1931,7 @@ void ip6_route_cleanup(void) { #ifdef CONFIG_PROC_FS proc_net_remove("ipv6_route"); - remove_proc_entry("rt6_stats", proc_net); + proc_net_remove("rt6_stats"); #endif xfrm6_fini(); rt6_ifdown(NULL); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4a9aa1a191ae..d2920063972c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -571,7 +571,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (usin->sin6_family != AF_INET6) return(-EAFNOSUPPORT); - fl.fl6_flowlabel = 0; + memset(&fl, 0, sizeof(fl)); + if (np->sndflow) { fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; IP6_ECN_flow_init(fl.fl6_flowlabel); @@ -666,20 +667,18 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, saddr = &np->rcv_saddr; fl.proto = IPPROTO_TCP; - fl.fl6_dst = &np->daddr; - fl.fl6_src = saddr; + ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + ipv6_addr_copy(&fl.fl6_src, + (saddr ? saddr : &np->saddr)); fl.oif = sk->bound_dev_if; fl.fl_ip_dport = usin->sin6_port; fl.fl_ip_sport = inet->sport; if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; - fl.fl6_dst = rt0->addr; + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); } - if (!fl.fl6_src) - fl.fl6_src = &np->saddr; - dst = ip6_route_output(sk, &fl); if ((err = dst->error) != 0) { @@ -794,9 +793,10 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, to handle rthdr case. Ignore this complexity for now. */ + memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_TCP; - fl.fl6_dst = &np->daddr; - fl.fl6_src = &np->saddr; + ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.oif = sk->bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; @@ -879,9 +879,10 @@ static int tcp_v6_send_synack(struct sock *sk, struct open_request *req, struct flowi fl; int err = -1; + memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_TCP; - fl.fl6_dst = &req->af.v6_req.rmt_addr; - fl.fl6_src = &req->af.v6_req.loc_addr; + ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr); + ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr); fl.fl6_flowlabel = 0; fl.oif = req->af.v6_req.iif; fl.fl_ip_dport = req->rmt_port; @@ -900,7 +901,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct open_request *req, if (opt && opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - fl.fl6_dst = rt0->addr; + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); } dst = ip6_route_output(sk, &fl); @@ -916,7 +917,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct open_request *req, &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr, csum_partial((char *)th, skb->len, skb->csum)); - fl.fl6_dst = &req->af.v6_req.rmt_addr; + ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr); err = ip6_xmit(sk, skb, &fl, opt); if (err == NET_XMIT_CN) err = 0; @@ -1018,11 +1019,11 @@ static void tcp_v6_send_reset(struct sk_buff *skb) buff->csum = csum_partial((char *)t1, sizeof(*t1), 0); - fl.fl6_dst = &skb->nh.ipv6h->saddr; - fl.fl6_src = &skb->nh.ipv6h->daddr; - fl.fl6_flowlabel = 0; + memset(&fl, 0, sizeof(fl)); + ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr); + ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr); - t1->check = csum_ipv6_magic(fl.fl6_src, fl.fl6_dst, + t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst, sizeof(*t1), IPPROTO_TCP, buff->csum); @@ -1082,11 +1083,11 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 buff->csum = csum_partial((char *)t1, tot_len, 0); - fl.fl6_dst = &skb->nh.ipv6h->saddr; - fl.fl6_src = &skb->nh.ipv6h->daddr; - fl.fl6_flowlabel = 0; + memset(&fl, 0, sizeof(fl)); + ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr); + ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr); - t1->check = csum_ipv6_magic(fl.fl6_src, fl.fl6_dst, + t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst, tot_len, IPPROTO_TCP, buff->csum); @@ -1261,7 +1262,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, { struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct tcp6_sock *newtcp6sk; - struct flowi fl; struct inet_opt *newinet; struct tcp_opt *newtp; struct sock *newsk; @@ -1330,14 +1330,16 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, } if (dst == NULL) { + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_TCP; - fl.fl6_dst = &req->af.v6_req.rmt_addr; + ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr); if (opt && opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - fl.fl6_dst = rt0->addr; + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); } - fl.fl6_src = &req->af.v6_req.loc_addr; - fl.fl6_flowlabel = 0; + ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr); fl.oif = sk->bound_dev_if; fl.fl_ip_dport = req->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; @@ -1725,9 +1727,10 @@ static int tcp_v6_rebuild_header(struct sock *sk) struct inet_opt *inet = inet_sk(sk); struct flowi fl; + memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_TCP; - fl.fl6_dst = &np->daddr; - fl.fl6_src = &np->saddr; + ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.fl6_flowlabel = np->flow_label; fl.oif = sk->bound_dev_if; fl.fl_ip_dport = inet->dport; @@ -1735,7 +1738,7 @@ static int tcp_v6_rebuild_header(struct sock *sk) if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; - fl.fl6_dst = rt0->addr; + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); } dst = ip6_route_output(sk, &fl); @@ -1762,9 +1765,10 @@ static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok) struct flowi fl; struct dst_entry *dst; + memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_TCP; - fl.fl6_dst = &np->daddr; - fl.fl6_src = &np->saddr; + ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.fl6_flowlabel = np->flow_label; IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); fl.oif = sk->bound_dev_if; @@ -1773,7 +1777,7 @@ static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok) if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; - fl.fl6_dst = rt0->addr; + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); } dst = __sk_dst_check(sk, np->dst_cookie); @@ -1793,7 +1797,7 @@ static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok) skb->dst = dst_clone(dst); /* Restore final destination back after routing done */ - fl.fl6_dst = &np->daddr; + ipv6_addr_copy(&fl.fl6_dst, &np->daddr); return ip6_xmit(sk, skb, &fl, np->opt); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5d0e7645b3e6..fc3dd9c50b1e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -270,7 +270,7 @@ int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; - fl.fl6_flowlabel = 0; + memset(&fl, 0, sizeof(fl)); if (np->sndflow) { fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { @@ -350,8 +350,8 @@ ipv4_connected: */ fl.proto = IPPROTO_UDP; - fl.fl6_dst = &np->daddr; - fl.fl6_src = &saddr; + ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + ipv6_addr_copy(&fl.fl6_src, &saddr); fl.oif = sk->bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; @@ -362,11 +362,11 @@ ipv4_connected: if (flowlabel) { if (flowlabel->opt && flowlabel->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt; - fl.fl6_dst = rt0->addr; + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); } } else if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; - fl.fl6_dst = rt0->addr; + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); } dst = ip6_route_output(sk, &fl); @@ -377,7 +377,7 @@ ipv4_connected: return err; } - ip6_dst_store(sk, dst, fl.fl6_dst); + ip6_dst_store(sk, dst, &fl.fl6_dst); /* get the source address used in the appropriate device */ @@ -784,8 +784,8 @@ static int udp_v6_push_pending_frames(struct sock *sk, struct udp_opt *up) if (skb_queue_len(&sk->write_queue) == 1) { skb->csum = csum_partial((char *)uh, sizeof(struct udphdr), skb->csum); - uh->check = csum_ipv6_magic(fl->fl6_src, - fl->fl6_dst, + uh->check = csum_ipv6_magic(&fl->fl6_src, + &fl->fl6_dst, up->len, fl->proto, skb->csum); } else { u32 tmp_csum = 0; @@ -795,8 +795,8 @@ static int udp_v6_push_pending_frames(struct sock *sk, struct udp_opt *up) } tmp_csum = csum_partial((char *)uh, sizeof(struct udphdr), tmp_csum); - tmp_csum = csum_ipv6_magic(fl->fl6_src, - fl->fl6_dst, + tmp_csum = csum_ipv6_magic(&fl->fl6_src, + &fl->fl6_dst, up->len, fl->proto, tmp_csum); uh->check = tmp_csum; @@ -819,7 +819,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg struct inet_opt *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; - struct in6_addr *daddr, *saddr = NULL; + struct in6_addr *daddr; struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; struct flowi fl; @@ -849,8 +849,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg } ulen += sizeof(struct udphdr); - fl.fl6_flowlabel = 0; - fl.oif = 0; + memset(&fl, 0, sizeof(fl)); if (sin6) { if (sin6->sin6_family == AF_INET) { @@ -919,7 +918,6 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg if (!fl.oif) fl.oif = sk->bound_dev_if; - fl.fl6_src = NULL; if (msg->msg_controllen) { opt = &opt_space; @@ -944,26 +942,27 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg opt = fl6_merge_options(&opt_space, flowlabel, opt); fl.proto = IPPROTO_UDP; - fl.fl6_dst = daddr; - if (fl.fl6_src == NULL && !ipv6_addr_any(&np->saddr)) - fl.fl6_src = &np->saddr; + ipv6_addr_copy(&fl.fl6_dst, daddr); + if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) + ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.fl_ip_dport = up->dport; fl.fl_ip_sport = inet->sport; /* merge ip6_build_xmit from ip6_output */ if (opt && opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - fl.fl6_dst = rt0->addr; + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); } - if (!fl.oif && ipv6_addr_is_multicast(fl.nl_u.ip6_u.daddr)) + if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) fl.oif = np->mcast_oif; - err = ip6_dst_lookup(sk, &dst, &fl, &saddr); - if (err) goto out; + err = ip6_dst_lookup(sk, &dst, &fl); + if (err) + goto out; if (hlimit < 0) { - if (ipv6_addr_is_multicast(fl.fl6_dst)) + if (ipv6_addr_is_multicast(&fl.fl6_dst)) hlimit = np->mcast_hops; else hlimit = np->hop_limit; @@ -998,13 +997,14 @@ do_append_data: else if (!corkreq) err = udp_v6_push_pending_frames(sk, up); - ip6_dst_store(sk, dst, fl.nl_u.ip6_u.daddr == &np->daddr ? &np->daddr : NULL); + ip6_dst_store(sk, dst, + !ipv6_addr_cmp(&fl.fl6_dst, &np->daddr) ? + &np->daddr : NULL); if (err > 0) err = np->recverr ? net_xmit_errno(err) : 0; release_sock(sk); out: fl6_sock_release(flowlabel); - if (saddr) kfree(saddr); if (!err) { UDP6_INC_STATS_USER(UdpOutDatagrams); return len; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index e0581717c697..285c011f2294 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -60,8 +60,8 @@ __xfrm6_find_bundle(struct flowi *fl, struct rtable *rt, struct xfrm_policy *pol read_lock_bh(&policy->lock); for (dst = policy->bundles; dst; dst = dst->next) { struct xfrm_dst *xdst = (struct xfrm_dst*)dst; - if (!ipv6_addr_cmp(&xdst->u.rt6.rt6i_dst.addr, fl->fl6_dst) && - !ipv6_addr_cmp(&xdst->u.rt6.rt6i_src.addr, fl->fl6_src) && + if (!ipv6_addr_cmp(&xdst->u.rt6.rt6i_dst.addr, &fl->fl6_dst) && + !ipv6_addr_cmp(&xdst->u.rt6.rt6i_src.addr, &fl->fl6_src) && __xfrm6_bundle_ok(xdst, fl)) { dst_clone(dst); break; @@ -82,8 +82,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int struct dst_entry *dst, *dst_prev; struct rt6_info *rt0 = (struct rt6_info*)(*dst_p); struct rt6_info *rt = rt0; - struct in6_addr *remote = fl->fl6_dst; - struct in6_addr *local = fl->fl6_src; + struct in6_addr *remote = &fl->fl6_dst; + struct in6_addr *local = &fl->fl6_src; int i; int err = 0; int header_len = 0; @@ -116,13 +116,15 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int trailer_len += xfrm[i]->props.trailer_len; } - if (ipv6_addr_cmp(remote, fl->fl6_dst)) { - struct flowi fl_tunnel = { .nl_u = { .ip6_u = - { .daddr = remote, - .saddr = local } - } - }; - err = xfrm_dst_lookup((struct xfrm_dst**)&rt, &fl_tunnel, AF_INET6); + if (ipv6_addr_cmp(remote, &fl->fl6_dst)) { + struct flowi fl_tunnel; + + memset(&fl_tunnel, 0, sizeof(fl_tunnel)); + ipv6_addr_copy(&fl_tunnel.fl6_dst, remote); + ipv6_addr_copy(&fl_tunnel.fl6_src, local); + + err = xfrm_dst_lookup((struct xfrm_dst **) &rt, + &fl_tunnel, AF_INET6); if (err) goto error; } else { @@ -175,8 +177,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl) struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); u8 nexthdr = skb->nh.ipv6h->nexthdr; - fl->fl6_dst = &hdr->daddr; - fl->fl6_src = &hdr->saddr; + ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr); + ipv6_addr_copy(&fl->fl6_src, &hdr->saddr); while (pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data)) { switch (nexthdr) { diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index c409fdf527fd..051ff16cde44 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -25,8 +25,8 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, { /* Initialize temporary selector matching only * to current session. */ - ipv6_addr_copy((struct in6_addr *)&x->sel.daddr, fl->fl6_dst); - ipv6_addr_copy((struct in6_addr *)&x->sel.saddr, fl->fl6_src); + ipv6_addr_copy((struct in6_addr *)&x->sel.daddr, &fl->fl6_dst); + ipv6_addr_copy((struct in6_addr *)&x->sel.saddr, &fl->fl6_src); x->sel.dport = fl->fl_ip_dport; x->sel.dport_mask = ~0; x->sel.sport = fl->fl_ip_sport; diff --git a/net/key/af_key.c b/net/key/af_key.c index e415fdd3d64f..cc0a5f27bb93 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2244,6 +2244,7 @@ static void dump_ah_combs(struct sk_buff *skb, struct xfrm_tmpl *t) p->sadb_prop_len = sizeof(struct sadb_prop)/8; p->sadb_prop_exttype = SADB_EXT_PROPOSAL; p->sadb_prop_replay = 32; + memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved)); for (i = 0; ; i++) { struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); @@ -2275,6 +2276,7 @@ static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t) p->sadb_prop_len = sizeof(struct sadb_prop)/8; p->sadb_prop_exttype = SADB_EXT_PROPOSAL; p->sadb_prop_replay = 32; + memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved)); for (i=0; ; i++) { struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e3f9d2a74b93..9d4583e4824e 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -144,17 +144,19 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport, struct ipv6_pinfo *np = inet6_sk(sk); struct flowi fl; + memset(&fl, 0, sizeof(fl)); + fl.proto = sk->protocol; /* Fill in the dest address from the route entry passed with the skb * and the source address from the transport. */ - fl.fl6_dst = &transport->ipaddr.v6.sin6_addr; - fl.fl6_src = &transport->saddr.v6.sin6_addr; + ipv6_addr_copy(&fl.fl6_dst, &transport->ipaddr.v6.sin6_addr); + ipv6_addr_copy(&fl.fl6_src, &transport->saddr.v6.sin6_addr); fl.fl6_flowlabel = np->flow_label; IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); - if (ipv6_addr_type(fl.fl6_src) & IPV6_ADDR_LINKLOCAL) + if (ipv6_addr_type(&fl.fl6_src) & IPV6_ADDR_LINKLOCAL) fl.oif = transport->saddr.v6.sin6_scope_id; else fl.oif = sk->bound_dev_if; @@ -163,14 +165,14 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport, if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; - fl.nl_u.ip6_u.daddr = rt0->addr; + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); } SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, " "src:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x " "dst:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", __FUNCTION__, skb, skb->len, - NIP6(*fl.fl6_src), NIP6(*fl.fl6_dst)); + NIP6(fl.fl6_src), NIP6(fl.fl6_dst)); SCTP_INC_STATS(SctpOutSCTPPacks); @@ -185,17 +187,19 @@ struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc, union sctp_addr *saddr) { struct dst_entry *dst; - struct flowi fl = { - .nl_u = { .ip6_u = { .daddr = &daddr->v6.sin6_addr, } } }; + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); + ipv6_addr_copy(&fl.fl6_dst, &daddr->v6.sin6_addr); SCTP_DEBUG_PRINTK("%s: DST=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ", - __FUNCTION__, NIP6(*fl.fl6_dst)); + __FUNCTION__, NIP6(fl.fl6_dst)); if (saddr) { - fl.fl6_src = &saddr->v6.sin6_addr; + ipv6_addr_copy(&fl.fl6_src, &saddr->v6.sin6_addr); SCTP_DEBUG_PRINTK( "SRC=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x - ", - NIP6(*fl.fl6_src)); + NIP6(fl.fl6_src)); } dst = ip6_route_output(NULL, &fl); |
