diff options
| author | Kai Germaschewski <kai@tp1.ruhr-uni-bochum.de> | 2002-05-20 14:09:56 -0500 |
|---|---|---|
| committer | Kai Germaschewski <kai@tp1.ruhr-uni-bochum.de> | 2002-05-20 14:09:56 -0500 |
| commit | d74cacef5c59db1efedf574e3c9fa8391dcc3cd9 (patch) | |
| tree | 45f81c34121a659fc6fd00cd042f10a70620abdd | |
| parent | f457c929ba1e9a372cba5d557fdc9f4f7d92d88d (diff) | |
| parent | 6c52c43ca25e1e05a6c2932c679c84171cbaf6d0 (diff) | |
Merge tp1.ruhr-uni-bochum.de:/home/kai/kernel/v2.5/linux-2.5
into tp1.ruhr-uni-bochum.de:/home/kai/kernel/v2.5/linux-2.5.isdn
247 files changed, 6856 insertions, 3634 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 1acc415a99a3..72288b4d8f9e 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -115,7 +115,7 @@ statfs: yes no no remount_fs: yes yes maybe (see below) umount_begin: yes no maybe (see below) -->read_inode() is not a method - it's a callback used in iget()/iget4(). +->read_inode() is not a method - it's a callback used in iget(). rules for mount_sem are not too nice - it is going to die and be replaced by better scheme anyway. diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index df06a180b650..5e1e47711009 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -152,3 +152,38 @@ settles down a bit. s_export_op is now required for exporting a filesystem. isofs, ext2, ext3, resierfs, fat can be used as examples of very different filesystems. + +--- +[mandatory] + +iget4() and the read_inode2 callback have been superseded by iget5_locked() +which has the following prototype, + + struct inode *iget5_locked(struct super_block *sb, unsigned long ino, + int (*test)(struct inode *, void *), + int (*set)(struct inode *, void *), + void *data); + +'test' is an additional function that can be used when the inode +number is not sufficient to identify the actual file object. 'set' +should be a non-blocking function that initializes those parts of a +newly created inode to allow the test function to succeed. 'data' is +passed as an opaque value to both test and set functions. + +When the inode has been created by iget5_locked(), it will be returned with +the I_NEW flag set and will still be locked. read_inode has not been +called so the file system still has to finalize the initialization. Once +the inode is initialized it must be unlocked by calling unlock_new_inode(). + +The filesystem is responsible for setting (and possibly testing) i_ino +when appropriate. There is also a simpler iget_locked function that +just takes the superblock and inode number as arguments and does the +test and set for you. + +e.g. + inode = iget_locked(sb, ino); + if (inode->i_state & I_NEW) { + read_inode_from_disk(inode); + unlock_new_inode(inode); + } + diff --git a/MAINTAINERS b/MAINTAINERS index 14438aea4fd4..3729a6c838e7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -709,7 +709,8 @@ W: http://www.uni-mainz.de/~langm000/linux.html S: Maintained IBM ServeRAID RAID DRIVER -P: Keith Mitchell +P: Jack Hammer +P: Dave Jeffrey M: ipslinux@us.ibm.com W: http://www.developer.ibm.com/welcome/netfinity/serveraid.html S: Supported @@ -1,6 +1,6 @@ VERSION = 2 PATCHLEVEL = 5 -SUBLEVEL = 16 +SUBLEVEL = 17 EXTRAVERSION = KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) @@ -55,8 +55,7 @@ all: do-it-all ifeq (.config,$(wildcard .config)) include .config -ifeq (.depend,$(wildcard .depend)) -include .depend +ifeq (.hdepend,$(wildcard .hdepend)) do-it-all: vmlinux else CONFIGURATION = depend @@ -93,11 +92,12 @@ CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \ -fomit-frame-pointer -fno-strict-aliasing -fno-common AFLAGS := -D__ASSEMBLY__ $(CPPFLAGS) +INIT =init/init.o CORE_FILES =kernel/kernel.o mm/mm.o fs/fs.o ipc/ipc.o NETWORKS =net/network.o LIBS =$(TOPDIR)/lib/lib.a -SUBDIRS =kernel lib drivers mm fs net ipc sound +SUBDIRS =init kernel lib drivers mm fs net ipc sound DRIVERS-n := DRIVERS-y := @@ -165,8 +165,8 @@ export NETWORKS DRIVERS LIBS HEAD LDFLAGS LINKFLAGS MAKEBOOT ASFLAGS boot: vmlinux @$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" AFLAGS="$(AFLAGS) $(AFLAGS_KERNEL)" -C arch/$(ARCH)/boot -vmlinux: include/linux/version.h $(CONFIGURATION) init/main.o init/version.o init/do_mounts.o linuxsubdirs - $(LD) $(LINKFLAGS) $(HEAD) init/main.o init/version.o init/do_mounts.o \ +vmlinux: include/linux/version.h $(CONFIGURATION) linuxsubdirs + $(LD) $(LINKFLAGS) $(HEAD) $(INIT) \ --start-group \ $(CORE_FILES) \ $(LIBS) \ @@ -220,11 +220,6 @@ include/config/MARKER: scripts/split-include include/linux/autoconf.h # Generate some files $(TOPDIR)/include/linux/version.h: include/linux/version.h -$(TOPDIR)/include/linux/compile.h: include/linux/compile.h - -include/linux/compile.h: $(CONFIGURATION) include/linux/version.h - @echo Generating $@ - @. scripts/mkcompile_h $@ "$(ARCH)" "$(CONFIG_SMP)" "$(CC) $(CFLAGS)" include/linux/version.h: ./Makefile @echo Generating $@ @@ -233,25 +228,11 @@ include/linux/version.h: ./Makefile comma := , # --------------------------------------------------------------------------- -# Build files in init -# FIXME should be moved to init/Makefile - -init/version.o: init/version.c include/linux/compile.h include/config/MARKER - $(CC) $(CFLAGS) $(CFLAGS_KERNEL) -DKBUILD_BASENAME=$(subst $(comma),_,$(subst -,_,$(*F))) -c -o init/version.o init/version.c - -init/main.o: init/main.c include/config/MARKER - $(CC) $(CFLAGS) $(CFLAGS_KERNEL) $(PROFILING) -DKBUILD_BASENAME=$(subst $(comma),_,$(subst -,_,$(*F))) -c -o $*.o $< - -init/do_mounts.o: init/do_mounts.c include/config/MARKER - $(CC) $(CFLAGS) $(CFLAGS_KERNEL) $(PROFILING) -DKBUILD_BASENAME=$(subst $(comma),_,$(subst -,_,$(*F))) -c -o $*.o $< - -# --------------------------------------------------------------------------- # Generate dependencies depend dep: dep-files dep-files: scripts/mkdep archdep include/linux/version.h - scripts/mkdep -- init/*.c > .depend scripts/mkdep -- `find $(FINDHPATH) -name SCCS -prune -o -follow -name \*.h ! -name modversions.h -print` > .hdepend $(MAKE) $(patsubst %,_sfdep_%,$(SUBDIRS)) _FASTDEP_ALL_SUB_DIRS="$(SUBDIRS)" ifdef CONFIG_MODVERSIONS @@ -391,7 +372,7 @@ MRPROPER_DIRS += \ clean: archclean - find . \( -name '*.[oas]' -o -name core -o -name '.*.flags' \) -type f -print \ + find . \( -name '*.[oas]' -o -name core -o -name '.*.cmd' \) -type f -print \ | grep -v lxdialog/ | xargs rm -f rm -f $(CLEAN_FILES) rm -rf $(CLEAN_DIRS) @@ -487,4 +468,3 @@ backup: mrproper sums: find . -type f -print | sort | xargs sum > .SUMS - diff --git a/Rules.make b/Rules.make index 756f5bbcd8e5..b4d5f2f84457 100644 --- a/Rules.make +++ b/Rules.make @@ -41,8 +41,7 @@ obj-m := $(filter-out $(obj-y),$(obj-m)) # # Get things started. # -first_rule: sub_dirs - $(MAKE) all_targets +first_rule: all_targets both-m := $(filter $(mod-subdirs), $(subdir-y)) SUB_DIRS := $(subdir-y) @@ -102,7 +101,13 @@ endif # # # -all_targets: $(O_TARGET) $(L_TARGET) +all_targets: $(O_TARGET) $(L_TARGET) sub_dirs + +# $(subdir-obj-y) is the list of objects in $(obj-y) which do not live +# in the local directory +subdir-obj-y := $(foreach o,$(obj-y),$(if $(filter-out $(o),$(notdir $(o))),$(o))) +# Do build these objects, we need to descend into the directories +$(subdir-obj-y): sub_dirs # # Rule to compile a set of .o files into one .o file diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 0bcd16ac271e..95b7e49bf51e 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -3669,7 +3669,11 @@ getname32 (const char *filename) return result; } -struct dqblk32 { +extern asmlinkage long sys_quotactl(int cmd, const char *special, int id, caddr_t addr); + +#ifdef CONFIG_QIFACE_COMPAT +#ifdef CONFIG_QIFACE_V1 +struct user_dqblk32 { __u32 dqb_bhardlimit; __u32 dqb_bsoftlimit; __u32 dqb_curblocks; @@ -3679,50 +3683,83 @@ struct dqblk32 { __kernel_time_t32 dqb_btime; __kernel_time_t32 dqb_itime; }; +typedef struct v1c_mem_dqblk comp_dqblk_t; -asmlinkage long -sys32_quotactl (int cmd, unsigned int special, int id, struct dqblk32 *addr) +#define Q_COMP_GETQUOTA Q_V1_GETQUOTA +#define Q_COMP_SETQUOTA Q_V1_SETQUOTA +#define Q_COMP_SETQLIM Q_V1_SETQLIM +#define Q_COMP_SETUSE Q_V1_SETUSE +#else +struct user_dqblk32 { + __u32 dqb_ihardlimit; + __u32 dqb_isoftlimit; + __u32 dqb_curinodes; + __u32 dqb_bhardlimit; + __u32 dqb_bsoftlimit; + __u64 dqb_curspace; + __kernel_time_t32 dqb_btime; + __kernel_time_t32 dqb_itime; +}; +typedef struct v2c_mem_dqblk comp_dqblk_t; + +#define Q_COMP_GETQUOTA Q_V2_GETQUOTA +#define Q_COMP_SETQUOTA Q_V2_SETQUOTA +#define Q_COMP_SETQLIM Q_V2_SETQLIM +#define Q_COMP_SETUSE Q_V2_SETUSE +#endif + +asmlinkage long sys32_quotactl(int cmd, const char *special, int id, caddr_t addr) { - extern asmlinkage long sys_quotactl (int, const char *, int, caddr_t); int cmds = cmd >> SUBCMDSHIFT; + long err; + comp_dqblk_t d; mm_segment_t old_fs; - struct dqblk d; char *spec; - long err; - + switch (cmds) { - case Q_GETQUOTA: - break; - case Q_SETQUOTA: - case Q_SETUSE: - case Q_SETQLIM: - if (copy_from_user (&d, addr, sizeof(struct dqblk32))) - return -EFAULT; - d.dqb_itime = ((struct dqblk32 *)&d)->dqb_itime; - d.dqb_btime = ((struct dqblk32 *)&d)->dqb_btime; - break; - default: - return sys_quotactl(cmd, (void *) A(special), id, (caddr_t) addr); + case Q_COMP_GETQUOTA: + break; + case Q_COMP_SETQUOTA: + case Q_COMP_SETUSE: + case Q_COMP_SETQLIM: + if (copy_from_user(&d, (struct user_dqblk32 *)addr, + sizeof (struct user_dqblk32))) + return -EFAULT; + d.dqb_itime = ((struct user_dqblk32 *)&d)->dqb_itime; + d.dqb_btime = ((struct user_dqblk32 *)&d)->dqb_btime; + break; + default: + return sys_quotactl(cmd, special, id, (__kernel_caddr_t)addr); } - spec = getname32((void *) A(special)); + spec = getname (special); err = PTR_ERR(spec); - if (IS_ERR(spec)) + if (IS_ERR(spec)) return err; + old_fs = get_fs(); + set_fs (KERNEL_DS); + err = sys_quotactl(cmd, (const char *)spec, id, (__kernel_caddr_t)&d); + set_fs (old_fs); + putname (spec); + if (err) return err; - old_fs = get_fs (); - set_fs(KERNEL_DS); - err = sys_quotactl(cmd, (const char *)spec, id, (caddr_t)&d); - set_fs(old_fs); - putname(spec); - if (cmds == Q_GETQUOTA) { + if (cmds == Q_COMP_GETQUOTA) { __kernel_time_t b = d.dqb_btime, i = d.dqb_itime; - ((struct dqblk32 *)&d)->dqb_itime = i; - ((struct dqblk32 *)&d)->dqb_btime = b; - if (copy_to_user(addr, &d, sizeof(struct dqblk32))) + ((struct user_dqblk32 *)&d)->dqb_itime = i; + ((struct user_dqblk32 *)&d)->dqb_btime = b; + if (copy_to_user ((struct user_dqblk32 *)addr, &d, + sizeof (struct user_dqblk32))) return -EFAULT; } - return err; + return 0; } +#else +/* No conversion needed for new interface */ +asmlinkage long sys32_quotactl(int cmd, const char *special, int id, caddr_t addr) +{ + return sys_quotactl(cmd, special, id, addr); +} +#endif + asmlinkage long sys32_sched_rr_get_interval (pid_t pid, struct timespec32 *interval) { diff --git a/arch/s390x/kernel/linux32.c b/arch/s390x/kernel/linux32.c index a125f2d41043..f9da89b329bb 100644 --- a/arch/s390x/kernel/linux32.c +++ b/arch/s390x/kernel/linux32.c @@ -897,64 +897,97 @@ asmlinkage long sys32_fcntl64(unsigned int fd, unsigned int cmd, unsigned long a return sys32_fcntl(fd, cmd, arg); } -struct mem_dqblk32 { - __u32 dqb_ihardlimit; - __u32 dqb_isoftlimit; - __u32 dqb_curinodes; - __u32 dqb_bhardlimit; - __u32 dqb_bsoftlimit; - __u64 dqb_curspace; - __kernel_time_t32 dqb_btime; - __kernel_time_t32 dqb_itime; +extern asmlinkage int sys_quotactl(int cmd, const char *special, int id, caddr_t addr); + +#ifdef CONFIG_QIFACE_COMPAT +#ifdef CONFIG_QIFACE_V1 +struct user_dqblk32 { + __u32 dqb_bhardlimit; + __u32 dqb_bsoftlimit; + __u32 dqb_curblocks; + __u32 dqb_ihardlimit; + __u32 dqb_isoftlimit; + __u32 dqb_curinodes; + __kernel_time_t32 dqb_btime; + __kernel_time_t32 dqb_itime; }; - -extern asmlinkage long sys_quotactl(int cmd, const char *special, int id, __kernel_caddr_t addr); +typedef struct v1c_mem_dqblk comp_dqblk_t; + +#define Q_COMP_GETQUOTA Q_V1_GETQUOTA +#define Q_COMP_SETQUOTA Q_V1_SETQUOTA +#define Q_COMP_SETQLIM Q_V1_SETQLIM +#define Q_COMP_SETUSE Q_V1_SETUSE +#else +struct user_dqblk32 { + __u32 dqb_ihardlimit; + __u32 dqb_isoftlimit; + __u32 dqb_curinodes; + __u32 dqb_bhardlimit; + __u32 dqb_bsoftlimit; + __u64 dqb_curspace; + __kernel_time_t32 dqb_btime; + __kernel_time_t32 dqb_itime; +}; +typedef struct v2c_mem_dqblk comp_dqblk_t; -asmlinkage int sys32_quotactl(int cmd, const char *special, int id, unsigned long addr) +#define Q_COMP_GETQUOTA Q_V2_GETQUOTA +#define Q_COMP_SETQUOTA Q_V2_SETQUOTA +#define Q_COMP_SETQLIM Q_V2_SETQLIM +#define Q_COMP_SETUSE Q_V2_SETUSE +#endif + +asmlinkage int sys32_quotactl(int cmd, const char *special, int id, caddr_t addr) { int cmds = cmd >> SUBCMDSHIFT; int err; - struct mem_dqblk d; + comp_dqblk_t d; mm_segment_t old_fs; char *spec; switch (cmds) { - case Q_GETQUOTA: - break; - case Q_SETQUOTA: - case Q_SETUSE: - case Q_SETQLIM: - if (copy_from_user (&d, (struct mem_dqblk32 *)addr, - sizeof (struct mem_dqblk32))) - return -EFAULT; - d.dqb_itime = ((struct mem_dqblk32 *)&d)->dqb_itime; - d.dqb_btime = ((struct mem_dqblk32 *)&d)->dqb_btime; - break; + case Q_COMP_GETQUOTA: + break; + case Q_COMP_SETQUOTA: + case Q_COMP_SETUSE: + case Q_COMP_SETQLIM: + if (copy_from_user(&d, (struct user_dqblk32 *)addr, + sizeof (struct user_dqblk32))) + return -EFAULT; + d.dqb_itime = ((struct user_dqblk32 *)&d)->dqb_itime; + d.dqb_btime = ((struct user_dqblk32 *)&d)->dqb_btime; + break; default: - return sys_quotactl(cmd, special, - id, (__kernel_caddr_t)addr); + return sys_quotactl(cmd, special, id, (__kernel_caddr_t)addr); } spec = getname (special); err = PTR_ERR(spec); if (IS_ERR(spec)) return err; - old_fs = get_fs (); + old_fs = get_fs(); set_fs (KERNEL_DS); err = sys_quotactl(cmd, (const char *)spec, id, (__kernel_caddr_t)&d); set_fs (old_fs); putname (spec); if (err) return err; - if (cmds == Q_GETQUOTA) { + if (cmds == Q_COMP_GETQUOTA) { __kernel_time_t b = d.dqb_btime, i = d.dqb_itime; - ((struct mem_dqblk32 *)&d)->dqb_itime = i; - ((struct mem_dqblk32 *)&d)->dqb_btime = b; - if (copy_to_user ((struct mem_dqblk32 *)addr, &d, - sizeof (struct mem_dqblk32))) + ((struct user_dqblk32 *)&d)->dqb_itime = i; + ((struct user_dqblk32 *)&d)->dqb_btime = b; + if (copy_to_user ((struct user_dqblk32 *)addr, &d, + sizeof (struct user_dqblk32))) return -EFAULT; } return 0; } +#else +/* No conversion needed for new interface */ +asmlinkage int sys32_quotactl(int cmd, const char *special, int id, caddr_t addr) +{ + return sys_quotactl(cmd, special, id, addr); +} +#endif + static inline int put_statfs (struct statfs32 *ubuf, struct statfs *kbuf) { int err; diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index 0e4720e33966..ebf671149c95 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -889,62 +889,97 @@ asmlinkage long sys32_fcntl64(unsigned int fd, unsigned int cmd, unsigned long a return sys32_fcntl(fd, cmd, arg); } -struct dqblk32 { - __u32 dqb_bhardlimit; - __u32 dqb_bsoftlimit; - __u32 dqb_curblocks; - __u32 dqb_ihardlimit; - __u32 dqb_isoftlimit; - __u32 dqb_curinodes; - __kernel_time_t32 dqb_btime; - __kernel_time_t32 dqb_itime; -}; - extern asmlinkage int sys_quotactl(int cmd, const char *special, int id, caddr_t addr); -asmlinkage int sys32_quotactl(int cmd, const char *special, int id, unsigned long addr) +#ifdef CONFIG_QIFACE_COMPAT +#ifdef CONFIG_QIFACE_V1 +struct user_dqblk32 { + __u32 dqb_bhardlimit; + __u32 dqb_bsoftlimit; + __u32 dqb_curblocks; + __u32 dqb_ihardlimit; + __u32 dqb_isoftlimit; + __u32 dqb_curinodes; + __kernel_time_t32 dqb_btime; + __kernel_time_t32 dqb_itime; +}; +typedef struct v1c_mem_dqblk comp_dqblk_t; + +#define Q_COMP_GETQUOTA Q_V1_GETQUOTA +#define Q_COMP_SETQUOTA Q_V1_SETQUOTA +#define Q_COMP_SETQLIM Q_V1_SETQLIM +#define Q_COMP_SETUSE Q_V1_SETUSE +#else +struct user_dqblk32 { + __u32 dqb_ihardlimit; + __u32 dqb_isoftlimit; + __u32 dqb_curinodes; + __u32 dqb_bhardlimit; + __u32 dqb_bsoftlimit; + __u64 dqb_curspace; + __kernel_time_t32 dqb_btime; + __kernel_time_t32 dqb_itime; +}; +typedef struct v2c_mem_dqblk comp_dqblk_t; + +#define Q_COMP_GETQUOTA Q_V2_GETQUOTA +#define Q_COMP_SETQUOTA Q_V2_SETQUOTA +#define Q_COMP_SETQLIM Q_V2_SETQLIM +#define Q_COMP_SETUSE Q_V2_SETUSE +#endif + +asmlinkage int sys32_quotactl(int cmd, const char *special, int id, caddr_t addr) { int cmds = cmd >> SUBCMDSHIFT; int err; - struct dqblk d; + comp_dqblk_t d; mm_segment_t old_fs; char *spec; switch (cmds) { - case Q_GETQUOTA: - break; - case Q_SETQUOTA: - case Q_SETUSE: - case Q_SETQLIM: - if (copy_from_user (&d, (struct dqblk32 *)addr, - sizeof (struct dqblk32))) - return -EFAULT; - d.dqb_itime = ((struct dqblk32 *)&d)->dqb_itime; - d.dqb_btime = ((struct dqblk32 *)&d)->dqb_btime; - break; + case Q_COMP_GETQUOTA: + break; + case Q_COMP_SETQUOTA: + case Q_COMP_SETUSE: + case Q_COMP_SETQLIM: + if (copy_from_user(&d, (struct user_dqblk32 *)addr, + sizeof (struct user_dqblk32))) + return -EFAULT; + d.dqb_itime = ((struct user_dqblk32 *)&d)->dqb_itime; + d.dqb_btime = ((struct user_dqblk32 *)&d)->dqb_btime; + break; default: - return sys_quotactl(cmd, special, - id, (caddr_t)addr); + return sys_quotactl(cmd, special, id, (__kernel_caddr_t)addr); } spec = getname (special); err = PTR_ERR(spec); if (IS_ERR(spec)) return err; - old_fs = get_fs (); + old_fs = get_fs(); set_fs (KERNEL_DS); - err = sys_quotactl(cmd, (const char *)spec, id, (caddr_t)&d); + err = sys_quotactl(cmd, (const char *)spec, id, (__kernel_caddr_t)&d); set_fs (old_fs); putname (spec); - if (cmds == Q_GETQUOTA) { + if (err) + return err; + if (cmds == Q_COMP_GETQUOTA) { __kernel_time_t b = d.dqb_btime, i = d.dqb_itime; - ((struct dqblk32 *)&d)->dqb_itime = i; - ((struct dqblk32 *)&d)->dqb_btime = b; - if (copy_to_user ((struct dqblk32 *)addr, &d, - sizeof (struct dqblk32))) + ((struct user_dqblk32 *)&d)->dqb_itime = i; + ((struct user_dqblk32 *)&d)->dqb_btime = b; + if (copy_to_user ((struct user_dqblk32 *)addr, &d, + sizeof (struct user_dqblk32))) return -EFAULT; } - return err; + return 0; } +#else +/* No conversion needed for new interface */ +asmlinkage int sys32_quotactl(int cmd, const char *special, int id, caddr_t addr) +{ + return sys_quotactl(cmd, special, id, addr); +} +#endif + static inline int put_statfs (struct statfs32 *ubuf, struct statfs *kbuf) { int err; diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S index b7b5414167bd..8efe92296056 100644 --- a/arch/sparc64/kernel/systbls.S +++ b/arch/sparc64/kernel/systbls.S @@ -194,7 +194,7 @@ sunos_sys_table: .word sunos_getdirentries, sys32_statfs, sys32_fstatfs .word sys_oldumount, sunos_nosys, sunos_nosys .word sys_getdomainname, sys_setdomainname - .word sunos_nosys, sys32_quotactl, sunos_nosys + .word sunos_nosys, sys_quotactl, sunos_nosys .word sunos_mount, sys_ustat, sunos_semsys .word sunos_nosys, sunos_shmsys, sunos_audit .word sunos_nosys, sunos_getdents, sys_setsid diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 12481db1849e..b539b367cbc7 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -35,7 +35,6 @@ #include <linux/blkpg.h> #include <linux/interrupt.h> #include <linux/ioport.h> -#include <linux/locks.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/proc_fs.h> @@ -5473,9 +5472,11 @@ static int DAC960_UserIOCTL(Inode_T *Inode, File_T *File, int ControllerNumber, DataTransferLength; unsigned char *DataTransferBuffer = NULL; if (UserSpaceUserCommand == NULL) return -EINVAL; - ErrorCode = copy_from_user(&UserCommand, UserSpaceUserCommand, - sizeof(DAC960_V1_UserCommand_T)); - if (ErrorCode != 0) goto Failure1; + if (copy_from_user(&UserCommand, UserSpaceUserCommand, + sizeof(DAC960_V1_UserCommand_T))) { + ErrorCode = -EFAULT; + goto Failure1; + } ControllerNumber = UserCommand.ControllerNumber; if (ControllerNumber < 0 || ControllerNumber > DAC960_ControllerCount - 1) @@ -5488,9 +5489,11 @@ static int DAC960_UserIOCTL(Inode_T *Inode, File_T *File, if (CommandOpcode & 0x80) return -EINVAL; if (CommandOpcode == DAC960_V1_DCDB) { - ErrorCode = - copy_from_user(&DCDB, UserCommand.DCDB, sizeof(DAC960_V1_DCDB_T)); - if (ErrorCode != 0) goto Failure1; + if (copy_from_user(&DCDB, UserCommand.DCDB, + sizeof(DAC960_V1_DCDB_T))) { + ErrorCode = -EFAULT; + goto Failure1; + } if (DCDB.Channel >= DAC960_V1_MaxChannels) return -EINVAL; if (!((DataTransferLength == 0 && DCDB.Direction @@ -5516,10 +5519,12 @@ static int DAC960_UserIOCTL(Inode_T *Inode, File_T *File, { DataTransferBuffer = kmalloc(-DataTransferLength, GFP_KERNEL); if (DataTransferBuffer == NULL) return -ENOMEM; - ErrorCode = copy_from_user(DataTransferBuffer, - UserCommand.DataTransferBuffer, - -DataTransferLength); - if (ErrorCode != 0) goto Failure1; + if (copy_from_user(DataTransferBuffer, + UserCommand.DataTransferBuffer, + -DataTransferLength)) { + ErrorCode = -EFAULT; + goto Failure1; + } } if (CommandOpcode == DAC960_V1_DCDB) { @@ -5567,17 +5572,21 @@ static int DAC960_UserIOCTL(Inode_T *Inode, File_T *File, DAC960_ReleaseControllerLock(Controller, &ProcessorFlags); if (DataTransferLength > 0) { - ErrorCode = copy_to_user(UserCommand.DataTransferBuffer, - DataTransferBuffer, DataTransferLength); - if (ErrorCode != 0) goto Failure1; + if (copy_to_user(UserCommand.DataTransferBuffer, + DataTransferBuffer, DataTransferLength)) + ErrorCode = -EFAULT; + goto Failure1; + } } if (CommandOpcode == DAC960_V1_DCDB) { Controller->V1.DirectCommandActive[DCDB.Channel] [DCDB.TargetID] = false; - ErrorCode = - copy_to_user(UserCommand.DCDB, &DCDB, sizeof(DAC960_V1_DCDB_T)); - if (ErrorCode != 0) goto Failure1; + if (copy_to_user(UserCommand.DCDB, &DCDB, + sizeof(DAC960_V1_DCDB_T))) { + ErrorCode = -EFAULT; + goto Failure1; + } } ErrorCode = CommandStatus; Failure1: @@ -5600,9 +5609,11 @@ static int DAC960_UserIOCTL(Inode_T *Inode, File_T *File, unsigned char *DataTransferBuffer = NULL; unsigned char *RequestSenseBuffer = NULL; if (UserSpaceUserCommand == NULL) return -EINVAL; - ErrorCode = copy_from_user(&UserCommand, UserSpaceUserCommand, - sizeof(DAC960_V2_UserCommand_T)); - if (ErrorCode != 0) goto Failure2; + if (copy_from_user(&UserCommand, UserSpaceUserCommand, + sizeof(DAC960_V2_UserCommand_T))) { + ErrorCode = -EFAULT; + goto Failure2; + } ControllerNumber = UserCommand.ControllerNumber; if (ControllerNumber < 0 || ControllerNumber > DAC960_ControllerCount - 1) @@ -5621,10 +5632,12 @@ static int DAC960_UserIOCTL(Inode_T *Inode, File_T *File, { DataTransferBuffer = kmalloc(-DataTransferLength, GFP_KERNEL); if (DataTransferBuffer == NULL) return -ENOMEM; - ErrorCode = copy_from_user(DataTransferBuffer, - UserCommand.DataTransferBuffer, - -DataTransferLength); - if (ErrorCode != 0) goto Failure2; + if (copy_from_user(DataTransferBuffer, + UserCommand.DataTransferBuffer, + -DataTransferLength)) { + ErrorCode = -EFAULT; + goto Failure2; + } } RequestSenseLength = UserCommand.RequestSenseLength; if (RequestSenseLength > 0) @@ -5694,25 +5707,32 @@ static int DAC960_UserIOCTL(Inode_T *Inode, File_T *File, DAC960_ReleaseControllerLock(Controller, &ProcessorFlags); if (RequestSenseLength > UserCommand.RequestSenseLength) RequestSenseLength = UserCommand.RequestSenseLength; - ErrorCode = copy_to_user(&UserSpaceUserCommand->DataTransferLength, + if (copy_to_user(&UserSpaceUserCommand->DataTransferLength, &DataTransferResidue, - sizeof(DataTransferResidue)); - if (ErrorCode != 0) goto Failure2; - ErrorCode = copy_to_user(&UserSpaceUserCommand->RequestSenseLength, - &RequestSenseLength, - sizeof(RequestSenseLength)); - if (ErrorCode != 0) goto Failure2; + sizeof(DataTransferResidue))) { + ErrorCode = -EFAULT; + goto Failure2; + } + if (copy_to_user(&UserSpaceUserCommand->RequestSenseLength, + &RequestSenseLength, sizeof(RequestSenseLength))) { + ErrorCode = -EFAULT; + goto Failure2; + } if (DataTransferLength > 0) { - ErrorCode = copy_to_user(UserCommand.DataTransferBuffer, - DataTransferBuffer, DataTransferLength); - if (ErrorCode != 0) goto Failure2; + if (copy_to_user(UserCommand.DataTransferBuffer, + DataTransferBuffer, DataTransferLength)) { + ErrorCode = -EFAULT; + goto Failure2; + } } if (RequestSenseLength > 0) { - ErrorCode = copy_to_user(UserCommand.RequestSenseBuffer, - RequestSenseBuffer, RequestSenseLength); - if (ErrorCode != 0) goto Failure2; + if (copy_to_user(UserCommand.RequestSenseBuffer, + RequestSenseBuffer, RequestSenseLength)) { + ErrorCode = -EFAULT; + goto Failure2; + } } ErrorCode = CommandStatus; Failure2: @@ -5731,9 +5751,9 @@ static int DAC960_UserIOCTL(Inode_T *Inode, File_T *File, DAC960_Controller_T *Controller; int ControllerNumber; if (UserSpaceGetHealthStatus == NULL) return -EINVAL; - ErrorCode = copy_from_user(&GetHealthStatus, UserSpaceGetHealthStatus, - sizeof(DAC960_V2_GetHealthStatus_T)); - if (ErrorCode != 0) return ErrorCode; + if (copy_from_user(&GetHealthStatus, UserSpaceGetHealthStatus, + sizeof(DAC960_V2_GetHealthStatus_T))) + return -EFAULT; ControllerNumber = GetHealthStatus.ControllerNumber; if (ControllerNumber < 0 || ControllerNumber > DAC960_ControllerCount - 1) @@ -5741,10 +5761,10 @@ static int DAC960_UserIOCTL(Inode_T *Inode, File_T *File, Controller = DAC960_Controllers[ControllerNumber]; if (Controller == NULL) return -ENXIO; if (Controller->FirmwareType != DAC960_V2_Controller) return -EINVAL; - ErrorCode = copy_from_user(&HealthStatusBuffer, - GetHealthStatus.HealthStatusBuffer, - sizeof(DAC960_V2_HealthStatusBuffer_T)); - if (ErrorCode != 0) return ErrorCode; + if (copy_from_user(&HealthStatusBuffer, + GetHealthStatus.HealthStatusBuffer, + sizeof(DAC960_V2_HealthStatusBuffer_T))) + return -EFAULT; while (Controller->V2.HealthStatusBuffer->StatusChangeCounter == HealthStatusBuffer.StatusChangeCounter && Controller->V2.HealthStatusBuffer->NextEventSequenceNumber @@ -5754,10 +5774,11 @@ static int DAC960_UserIOCTL(Inode_T *Inode, File_T *File, DAC960_MonitoringTimerInterval); if (signal_pending(current)) return -EINTR; } - ErrorCode = copy_to_user(GetHealthStatus.HealthStatusBuffer, - Controller->V2.HealthStatusBuffer, - sizeof(DAC960_V2_HealthStatusBuffer_T)); - return ErrorCode; + if (copy_to_user(GetHealthStatus.HealthStatusBuffer, + Controller->V2.HealthStatusBuffer, + sizeof(DAC960_V2_HealthStatusBuffer_T))) + return -EFAULT; + return 0; } } return -EINVAL; diff --git a/drivers/block/blkpg.c b/drivers/block/blkpg.c index e8059084b8f0..595fa49af3ef 100644 --- a/drivers/block/blkpg.c +++ b/drivers/block/blkpg.c @@ -35,6 +35,7 @@ #include <linux/blkpg.h> #include <linux/genhd.h> #include <linux/module.h> /* for EXPORT_SYMBOL */ +#include <linux/backing-dev.h> #include <asm/uaccess.h> @@ -219,7 +220,7 @@ int blk_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg) unsigned short usval; kdev_t dev = to_kdev_t(bdev->bd_dev); int holder; - unsigned long *ra_pages; + struct backing_dev_info *bdi; intval = block_ioctl(bdev, cmd, arg); if (intval != -ENOTTY) @@ -241,20 +242,20 @@ int blk_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg) case BLKFRASET: if(!capable(CAP_SYS_ADMIN)) return -EACCES; - ra_pages = blk_get_ra_pages(bdev); - if (ra_pages == NULL) + bdi = blk_get_backing_dev_info(bdev); + if (bdi == NULL) return -ENOTTY; - *ra_pages = (arg * 512) / PAGE_CACHE_SIZE; + bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; return 0; case BLKRAGET: case BLKFRAGET: if (!arg) return -EINVAL; - ra_pages = blk_get_ra_pages(bdev); - if (ra_pages == NULL) + bdi = blk_get_backing_dev_info(bdev); + if (bdi == NULL) return -ENOTTY; - return put_user((*ra_pages * PAGE_CACHE_SIZE) / 512, + return put_user((bdi->ra_pages * PAGE_CACHE_SIZE) / 512, (long *)arg); case BLKSECTGET: diff --git a/drivers/block/block_ioctl.c b/drivers/block/block_ioctl.c index 6c204d48ea53..7801e021c1bf 100644 --- a/drivers/block/block_ioctl.c +++ b/drivers/block/block_ioctl.c @@ -21,7 +21,6 @@ #include <linux/errno.h> #include <linux/string.h> #include <linux/config.h> -#include <linux/locks.h> #include <linux/swap.h> #include <linux/init.h> #include <linux/smp_lock.h> diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 314ade57968d..027bdaaf3189 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -1117,17 +1117,19 @@ static int ida_ioctl(struct inode *inode, struct file *filep, unsigned int cmd, put_user(get_start_sect(inode->i_rdev), &geo->start); return 0; case IDAGETDRVINFO: - return copy_to_user(&io->c.drv,&hba[ctlr]->drv[dsk],sizeof(drv_info_t)); + if (copy_to_user(&io->c.drv, &hba[ctlr]->drv[dsk], + sizeof(drv_info_t))) + return -EFAULT; + return 0; case BLKRRPART: return revalidate_logvol(inode->i_rdev, 1); case IDAPASSTHRU: if (!capable(CAP_SYS_RAWIO)) return -EPERM; - error = copy_from_user(&my_io, io, sizeof(my_io)); - if (error) return error; + if (copy_from_user(&my_io, io, sizeof(my_io))) + return -EFAULT; error = ida_ctlr_ioctl(ctlr, dsk, &my_io); if (error) return error; - error = copy_to_user(io, &my_io, sizeof(my_io)); - return error; + return copy_to_user(io, &my_io, sizeof(my_io)) ? -EFAULT : 0; case IDAGETCTLRSIG: if (!arg) return -EINVAL; put_user(hba[ctlr]->ctlr_sig, (int*)arg); @@ -1208,7 +1210,11 @@ static int ida_ctlr_ioctl(int ctlr, int dsk, ida_ioctl_t *io) cmd_free(h, c, 0); return(error); } - copy_from_user(p, (void*)io->sg[0].addr, io->sg[0].size); + if (copy_from_user(p, (void*)io->sg[0].addr, io->sg[0].size)) { + kfree(p); + cmd_free(h, c, 0); + return -EFAULT; + } c->req.hdr.blk = pci_map_single(h->pci_dev, &(io->c), sizeof(ida_ioctl_t), PCI_DMA_BIDIRECTIONAL); @@ -1245,7 +1251,11 @@ static int ida_ctlr_ioctl(int ctlr, int dsk, ida_ioctl_t *io) cmd_free(h, c, 0); return(error); } - copy_from_user(p, (void*)io->sg[0].addr, io->sg[0].size); + if (copy_from_user(p, (void*)io->sg[0].addr, io->sg[0].size)) { + kfree(p); + cmd_free(h, c, 0); + return -EFAULT; + } c->req.sg[0].size = io->sg[0].size; c->req.sg[0].addr = pci_map_single(h->pci_dev, p, c->req.sg[0].size, PCI_DMA_BIDIRECTIONAL); @@ -1282,7 +1292,10 @@ static int ida_ctlr_ioctl(int ctlr, int dsk, ida_ioctl_t *io) case DIAG_PASS_THRU: case SENSE_CONTROLLER_PERFORMANCE: case READ_FLASH_ROM: - copy_to_user((void*)io->sg[0].addr, p, io->sg[0].size); + if (copy_to_user((void*)io->sg[0].addr, p, io->sg[0].size)) { + kfree(p); + return -EFAULT; + } /* fall through and free p */ case IDA_WRITE: case IDA_WRITE_MEDIA: diff --git a/drivers/block/cpqarray.h b/drivers/block/cpqarray.h index 80b4dba8b83e..a6118b3de22b 100644 --- a/drivers/block/cpqarray.h +++ b/drivers/block/cpqarray.h @@ -27,7 +27,6 @@ #ifdef __KERNEL__ #include <linux/blkdev.h> -#include <linux/locks.h> #include <linux/slab.h> #include <linux/proc_fs.h> #include <linux/timer.h> diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 5936d02cc679..b3d2a94c59ce 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3859,7 +3859,9 @@ static int check_floppy_change(kdev_t dev) static int floppy_revalidate(kdev_t dev) { #define NO_GEOM (!current_type[drive] && !TYPE(dev)) +#if 0 struct buffer_head * bh; +#endif int drive=DRIVE(dev); int cf; diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 5430dea71325..0ea76d978992 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -18,7 +18,6 @@ #include <linux/errno.h> #include <linux/string.h> #include <linux/config.h> -#include <linux/locks.h> #include <linux/mm.h> #include <linux/swap.h> #include <linux/init.h> @@ -27,6 +26,7 @@ #include <linux/completion.h> #include <linux/compiler.h> #include <scsi/scsi.h> +#include <linux/backing-dev.h> #include <asm/system.h> #include <asm/io.h> @@ -100,21 +100,21 @@ inline request_queue_t *blk_get_queue(kdev_t dev) } /** - * blk_get_ra_pages - get the address of a queue's readahead tunable + * blk_get_backing_dev_info - get the address of a queue's backing_dev_info * @dev: device * * Locates the passed device's request queue and returns the address of its - * readahead setting. + * backing_dev_info * * Will return NULL if the request queue cannot be located. */ -unsigned long *blk_get_ra_pages(struct block_device *bdev) +struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) { - unsigned long *ret = NULL; + struct backing_dev_info *ret = NULL; request_queue_t *q = blk_get_queue(to_kdev_t(bdev->bd_dev)); if (q) - ret = &q->ra_pages; + ret = &q->backing_dev_info; return ret; } @@ -153,7 +153,8 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) q->max_phys_segments = MAX_PHYS_SEGMENTS; q->max_hw_segments = MAX_HW_SEGMENTS; q->make_request_fn = mfn; - q->ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; + q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; + q->backing_dev_info.state = 0; blk_queue_max_sectors(q, MAX_SECTORS); blk_queue_hardsect_size(q, 512); diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c index 8592e40392d5..1f05ab38cc84 100644 --- a/drivers/block/paride/pg.c +++ b/drivers/block/paride/pg.c @@ -623,7 +623,8 @@ static ssize_t pg_write(struct file * filp, const char * buf, if (PG.busy) return -EBUSY; if (count < hs) return -EINVAL; - copy_from_user((char *)&hdr,buf,hs); + if (copy_from_user((char *)&hdr, buf, hs)) + return -EFAULT; if (hdr.magic != PG_MAGIC) return -EINVAL; if (hdr.dlen > PG_MAX_DATA) return -EINVAL; @@ -647,8 +648,8 @@ static ssize_t pg_write(struct file * filp, const char * buf, PG.busy = 1; - copy_from_user(PG.bufptr,buf+hs,count-hs); - + if (copy_from_user(PG.bufptr, buf + hs, count - hs)) + return -EFAULT; return count; } @@ -682,9 +683,11 @@ static ssize_t pg_read(struct file * filp, char * buf, hdr.duration = (jiffies - PG.start + HZ/2) / HZ; hdr.scsi = PG.status & 0x0f; - copy_to_user(buf,(char *)&hdr,hs); - if (copy > 0) copy_to_user(buf+hs,PG.bufptr,copy); - + if (copy_to_user(buf, (char *)&hdr, hs)) + return -EFAULT; + if (copy > 0) + if (copy_to_user(buf+hs,PG.bufptr,copy)) + return -EFAULT; return copy+hs; } diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c index fab199e16a25..ccf3db7b1463 100644 --- a/drivers/block/paride/pt.c +++ b/drivers/block/paride/pt.c @@ -860,7 +860,10 @@ static ssize_t pt_read(struct file * filp, char * buf, n -= k; b = k; if (b > count) b = count; - copy_to_user(buf+t,PT.bufptr,b); + if (copy_to_user(buf + t, PT.bufptr, b)) { + pi_disconnect(PI); + return -EFAULT; + } t += b; count -= b; } @@ -944,7 +947,10 @@ static ssize_t pt_write(struct file * filp, const char * buf, if (k > PT_BUFSIZE) k = PT_BUFSIZE; b = k; if (b > count) b = count; - copy_from_user(PT.bufptr,buf+t,b); + if (copy_from_user(PT.bufptr, buf + t, b)) { + pi_disconnect(PI); + return -EFAULT; + } pi_write_block(PI,PT.bufptr,k); t += b; count -= b; diff --git a/drivers/block/rd.c b/drivers/block/rd.c index d5af5c1d3196..e2b22a17629b 100644 --- a/drivers/block/rd.c +++ b/drivers/block/rd.c @@ -318,7 +318,8 @@ static ssize_t initrd_read(struct file *file, char *buf, left = initrd_end - initrd_start - *ppos; if (count > left) count = left; if (count == 0) return 0; - copy_to_user(buf, (char *)initrd_start + *ppos, count); + if (copy_to_user(buf, (char *)initrd_start + *ppos, count)) + return -EFAULT; *ppos += count; return count; } diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 5b223f90dcd9..d83408ba14fb 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -840,9 +840,10 @@ static int floppy_ioctl(struct inode *inode, struct file *filp, err = fd_eject(fs); return err; case FDGETPRM: - err = copy_to_user((void *) param, (void *) &floppy_type, - sizeof(struct floppy_struct)); - return err; + if (copy_to_user((void *) param, (void *)&floppy_type, + sizeof(struct floppy_struct))) + return -EFAULT; + return 0; } return -ENOTTY; } diff --git a/drivers/block/swim_iop.c b/drivers/block/swim_iop.c index cf99a0074948..d57ff9d3bbfb 100644 --- a/drivers/block/swim_iop.c +++ b/drivers/block/swim_iop.c @@ -360,9 +360,10 @@ static int floppy_ioctl(struct inode *inode, struct file *filp, err = swimiop_eject(fs); return err; case FDGETPRM: - err = copy_to_user((void *) param, (void *) &floppy_type, - sizeof(struct floppy_struct)); - return err; + if (copy_to_user((void *) param, (void *) &floppy_type, + sizeof(struct floppy_struct))) + return -EFAULT; + return 0; } return -ENOTTY; } diff --git a/drivers/char/epca.c b/drivers/char/epca.c index 21f270d0d14d..d90d0d973380 100644 --- a/drivers/char/epca.c +++ b/drivers/char/epca.c @@ -907,7 +907,9 @@ static int pc_write(struct tty_struct * tty, int from_user, ----------------------------------------------------------------- */ - copy_from_user(ch->tmp_buf, buf, bytesAvailable); + if (copy_from_user(ch->tmp_buf, buf, + bytesAvailable)) + return -EFAULT; } /* End if area verified */ @@ -2999,7 +3001,8 @@ static int pc_ioctl(struct tty_struct *tty, struct file * file, di.port = boards[brd].port ; di.membase = boards[brd].membase ; - copy_to_user((char *)arg, &di, sizeof (di)); + if (copy_to_user((char *)arg, &di, sizeof (di))) + return -EFAULT; break; } /* End case DIGI_GETINFO */ @@ -3068,14 +3071,9 @@ static int pc_ioctl(struct tty_struct *tty, struct file * file, { /* Begin switch cmd */ case TCGETS: - retval = verify_area(VERIFY_WRITE, (void *)arg, - sizeof(struct termios)); - - if (retval) - return(retval); - - copy_to_user((struct termios *)arg, - tty->termios, sizeof(struct termios)); + if (copy_to_user((struct termios *)arg, + tty->termios, sizeof(struct termios))) + return -EFAULT; return(0); case TCGETA: @@ -3235,14 +3233,9 @@ static int pc_ioctl(struct tty_struct *tty, struct file * file, break; case DIGI_GETA: - if ((error= - verify_area(VERIFY_WRITE, (char*)arg, sizeof(digi_t)))) - { - printk(KERN_ERR "<Error> - Digi GETA failed\n"); - return(error); - } - - copy_to_user((char*)arg, &ch->digiext, sizeof(digi_t)); + if (copy_to_user((char*)arg, &ch->digiext, + sizeof(digi_t))) + return -EFAULT; break; case DIGI_SETAW: @@ -3263,11 +3256,9 @@ static int pc_ioctl(struct tty_struct *tty, struct file * file, /* Fall Thru */ case DIGI_SETA: - if ((error = - verify_area(VERIFY_READ, (char*)arg,sizeof(digi_t)))) - return(error); - - copy_from_user(&ch->digiext, (char*)arg, sizeof(digi_t)); + if (copy_from_user(&ch->digiext, (char*)arg, + sizeof(digi_t))) + return -EFAULT; if (ch->digiext.digi_flags & DIGI_ALTPIN) { @@ -3310,10 +3301,8 @@ static int pc_ioctl(struct tty_struct *tty, struct file * file, memoff(ch); restore_flags(flags); - if ((error = verify_area(VERIFY_WRITE, (char*)arg,sizeof(dflow)))) - return(error); - - copy_to_user((char*)arg, &dflow, sizeof(dflow)); + if (copy_to_user((char*)arg, &dflow, sizeof(dflow))) + return -EFAULT; break; case DIGI_SETAFLOW: @@ -3329,10 +3318,8 @@ static int pc_ioctl(struct tty_struct *tty, struct file * file, stopc = ch->stopca; } - if ((error = verify_area(VERIFY_READ, (char*)arg,sizeof(dflow)))) - return(error); - - copy_from_user(&dflow, (char*)arg, sizeof(dflow)); + if (copy_from_user(&dflow, (char*)arg, sizeof(dflow))) + return -EFAULT; if (dflow.startc != startc || dflow.stopc != stopc) { /* Begin if setflow toggled */ diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c index e50dd36f4ad7..eab51921c986 100644 --- a/drivers/char/istallion.c +++ b/drivers/char/istallion.c @@ -2022,7 +2022,8 @@ static int stli_setserial(stliport_t *portp, struct serial_struct *sp) printk("stli_setserial(portp=%x,sp=%x)\n", (int) portp, (int) sp); #endif - copy_from_user(&sio, sp, sizeof(struct serial_struct)); + if (copy_from_user(&sio, sp, sizeof(struct serial_struct))) + return -EFAULT; if (!capable(CAP_SYS_ADMIN)) { if ((sio.baud_base != portp->baud_base) || (sio.close_delay != portp->close_delay) || @@ -4878,11 +4879,15 @@ static ssize_t stli_memread(struct file *fp, char *buf, size_t count, loff_t *of while (size > 0) { memptr = (void *) EBRDGETMEMPTR(brdp, fp->f_pos); n = MIN(size, (brdp->pagesize - (((unsigned long) fp->f_pos) % brdp->pagesize))); - copy_to_user(buf, memptr, n); + if (copy_to_user(buf, memptr, n)) { + count = -EFAULT; + goto out; + } fp->f_pos += n; buf += n; size -= n; } +out: EBRDDISABLE(brdp); restore_flags(flags); @@ -4930,11 +4935,15 @@ static ssize_t stli_memwrite(struct file *fp, const char *buf, size_t count, lof while (size > 0) { memptr = (void *) EBRDGETMEMPTR(brdp, fp->f_pos); n = MIN(size, (brdp->pagesize - (((unsigned long) fp->f_pos) % brdp->pagesize))); - copy_from_user(memptr, chbuf, n); + if (copy_from_user(memptr, chbuf, n)) { + count = -EFAULT; + goto out; + } fp->f_pos += n; chbuf += n; size -= n; } +out: EBRDDISABLE(brdp); restore_flags(flags); diff --git a/drivers/char/machzwd.c b/drivers/char/machzwd.c index 85ba8321a564..4145913cf5fc 100644 --- a/drivers/char/machzwd.c +++ b/drivers/char/machzwd.c @@ -359,20 +359,15 @@ static ssize_t zf_read(struct file *file, char *buf, size_t count, static int zf_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { - int ret; - switch(cmd){ case WDIOC_GETSUPPORT: - ret = copy_to_user((struct watchdog_info *)arg, - &zf_info, sizeof(zf_info)); - if(ret) + if (copy_to_user((struct watchdog_info *)arg, + &zf_info, sizeof(zf_info))) return -EFAULT; break; case WDIOC_GETSTATUS: - ret = copy_to_user((int *)arg, &zf_is_open, - sizeof(int)); - if(ret) + if (copy_to_user((int *)arg, &zf_is_open, sizeof(int))) return -EFAULT; break; diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c index f6e059693c6b..b6d3ea88ebac 100644 --- a/drivers/char/mxser.c +++ b/drivers/char/mxser.c @@ -2175,8 +2175,7 @@ static int mxser_get_serial_info(struct mxser_struct *info, tmp.closing_wait = info->closing_wait; tmp.custom_divisor = info->custom_divisor; tmp.hub6 = 0; - copy_to_user(retinfo, &tmp, sizeof(*retinfo)); - return (0); + return copy_to_user(retinfo, &tmp, sizeof(*retinfo)) ? -EFAULT : 0; } static int mxser_set_serial_info(struct mxser_struct *info, @@ -2188,7 +2187,8 @@ static int mxser_set_serial_info(struct mxser_struct *info, if (!new_info || !info->base) return (-EFAULT); - copy_from_user(&new_serial, new_info, sizeof(new_serial)); + if (copy_from_user(&new_serial, new_info, sizeof(new_serial))) + return -EFAULT; if ((new_serial.irq != info->irq) || (new_serial.port != info->base) || diff --git a/drivers/char/n_r3964.c b/drivers/char/n_r3964.c index 849b91296412..47effb41f46e 100644 --- a/drivers/char/n_r3964.c +++ b/drivers/char/n_r3964.c @@ -1364,7 +1364,7 @@ static ssize_t r3964_write(struct tty_struct * tty, struct file * file, pHeader->owner = pClient; } - copy_from_user (pHeader->data, data, count); /* We already verified this */ + __copy_from_user(pHeader->data, data, count); /* We already verified this */ if(pInfo->flags & R3964_DEBUG) { diff --git a/drivers/char/nwflash.c b/drivers/char/nwflash.c index 4022345238f1..a5055576aaf3 100644 --- a/drivers/char/nwflash.c +++ b/drivers/char/nwflash.c @@ -159,7 +159,8 @@ static ssize_t flash_read(struct file *file, char *buf, size_t size, loff_t * pp if (ret == 0) { ret = count; *ppos += count; - } + } else + ret = -EFAULT; up(&nwflash_sem); } return ret; diff --git a/drivers/char/raw.c b/drivers/char/raw.c index 7b0c47fc129a..a901f7b1bdbe 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -163,9 +163,10 @@ int raw_ctl_ioctl(struct inode *inode, /* First, find out which raw minor we want */ - err = copy_from_user(&rq, (void *) arg, sizeof(rq)); - if (err) + if (copy_from_user(&rq, (void *) arg, sizeof(rq))) { + err = -EFAULT; break; + } minor = rq.raw_minor; if (minor <= 0 || minor > MINORMASK) { @@ -222,6 +223,8 @@ int raw_ctl_ioctl(struct inode *inode, rq.block_major = rq.block_minor = 0; } err = copy_to_user((void *) arg, &rq, sizeof(rq)); + if (err) + err = -EFAULT; } break; diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c index db06f5ad2b10..c19aa59a62b1 100644 --- a/drivers/char/stallion.c +++ b/drivers/char/stallion.c @@ -1553,7 +1553,8 @@ static int stl_setserial(stlport_t *portp, struct serial_struct *sp) printk("stl_setserial(portp=%x,sp=%x)\n", (int) portp, (int) sp); #endif - copy_from_user(&sio, sp, sizeof(struct serial_struct)); + if (copy_from_user(&sio, sp, sizeof(struct serial_struct))) + return -EFAULT; if (!capable(CAP_SYS_ADMIN)) { if ((sio.baud_base != portp->baud_base) || (sio.close_delay != portp->close_delay) || @@ -2949,7 +2950,8 @@ static int stl_getbrdstats(combrd_t *bp) stlpanel_t *panelp; int i; - copy_from_user(&stl_brdstats, bp, sizeof(combrd_t)); + if (copy_from_user(&stl_brdstats, bp, sizeof(combrd_t))) + return -EFAULT; if (stl_brdstats.brd >= STL_MAXBRDS) return(-ENODEV); brdp = stl_brds[stl_brdstats.brd]; @@ -2973,8 +2975,7 @@ static int stl_getbrdstats(combrd_t *bp) stl_brdstats.panels[i].nrports = panelp->nrports; } - copy_to_user(bp, &stl_brdstats, sizeof(combrd_t)); - return(0); + return copy_to_user(bp, &stl_brdstats, sizeof(combrd_t)) ? -EFAULT : 0; } /*****************************************************************************/ @@ -3017,7 +3018,8 @@ static int stl_getportstats(stlport_t *portp, comstats_t *cp) unsigned long flags; if (portp == (stlport_t *) NULL) { - copy_from_user(&stl_comstats, cp, sizeof(comstats_t)); + if (copy_from_user(&stl_comstats, cp, sizeof(comstats_t))) + return -EFAULT; portp = stl_getport(stl_comstats.brd, stl_comstats.panel, stl_comstats.port); if (portp == (stlport_t *) NULL) @@ -3058,8 +3060,8 @@ static int stl_getportstats(stlport_t *portp, comstats_t *cp) portp->stats.signals = (unsigned long) stl_getsignals(portp); - copy_to_user(cp, &portp->stats, sizeof(comstats_t)); - return(0); + return copy_to_user(cp, &portp->stats, + sizeof(comstats_t)) ? -EFAULT : 0; } /*****************************************************************************/ @@ -3071,7 +3073,8 @@ static int stl_getportstats(stlport_t *portp, comstats_t *cp) static int stl_clrportstats(stlport_t *portp, comstats_t *cp) { if (portp == (stlport_t *) NULL) { - copy_from_user(&stl_comstats, cp, sizeof(comstats_t)); + if (copy_from_user(&stl_comstats, cp, sizeof(comstats_t))) + return -EFAULT; portp = stl_getport(stl_comstats.brd, stl_comstats.panel, stl_comstats.port); if (portp == (stlport_t *) NULL) @@ -3082,8 +3085,8 @@ static int stl_clrportstats(stlport_t *portp, comstats_t *cp) portp->stats.brd = portp->brdnr; portp->stats.panel = portp->panelnr; portp->stats.port = portp->portnr; - copy_to_user(cp, &portp->stats, sizeof(comstats_t)); - return(0); + return copy_to_user(cp, &portp->stats, + sizeof(comstats_t)) ? -EFAULT : 0; } /*****************************************************************************/ @@ -3096,13 +3099,14 @@ static int stl_getportstruct(unsigned long arg) { stlport_t *portp; - copy_from_user(&stl_dummyport, (void *) arg, sizeof(stlport_t)); + if (copy_from_user(&stl_dummyport, (void *) arg, sizeof(stlport_t))) + return -EFAULT; portp = stl_getport(stl_dummyport.brdnr, stl_dummyport.panelnr, stl_dummyport.portnr); if (portp == (stlport_t *) NULL) return(-ENODEV); - copy_to_user((void *) arg, portp, sizeof(stlport_t)); - return(0); + return copy_to_user((void *)arg, portp, + sizeof(stlport_t)) ? -EFAULT : 0; } /*****************************************************************************/ @@ -3115,14 +3119,14 @@ static int stl_getbrdstruct(unsigned long arg) { stlbrd_t *brdp; - copy_from_user(&stl_dummybrd, (void *) arg, sizeof(stlbrd_t)); + if (copy_from_user(&stl_dummybrd, (void *) arg, sizeof(stlbrd_t))) + return -EFAULT; if ((stl_dummybrd.brdnr < 0) || (stl_dummybrd.brdnr >= STL_MAXBRDS)) return(-ENODEV); brdp = stl_brds[stl_dummybrd.brdnr]; if (brdp == (stlbrd_t *) NULL) return(-ENODEV); - copy_to_user((void *) arg, brdp, sizeof(stlbrd_t)); - return(0); + return copy_to_user((void *)arg, brdp, sizeof(stlbrd_t)) ? -EFAULT : 0; } /*****************************************************************************/ diff --git a/drivers/char/sx.c b/drivers/char/sx.c index 947f76a60898..694b0e1e9283 100644 --- a/drivers/char/sx.c +++ b/drivers/char/sx.c @@ -1720,8 +1720,11 @@ static int sx_fw_ioctl (struct inode *inode, struct file *filp, Get_user (data, descr++); while (nbytes && data) { for (i=0;i<nbytes;i += SX_CHUNK_SIZE) { - copy_from_user (tmp, (char *)data+i, - (i+SX_CHUNK_SIZE>nbytes)?nbytes-i:SX_CHUNK_SIZE); + if (copy_from_user(tmp, (char *)data + i, + (i + SX_CHUNK_SIZE > + nbytes) ? nbytes - i : + SX_CHUNK_SIZE)) + return -EFAULT; memcpy_toio ((char *) (board->base2 + offset + i), tmp, (i+SX_CHUNK_SIZE>nbytes)?nbytes-i:SX_CHUNK_SIZE); } diff --git a/drivers/char/tpqic02.c b/drivers/char/tpqic02.c index 24b490623b14..b2c1ce6a2487 100644 --- a/drivers/char/tpqic02.c +++ b/drivers/char/tpqic02.c @@ -1944,12 +1944,8 @@ static ssize_t qic02_tape_read(struct file *filp, char *buf, size_t count, } /* copy buffer to user-space in one go */ if (bytes_done > 0) { - err = - copy_to_user(buf, buffaddr, - bytes_done); - if (err) { + if (copy_to_user(buf, buffaddr, bytes_done)) return -EFAULT; - } } #if 1 /* Checks Ton's patch below */ @@ -2085,10 +2081,8 @@ static ssize_t qic02_tape_write(struct file *filp, const char *buf, /* copy from user to DMA buffer and initiate transfer. */ if (bytes_todo > 0) { - err = copy_from_user(buffaddr, buf, bytes_todo); - if (err) { + if (copy_from_user(buffaddr, buf, bytes_todo)) return -EFAULT; - } /****************** similar problem with read() at FM could happen here at EOT. ******************/ diff --git a/drivers/isdn/capi/capifs.c b/drivers/isdn/capi/capifs.c index f0c79911456f..5021b597997d 100644 --- a/drivers/isdn/capi/capifs.c +++ b/drivers/isdn/capi/capifs.c @@ -21,7 +21,6 @@ #include <linux/init.h> #include <linux/kdev_t.h> #include <linux/kernel.h> -#include <linux/locks.h> #include <linux/major.h> #include <linux/slab.h> #include <linux/ctype.h> diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index a609563d7c84..e9d33b415ca5 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -26,7 +26,6 @@ printk(KERN_DEBUG __FUNCTION__ ": " format "\n" , ## arg); \ #include <linux/tqueue.h> #include <linux/capi.h> #include <linux/kernelcapi.h> -#include <linux/locks.h> #include <linux/init.h> #include <asm/uaccess.h> #include <linux/isdn/capicmd.h> diff --git a/drivers/md/lvm.c b/drivers/md/lvm.c index 1c31e2058143..dfc256c6a2ec 100644 --- a/drivers/md/lvm.c +++ b/drivers/md/lvm.c @@ -212,7 +212,6 @@ #include <linux/proc_fs.h> #include <linux/blkdev.h> #include <linux/genhd.h> -#include <linux/locks.h> #include <linux/devfs_fs_kernel.h> #include <linux/smp_lock.h> #include <asm/ioctl.h> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 8133b7c0952f..9402b0c779b9 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -18,7 +18,6 @@ #include <linux/config.h> #include <linux/module.h> -#include <linux/locks.h> #include <linux/slab.h> #include <linux/raid/raid5.h> #include <asm/bitops.h> diff --git a/drivers/media/video/i2c-old.c b/drivers/media/video/i2c-old.c index 52dc8ebb17d5..bd731be97aa1 100644 --- a/drivers/media/video/i2c-old.c +++ b/drivers/media/video/i2c-old.c @@ -12,7 +12,6 @@ #include <linux/types.h> #include <linux/string.h> #include <linux/delay.h> -#include <linux/locks.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/i2c-old.h> diff --git a/drivers/scsi/ChangeLog.ips b/drivers/scsi/ChangeLog.ips index cd84e4494127..5019f5182bf4 100644 --- a/drivers/scsi/ChangeLog.ips +++ b/drivers/scsi/ChangeLog.ips @@ -1,10 +1,44 @@ IBM ServeRAID driver Change Log ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + 5.00.01 - Sarasota ( 5i ) adapters must always be scanned first + - Get rid on IOCTL_NEW_COMMAND code + - Add Extended DCDB Commands for Tape Support in 5I + + 4.90.11 - Don't actually RESET unless it's physically required + - Remove unused compile options + + 4.90.08 - Data Corruption if First Scatter Gather Element is > 64K + + 4.90.08 - Increase Delays in Flashing ( Trombone Only - 4H ) + + 4.90.05 - Use New PCI Architecture to facilitate Hot Plug Development + + 4.90.01 - Add ServeRAID Version Checking + + 4.80.26 - Clean up potential code problems ( Arjan's recommendations ) + + 4.80.21 - Change memcpy() to copy_to_user() in NVRAM Page 5 IOCTL path + + 4.80.20 - Set max_sectors in Scsi_Host structure ( if >= 2.4.7 kernel ) + - 5 second delay needed after resetting an i960 adapter + + 4.80.14 - Take all semaphores off stack + - Clean Up New_IOCTL path + + 4.80.04 - Eliminate calls to strtok() if 2.4.x or greater + - Adjustments to Device Queue Depth + + 4.80.00 - Make ia64 Safe + + 4.72.01 - I/O Mapped Memory release ( so "insmod ips" does not Fail ) + - Don't Issue Internal FFDC Command if there are Active Commands + - Close Window for getting too many IOCTL's active + 4.72.00 - Allow for a Scatter-Gather Element to exceed MAX_XFER Size 4.71.00 - Change all memory allocations to not use GFP_DMA flag - Code Clean-Up for 2.4.x kernel + - Code Clean-Up for 2.4.x kernel 4.70.15 - Fix Breakup for very large ( non-SG ) requests @@ -86,4 +120,3 @@ IBM ServeRAID driver Change Log - Fixed read/write errors when the adapter is using an 8K stripe size. -
\ No newline at end of file diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c index 000a6fcb5a79..ad14e1ba25f7 100644 --- a/drivers/scsi/ips.c +++ b/drivers/scsi/ips.c @@ -2,6 +2,8 @@ /* ips.c -- driver for the IBM ServeRAID controller */ /* */ /* Written By: Keith Mitchell, IBM Corporation */ +/* Jack Hammer, Adaptec, Inc. */ +/* David Jeffery, Adaptec, Inc. */ /* */ /* Copyright (C) 2000 IBM Corporation */ /* */ @@ -81,7 +83,7 @@ /* 2.3.18 and later */ /* - Sync with other changes from the 2.3 kernels */ /* 4.00.06 - Fix timeout with initial FFDC command */ -/* 4.00.06a - Port to 2.4 (trivial) -- Christoph Hellwig <hch@infradead.org> */ +/* 4.00.06a - Port to 2.4 (trivial) -- Christoph Hellwig <hch@caldera.de> */ /* 4.10.00 - Add support for ServeRAID 4M/4L */ /* 4.10.13 - Fix for dynamic unload and proc file system */ /* 4.20.03 - Rename version to coincide with new release schedules */ @@ -105,15 +107,26 @@ /* Code Clean-Up for 2.4.x kernel */ /* 4.72.00 - Allow for a Scatter-Gather Element to exceed MAX_XFER Size */ /* 4.72.01 - I/O Mapped Memory release ( so "insmod ips" does not Fail ) */ -/* Don't Issue Internal FFDC Command if there are Active Commands */ -/* Close Window for getting too many IOCTL's active */ -/* 4.80.00 Make ia64 Safe */ -/* 4.80.04 Eliminate calls to strtok() if 2.4.x or greater */ -/* Adjustments to Device Queue Depth */ -/* 4.80.14 Take all semaphores off stack */ -/* Clean Up New_IOCTL path */ -/* 4.80.20 Set max_sectors in Scsi_Host structure ( if >= 2.4.7 kernel ) */ -/* 5 second delay needed after resetting an i960 adapter */ +/* - Don't Issue Internal FFDC Command if there are Active Commands */ +/* - Close Window for getting too many IOCTL's active */ +/* 4.80.00 - Make ia64 Safe */ +/* 4.80.04 - Eliminate calls to strtok() if 2.4.x or greater */ +/* - Adjustments to Device Queue Depth */ +/* 4.80.14 - Take all semaphores off stack */ +/* - Clean Up New_IOCTL path */ +/* 4.80.20 - Set max_sectors in Scsi_Host structure ( if >= 2.4.7 kernel ) */ +/* - 5 second delay needed after resetting an i960 adapter */ +/* 4.80.26 - Clean up potential code problems ( Arjan's recommendations ) */ +/* 4.90.01 - Version Matching for FirmWare, BIOS, and Driver */ +/* 4.90.05 - Use New PCI Architecture to facilitate Hot Plug Development */ +/* 4.90.08 - Increase Delays in Flashing ( Trombone Only - 4H ) */ +/* 4.90.08 - Data Corruption if First Scatter Gather Element is > 64K */ +/* 4.90.11 - Don't actually RESET unless it's physically required */ +/* - Remove unused compile options */ +/* 5.00.01 - Sarasota ( 5i ) adapters must always be scanned first */ +/* - Get rid on IOCTL_NEW_COMMAND code */ +/* - Add Extended DCDB Commands for Tape Support in 5I */ +/* 5.10.12 - use pci_dma interfaces, update for 2.5 kernel changes */ /*****************************************************************************/ /* @@ -121,26 +134,20 @@ * * IPS_DEBUG - Turn on debugging info * - * * Parameters: * * debug:<number> - Set debug level to <number> - * NOTE: only works when IPS_DEBUG compile directive - * is used. - * + * NOTE: only works when IPS_DEBUG compile directive is used. * 1 - Normal debug messages * 2 - Verbose debug messages * 11 - Method trace (non interrupt) * 12 - Method trace (includes interrupt) * - * noreset - Don't reset the controller - * nocmdline - Turn off passthru support * noi2o - Don't use I2O Queues (ServeRAID 4 only) * nommap - Don't use memory mapped I/O * ioctlsize - Initial size of the IOCTL buffer */ - #include <asm/io.h> #include <asm/byteorder.h> #include <asm/page.h> @@ -163,9 +170,7 @@ #include <linux/blk.h> #include <linux/types.h> -#ifndef NO_IPS_CMDLINE #include <scsi/sg.h> -#endif #include "sd.h" #include "scsi.h" @@ -177,14 +182,11 @@ #include <linux/stat.h> #include <linux/config.h> -#if LINUX_VERSION_CODE >= LinuxVersionCode(2,3,18) -#include <linux/spinlock.h> +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,0) + #include <linux/spinlock.h> + #include <linux/init.h> #else -#include <asm/spinlock.h> -#endif - -#if LINUX_VERSION_CODE >= LinuxVersionCode(2,3,13) -#include <linux/init.h> + #include <asm/spinlock.h> #endif #include <linux/smp.h> @@ -197,10 +199,10 @@ /* * DRIVER_VER */ -#define IPS_VERSION_HIGH "4.80" -#define IPS_VERSION_LOW ".26 " +#define IPS_VERSION_HIGH "5.10" +#define IPS_VERSION_LOW ".13-BETA " -#if LINUX_VERSION_CODE < LinuxVersionCode(2,3,27) +#if LINUX_VERSION_CODE < LinuxVersionCode(2,4,0) struct proc_dir_entry proc_scsi_ips = { 0, 3, "ips", @@ -214,12 +216,52 @@ struct proc_dir_entry proc_scsi_ips = { #if LINUX_VERSION_CODE < LinuxVersionCode(2,2,0) #error "This driver only works with kernel 2.2.0 and later" +#elif LINUX_VERSION_CODE <= LinuxVersionCode(2,3,18) + #define dma_addr_t uint32_t + + static inline void *pci_alloc_consistent(struct pci_dev *dev,int size, + dma_addr_t *dmahandle) { + void * ptr = kmalloc(size, GFP_ATOMIC); + if(ptr){ + *dmahandle = VIRT_TO_BUS(ptr); + } + return ptr; + } + + #define pci_free_consistent(a,size,address,dmahandle) kfree(address) + + #define pci_map_sg(a,b,n,z) (n) + #define pci_unmap_sg(a,b,c,d) + #define pci_map_single(a,b,c,d) (VIRT_TO_BUS(b)) + #define pci_unmap_single(a,b,c,d) + #ifndef sg_dma_address + #define sg_dma_address(x) (VIRT_TO_BUS((x)->address)) + #define sg_dma_len(x) ((x)->length) + #endif + #define pci_unregister_driver(x) #endif -#if !defined(NO_IPS_CMDLINE) && ((SG_BIG_BUFF < 8192) || !defined(SG_BIG_BUFF)) - #error "To use the command-line interface you need to define SG_BIG_BUFF" +#if LINUX_VERSION_CODE <= LinuxVersionCode(2,5,0) + #define IPS_SG_ADDRESS(sg) ((sg)->address) + #define IPS_LOCK_IRQ(lock) spin_lock_irq(&io_request_lock) + #define IPS_UNLOCK_IRQ(lock) spin_unlock_irq(&io_request_lock) + #define IPS_LOCK_SAVE(lock,flags) spin_lock_irqsave(&io_request_lock,flags) + #define IPS_UNLOCK_RESTORE(lock,flags) spin_unlock_irqrestore(&io_request_lock,flags) +#else + #define IPS_SG_ADDRESS(sg) (page_address((sg)->page) ? \ + page_address((sg)->page)+(sg)->offset : 0) + #define IPS_LOCK_IRQ(lock) spin_lock_irq(lock) + #define IPS_UNLOCK_IRQ(lock) spin_unlock_irq(lock) + #define IPS_LOCK_SAVE(lock,flags) spin_lock_irqsave(lock,flags) + #define IPS_UNLOCK_RESTORE(lock,flags) spin_unlock_irqrestore(lock,flags) + #endif +#define IPS_DMA_DIR(scb) ((!scb->scsi_cmd || ips_is_passthru(scb->scsi_cmd) || \ + SCSI_DATA_NONE == scb->scsi_cmd->sc_data_direction) ? \ + PCI_DMA_BIDIRECTIONAL : \ + scsi_to_pci_dma_dir(scb->scsi_cmd->sc_data_direction)) + #ifdef IPS_DEBUG #define METHOD_TRACE(s, i) if (ips_debug >= (i+10)) printk(KERN_NOTICE s "\n"); #define DEBUG(i, s) if (ips_debug >= i) printk(KERN_NOTICE s "\n"); @@ -234,8 +276,8 @@ struct proc_dir_entry proc_scsi_ips = { * global variables */ static const char ips_name[] = "ips"; -static struct Scsi_Host * ips_sh[IPS_MAX_ADAPTERS]; /* Array of host controller structures */ -static ips_ha_t * ips_ha[IPS_MAX_ADAPTERS]; /* Array of HA structures */ +static struct Scsi_Host *ips_sh[IPS_MAX_ADAPTERS]; /* Array of host controller structures */ +static ips_ha_t *ips_ha[IPS_MAX_ADAPTERS]; /* Array of HA structures */ static unsigned int ips_next_controller = 0; static unsigned int ips_num_controllers = 0; static unsigned int ips_released_controllers = 0; @@ -243,15 +285,64 @@ static int ips_cmd_timeout = 60; static int ips_reset_timeout = 60 * 5; static int ips_force_memio = 1; /* Always use Memory Mapped I/O */ static int ips_force_i2o = 1; /* Always use I2O command delivery */ -static int ips_resetcontroller = 1; /* Reset the controller */ -static int ips_cmdline = 1; /* Support for passthru */ static int ips_ioctlsize = IPS_IOCTL_SIZE; /* Size of the ioctl buffer */ static int ips_cd_boot = 0; /* Booting from ServeRAID Manager CD */ static char *ips_FlashData = NULL; /* CD Boot - Flash Data Buffer */ -static int ips_FlashDataInUse = 0; /* CD Boot - Flash Data In Use Flag */ +static long ips_FlashDataInUse = 0; /* CD Boot - Flash Data In Use Flag */ +static uint32_t MaxLiteCmds = 32; /* Max Active Cmds for a Lite Adapter */ + +IPS_DEFINE_COMPAT_TABLE( Compatable ); /* Version Compatability Table */ + + +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,0) + /* This table describes any / all ServeRAID Adapters */ + static struct pci_device_id ips_pci_table[] __devinitdata = { + { 0x1014, 0x002E, PCI_ANY_ID, PCI_ANY_ID, 0, 0 }, + { 0x1014, 0x01BD, PCI_ANY_ID, PCI_ANY_ID, 0, 0 }, + { 0, } + }; + + /* This table describes only Sarasota ( ServeRAID 5i ) Adapters */ + static struct pci_device_id ips_pci_table_5i[] __devinitdata = { + { 0x1014, 0x01BD, PCI_ANY_ID, 0x259, 0, 0 }, + { 0x1014, 0x01BD, PCI_ANY_ID, 0x258, 0, 0 }, + { 0, } + }; + + /* This table describes all i960 Adapters */ + static struct pci_device_id ips_pci_table_i960[] __devinitdata = { + { 0x1014, 0x01BD, PCI_ANY_ID, PCI_ANY_ID, 0, 0 }, + { 0, } + }; + + MODULE_DEVICE_TABLE( pci, ips_pci_table ); + + static char ips_hot_plug_name[] = "ips"; + + static int __devinit ips_insert_device(struct pci_dev *pci_dev, const struct pci_device_id *ent); + static void __devexit ips_remove_device(struct pci_dev *pci_dev); + + struct pci_driver ips_pci_driver = { + name: ips_hot_plug_name, + id_table: ips_pci_table, + probe: ips_insert_device, + remove: ips_remove_device, + }; + + struct pci_driver ips_pci_driver_5i = { + name: ips_hot_plug_name, + id_table: ips_pci_table_5i, + probe: ips_insert_device, + remove: ips_remove_device, + }; + + struct pci_driver ips_pci_driver_i960 = { + name: ips_hot_plug_name, + id_table: ips_pci_table_i960, + probe: ips_insert_device, + remove: ips_remove_device, + }; -#ifdef IPS_DEBUG -static int ips_debug = 0; /* Debug mode */ #endif /* @@ -272,7 +363,9 @@ static char ips_adapter_name[][30] = { "ServeRAID 4M", "ServeRAID 4L", "ServeRAID 4Mx", - "ServeRAID 4Lx" + "ServeRAID 4Lx", + "ServeRAID 5i", + "ServeRAID 5i" }; static struct notifier_block ips_notifier = { @@ -286,7 +379,7 @@ static char ips_command_direction[] = { IPS_DATA_NONE, IPS_DATA_NONE, IPS_DATA_IN, IPS_DATA_IN, IPS_DATA_OUT, IPS_DATA_IN, IPS_DATA_IN, IPS_DATA_OUT, IPS_DATA_IN, IPS_DATA_UNK, IPS_DATA_OUT, IPS_DATA_OUT, IPS_DATA_UNK, IPS_DATA_UNK, IPS_DATA_UNK, -IPS_DATA_IN, IPS_DATA_NONE, IPS_DATA_IN, IPS_DATA_IN, IPS_DATA_OUT, +IPS_DATA_IN, IPS_DATA_NONE, IPS_DATA_NONE, IPS_DATA_IN, IPS_DATA_OUT, IPS_DATA_IN, IPS_DATA_OUT, IPS_DATA_NONE, IPS_DATA_NONE, IPS_DATA_OUT, IPS_DATA_NONE, IPS_DATA_IN, IPS_DATA_NONE, IPS_DATA_IN, IPS_DATA_OUT, IPS_DATA_NONE, IPS_DATA_UNK, IPS_DATA_IN, IPS_DATA_UNK, IPS_DATA_IN, @@ -349,7 +442,6 @@ const char * ips_info(struct Scsi_Host *); void do_ipsintr(int, void *, struct pt_regs *); static int ips_hainit(ips_ha_t *); static int ips_map_status(ips_ha_t *, ips_scb_t *, ips_stat_t *); -static int ips_send(ips_ha_t *, ips_scb_t *, ips_scb_callback); static int ips_send_wait(ips_ha_t *, ips_scb_t *, int, int); static int ips_send_cmd(ips_ha_t *, ips_scb_t *); static int ips_online(ips_ha_t *, ips_scb_t *); @@ -357,6 +449,7 @@ static int ips_inquiry(ips_ha_t *, ips_scb_t *); static int ips_rdcap(ips_ha_t *, ips_scb_t *); static int ips_msense(ips_ha_t *, ips_scb_t *); static int ips_reqsen(ips_ha_t *, ips_scb_t *); +static int ips_deallocatescbs(ips_ha_t *, int); static int ips_allocatescbs(ips_ha_t *); static int ips_reset_copperhead(ips_ha_t *); static int ips_reset_copperhead_memio(ips_ha_t *); @@ -382,15 +475,15 @@ static int ips_isinit_copperhead(ips_ha_t *); static int ips_isinit_copperhead_memio(ips_ha_t *); static int ips_isinit_morpheus(ips_ha_t *); static int ips_erase_bios(ips_ha_t *); -static int ips_program_bios(ips_ha_t *, char *, u_int32_t, u_int32_t); -static int ips_verify_bios(ips_ha_t *, char *, u_int32_t, u_int32_t); +static int ips_program_bios(ips_ha_t *, char *, uint32_t, uint32_t); +static int ips_verify_bios(ips_ha_t *, char *, uint32_t, uint32_t); static int ips_erase_bios_memio(ips_ha_t *); -static int ips_program_bios_memio(ips_ha_t *, char *, u_int32_t, u_int32_t); -static int ips_verify_bios_memio(ips_ha_t *, char *, u_int32_t, u_int32_t); -static void ips_flash_bios_section(void *); -static void ips_flash_bios_segment(void *); -static void ips_scheduled_flash_bios(void *); -static void ips_create_nvrampage5(ips_ha_t *, IPS_NVRAM_P5 *); +static int ips_program_bios_memio(ips_ha_t *, char *, uint32_t, uint32_t); +static int ips_verify_bios_memio(ips_ha_t *, char *, uint32_t, uint32_t); +static int ips_flash_copperhead(ips_ha_t *, ips_passthru_t *, ips_scb_t *); +static int ips_flash_bios(ips_ha_t *, ips_passthru_t *, ips_scb_t *); +static int ips_flash_firmware(ips_ha_t *, ips_passthru_t *, ips_scb_t *); +static void ips_free_flash_copperhead(ips_ha_t *ha); static void ips_get_bios_version(ips_ha_t *, int); static void ips_identify_controller(ips_ha_t *); static void ips_select_queue_depth(struct Scsi_Host *, Scsi_Device *); @@ -411,10 +504,10 @@ static void ips_statinit(ips_ha_t *); static void ips_statinit_memio(ips_ha_t *); static void ips_fix_ffdc_time(ips_ha_t *, ips_scb_t *, time_t); static void ips_ffdc_reset(ips_ha_t *, int); -static void ips_ffdc_time(ips_ha_t *, int); -static u_int32_t ips_statupd_copperhead(ips_ha_t *); -static u_int32_t ips_statupd_copperhead_memio(ips_ha_t *); -static u_int32_t ips_statupd_morpheus(ips_ha_t *); +static void ips_ffdc_time(ips_ha_t *); +static uint32_t ips_statupd_copperhead(ips_ha_t *); +static uint32_t ips_statupd_copperhead_memio(ips_ha_t *); +static uint32_t ips_statupd_morpheus(ips_ha_t *); static ips_scb_t * ips_getscb(ips_ha_t *); static inline void ips_putq_scb_head(ips_scb_queue_t *, ips_scb_t *); static inline void ips_putq_scb_tail(ips_scb_queue_t *, ips_scb_t *); @@ -429,18 +522,22 @@ static inline Scsi_Cmnd * ips_removeq_wait(ips_wait_queue_t *, Scsi_Cmnd *); static inline ips_copp_wait_item_t * ips_removeq_copp(ips_copp_queue_t *, ips_copp_wait_item_t *); static inline ips_copp_wait_item_t * ips_removeq_copp_head(ips_copp_queue_t *); -#ifndef NO_IPS_CMDLINE static int ips_is_passthru(Scsi_Cmnd *); static int ips_make_passthru(ips_ha_t *, Scsi_Cmnd *, ips_scb_t *, int); static int ips_usrcmd(ips_ha_t *, ips_passthru_t *, ips_scb_t *); -static int ips_newusrcmd(ips_ha_t *, ips_passthru_t *, ips_scb_t *); static void ips_cleanup_passthru(ips_ha_t *, ips_scb_t *); -#endif int ips_proc_info(char *, char **, off_t, int, int, int); static int ips_host_info(ips_ha_t *, char *, off_t, int); static void copy_mem_info(IPS_INFOSTR *, char *, int); static int copy_info(IPS_INFOSTR *, char *, ...); +static int ips_get_version_info(ips_ha_t *ha, IPS_VERSION_DATA *Buffer, int intr ); +static void ips_version_check(ips_ha_t *ha, int intr); +static int ips_init_phase2( int index ); + +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,0) +static int ips_init_phase1( struct pci_dev *pci_dev, int *indexPtr ); +#endif /*--------------------------------------------------------------------------*/ /* Exported Functions */ @@ -455,37 +552,26 @@ static int copy_info(IPS_INFOSTR *, char *, ...); /* setup parameters to the driver */ /* */ /****************************************************************************/ -#if LINUX_VERSION_CODE >= LinuxVersionCode(2,3,13) +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,0) static int ips_setup(char *ips_str) { #else void ips_setup(char *ips_str, int *dummy) { #endif - int i; - -#if LINUX_VERSION_CODE < LinuxVersionCode(2,4,0) - char *p; - char tokens[3] = {',', '.', 0}; -#endif + int i; char *key; char *value; IPS_OPTION options[] = { - {"noreset", &ips_resetcontroller, 0}, -#ifdef IPS_DEBUG - {"debug", &ips_debug, 1}, -#endif {"noi2o", &ips_force_i2o, 0}, {"nommap", &ips_force_memio, 0}, - {"nocmdline", &ips_cmdline, 0}, {"ioctlsize", &ips_ioctlsize, IPS_IOCTL_SIZE}, {"cdboot", &ips_cd_boot, 0}, + {"maxcmds", &MaxLiteCmds, 32}, }; - - METHOD_TRACE("ips_setup", 1); - -/* Don't use strtok() anymore ( if 2.4 Kernel or beyond ) */ + + /* Don't use strtok() anymore ( if 2.4 Kernel or beyond ) */ #if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,0) /* Search for value */ while ((key = strsep(&ips_str, ",."))) { @@ -508,7 +594,16 @@ ips_setup(char *ips_str, int *dummy) { } } } + + return (1); + +__setup("ips=", ips_setup); + #else + + char *p; + char tokens[3] = {',', '.', 0}; + for (key = strtok(ips_str, tokens); key; key = strtok(NULL, tokens)) { p = key; @@ -537,16 +632,10 @@ ips_setup(char *ips_str, int *dummy) { } } } -#endif -#if LINUX_VERSION_CODE >= LinuxVersionCode(2,3,13) - return (1); #endif -} -#if LINUX_VERSION_CODE >= LinuxVersionCode(2,3,13) -__setup("ips=", ips_setup); -#endif +} /****************************************************************************/ /* */ @@ -563,36 +652,36 @@ int ips_detect(Scsi_Host_Template *SHT) { struct Scsi_Host *sh; ips_ha_t *ha; - u_int32_t io_addr; - u_int32_t mem_addr; - u_int32_t io_len; - u_int32_t mem_len; - u_int16_t planer; - u_int8_t revision_id; - u_int8_t bus; - u_int8_t func; - u_int8_t irq; - u_int16_t deviceID[2]; - u_int16_t subdevice_id; + uint32_t io_addr; + uint32_t mem_addr; + uint32_t io_len; + uint32_t mem_len; + uint16_t planer; + uint8_t revision_id; + uint8_t bus; + uint8_t func; + uint8_t irq; + uint16_t deviceID[2]; + uint16_t subdevice_id; int i; int j; - u_int32_t count; + uint32_t count; char *ioremap_ptr; char *mem_ptr; struct pci_dev *dev[2]; struct pci_dev *morpheus = NULL; struct pci_dev *trombone = NULL; -#if LINUX_VERSION_CODE < LinuxVersionCode(2,3,14) - u_int32_t currbar; - u_int32_t maskbar; - u_int8_t barnum; +#if LINUX_VERSION_CODE < LinuxVersionCode(2,4,0) + uint32_t currbar; + uint32_t maskbar; + uint8_t barnum; #endif METHOD_TRACE("ips_detect", 1); #ifdef MODULE if (ips) -#if LINUX_VERSION_CODE >= LinuxVersionCode(2,3,13) +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,0) ips_setup(ips); #else ips_setup(ips, NULL); @@ -610,7 +699,7 @@ ips_detect(Scsi_Host_Template *SHT) { } SHT->proc_info = ips_proc_info; -#if LINUX_VERSION_CODE < LinuxVersionCode(2,3,27) +#if LINUX_VERSION_CODE < LinuxVersionCode(2,4,0) SHT->proc_dir = &proc_scsi_ips; #else SHT->proc_name = "ips"; @@ -668,6 +757,32 @@ ips_detect(Scsi_Host_Template *SHT) { } } +/**********************************************************************************/ +/* For Kernel Versions 2.4 or greater, use new PCI ( Hot Pluggable ) architecture */ +/**********************************************************************************/ + +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,0) + #if LINUX_VERSION_CODE < LinuxVersionCode(2,5,0) + spin_unlock_irq(&io_request_lock); + #endif + /* By definition, a Sarasota ( 5i ) Adapter MUST be enumerated first or the */ + /* server may not boot properly. The adapters must be enumerated in exactly */ + /* the same order as ServeRAID BIOS for the machine to come up properly. */ + + pci_module_init(&ips_pci_driver_5i); /* Ask for 5i Adapters First */ + if (ips_num_controllers) /* If there is a 5i Adapter */ + pci_module_init(&ips_pci_driver_i960); /* Get all i960's next */ + pci_module_init(&ips_pci_driver); /* Get all remaining Adapters */ + /* ( in normal BUS order ) */ + #if LINUX_VERSION_CODE < LinuxVersionCode(2,5,0) + spin_lock_irq(&io_request_lock); + #endif + if (ips_num_controllers > 0) + register_reboot_notifier(&ips_notifier); + + return (ips_num_controllers); +#endif + /* Now scan the controllers */ for (i = 0; i < 2; i++) { if (!dev[i]) @@ -705,17 +820,6 @@ ips_detect(Scsi_Host_Template *SHT) { mem_addr = pci_resource_start(dev[i], j); mem_len = pci_resource_len(dev[i], j); } -#elif LINUX_VERSION_CODE >= LinuxVersionCode(2,3,14) - if (!dev[i]->resource[j].start) - break; - - if ((dev[i]->resource[j].start & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) { - io_addr = dev[i]->resource[j].start; - io_len = dev[i]->resource[j].end - dev[i]->resource[j].start + 1; - } else { - mem_addr = dev[i]->resource[j].start; - mem_len = dev[i]->resource[j].end - dev[i]->resource[j].start + 1; - } #else if (!dev[i]->base_address[j]) break; @@ -748,13 +852,13 @@ ips_detect(Scsi_Host_Template *SHT) { /* setup memory mapped area (if applicable) */ if (mem_addr) { - u_int32_t base; - u_int32_t offs; + uint32_t base; + uint32_t offs; DEBUG_VAR(1, "(%s%d) detect, Memory region %x, size: %d", ips_name, ips_next_controller, mem_addr, mem_len); -#if LINUX_VERSION_CODE >= LinuxVersionCode(2,3,17) +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,0) if (check_mem_region(mem_addr, mem_len)) { /* Couldn't allocate io space */ printk(KERN_WARNING "(%s%d) couldn't allocate IO space %x len %d.\n", @@ -830,7 +934,7 @@ ips_detect(Scsi_Host_Template *SHT) { continue; } -#if LINUX_VERSION_CODE < LinuxVersionCode(2,3,15) +#if LINUX_VERSION_CODE < LinuxVersionCode(2,4,0) /* get the subdevice id */ if (pci_read_config_word(dev[i], PCI_SUBSYSTEM_ID, &subdevice_id)) { printk(KERN_WARNING "(%s%d) can't get subdevice id.\n", @@ -847,13 +951,6 @@ ips_detect(Scsi_Host_Template *SHT) { /* found a controller */ sh = scsi_register(SHT, sizeof(ips_ha_t)); - /* - * Set pci_dev and dma_mask - */ - pci_set_dma_mask(dev[i], (u64) 0xffffffff); - - scsi_set_pci_device(sh, dev[i]); - if (sh == NULL) { printk(KERN_WARNING "(%s%d) Unable to register controller with SCSI subsystem - skipping controller\n", ips_name, ips_next_controller); @@ -1011,9 +1108,9 @@ ips_detect(Scsi_Host_Template *SHT) { #if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,7) sh->max_sectors = 128; -#endif +#endif -#if LINUX_VERSION_CODE < LinuxVersionCode(2,3,32) +#if LINUX_VERSION_CODE < LinuxVersionCode(2,4,0) sh->wish_block = FALSE; #endif @@ -1086,6 +1183,7 @@ ips_detect(Scsi_Host_Template *SHT) { /* * Initialize the card if it isn't already */ + if (!(*ha->func.isinit)(ha)) { if (!(*ha->func.init)(ha)) { /* @@ -1125,8 +1223,8 @@ ips_detect(Scsi_Host_Template *SHT) { /* * Allocate a temporary SCB for initialization */ - ha->scbs = (ips_scb_t *) kmalloc(sizeof(ips_scb_t), GFP_KERNEL); - if (!ha->scbs) { + ha->max_cmds = 1; + if (!ips_allocatescbs(ha)) { /* couldn't allocate a temp SCB */ printk(KERN_WARNING "(%s%d) unable to allocate CCBs - skipping contoller\n", ips_name, ips_next_controller); @@ -1143,27 +1241,6 @@ ips_detect(Scsi_Host_Template *SHT) { continue; } - memset(ha->scbs, 0, sizeof(ips_scb_t)); - ha->scbs->sg_list = (IPS_SG_LIST *) kmalloc(sizeof(IPS_SG_LIST) * IPS_MAX_SG, GFP_KERNEL); - if (!ha->scbs->sg_list) { - /* couldn't allocate a temp SCB S/G list */ - printk(KERN_WARNING "(%s%d) unable to allocate CCBs - skipping contoller\n", - ips_name, ips_next_controller); - - ha->active = 0; - ips_free(ha); - scsi_unregister(sh); - ips_ha[ips_next_controller] = 0; - ips_sh[ips_next_controller] = 0; - free_irq(ha->irq, ha); - ips_next_controller++; - ips_num_controllers--; - - continue; - } - - ha->max_cmds = 1; - ips_next_controller++; } while ((dev[i] = pci_find_device(IPS_VENDORID, deviceID[i], dev[i]))); } @@ -1175,67 +1252,13 @@ ips_detect(Scsi_Host_Template *SHT) { for (i = 0; i < ips_next_controller; i++) { if (ips_ha[i] == 0) { - printk(KERN_WARNING "(%s%d) ignoring bad controller\n", - ips_name, i); - continue; - } - - ha = ips_ha[i]; - sh = ips_sh[i]; - - if (!ha->active) { - printk(KERN_WARNING "(%s%d) controller not active\n", - ips_name, i); - scsi_unregister(sh); - ips_ha[i] = NULL; - ips_sh[i] = NULL; - + printk(KERN_WARNING "(%s%d) ignoring bad controller\n", ips_name, i); continue; } - if (!ips_hainit(ha)) { - printk(KERN_WARNING "(%s%d) unable to initialize controller - skipping\n", - ips_name, i); - - ha->active = 0; - ips_free(ha); - free_irq(ha->irq, ha); - scsi_unregister(sh); - ips_ha[i] = NULL; - ips_sh[i] = NULL; + if (ips_init_phase2(i) != SUCCESS) ips_num_controllers--; - continue; - } - - /* - * Free the temporary SCB - */ - kfree(ha->scbs->sg_list); - kfree(ha->scbs); - ha->scbs = NULL; - - /* allocate CCBs */ - if (!ips_allocatescbs(ha)) { - printk(KERN_WARNING "(%s%d) unable to allocate CCBs - skipping contoller\n", - ips_name, i); - - ha->active = 0; - ips_free(ha); - free_irq(ha->irq, ha); - scsi_unregister(sh); - ips_ha[i] = NULL; - ips_sh[i] = NULL; - ips_num_controllers--; - - continue; - } - - /* finish setting values */ - sh->max_id = ha->ntargets; - sh->max_lun = ha->nlun; - sh->max_channel = ha->nbus - 1; - sh->can_queue = ha->max_cmds-1; } if (ips_num_controllers > 0) @@ -1324,9 +1347,10 @@ ips_release(struct Scsi_Host *sh) { ips_released_controllers++; - if (ips_num_controllers == ips_released_controllers) + if (ips_num_controllers == ips_released_controllers){ unregister_reboot_notifier(&ips_notifier); - + pci_unregister_driver(&ips_pci_driver); + } return (FALSE); } @@ -1521,6 +1545,46 @@ ips_eh_reset(Scsi_Cmnd *SC) { return (SUCCESS); } + /* An explanation for the casual observer: */ + /* Part of the function of a RAID controller is automatic error */ + /* detection and recovery. As such, the only problem that physically */ + /* resetting a ServeRAID adapter will ever fix is when, for some reason,*/ + /* the driver is not successfully communicating with the adapter. */ + /* Therefore, we will attempt to flush this adapter. If that succeeds, */ + /* then there's no real purpose in a physical reset. This will complete */ + /* much faster and avoids any problems that might be caused by a */ + /* physical reset ( such as having to fail all the outstanding I/O's ). */ + + if (ha->ioctl_reset == 0) { /* IF Not an IOCTL Requested Reset */ + scb = &ha->scbs[ha->max_cmds-1]; + + ips_init_scb(ha, scb); + + scb->timeout = ips_cmd_timeout; + scb->cdb[0] = IPS_CMD_FLUSH; + + scb->cmd.flush_cache.op_code = IPS_CMD_FLUSH; + scb->cmd.flush_cache.command_id = IPS_COMMAND_ID(ha, scb); + scb->cmd.flush_cache.state = IPS_NORM_STATE; + scb->cmd.flush_cache.reserved = 0; + scb->cmd.flush_cache.reserved2 = 0; + scb->cmd.flush_cache.reserved3 = 0; + scb->cmd.flush_cache.reserved4 = 0; + + /* Attempt the flush command */ + ret = ips_send_wait(ha, scb, ips_cmd_timeout, IPS_INTR_IORL); + if (ret == IPS_SUCCESS) { + printk(KERN_NOTICE "(%s%d) Reset Request - Flushed Cache\n", ips_name, ha->host_num); + clear_bit(IPS_IN_RESET, &ha->flags); + return (SUCCESS); + } + } + + /* Either we can't communicate with the adapter or it's an IOCTL request */ + /* from a ServeRAID utility. A physical reset is needed at this point. */ + + ha->ioctl_reset = 0; /* Reset the IOCTL Requested Reset Flag */ + /* * command must have already been sent * reset the controller @@ -1659,6 +1723,7 @@ int ips_queue(Scsi_Cmnd *SC, void (*done) (Scsi_Cmnd *)) { ips_ha_t *ha; unsigned long cpu_flags; + ips_passthru_t *pt; METHOD_TRACE("ips_queue", 1); @@ -1670,7 +1735,6 @@ ips_queue(Scsi_Cmnd *SC, void (*done) (Scsi_Cmnd *)) { if (!ha->active) return (DID_ERROR); -#ifndef NO_IPS_CMDLINE if (ips_is_passthru(SC)) { IPS_QUEUE_LOCK(&ha->copp_waitlist); if (ha->copp_waitlist.count == IPS_MAX_IOCTL_QUEUE) { @@ -1683,7 +1747,6 @@ ips_queue(Scsi_Cmnd *SC, void (*done) (Scsi_Cmnd *)) { IPS_QUEUE_UNLOCK(&ha->copp_waitlist); } } else { -#endif IPS_QUEUE_LOCK(&ha->scb_waitlist); if (ha->scb_waitlist.count == IPS_MAX_QUEUE) { IPS_QUEUE_UNLOCK(&ha->scb_waitlist); @@ -1695,9 +1758,7 @@ ips_queue(Scsi_Cmnd *SC, void (*done) (Scsi_Cmnd *)) { IPS_QUEUE_UNLOCK(&ha->scb_waitlist); } -#ifndef NO_IPS_CMDLINE } -#endif SC->scsi_done = done; @@ -1717,23 +1778,26 @@ ips_queue(Scsi_Cmnd *SC, void (*done) (Scsi_Cmnd *)) { return (0); } -#ifndef NO_IPS_CMDLINE if (ips_is_passthru(SC)) { ips_copp_wait_item_t *scratch; - - /* The IPS_IOCTL_NEW_COMMAND is only used to flash an adapter. This should */ - /* never happen when the adapter is active. Just in case, check here, and */ - /* reject the command if anything else is going on. */ - if (SC->cmnd[0] == IPS_IOCTL_NEW_COMMAND) { + + /* A Reset IOCTL is only sent by the ServeRAID boot CD in extreme cases. */ + /* There can never be any system activity ( network or disk ), but check */ + /* anyway just as a good practice. */ + pt = (ips_passthru_t *) SC->request_buffer; + if ((pt->CoppCP.cmd.reset.op_code == IPS_CMD_RESET_CHANNEL) && + (pt->CoppCP.cmd.reset.adapter_flag == 1)) { if (ha->scb_activelist.count != 0) { - /* printk( KERN_WARNING "New IOCTL Cmd Return BUSY: %d Cmds Active\n", */ - /* ha->scb_activelist.count ); */ - SC->result = DID_BUS_BUSY << 16; - done(SC); - - return (0); + SC->result = DID_BUS_BUSY << 16; + done(SC); + return (0); } + ha->ioctl_reset = 1; /* This reset request is from an IOCTL */ + ips_eh_reset(SC); + SC->result = DID_OK << 16; + SC->scsi_done(SC); + return (0); } /* allocate space for the scribble */ @@ -1754,9 +1818,11 @@ ips_queue(Scsi_Cmnd *SC, void (*done) (Scsi_Cmnd *)) { ips_putq_copp_tail(&ha->copp_waitlist, scratch); } else -#endif ips_putq_wait_tail(&ha->scb_waitlist, SC); + if(ha->scb_waitlist.count + ha->scb_activelist.count > 32) + mod_timer(&SC->eh_timeout, jiffies + 120 * HZ); + IPS_HA_LOCK(cpu_flags); if ((!test_bit(IPS_IN_INTR, &ha->flags)) && (!test_bit(IPS_IN_ABORT, &ha->flags)) && @@ -1766,51 +1832,14 @@ ips_queue(Scsi_Cmnd *SC, void (*done) (Scsi_Cmnd *)) { } else { IPS_HA_UNLOCK(cpu_flags); } - - /* - * If this request was a new style IOCTL wait - * for it to finish. - * - * NOTE: we relinquished the lock above so this should - * not cause contention problems - */ - if (ips_is_passthru(SC) && SC->cmnd[0] == IPS_IOCTL_NEW_COMMAND) { - char *user_area; - char *kern_area; - u_int32_t datasize; - - spin_unlock_irq(SC->host->host_lock); - - /* wait for the command to finish */ - down(&ha->ioctl_sem); - - /* reobtain the lock */ - spin_lock_irq(SC->host->host_lock); - - /* command finished -- copy back */ - user_area = *((char **) &SC->cmnd[4]); - kern_area = ha->ioctl_data; - datasize = *((u_int32_t *) &SC->cmnd[8]); - - if (datasize) { - if (copy_to_user(user_area, kern_area, datasize) > 0) { - DEBUG_VAR(1, "(%s%d) passthru failed - unable to copy out user data", - ips_name, ha->host_num); - SC->result = DID_ERROR << 16; - } - } - - SC->scsi_done(SC); - } - + /* If We were using the CD Boot Flash Buffer, Restore the Old Values */ if ( ips_FlashData == ha->ioctl_data ) { - ha->ioctl_data = ha->save_ioctl_data; - ha->ioctl_order = ha->save_ioctl_order; - ha->ioctl_datasize = ha->save_ioctl_datasize; + ha->ioctl_data = ha->flash_data; + ha->ioctl_order = ha->flash_order; + ha->ioctl_datasize = ha->flash_datasize; ips_FlashDataInUse = 0; } - return (0); } @@ -1920,43 +1949,36 @@ ips_select_queue_depth(struct Scsi_Host *host, Scsi_Device *scsi_devs) { /****************************************************************************/ void do_ipsintr(int irq, void *dev_id, struct pt_regs *regs) { - ips_ha_t *ha = (ips_ha_t *) dev_id; + ips_ha_t *ha; unsigned long cpu_flags; - struct Scsi_Host *host = ips_sh[ha->host_num]; + struct Scsi_Host *host; METHOD_TRACE("do_ipsintr", 2); - spin_lock_irqsave(host->host_lock, cpu_flags); + ha = (ips_ha_t *) dev_id; + if (!ha) + return; + host = ips_sh[ha->host_num]; + IPS_LOCK_SAVE(host->host_lock, cpu_flags); if (test_and_set_bit(IPS_IN_INTR, &ha->flags)) { - spin_unlock_irqrestore(host->host_lock, cpu_flags); - + IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags); return ; } - if (!ha) { - clear_bit(IPS_IN_INTR, &ha->flags); - spin_unlock_irqrestore(host->host_lock, cpu_flags); - - return; - } - if (!ha->active) { clear_bit(IPS_IN_INTR, &ha->flags); - spin_unlock_irqrestore(host->host_lock, cpu_flags); - + IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags); return; } (*ha->func.intr)(ha); clear_bit(IPS_IN_INTR, &ha->flags); - - spin_unlock_irqrestore(host->host_lock, cpu_flags); + IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags); /* start the next command */ ips_next(ha, IPS_INTR_ON); - return; } /****************************************************************************/ @@ -2191,8 +2213,6 @@ ips_proc_info(char *buffer, char **start, off_t offset, /* Helper Functions */ /*--------------------------------------------------------------------------*/ -#ifndef NO_IPS_CMDLINE - /****************************************************************************/ /* */ /* Routine Name: ips_is_passthru */ @@ -2209,20 +2229,26 @@ ips_is_passthru(Scsi_Cmnd *SC) { if (!SC) return (0); - if (((SC->cmnd[0] == IPS_IOCTL_COMMAND) || (SC->cmnd[0] == IPS_IOCTL_NEW_COMMAND)) && + if ((SC->cmnd[0] == IPS_IOCTL_COMMAND) && (SC->channel == 0) && (SC->target == IPS_ADAPTER_ID) && (SC->lun == 0) && - (SC->request_bufflen) && - (!SC->use_sg) && - (((char *) SC->request_buffer)[0] == 'C') && - (((char *) SC->request_buffer)[1] == 'O') && - (((char *) SC->request_buffer)[2] == 'P') && - (((char *) SC->request_buffer)[3] == 'P')) { - return (1); - } else { - return (0); + SC->request_buffer){ + if((!SC->use_sg) && SC->request_bufflen && + (((char *) SC->request_buffer)[0] == 'C') && + (((char *) SC->request_buffer)[1] == 'O') && + (((char *) SC->request_buffer)[2] == 'P') && + (((char *) SC->request_buffer)[3] == 'P')) + return 1; + else if(SC->use_sg){ + struct scatterlist *sg = SC->request_buffer; + char *buffer = IPS_SG_ADDRESS(sg); + if(buffer && buffer[0] == 'C' && buffer[1] == 'O' && + buffer[2] == 'P' && buffer[3] == 'P') + return 1; + } } + return 0; } /****************************************************************************/ @@ -2236,40 +2262,74 @@ ips_is_passthru(Scsi_Cmnd *SC) { /****************************************************************************/ static int ips_make_passthru(ips_ha_t *ha, Scsi_Cmnd *SC, ips_scb_t *scb, int intr) { - IPS_NVRAM_P5 nvram; ips_passthru_t *pt; + char *buffer; + int length = 0; METHOD_TRACE("ips_make_passthru", 1); - if (!SC->request_bufflen || !SC->request_buffer) { + if(!SC->use_sg){ + buffer = SC->request_buffer; + length = SC->request_bufflen; + }else{ + struct scatterlist *sg = SC->request_buffer; + int i; + for(i = 0; i < SC->use_sg; i++) + length += sg[i].length; + + if (length < sizeof(ips_passthru_t)) { + /* wrong size */ + DEBUG_VAR(1, "(%s%d) Passthru structure wrong size", + ips_name, ha->host_num); + return (IPS_FAILURE); + }else if(!ha->ioctl_data || length > (PAGE_SIZE << ha->ioctl_order)){ + void *bigger_buf; + int count; + int order; + /* try to allocate a bigger buffer */ + for (count = PAGE_SIZE, order = 0; + count < length; + order++, count <<= 1); + bigger_buf = (void *) __get_free_pages(GFP_ATOMIC, order); + if (bigger_buf) { + /* free the old memory */ + free_pages((unsigned long) ha->ioctl_data, ha->ioctl_order); + /* use the new memory */ + ha->ioctl_data = (char *) bigger_buf; + ha->ioctl_order = order; + ha->ioctl_datasize = count; + } else { + pt = (ips_passthru_t*)IPS_SG_ADDRESS(sg); + pt->BasicStatus = 0x0B; + pt->ExtendedStatus = 0x00; + SC->result = DID_ERROR << 16; + return (IPS_FAILURE); + } + } + ha->ioctl_datasize = length; + length = 0; + for(i = 0; i < SC->use_sg; i++){ + memcpy(&ha->ioctl_data[length], IPS_SG_ADDRESS(&sg[i]), sg[i].length); + length += sg[i].length; + } + pt = (ips_passthru_t *)ha->ioctl_data; + buffer = ha->ioctl_data; + } + if (!length || !buffer) { /* no data */ DEBUG_VAR(1, "(%s%d) No passthru structure", ips_name, ha->host_num); return (IPS_FAILURE); } - - if (SC->request_bufflen < sizeof(ips_passthru_t)) { + if (length < sizeof(ips_passthru_t)) { /* wrong size */ DEBUG_VAR(1, "(%s%d) Passthru structure wrong size", ips_name, ha->host_num); return (IPS_FAILURE); } - - if ((((char *) SC->request_buffer)[0] != 'C') || - (((char *) SC->request_buffer)[1] != 'O') || - (((char *) SC->request_buffer)[2] != 'P') || - (((char *) SC->request_buffer)[3] != 'P')) { - /* signature doesn't match */ - DEBUG_VAR(1, "(%s%d) Wrong signature on passthru structure.", - ips_name, ha->host_num); - - return (IPS_FAILURE); - } - - pt = (ips_passthru_t *) SC->request_buffer; - + pt = (ips_passthru_t*) buffer; /* * Some notes about the passthru interface used * @@ -2289,14 +2349,14 @@ ips_make_passthru(ips_ha_t *ha, Scsi_Cmnd *SC, ips_scb_t *scb, int intr) { switch (pt->CoppCmd) { case IPS_NUMCTRLS: - memcpy(SC->request_buffer + sizeof(ips_passthru_t), + memcpy(buffer + sizeof(ips_passthru_t), &ips_num_controllers, sizeof(int)); SC->result = DID_OK << 16; return (IPS_SUCCESS_IMM); case IPS_CTRLINFO: - memcpy(SC->request_buffer + sizeof(ips_passthru_t), + memcpy(buffer + sizeof(ips_passthru_t), ha, sizeof(ips_ha_t)); SC->result = DID_OK << 16; @@ -2305,365 +2365,24 @@ ips_make_passthru(ips_ha_t *ha, Scsi_Cmnd *SC, ips_scb_t *scb, int intr) { case IPS_COPPUSRCMD: case IPS_COPPIOCCMD: if (SC->cmnd[0] == IPS_IOCTL_COMMAND) { - if (SC->request_bufflen < (sizeof(ips_passthru_t) + pt->CmdBSize)) { - /* wrong size */ - DEBUG_VAR(1, "(%s%d) Passthru structure wrong size", - ips_name, ha->host_num); - - return (IPS_FAILURE); - } - - if (ips_usrcmd(ha, pt, scb)) - return (IPS_SUCCESS); - else - return (IPS_FAILURE); - } else if (SC->cmnd[0] == IPS_IOCTL_NEW_COMMAND) { - char *user_area; - char *kern_area; - u_int32_t datasize; - - if (SC->request_bufflen < (sizeof(ips_passthru_t))) { + if (length < (sizeof(ips_passthru_t) + pt->CmdBSize)) { /* wrong size */ DEBUG_VAR(1, "(%s%d) Passthru structure wrong size", ips_name, ha->host_num); - SC->result = DID_ERROR << 16; - - return (IPS_FAILURE); - } - - /* IF it's OK to Use the "CD BOOT" Flash Buffer, then you can */ - /* avoid allocating a huge buffer per adapter ( which can fail ). */ - if ( (ips_FlashData) && - (pt->CmdBSize == IPS_IMAGE_SIZE) && - (ips_FlashDataInUse == 0) ) { - ips_FlashDataInUse = 1; - ha->save_ioctl_data = ha->ioctl_data; - ha->save_ioctl_order = ha->ioctl_order; - ha->save_ioctl_datasize = ha->ioctl_datasize; - ha->ioctl_data = ips_FlashData; - ha->ioctl_order = 7; - ha->ioctl_datasize = IPS_IMAGE_SIZE; - } - - if ((pt->CoppCP.cmd.nvram.op_code == IPS_CMD_RW_NVRAM_PAGE) && - (pt->CoppCP.cmd.nvram.page == 5) && - (pt->CoppCP.cmd.nvram.write == 0)) { - - datasize = *((u_int32_t *) &scb->scsi_cmd->cmnd[8]); - - if (datasize < sizeof(IPS_NVRAM_P5)) { - pt->BasicStatus = 0x0B; - pt->ExtendedStatus = 0x00; - SC->result = DID_ERROR << 16; - return (IPS_FAILURE); - } - - ips_get_bios_version(ha, IPS_INTR_IORL); - ips_create_nvrampage5(ha, &nvram); - - user_area = *((char **) &scb->scsi_cmd->cmnd[4]); - kern_area = (char *) &nvram; - datasize = *((u_int32_t *) &scb->scsi_cmd->cmnd[8]); - - if (datasize > sizeof(IPS_NVRAM_P5)) - datasize = sizeof(IPS_NVRAM_P5); - - /* Copy out the buffer */ - if (copy_to_user((void *) user_area, (void *) kern_area, datasize) > 0) { - pt->BasicStatus = 0x0B; - pt->ExtendedStatus = 0x00; - SC->result = DID_ERROR << 16; - - return (EFAULT); - } - - pt->BasicStatus = 0x00; - pt->ExtendedStatus = 0x00; - SC->result = DID_OK << 16; - - return (IPS_SUCCESS_IMM); - } - - /* - * IPSSEND flashing BIOS - */ - if ((pt->CoppCP.cmd.flashfw.op_code == IPS_CMD_RW_BIOSFW) && - (pt->CoppCP.cmd.flashfw.type == 1) && - (pt->CoppCP.cmd.flashfw.direction == 2) && - (ha->device_id == IPS_DEVICEID_COPPERHEAD)) { - struct tq_struct task; - IPS_FLASH_DATA flash_data; - - /* We only support one packet */ - if (pt->CoppCP.cmd.flashfw.total_packets != 1) { - pt->BasicStatus = 0x0B; - pt->ExtendedStatus = 0x00; - SC->result = DID_ERROR << 16; - - return (IPS_FAILURE); - } - - /* copy in the size/buffer ptr from the scsi command */ - memcpy(&pt->CmdBuffer, &SC->cmnd[4], 4); - memcpy(&pt->CmdBSize, &SC->cmnd[8], 4); - - if (pt->CmdBSize > le32_to_cpu(pt->CoppCP.cmd.flashfw.count)) { - pt->CmdBSize = le32_to_cpu(pt->CoppCP.cmd.flashfw.count); - } else { - /* ERROR: Command/Buffer mismatch */ - pt->BasicStatus = 0x0B; - pt->ExtendedStatus = 0x00; - SC->result = DID_ERROR << 16; - - return (IPS_FAILURE); - } - - if ((!ha->func.programbios) || - (!ha->func.erasebios) || - (!ha->func.verifybios)) { - pt->BasicStatus = 0x0B; - pt->ExtendedStatus = 0x00; - SC->result = DID_ERROR << 16; - - return (IPS_FAILURE); - } - - /* must have a buffer */ - if ((!pt->CmdBSize) || (!pt->CmdBuffer)) { - pt->BasicStatus = 0x0B; - pt->ExtendedStatus = 0x00; - SC->result = DID_ERROR << 16; - - return (IPS_FAILURE); - } - - /* make sure buffer is big enough */ - if (pt->CmdBSize > ha->ioctl_datasize) { - void *bigger_struct; - u_int32_t count; - u_int32_t order; - - /* try to allocate a bigger struct */ - for (count = PAGE_SIZE, order = 0; - count < pt->CmdBSize; - order++, count <<= 1); - - bigger_struct = (void *) __get_free_pages(GFP_ATOMIC, order); - if (bigger_struct) { - /* free the old memory */ - free_pages((unsigned long) ha->ioctl_data, ha->ioctl_order); - - /* use the new memory */ - ha->ioctl_data = (char *) bigger_struct; - ha->ioctl_order = order; - ha->ioctl_datasize = count; - } else { - pt->BasicStatus = 0x0B; - pt->ExtendedStatus = 0x00; - SC->result = DID_ERROR << 16; - - spin_unlock(&ha->ips_lock); - - return (IPS_FAILURE); - } - } - - /* copy in the buffer */ - if (copy_from_user(ha->ioctl_data, pt->CmdBuffer, pt->CmdBSize) > 0) { - DEBUG_VAR(1, "(%s%d) flash bios failed - unable to copy user buffer", - ips_name, ha->host_num); - pt->BasicStatus = 0x0B; - pt->ExtendedStatus = 0x00; - SC->result = DID_ERROR << 16; - - return (IPS_FAILURE); - } - - flash_data.userbuffer = pt->CmdBuffer; - flash_data.usersize = pt->CmdBSize; - flash_data.kernbuffer = ha->ioctl_data; - flash_data.kernsize = ha->ioctl_datasize; - flash_data.offset = 0; - flash_data.SC = (void *) SC; - flash_data.pt = (void *) pt; - flash_data.ha = (void *) ha; - sema_init( &ha->flash_ioctl_sem, 0 ); - flash_data.sem = &ha->flash_ioctl_sem; - - task.sync = 0; - task.routine = ips_scheduled_flash_bios; - task.data = (void *) &flash_data; - - /* Unlock the per-board lock */ - spin_unlock_irq(SC->host->host_lock); - - queue_task(&task, &tq_immediate); - mark_bh(IMMEDIATE_BH); - - /* Wait for the flash to complete */ - down(&ha->flash_ioctl_sem); - - /* Obtain the per-board lock */ - spin_lock_irq(SC->host->host_lock); - - return (flash_data.retcode); } - /* - * IPSSEND flashing BIOS in sectioned mode - */ - if ((pt->CoppCP.cmd.flashbios.op_code == IPS_CMD_RW_BIOSFW) && - (pt->CoppCP.cmd.flashbios.type == 1) && - (pt->CoppCP.cmd.flashbios.direction == 4) && - (ha->device_id == IPS_DEVICEID_COPPERHEAD)) { - struct tq_struct task; - IPS_FLASH_DATA flash_data; - - /* copy in the size/buffer ptr from the scsi command */ - memcpy(&pt->CmdBuffer, &SC->cmnd[4], 4); - memcpy(&pt->CmdBSize, &SC->cmnd[8], 4); - - if (pt->CmdBSize > le32_to_cpu(pt->CoppCP.cmd.flashbios.count)) { - pt->CmdBSize = le32_to_cpu(pt->CoppCP.cmd.flashbios.count); - } else { - /* ERROR: Command/Buffer mismatch */ - pt->BasicStatus = 0x0B; - pt->ExtendedStatus = 0x00; - SC->result = DID_ERROR << 16; - - return (IPS_FAILURE); - } - - /* Update the Card BIOS */ - if ((!ha->func.programbios) || - (!ha->func.erasebios) || - (!ha->func.verifybios)) { - pt->BasicStatus = 0x0B; - pt->ExtendedStatus = 0x00; - SC->result = DID_ERROR << 16; - - return (IPS_FAILURE); - } - - /* must have a buffer */ - if ((!pt->CmdBSize) || (!pt->CmdBuffer)) { - pt->BasicStatus = 0x0B; - pt->ExtendedStatus = 0x00; - SC->result = DID_ERROR << 16; - - return (IPS_FAILURE); - } + if(ha->device_id == IPS_DEVICEID_COPPERHEAD && + pt->CoppCP.cmd.flashfw.op_code == IPS_CMD_RW_BIOSFW) + return ips_flash_copperhead(ha, pt, scb); - /* make sure buffer is big enough */ - if (pt->CmdBSize > ha->ioctl_datasize) { - void *bigger_struct; - u_int32_t count; - u_int32_t order; - - /* try to allocate a bigger struct */ - for (count = PAGE_SIZE, order = 0; - count < pt->CmdBSize; - order++, count <<= 1); - - bigger_struct = (void *) __get_free_pages(GFP_ATOMIC, order); - if (bigger_struct) { - /* free the old memory */ - free_pages((unsigned long) ha->ioctl_data, ha->ioctl_order); - - /* use the new memory */ - ha->ioctl_data = (char *) bigger_struct; - ha->ioctl_order = order; - ha->ioctl_datasize = count; - } else { - pt->BasicStatus = 0x0B; - pt->ExtendedStatus = 0x00; - SC->result = DID_ERROR << 16; - - spin_unlock(&ha->ips_lock); - - return (IPS_FAILURE); - } - } - - /* copy in the buffer */ - if (copy_from_user(ha->ioctl_data, pt->CmdBuffer, pt->CmdBSize) > 0) { - DEBUG_VAR(1, "(%s%d) flash bios failed - unable to copy user buffer", - ips_name, ha->host_num); - pt->BasicStatus = 0x0B; - pt->ExtendedStatus = 0x00; - SC->result = DID_ERROR << 16; - - return (EFAULT); - } - - flash_data.userbuffer = pt->CmdBuffer; - flash_data.usersize = pt->CmdBSize; - flash_data.kernbuffer = ha->ioctl_data; - flash_data.kernsize = ha->ioctl_datasize; - flash_data.offset = le32_to_cpu(pt->CoppCP.cmd.flashbios.offset); - flash_data.SC = (void *) SC; - flash_data.pt = (void *) pt; - flash_data.ha = (void *) ha; - sema_init( &ha->flash_ioctl_sem, 0 ); - flash_data.sem = &ha->flash_ioctl_sem; - - task.sync = 0; - task.routine = ips_flash_bios_section; - task.data = (void *) &flash_data; - - /* Unlock the per-board lock */ - spin_unlock_irq(SC->host->host_lock); - - queue_task(&task, &tq_immediate); - mark_bh(IMMEDIATE_BH); - - /* Wait for the flash to complete */ - down(&ha->flash_ioctl_sem); - - /* Obtain the per-board lock */ - spin_lock_irq(SC->host->host_lock); - - return (flash_data.retcode); - } - - if ((pt->CoppCP.cmd.flashfw.op_code == IPS_CMD_RW_BIOSFW) && - (pt->CoppCP.cmd.flashfw.type == 1) && - (pt->CoppCP.cmd.flashfw.direction == 3) && - (ha->device_id == IPS_DEVICEID_COPPERHEAD)) { - /* Erase the Card BIOS */ - if (!ha->func.erasebios) { - pt->BasicStatus = 0x0B; - pt->ExtendedStatus = 0x00; - SC->result = DID_ERROR << 16; - - return (IPS_FAILURE); - } - - if ((*ha->func.erasebios)(ha)) { - DEBUG_VAR(1, "(%s%d) flash bios failed - unable to erase flash", - ips_name, ha->host_num); - pt->BasicStatus = 0x0B; - pt->ExtendedStatus = 0x00; - SC->result = DID_ERROR << 16; - - return (IPS_FAILURE); - } - - SC->result = DID_OK << 16; - pt->BasicStatus = 0x00; - pt->ExtendedStatus = 0x00; - - return (IPS_SUCCESS_IMM); - } - - if (ips_newusrcmd(ha, pt, scb)) + if (ips_usrcmd(ha, pt, scb)) return (IPS_SUCCESS); else return (IPS_FAILURE); - } - + } + break; } /* end switch */ @@ -2672,176 +2391,146 @@ ips_make_passthru(ips_ha_t *ha, Scsi_Cmnd *SC, ips_scb_t *scb, int intr) { } /****************************************************************************/ -/* */ -/* Routine Name: ips_scheduled_flash_bios */ -/* */ +/* Routine Name: ips_flash_copperhead */ /* Routine Description: */ -/* */ -/* Flash the BIOS on a Copperhead style controller */ -/* To be called from a task queue */ -/* */ +/* Flash the BIOS/FW on a Copperhead style controller */ /****************************************************************************/ -static void -ips_scheduled_flash_bios(void *data) { - ips_ha_t *ha; - Scsi_Cmnd *SC; - ips_passthru_t *pt; - IPS_FLASH_DATA *fd; - - fd = (IPS_FLASH_DATA *) data; - ha = (ips_ha_t *) fd->ha; - pt = (ips_passthru_t *) fd->pt; - SC = (Scsi_Cmnd *) fd->SC; - - /* - * Set initial return codes - */ - SC->result = DID_OK << 16; - pt->BasicStatus = 0x00; - pt->ExtendedStatus = 0x00; - fd->retcode = IPS_SUCCESS_IMM; - - /* - * Fix the size/ptr to account for the - * flash header - */ - fd->kernbuffer += 0xC0; - fd->kernsize -= 0xC0; - fd->userbuffer += 0xC0; - fd->usersize -= 0xC0; - - if ((*ha->func.erasebios)(ha)) { - DEBUG_VAR(1, "(%s%d) flash bios failed - unable to erase flash", - ips_name, ha->host_num); - pt->BasicStatus = 0x0B; - pt->ExtendedStatus = 0x00; - SC->result = DID_ERROR << 16; - fd->retcode = IPS_FAILURE; - up(fd->sem); - - return ; +static int +ips_flash_copperhead(ips_ha_t *ha, ips_passthru_t *pt, ips_scb_t *scb){ + int datasize, count; + + /* Trombone is the only copperhead that can do packet flash, but only + * for firmware. No one said it had to make sence. */ + if(IPS_IS_TROMBONE(ha) && pt->CoppCP.cmd.flashfw.type == IPS_FW_IMAGE){ + if(ips_usrcmd(ha, pt, scb)) + return IPS_SUCCESS; + else + return IPS_FAILURE; } - - ips_flash_bios_segment(data); - - if (fd->retcode == IPS_FAILURE) - return ; - - if ((*ha->func.verifybios)(ha, fd->kernbuffer, fd->usersize, fd->offset)) { - DEBUG_VAR(1, "(%s%d) flash bios failed - unable to verify flash", - ips_name, ha->host_num); - pt->BasicStatus = 0x0B; - pt->ExtendedStatus = 0x00; - SC->result = DID_ERROR << 16; - fd->retcode = IPS_FAILURE; - up(fd->sem); - - return ; + pt->BasicStatus = 0x0B; + pt->ExtendedStatus = 0; + scb->scsi_cmd->result = DID_OK <<16; + /* IF it's OK to Use the "CD BOOT" Flash Buffer, then you can */ + /* avoid allocating a huge buffer per adapter ( which can fail ). */ + if(pt->CoppCP.cmd.flashfw.type == IPS_BIOS_IMAGE && + pt->CoppCP.cmd.flashfw.direction == IPS_ERASE_BIOS){ + pt->BasicStatus = 0; + return ips_flash_bios(ha, pt, scb); + }else if(pt->CoppCP.cmd.flashfw.packet_num == 0){ + if(ips_FlashData && !test_and_set_bit(0, &ips_FlashDataInUse)){ + ha->flash_data = ips_FlashData; + ha->flash_order = 7; + ha->flash_datasize = 0; + }else if(!ha->flash_data){ + datasize = pt->CoppCP.cmd.flashfw.total_packets * + pt->CoppCP.cmd.flashfw.count; + for (count = PAGE_SIZE, ha->flash_order = 0; count < datasize; + ha->flash_order++, count <<= 1); + ha->flash_data = (char *)__get_free_pages(GFP_ATOMIC, ha->flash_order); + ha->flash_datasize = 0; + }else + return IPS_FAILURE; + }else{ + if(pt->CoppCP.cmd.flashfw.count + ha->flash_datasize > + (PAGE_SIZE << ha->flash_order)){ + ips_free_flash_copperhead(ha); + printk(KERN_WARNING "failed size sanity check\n"); + return IPS_FAILURE; + } } - - /* Tell them we are done */ - if (fd->retcode != IPS_FAILURE) - up(fd->sem); + if(!ha->flash_data) + return IPS_FAILURE; + pt->BasicStatus = 0; + memcpy(&ha->flash_data[ha->flash_datasize], pt + 1, + pt->CoppCP.cmd.flashfw.count); + ha->flash_datasize += pt->CoppCP.cmd.flashfw.count; + if(pt->CoppCP.cmd.flashfw.packet_num == + pt->CoppCP.cmd.flashfw.total_packets - 1){ + if(pt->CoppCP.cmd.flashfw.type == IPS_BIOS_IMAGE) + return ips_flash_bios(ha, pt, scb); + else if(pt->CoppCP.cmd.flashfw.type == IPS_FW_IMAGE) + return ips_flash_firmware(ha, pt, scb); + } + return IPS_SUCCESS_IMM; } /****************************************************************************/ -/* */ -/* Routine Name: ips_flash_bios_section */ -/* */ +/* Routine Name: ips_flash_bios */ /* Routine Description: */ -/* */ -/* wrapper for ips_flash_bios_segment that raises the semaphore */ -/* */ +/* flashes the bios of a copperhead adapter */ /****************************************************************************/ -static void -ips_flash_bios_section(void *data) { - ips_ha_t *ha; - Scsi_Cmnd *SC; - ips_passthru_t *pt; - IPS_FLASH_DATA *fd; - - fd = (IPS_FLASH_DATA *) data; - ha = (ips_ha_t *) fd->ha; - pt = (ips_passthru_t *) fd->pt; - SC = (Scsi_Cmnd *) fd->SC; - - /* - * Set initial return codes - */ - SC->result = DID_OK << 16; - pt->BasicStatus = 0x00; +static int +ips_flash_bios(ips_ha_t * ha, ips_passthru_t *pt, ips_scb_t *scb){ + + if(pt->CoppCP.cmd.flashfw.type == IPS_BIOS_IMAGE && + pt->CoppCP.cmd.flashfw.direction == IPS_WRITE_BIOS){ + if ((!ha->func.programbios) || (!ha->func.erasebios) || + (!ha->func.verifybios)) + goto error; + if((*ha->func.erasebios)(ha)){ + DEBUG_VAR(1, "(%s%d) flash bios failed - unable to erase flash", + ips_name, ha->host_num); + goto error; + }else if ((*ha->func.programbios)(ha, ha->flash_data + IPS_BIOS_HEADER, + ha->flash_datasize - IPS_BIOS_HEADER, 0 )) { + DEBUG_VAR(1, "(%s%d) flash bios failed - unable to flash", + ips_name, ha->host_num); + goto error; + }else if ((*ha->func.verifybios)(ha, ha->flash_data + IPS_BIOS_HEADER, + ha->flash_datasize - IPS_BIOS_HEADER, 0 )) { + DEBUG_VAR(1, "(%s%d) flash bios failed - unable to verify flash", + ips_name, ha->host_num); + goto error; + } + ips_free_flash_copperhead(ha); + return IPS_SUCCESS_IMM; + }else if(pt->CoppCP.cmd.flashfw.type == IPS_BIOS_IMAGE && + pt->CoppCP.cmd.flashfw.direction == IPS_ERASE_BIOS){ + if(!ha->func.erasebios) + goto error; + if((*ha->func.erasebios)(ha)){ + DEBUG_VAR(1, "(%s%d) flash bios failed - unable to erase flash", + ips_name, ha->host_num); + goto error; + } + return IPS_SUCCESS_IMM; + } +error: + pt->BasicStatus = 0x0B; pt->ExtendedStatus = 0x00; - fd->retcode = IPS_SUCCESS_IMM; - - ips_flash_bios_segment(data); - - if (fd->retcode != IPS_FAILURE) - up(fd->sem); + ips_free_flash_copperhead(ha); + return IPS_FAILURE; } /****************************************************************************/ -/* */ -/* Routine Name: ips_flash_bios_segment */ -/* */ +/* Routine Name: ips_flash_firmware */ /* Routine Description: */ -/* */ -/* Flash a portion of the BIOS on a Copperhead style controller */ -/* To be called from a task queue */ -/* */ +/* flashes the firmware of a copperhead adapter */ /****************************************************************************/ -static void -ips_flash_bios_segment(void *data) { - ips_ha_t *ha; - Scsi_Cmnd *SC; - ips_passthru_t *pt; - IPS_FLASH_DATA *fd; - - fd = (IPS_FLASH_DATA *) data; - ha = (ips_ha_t *) fd->ha; - pt = (ips_passthru_t *) fd->pt; - SC = (Scsi_Cmnd *) fd->SC; - - if ((*ha->func.programbios)(ha, fd->kernbuffer, fd->usersize, fd->offset)) { - DEBUG_VAR(1, "(%s%d) flash bios failed - unable to program flash", - ips_name, ha->host_num); +static int +ips_flash_firmware(ips_ha_t * ha, ips_passthru_t *pt, ips_scb_t *scb){ + IPS_SG_LIST *sg_list; + uint32_t cmd_busaddr; + + if(pt->CoppCP.cmd.flashfw.type == IPS_FW_IMAGE && + pt->CoppCP.cmd.flashfw.direction == IPS_WRITE_FW ){ + memset(&pt->CoppCP.cmd, 0, sizeof(IPS_HOST_COMMAND)); + pt->CoppCP.cmd.flashfw.op_code = IPS_CMD_DOWNLOAD; + pt->CoppCP.cmd.flashfw.count = cpu_to_le32(ha->flash_datasize); + }else{ pt->BasicStatus = 0x0B; pt->ExtendedStatus = 0x00; - SC->result = DID_ERROR << 16; - fd->retcode = IPS_FAILURE; - up(fd->sem); - - return ; + ips_free_flash_copperhead(ha); + return IPS_FAILURE; } -} - -/****************************************************************************/ -/* */ -/* Routine Name: ips_usrcmd */ -/* */ -/* Routine Description: */ -/* */ -/* Process a user command and make it ready to send */ -/* */ -/****************************************************************************/ -static int -ips_usrcmd(ips_ha_t *ha, ips_passthru_t *pt, ips_scb_t *scb) { - IPS_SG_LIST *sg_list; - - METHOD_TRACE("ips_usrcmd", 1); - - if ((!scb) || (!pt) || (!ha)) - return (0); - /* Save the S/G list pointer so it doesn't get clobbered */ sg_list = scb->sg_list; - + cmd_busaddr = scb->scb_busaddr; /* copy in the CP */ memcpy(&scb->cmd, &pt->CoppCP.cmd, sizeof(IPS_IOCTL_CMD)); - memcpy(&scb->dcdb, &pt->CoppCP.dcdb, sizeof(IPS_DCDB_TABLE)); - /* FIX stuff that might be wrong */ scb->sg_list = sg_list; - scb->scb_busaddr = virt_to_bus(scb); + scb->scb_busaddr = cmd_busaddr; scb->bus = scb->scsi_cmd->channel; scb->target_id = scb->scsi_cmd->target; scb->lun = scb->scsi_cmd->lun; @@ -2851,52 +2540,36 @@ ips_usrcmd(ips_ha_t *ha, ips_passthru_t *pt, ips_scb_t *scb) { scb->op_code = 0; scb->callback = ipsintr_done; scb->timeout = ips_cmd_timeout; - scb->cmd.basic_io.command_id = IPS_COMMAND_ID(ha, scb); - - /* we don't support DCDB/READ/WRITE Scatter Gather */ - if ((scb->cmd.basic_io.op_code == IPS_CMD_READ_SG) || - (scb->cmd.basic_io.op_code == IPS_CMD_WRITE_SG) || - (scb->cmd.basic_io.op_code == IPS_CMD_DCDB_SG)) - return (0); - - if (pt->CmdBSize) { - scb->data_busaddr = virt_to_bus(scb->scsi_cmd->request_buffer + sizeof(ips_passthru_t)); - } else { - scb->data_busaddr = 0L; - } - - if (scb->cmd.dcdb.op_code == IPS_CMD_DCDB) - scb->cmd.dcdb.dcdb_address = cpu_to_le32(virt_to_bus(&scb->dcdb)); - - if (pt->CmdBSize) { - if (scb->cmd.dcdb.op_code == IPS_CMD_DCDB) - scb->dcdb.buffer_pointer = cpu_to_le32(scb->data_busaddr); - else - scb->cmd.basic_io.sg_addr = cpu_to_le32(scb->data_busaddr); - } - /* set timeouts */ - if (pt->TimeOut) { + scb->data_len = ha->flash_datasize; + scb->data_busaddr = pci_map_single(ha->pcidev, ha->flash_data, scb->data_len, + IPS_DMA_DIR(scb)); + scb->flags |= IPS_SCB_MAP_SINGLE; + scb->cmd.flashfw.command_id = IPS_COMMAND_ID(ha, scb); + scb->cmd.flashfw.buffer_addr = scb->data_busaddr; + if (pt->TimeOut) scb->timeout = pt->TimeOut; + scb->scsi_cmd->result = DID_OK <<16; + return IPS_SUCCESS; +} - if (pt->TimeOut <= 10) - scb->dcdb.cmd_attribute |= IPS_TIMEOUT10; - else if (pt->TimeOut <= 60) - scb->dcdb.cmd_attribute |= IPS_TIMEOUT60; - else - scb->dcdb.cmd_attribute |= IPS_TIMEOUT20M; - } - - /* assume success */ - scb->scsi_cmd->result = DID_OK << 16; - - /* success */ - return (1); +/****************************************************************************/ +/* Routine Name: ips_free_flash_copperhead */ +/* Routine Description: */ +/* release the memory resources used to hold the flash image */ +/****************************************************************************/ +static void +ips_free_flash_copperhead(ips_ha_t *ha){ + if(ha->flash_data == ips_FlashData) + test_and_clear_bit(0, &ips_FlashDataInUse); + else if(ha->flash_data) + free_pages((unsigned long)ha->flash_data, ha->flash_order); + ha->flash_data = NULL; } /****************************************************************************/ /* */ -/* Routine Name: ips_newusrcmd */ +/* Routine Name: ips_usrcmd */ /* */ /* Routine Description: */ /* */ @@ -2904,11 +2577,9 @@ ips_usrcmd(ips_ha_t *ha, ips_passthru_t *pt, ips_scb_t *scb) { /* */ /****************************************************************************/ static int -ips_newusrcmd(ips_ha_t *ha, ips_passthru_t *pt, ips_scb_t *scb) { - IPS_SG_LIST *sg_list; - char *user_area; - char *kern_area; - u_int32_t datasize; +ips_usrcmd(ips_ha_t *ha, ips_passthru_t *pt, ips_scb_t *scb) { + IPS_SG_LIST *sg_list; + uint32_t cmd_busaddr; METHOD_TRACE("ips_usrcmd", 1); @@ -2917,14 +2588,14 @@ ips_newusrcmd(ips_ha_t *ha, ips_passthru_t *pt, ips_scb_t *scb) { /* Save the S/G list pointer so it doesn't get clobbered */ sg_list = scb->sg_list; - + cmd_busaddr = scb->scb_busaddr; /* copy in the CP */ memcpy(&scb->cmd, &pt->CoppCP.cmd, sizeof(IPS_IOCTL_CMD)); memcpy(&scb->dcdb, &pt->CoppCP.dcdb, sizeof(IPS_DCDB_TABLE)); /* FIX stuff that might be wrong */ scb->sg_list = sg_list; - scb->scb_busaddr = virt_to_bus(scb); + scb->scb_busaddr = cmd_busaddr; scb->bus = scb->scsi_cmd->channel; scb->target_id = scb->scsi_cmd->target; scb->lun = scb->scsi_cmd->lun; @@ -2943,50 +2614,31 @@ ips_newusrcmd(ips_ha_t *ha, ips_passthru_t *pt, ips_scb_t *scb) { return (0); if (pt->CmdBSize) { - if (pt->CmdBSize > ha->ioctl_datasize) { - void *bigger_struct; - u_int32_t count; - u_int32_t order; - - /* try to allocate a bigger struct */ - for (count = PAGE_SIZE, order = 0; - count < pt->CmdBSize; - order++, count <<= 1); - - bigger_struct = (void *) __get_free_pages(GFP_ATOMIC, order); - if (bigger_struct) { - /* free the old memory */ - free_pages((unsigned long) ha->ioctl_data, ha->ioctl_order); - - /* use the new memory */ - ha->ioctl_data = (char *) bigger_struct; - ha->ioctl_order = order; - ha->ioctl_datasize = count; - } else - return (0); - - } - - scb->data_busaddr = virt_to_bus(ha->ioctl_data); - - /* Attempt to copy in the data */ - user_area = *((char **) &scb->scsi_cmd->cmnd[4]); - kern_area = ha->ioctl_data; - datasize = *((u_int32_t *) &scb->scsi_cmd->cmnd[8]); - - if (copy_from_user(kern_area, user_area, datasize) > 0) { - DEBUG_VAR(1, "(%s%d) passthru failed - unable to copy in user data", - ips_name, ha->host_num); - - return (0); + if(!scb->scsi_cmd->use_sg){ + scb->data_len = pt->CmdBSize; + scb->data_busaddr = pci_map_single(ha->pcidev, + scb->scsi_cmd->request_buffer + + sizeof(ips_passthru_t), + pt->CmdBSize, + IPS_DMA_DIR(scb)); + scb->flags |= IPS_SCB_MAP_SINGLE; + } else { + scb->data_len = pt->CmdBSize; + scb->data_busaddr = pci_map_single(ha->pcidev, + ha->ioctl_data + + sizeof(ips_passthru_t), + pt->CmdBSize, + IPS_DMA_DIR(scb)); + scb->flags |= IPS_SCB_MAP_SINGLE; } - } else { scb->data_busaddr = 0L; } if (scb->cmd.dcdb.op_code == IPS_CMD_DCDB) - scb->cmd.dcdb.dcdb_address = cpu_to_le32(virt_to_bus(&scb->dcdb)); + scb->cmd.dcdb.dcdb_address = cpu_to_le32(scb->scb_busaddr + + (unsigned long)&scb->dcdb - + (unsigned long)scb); if (pt->CmdBSize) { if (scb->cmd.dcdb.op_code == IPS_CMD_DCDB) @@ -3035,8 +2687,10 @@ ips_cleanup_passthru(ips_ha_t *ha, ips_scb_t *scb) { return ; } - - pt = (ips_passthru_t *) scb->scsi_cmd->request_buffer; + if(!scb->scsi_cmd->use_sg) + pt = (ips_passthru_t *) scb->scsi_cmd->request_buffer; + else + pt = (ips_passthru_t *) ha->ioctl_data; /* Copy data back to the user */ if (scb->cmd.dcdb.op_code == IPS_CMD_DCDB) /* Copy DCDB Back to Caller's Area */ @@ -3044,14 +2698,23 @@ ips_cleanup_passthru(ips_ha_t *ha, ips_scb_t *scb) { pt->BasicStatus = scb->basic_status; pt->ExtendedStatus = scb->extended_status; - - if (scb->scsi_cmd->cmnd[0] == IPS_IOCTL_NEW_COMMAND) - up(&ha->ioctl_sem); - + pt->AdapterType = ha->ad_type; + + if(ha->device_id == IPS_DEVICEID_COPPERHEAD && + (scb->cmd.flashfw.op_code == IPS_CMD_DOWNLOAD || + scb->cmd.flashfw.op_code == IPS_CMD_RW_BIOSFW)) + ips_free_flash_copperhead(ha); + + if(scb->scsi_cmd->use_sg){ + int i, length = 0; + struct scatterlist *sg = scb->scsi_cmd->request_buffer; + for(i = 0; i < scb->scsi_cmd->use_sg; i++){ + memcpy(IPS_SG_ADDRESS(&sg[i]), &ha->ioctl_data[length], sg[i].length); + length += sg[i].length; + } + } } -#endif - /****************************************************************************/ /* */ /* Routine Name: ips_host_info */ @@ -3245,6 +2908,14 @@ ips_identify_controller(ips_ha_t *ha) { case IPS_SUBDEVICEID_4LX: ha->ad_type = IPS_ADTYPE_SERVERAID4LX; break; + + case IPS_SUBDEVICEID_5I2: + ha->ad_type = IPS_ADTYPE_SERVERAID5I2; + break; + + case IPS_SUBDEVICEID_5I1: + ha->ad_type = IPS_ADTYPE_SERVERAID5I1; + break; } break; @@ -3253,31 +2924,6 @@ ips_identify_controller(ips_ha_t *ha) { /****************************************************************************/ /* */ -/* Routine Name: ips_create_nvrampage5 */ -/* */ -/* Routine Description: */ -/* */ -/* Create a pseudo nvram page 5 */ -/* */ -/****************************************************************************/ -static void -ips_create_nvrampage5(ips_ha_t *ha, IPS_NVRAM_P5 *nvram) { - METHOD_TRACE("ips_create_nvrampage5", 1); - - memset(nvram, 0, sizeof(IPS_NVRAM_P5)); - - nvram->signature = IPS_NVRAM_P5_SIG; - nvram->adapter_slot = ha->slot_num; - nvram->adapter_type = ha->ad_type; - nvram->operating_system = IPS_OS_LINUX; - strncpy((char *) nvram->driver_high, IPS_VERSION_HIGH, 4); - strncpy((char *) nvram->driver_low, IPS_VERSION_LOW, 4); - strncpy((char *) nvram->bios_high, ha->bios_version, 4); - strncpy((char *) nvram->bios_low, ha->bios_version + 4, 4); -} - -/****************************************************************************/ -/* */ /* Routine Name: ips_get_bios_version */ /* */ /* Routine Description: */ @@ -3289,10 +2935,10 @@ static void ips_get_bios_version(ips_ha_t *ha, int intr) { ips_scb_t *scb; int ret; - u_int8_t major; - u_int8_t minor; - u_int8_t subminor; - u_int8_t *buffer; + uint8_t major; + uint8_t minor; + uint8_t subminor; + uint8_t *buffer; char hexDigits[] = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'}; METHOD_TRACE("ips_get_bios_version", 1); @@ -3309,35 +2955,35 @@ ips_get_bios_version(ips_ha_t *ha, int intr) { /* test 1st byte */ writel(0, ha->mem_ptr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ if (readb(ha->mem_ptr + IPS_REG_FLDP) != 0x55) return; - writel(cpu_to_le32(1), ha->mem_ptr + IPS_REG_FLAP); + writel(1, ha->mem_ptr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ if (readb(ha->mem_ptr + IPS_REG_FLDP) != 0xAA) return; /* Get Major version */ - writel(cpu_to_le32(0x1FF), ha->mem_ptr + IPS_REG_FLAP); + writel(0x1FF, ha->mem_ptr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ major = readb(ha->mem_ptr + IPS_REG_FLDP); /* Get Minor version */ - writel(cpu_to_le32(0x1FE), ha->mem_ptr + IPS_REG_FLAP); + writel(0x1FE, ha->mem_ptr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ minor = readb(ha->mem_ptr + IPS_REG_FLDP); /* Get SubMinor version */ - writel(cpu_to_le32(0x1FD), ha->mem_ptr + IPS_REG_FLAP); + writel(0x1FD, ha->mem_ptr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ subminor = readb(ha->mem_ptr + IPS_REG_FLDP); } else { @@ -3346,14 +2992,14 @@ ips_get_bios_version(ips_ha_t *ha, int intr) { /* test 1st byte */ outl(0, ha->io_addr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ if (inb(ha->io_addr + IPS_REG_FLDP) != 0x55) return ; outl(cpu_to_le32(1), ha->io_addr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ if (inb(ha->io_addr + IPS_REG_FLDP) != 0xAA) return ; @@ -3361,21 +3007,21 @@ ips_get_bios_version(ips_ha_t *ha, int intr) { /* Get Major version */ outl(cpu_to_le32(0x1FF), ha->io_addr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ major = inb(ha->io_addr + IPS_REG_FLDP); /* Get Minor version */ outl(cpu_to_le32(0x1FE), ha->io_addr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ minor = inb(ha->io_addr + IPS_REG_FLDP); /* Get SubMinor version */ outl(cpu_to_le32(0x1FD), ha->io_addr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ subminor = inb(ha->io_addr + IPS_REG_FLDP); @@ -3401,9 +3047,13 @@ ips_get_bios_version(ips_ha_t *ha, int intr) { scb->cmd.flashfw.type = 1; scb->cmd.flashfw.direction = 0; scb->cmd.flashfw.count = cpu_to_le32(0x800); - scb->cmd.flashfw.buffer_addr = cpu_to_le32(virt_to_bus(buffer)); scb->cmd.flashfw.total_packets = 1; scb->cmd.flashfw.packet_num = 0; + scb->data_len = 0x1000; + scb->data_busaddr = pci_map_single(ha->pcidev, buffer, scb->data_len, + IPS_DMA_DIR(scb)); + scb->cmd.flashfw.buffer_addr = scb->data_busaddr; + scb->flags |= IPS_SCB_MAP_SINGLE; /* issue the command */ if (((ret = ips_send_wait(ha, scb, ips_cmd_timeout, intr)) == IPS_FAILURE) || @@ -3551,6 +3201,14 @@ ips_hainit(ips_ha_t *ha) { } } + /* Limit the Active Commands on a Lite Adapter */ + if ((ha->ad_type == IPS_ADTYPE_SERVERAID3L) || + (ha->ad_type == IPS_ADTYPE_SERVERAID4L) || + (ha->ad_type == IPS_ADTYPE_SERVERAID4LX)) { + if ((ha->max_cmds > MaxLiteCmds) && (MaxLiteCmds)) + ha->max_cmds = MaxLiteCmds; + } + /* set controller IDs */ ha->ha_id[0] = IPS_ADAPTER_ID; for (i = 1; i < ha->nbus; i++) { @@ -3577,33 +3235,22 @@ ips_next(ips_ha_t *ha, int intr) { Scsi_Cmnd *p; Scsi_Cmnd *q; ips_copp_wait_item_t *item; - int ret, sg_entries = 0; - int intr_status; + int ret; unsigned long cpu_flags; - unsigned long cpu_flags2; + unsigned long cpu_flags2 = 0; struct Scsi_Host *host; - METHOD_TRACE("ips_next", 1); if (!ha) return ; - host = ips_sh[ha->host_num]; - /* * Block access to the queue function so * this command won't time out */ - if (intr == IPS_INTR_ON) { - spin_lock_irqsave(host->host_lock, cpu_flags2); - intr_status = IPS_INTR_IORL; - } else { - intr_status = intr; - - /* Quiet the compiler */ - cpu_flags2 = 0; - } - + if (intr == IPS_INTR_ON) + IPS_LOCK_SAVE(host->host_lock, cpu_flags2); + if ((ha->subsys->param[3] & 0x300000) && ( ha->scb_activelist.count == 0 )) { struct timeval tv; @@ -3613,16 +3260,15 @@ ips_next(ips_ha_t *ha, int intr) { if (tv.tv_sec - ha->last_ffdc > IPS_SECS_8HOURS) { ha->last_ffdc = tv.tv_sec; IPS_HA_UNLOCK(cpu_flags); - ips_ffdc_time(ha, intr_status); + ips_ffdc_time(ha); } else { IPS_HA_UNLOCK(cpu_flags); } } if (intr == IPS_INTR_ON) - spin_unlock_irqrestore(host->host_lock, cpu_flags2); + IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags2); -#ifndef NO_IPS_CMDLINE /* * Send passthru commands * These have priority over normal I/O @@ -3650,17 +3296,7 @@ ips_next(ips_ha_t *ha, int intr) { case IPS_FAILURE: if (scb->scsi_cmd) { scb->scsi_cmd->result = DID_ERROR << 16; - - /* raise the semaphore */ - if (scb->scsi_cmd->cmnd[0] == IPS_IOCTL_NEW_COMMAND) { - u_int32_t datasize; - - datasize = 0; - memcpy(&scb->scsi_cmd->cmnd[8], &datasize, 4); - up(&ha->ioctl_sem); - } else { - scb->scsi_cmd->scsi_done(scb->scsi_cmd); - } + scb->scsi_cmd->scsi_done(scb->scsi_cmd); } ips_freescb(ha, scb); @@ -3668,17 +3304,7 @@ ips_next(ips_ha_t *ha, int intr) { case IPS_SUCCESS_IMM: if (scb->scsi_cmd) { scb->scsi_cmd->result = DID_OK << 16; - - /* raise the semaphore */ - if (scb->scsi_cmd->cmnd[0] == IPS_IOCTL_NEW_COMMAND) { - u_int32_t datasize; - - datasize = 0; - memcpy(&scb->scsi_cmd->cmnd[8], &datasize, 4); - up(&ha->ioctl_sem); - } else { - scb->scsi_cmd->scsi_done(scb->scsi_cmd); - } + scb->scsi_cmd->scsi_done(scb->scsi_cmd); } ips_freescb(ha, scb); @@ -3692,7 +3318,7 @@ ips_next(ips_ha_t *ha, int intr) { IPS_QUEUE_LOCK(&ha->copp_waitlist); ha->num_ioctl--; continue; - } + } ret = ips_send_cmd(ha, scb); @@ -3704,22 +3330,12 @@ ips_next(ips_ha_t *ha, int intr) { switch(ret) { case IPS_FAILURE: if (scb->scsi_cmd) { - /* raise the semaphore */ - if (scb->scsi_cmd->cmnd[0] == IPS_IOCTL_NEW_COMMAND) - up(&ha->ioctl_sem); - scb->scsi_cmd->result = DID_ERROR << 16; } ips_freescb(ha, scb); break; case IPS_SUCCESS_IMM: - if (scb->scsi_cmd) { - /* raise the semaphore */ - if (scb->scsi_cmd->cmnd[0] == IPS_IOCTL_NEW_COMMAND) - up(&ha->ioctl_sem); - } - ips_freescb(ha, scb); break; default: @@ -3732,7 +3348,6 @@ ips_next(ips_ha_t *ha, int intr) { IPS_QUEUE_UNLOCK(&ha->copp_waitlist); IPS_HA_UNLOCK(cpu_flags); -#endif /* * Send "Normal" I/O commands @@ -3779,34 +3394,34 @@ ips_next(ips_ha_t *ha, int intr) { int i; sg = SC->request_buffer; - sg_entries = pci_map_sg(ha->pcidev, sg, SC->use_sg, scsi_to_pci_dma_dir(SC->sc_data_direction)); - - if (SC->use_sg == 1) { - if (sg[0].length > ha->max_xfer) { - scb->breakup = 1; + scb->sg_count = pci_map_sg(ha->pcidev, sg, SC->use_sg, + IPS_DMA_DIR(scb)); + scb->flags |= IPS_SCB_MAP_SG; + if (scb->sg_count == 1) { + if (sg_dma_len(sg) > ha->max_xfer) { + scb->breakup = 1; scb->data_len = ha->max_xfer; } else - scb->data_len = sg[0].length; + scb->data_len = sg_dma_len(sg); scb->dcdb.transfer_length = scb->data_len; - scb->data_busaddr = sg_dma_address(&sg[0]); + scb->data_busaddr = sg_dma_address(sg); scb->sg_len = 0; } else { /* Check for the first Element being bigger than MAX_XFER */ - if (sg[0].length > ha->max_xfer) { + if (sg_dma_len(&sg[0]) > ha->max_xfer) { scb->sg_list[0].address = cpu_to_le32(sg_dma_address(&sg[0])); scb->sg_list[0].length = ha->max_xfer; scb->data_len = ha->max_xfer; scb->breakup = 0; scb->sg_break=1; scb->sg_len = 1; - } - else { - for (i = 0; i < SC->use_sg; i++) { + } else { + for (i = 0; i < scb->sg_count; i++) { scb->sg_list[i].address = cpu_to_le32(sg_dma_address(&sg[i])); - scb->sg_list[i].length = cpu_to_le32(sg[i].length); + scb->sg_list[i].length = cpu_to_le32(sg_dma_len(&sg[i])); - if (scb->data_len + sg[i].length > ha->max_xfer) { + if (scb->data_len + sg_dma_len(&sg[i]) > ha->max_xfer) { /* * Data Breakup required */ @@ -3814,17 +3429,17 @@ ips_next(ips_ha_t *ha, int intr) { break; } - scb->data_len += sg[i].length; + scb->data_len += sg_dma_len(&sg[i]); } if (!scb->breakup) - scb->sg_len = sg_entries; + scb->sg_len = scb->sg_count; else scb->sg_len = scb->breakup; } scb->dcdb.transfer_length = scb->data_len; - scb->data_busaddr = virt_to_bus(scb->sg_list); + scb->data_busaddr = scb->sg_busaddr; } } else { if (SC->request_bufflen) { @@ -3839,7 +3454,9 @@ ips_next(ips_ha_t *ha, int intr) { } scb->dcdb.transfer_length = scb->data_len; - scb->data_busaddr = virt_to_bus(SC->request_buffer); + scb->data_busaddr = pci_map_single(ha->pcidev, SC->request_buffer, + scb->data_len, IPS_DMA_DIR(scb)); + scb->flags |= IPS_SCB_MAP_SINGLE; scb->sg_len = 0; } else { scb->data_busaddr = 0L; @@ -3850,8 +3467,7 @@ ips_next(ips_ha_t *ha, int intr) { } - scb->dcdb.cmd_attribute |= - ips_command_direction[scb->scsi_cmd->cmnd[0]]; + scb->dcdb.cmd_attribute = ips_command_direction[scb->scsi_cmd->cmnd[0]]; if (!scb->dcdb.cmd_attribute & 0x3) scb->dcdb.transfer_length = 0; @@ -4389,6 +4005,7 @@ static void ipsintr_blocking(ips_ha_t *ha, ips_scb_t *scb) { METHOD_TRACE("ipsintr_blocking", 2); + ips_freescb(ha, scb); if ((ha->waitflag == TRUE) && (ha->cmd_in_progress == scb->cdb[0])) { ha->waitflag = FALSE; @@ -4447,42 +4064,41 @@ ips_done(ips_ha_t *ha, ips_scb_t *scb) { if (!scb) return ; -#ifndef NO_IPS_CMDLINE if ((scb->scsi_cmd) && (ips_is_passthru(scb->scsi_cmd))) { ips_cleanup_passthru(ha, scb); IPS_HA_LOCK(cpu_flags); ha->num_ioctl--; IPS_HA_UNLOCK(cpu_flags); } else { -#endif /* * Check to see if this command had too much * data and had to be broke up. If so, queue * the rest of the data and continue. */ - if (scb->breakup) { + if ((scb->breakup) || (scb->sg_break)) { /* we had a data breakup */ - u_int8_t bk_save; + uint8_t bk_save; bk_save = scb->breakup; scb->breakup = 0; + mod_timer(&scb->scsi_cmd->eh_timeout, jiffies + 120 * HZ); - if (scb->scsi_cmd->use_sg) { + if (scb->sg_count) { /* S/G request */ struct scatterlist *sg; int i; sg = scb->scsi_cmd->request_buffer; - if (scb->scsi_cmd->use_sg == 1) { - if (sg[0].length - (bk_save * ha->max_xfer) > ha->max_xfer) { + if (scb->sg_count == 1) { + if (sg_dma_len(sg) - (bk_save * ha->max_xfer) > ha->max_xfer) { /* Further breakup required */ scb->data_len = ha->max_xfer; - scb->data_busaddr = sg_dma_address(&sg[0] + (bk_save * ha->max_xfer)); + scb->data_busaddr = sg_dma_address(sg) + (bk_save * ha->max_xfer); scb->breakup = bk_save + 1; } else { - scb->data_len = sg[0].length - (bk_save * ha->max_xfer); - scb->data_busaddr = sg_dma_address(&sg[0] + (bk_save * ha->max_xfer)); + scb->data_len = sg_dma_len(sg) - (bk_save * ha->max_xfer); + scb->data_busaddr = sg_dma_address(sg) + (bk_save * ha->max_xfer); } scb->dcdb.transfer_length = scb->data_len; @@ -4499,45 +4115,46 @@ ips_done(ips_ha_t *ha, ips_scb_t *scb) { /* pointed to by bk_save */ if (scb->sg_break) { scb->sg_len = 1; - scb->sg_list[0].address = sg_dma_address(&sg[bk_save] + ha->max_xfer*scb->sg_break); - if (ha->max_xfer > sg[bk_save].length-ha->max_xfer * scb->sg_break) - scb->sg_list[0].length = sg[bk_save].length-ha->max_xfer * scb->sg_break; + scb->sg_list[0].address = sg_dma_address(&sg[bk_save]) + + ha->max_xfer*scb->sg_break; + if (ha->max_xfer > sg_dma_len(&sg[bk_save]) - ha->max_xfer * scb->sg_break) + scb->sg_list[0].length = sg_dma_len(&sg[bk_save]) - ha->max_xfer * scb->sg_break; else scb->sg_list[0].length = ha->max_xfer; scb->sg_break++; /* MUST GO HERE for math below to work */ - scb->data_len = scb->sg_list[0].length;; + scb->data_len = scb->sg_list[0].length;; - if (sg[bk_save].length <= ha->max_xfer * scb->sg_break ) { + if (sg_dma_len(&sg[bk_save]) <= ha->max_xfer * scb->sg_break ) { scb->sg_break = 0; /* No more work in this unit */ - if (( bk_save + 1 ) >= scb->scsi_cmd->use_sg) + if (( bk_save + 1 ) >= scb->sg_count) scb->breakup = 0; - else + else scb->breakup = bk_save + 1; } } else { /* ( sg_break == 0 ), so this is our first look at a new sg piece */ - if (sg[bk_save].length > ha->max_xfer) { - scb->sg_list[0].address = cpu_to_le32(sg_dma_address(&sg[bk_save])); - scb->sg_list[0].length = ha->max_xfer; - scb->breakup = bk_save; - scb->sg_break = 1; - scb->data_len = ha->max_xfer; - scb->sg_len = 1; - } else { - /* OK, the next sg is a short one, so loop until full */ - scb->data_len = 0; - scb->sg_len = 0; - scb->sg_break = 0; + if (sg_dma_len(&sg[bk_save]) > ha->max_xfer) { + scb->sg_list[0].address = sg_dma_address(&sg[bk_save]); + scb->sg_list[0].length = ha->max_xfer; + scb->breakup = bk_save; + scb->sg_break = 1; + scb->data_len = ha->max_xfer; + scb->sg_len = 1; + } else { + /* OK, the next sg is a short one, so loop until full */ + scb->data_len = 0; + scb->sg_len = 0; + scb->sg_break = 0; /* We're only doing full units here */ - for (i = bk_save; i < scb->scsi_cmd->use_sg; i++) { - scb->sg_list[i - bk_save].address = cpu_to_le32(sg_dma_address(&sg[i])); - scb->sg_list[i - bk_save].length = cpu_to_le32(sg[i].length); - if (scb->data_len + sg[i].length > ha->max_xfer) { - scb->breakup = i; /* sneaky, if not more work, than breakup is 0 */ - break; - } - scb->data_len += sg[i].length; - scb->sg_len++; /* only if we didn't get too big */ + for (i = bk_save; i < scb->sg_count; i++) { + scb->sg_list[i - bk_save].address = sg_dma_address(&sg[i]); + scb->sg_list[i - bk_save].length = cpu_to_le32(sg_dma_len(&sg[i])); + if (scb->data_len + sg_dma_len(&sg[i]) > ha->max_xfer) { + scb->breakup = i; /* sneaky, if not more work, than breakup is 0 */ + break; + } + scb->data_len += sg_dma_len(&sg[i]); + scb->sg_len++; /* only if we didn't get too big */ } } } @@ -4545,27 +4162,34 @@ ips_done(ips_ha_t *ha, ips_scb_t *scb) { /* Also, we need to be sure we don't queue work ( breakup != 0 ) if no more sg units for next time */ scb->dcdb.transfer_length = scb->data_len; - scb->data_busaddr = virt_to_bus(scb->sg_list); + scb->data_busaddr = scb->sg_busaddr; } - } else { + } else { /* Non S/G Request */ + pci_unmap_single(ha->pcidev, scb->data_busaddr, scb->data_len, + IPS_DMA_DIR(scb)); if ((scb->scsi_cmd->request_bufflen - (bk_save * ha->max_xfer)) > ha->max_xfer) { /* Further breakup required */ scb->data_len = ha->max_xfer; - scb->data_busaddr = virt_to_bus(scb->scsi_cmd->request_buffer + (bk_save * ha->max_xfer)); + scb->data_busaddr = pci_map_single(ha->pcidev, + scb->scsi_cmd->request_buffer + + (bk_save * ha->max_xfer), + scb->data_len, IPS_DMA_DIR(scb)); scb->breakup = bk_save + 1; } else { scb->data_len = scb->scsi_cmd->request_bufflen - (bk_save * ha->max_xfer); - scb->data_busaddr = virt_to_bus(scb->scsi_cmd->request_buffer + (bk_save * ha->max_xfer)); - } + scb->data_busaddr = pci_map_single(ha->pcidev, + scb->scsi_cmd->request_buffer + + (bk_save * ha->max_xfer), + scb->data_len, IPS_DMA_DIR(scb)); + } scb->dcdb.transfer_length = scb->data_len; scb->sg_len = 0; } - scb->dcdb.cmd_attribute |= - ips_command_direction[scb->scsi_cmd->cmnd[0]]; + scb->dcdb.cmd_attribute |= ips_command_direction[scb->scsi_cmd->cmnd[0]]; if (!scb->dcdb.cmd_attribute & 0x3) scb->dcdb.transfer_length = 0; @@ -4598,13 +4222,9 @@ ips_done(ips_ha_t *ha, ips_scb_t *scb) { break; } /* end case */ - pci_unmap_sg(ha->pcidev, (struct scatterlist *)scb->scsi_cmd->request_buffer, scb->sg_len, - scsi_to_pci_dma_dir(scb->scsi_cmd->sc_data_direction)); return ; } -#ifndef NO_IPS_CMDLINE } /* end if passthru */ -#endif if (scb->bus) { IPS_HA_LOCK(cpu_flags); @@ -4612,9 +4232,7 @@ ips_done(ips_ha_t *ha, ips_scb_t *scb) { IPS_HA_UNLOCK(cpu_flags); } - /* call back to SCSI layer */ - if (scb->scsi_cmd && scb->scsi_cmd->cmnd[0] != IPS_IOCTL_NEW_COMMAND) - scb->scsi_cmd->scsi_done(scb->scsi_cmd); + scb->scsi_cmd->scsi_done(scb->scsi_cmd); ips_freescb(ha, scb); } @@ -4632,6 +4250,8 @@ static int ips_map_status(ips_ha_t *ha, ips_scb_t *scb, ips_stat_t *sp) { int errcode; int device_error; + uint32_t transfer_len; + IPS_DCDB_TABLE_TAPE *tapeDCDB; METHOD_TRACE("ips_map_status", 1); @@ -4673,8 +4293,16 @@ ips_map_status(ips_ha_t *ha, ips_scb_t *scb, ips_stat_t *sp) { break; - case IPS_ERR_OU_RUN: - if ((scb->bus) && (scb->dcdb.transfer_length < scb->data_len)) { + case IPS_ERR_OU_RUN: + if ( ( scb->cmd.dcdb.op_code == IPS_CMD_EXTENDED_DCDB ) || + ( scb->cmd.dcdb.op_code == IPS_CMD_EXTENDED_DCDB_SG ) ) { + tapeDCDB = ( IPS_DCDB_TABLE_TAPE * ) &scb->dcdb; + transfer_len = tapeDCDB->transfer_length; + } else { + transfer_len = ( uint32_t ) scb->dcdb.transfer_length; + } + + if ((scb->bus) && (transfer_len < scb->data_len)) { /* Underrun - set default to no error */ errcode = DID_OK; @@ -4705,9 +4333,15 @@ ips_map_status(ips_ha_t *ha, ips_scb_t *scb, ips_stat_t *sp) { case IPS_ERR_CKCOND: if (scb->bus) { - memcpy(scb->scsi_cmd->sense_buffer, scb->dcdb.sense_info, - sizeof(scb->scsi_cmd->sense_buffer)); - + if ((scb->cmd.dcdb.op_code == IPS_CMD_EXTENDED_DCDB) || + (scb->cmd.dcdb.op_code == IPS_CMD_EXTENDED_DCDB_SG)) { + tapeDCDB = (IPS_DCDB_TABLE_TAPE *) &scb->dcdb; + memcpy(scb->scsi_cmd->sense_buffer, tapeDCDB->sense_info, + sizeof(tapeDCDB->sense_info)); + } else { + memcpy(scb->scsi_cmd->sense_buffer, scb->dcdb.sense_info, + sizeof(scb->scsi_cmd->sense_buffer)); + } device_error = 2; /* check condition */ } @@ -4729,34 +4363,14 @@ ips_map_status(ips_ha_t *ha, ips_scb_t *scb, ips_stat_t *sp) { /****************************************************************************/ /* */ -/* Routine Name: ips_send */ -/* */ -/* Routine Description: */ -/* */ -/* Wrapper for ips_send_cmd */ -/* */ -/****************************************************************************/ -static int -ips_send(ips_ha_t *ha, ips_scb_t *scb, ips_scb_callback callback) { - int ret; - - METHOD_TRACE("ips_send", 1); - - scb->callback = callback; - - ret = ips_send_cmd(ha, scb); - - return (ret); -} - -/****************************************************************************/ -/* */ /* Routine Name: ips_send_wait */ /* */ /* Routine Description: */ /* */ /* Send a command to the controller and wait for it to return */ /* */ +/* The FFDC Time Stamp use this function for the callback, but doesn't */ +/* actually need to wait. */ /****************************************************************************/ static int ips_send_wait(ips_ha_t *ha, ips_scb_t *scb, int timeout, int intr) { @@ -4764,15 +4378,18 @@ ips_send_wait(ips_ha_t *ha, ips_scb_t *scb, int timeout, int intr) { METHOD_TRACE("ips_send_wait", 1); - ha->waitflag = TRUE; - ha->cmd_in_progress = scb->cdb[0]; - - ret = ips_send(ha, scb, ipsintr_blocking); + if (intr != IPS_FFDC) { /* Won't be Waiting if this is a Time Stamp */ + ha->waitflag = TRUE; + ha->cmd_in_progress = scb->cdb[0]; + } + scb->callback = ipsintr_blocking; + ret = ips_send_cmd(ha, scb); if ((ret == IPS_FAILURE) || (ret == IPS_SUCCESS_IMM)) return (ret); - ret = ips_wait(ha, timeout, intr); + if (intr != IPS_FFDC) /* Don't Wait around if this is a Time Stamp */ + ret = ips_wait(ha, timeout, intr); return (ret); } @@ -4791,6 +4408,7 @@ ips_send_cmd(ips_ha_t *ha, ips_scb_t *scb) { int ret; char *sp; int device_error; + IPS_DCDB_TABLE_TAPE *tapeDCDB; METHOD_TRACE("ips_send_cmd", 1); @@ -4809,11 +4427,7 @@ ips_send_cmd(ips_ha_t *ha, ips_scb_t *scb) { return (1); } -#ifndef NO_IPS_CMDLINE } else if ((scb->bus == 0) && (!ips_is_passthru(scb->scsi_cmd))) { -#else - } else if (scb->bus == 0) { -#endif /* command to logical bus -- interpret */ ret = IPS_SUCCESS_IMM; @@ -4862,9 +4476,12 @@ ips_send_cmd(ips_ha_t *ha, ips_scb_t *scb) { } else { scb->cmd.logical_info.op_code = IPS_CMD_GET_LD_INFO; scb->cmd.logical_info.command_id = IPS_COMMAND_ID(ha, scb); - scb->cmd.logical_info.buffer_addr = cpu_to_le32(virt_to_bus(&ha->adapt->logical_drive_info)); scb->cmd.logical_info.reserved = 0; scb->cmd.logical_info.reserved2 = 0; + scb->data_len = sizeof(ha->adapt->logical_drive_info); + scb->data_busaddr = ha->adapt->hw_status_start + sizeof(IPS_ADAPTER) + - sizeof(IPS_LD_INFO); + scb->cmd.logical_info.buffer_addr = scb->data_busaddr; ret = IPS_SUCCESS; } @@ -4955,17 +4572,24 @@ ips_send_cmd(ips_ha_t *ha, ips_scb_t *scb) { case MODE_SENSE: scb->cmd.basic_io.op_code = IPS_CMD_ENQUIRY; scb->cmd.basic_io.command_id = IPS_COMMAND_ID(ha, scb); - scb->cmd.basic_io.sg_addr = cpu_to_le32(virt_to_bus(ha->enq)); + scb->data_len = sizeof(*ha->enq); + scb->data_busaddr = pci_map_single(ha->pcidev, ha->enq, + scb->data_len, IPS_DMA_DIR(scb)); + scb->cmd.basic_io.sg_addr = scb->data_busaddr; + scb->flags |= IPS_SCB_MAP_SINGLE; ret = IPS_SUCCESS; break; case READ_CAPACITY: scb->cmd.logical_info.op_code = IPS_CMD_GET_LD_INFO; scb->cmd.logical_info.command_id = IPS_COMMAND_ID(ha, scb); - scb->cmd.logical_info.buffer_addr = cpu_to_le32(virt_to_bus(&ha->adapt->logical_drive_info)); scb->cmd.logical_info.reserved = 0; scb->cmd.logical_info.reserved2 = 0; scb->cmd.logical_info.reserved3 = 0; + scb->data_len = sizeof(ha->adapt->logical_drive_info); + scb->data_busaddr = ha->adapt->hw_status_start + sizeof(IPS_ADAPTER) + - sizeof(IPS_LD_INFO); + scb->cmd.logical_info.buffer_addr = scb->data_busaddr; ret = IPS_SUCCESS; break; @@ -5018,31 +4642,64 @@ ips_send_cmd(ips_ha_t *ha, ips_scb_t *scb) { ha->dcdb_active[scb->bus-1] |= (1 << scb->target_id); scb->cmd.dcdb.command_id = IPS_COMMAND_ID(ha, scb); - scb->cmd.dcdb.dcdb_address = cpu_to_le32(virt_to_bus(&scb->dcdb)); + scb->cmd.dcdb.dcdb_address = cpu_to_le32(scb->scb_busaddr + + (unsigned long)&scb->dcdb - + (unsigned long)scb); scb->cmd.dcdb.reserved = 0; scb->cmd.dcdb.reserved2 = 0; scb->cmd.dcdb.reserved3 = 0; - scb->dcdb.device_address = ((scb->bus - 1) << 4) | scb->target_id; - scb->dcdb.cmd_attribute |= IPS_DISCONNECT_ALLOWED; - - if (scb->timeout) { - if (scb->timeout <= 10) - scb->dcdb.cmd_attribute |= IPS_TIMEOUT10; - else if (scb->timeout <= 60) - scb->dcdb.cmd_attribute |= IPS_TIMEOUT60; + if (ha->subsys->param[4] & 0x00100000) { /* If NEW Tape DCDB is Supported */ + if (!scb->sg_len) + scb->cmd.dcdb.op_code = IPS_CMD_EXTENDED_DCDB; else - scb->dcdb.cmd_attribute |= IPS_TIMEOUT20M; - } + scb->cmd.dcdb.op_code = IPS_CMD_EXTENDED_DCDB_SG; + + tapeDCDB = (IPS_DCDB_TABLE_TAPE *) &scb->dcdb; /* Use Same Data Area as Old DCDB Struct */ + tapeDCDB->device_address = ((scb->bus - 1) << 4) | scb->target_id; + tapeDCDB->cmd_attribute |= IPS_DISCONNECT_ALLOWED; + + if (scb->timeout) { + if (scb->timeout <= 10) + tapeDCDB->cmd_attribute |= IPS_TIMEOUT10; + else if (scb->timeout <= 60) + tapeDCDB->cmd_attribute |= IPS_TIMEOUT60; + else + tapeDCDB->cmd_attribute |= IPS_TIMEOUT20M; + } - if (!(scb->dcdb.cmd_attribute & IPS_TIMEOUT20M)) - scb->dcdb.cmd_attribute |= IPS_TIMEOUT20M; + if (!(tapeDCDB->cmd_attribute & IPS_TIMEOUT20M)) + tapeDCDB->cmd_attribute |= IPS_TIMEOUT20M; + + tapeDCDB->sense_length = sizeof(tapeDCDB->sense_info); + tapeDCDB->transfer_length = scb->data_len; + tapeDCDB->buffer_pointer = cpu_to_le32(scb->data_busaddr); + tapeDCDB->sg_count = scb->sg_len; + tapeDCDB->cdb_length = scb->scsi_cmd->cmd_len; + memcpy(tapeDCDB->scsi_cdb, scb->scsi_cmd->cmnd, scb->scsi_cmd->cmd_len); + } else { + scb->dcdb.device_address = ((scb->bus - 1) << 4) | scb->target_id; + scb->dcdb.cmd_attribute |= IPS_DISCONNECT_ALLOWED; + + if (scb->timeout) { + if (scb->timeout <= 10) + scb->dcdb.cmd_attribute |= IPS_TIMEOUT10; + else if (scb->timeout <= 60) + scb->dcdb.cmd_attribute |= IPS_TIMEOUT60; + else + scb->dcdb.cmd_attribute |= IPS_TIMEOUT20M; + } - scb->dcdb.sense_length = sizeof(scb->scsi_cmd->sense_buffer); - scb->dcdb.buffer_pointer = cpu_to_le32(scb->data_busaddr); - scb->dcdb.sg_count = scb->sg_len; - scb->dcdb.cdb_length = scb->scsi_cmd->cmd_len; - memcpy(scb->dcdb.scsi_cdb, scb->scsi_cmd->cmnd, scb->scsi_cmd->cmd_len); + if (!(scb->dcdb.cmd_attribute & IPS_TIMEOUT20M)) + scb->dcdb.cmd_attribute |= IPS_TIMEOUT20M; + + scb->dcdb.sense_length = sizeof(scb->dcdb.sense_info); + scb->dcdb.transfer_length = scb->data_len; + scb->dcdb.buffer_pointer = cpu_to_le32(scb->data_busaddr); + scb->dcdb.sg_count = scb->sg_len; + scb->dcdb.cdb_length = scb->scsi_cmd->cmd_len; + memcpy(scb->dcdb.scsi_cdb, scb->scsi_cmd->cmnd, scb->scsi_cmd->cmd_len); + } } return ((*ha->func.issue)(ha, scb)); @@ -5061,8 +4718,8 @@ static void ips_chkstatus(ips_ha_t *ha, IPS_STATUS *pstatus) { ips_scb_t *scb; ips_stat_t *sp; - u_int8_t basic_status; - u_int8_t ext_status; + uint8_t basic_status; + uint8_t ext_status; int errcode; METHOD_TRACE("ips_chkstatus", 1); @@ -5091,11 +4748,9 @@ ips_chkstatus(ips_ha_t *ha, IPS_STATUS *pstatus) { scb->target_id, scb->lun); -#ifndef NO_IPS_CMDLINE if ((scb->scsi_cmd) && (ips_is_passthru(scb->scsi_cmd))) /* passthru - just returns the raw result */ return ; -#endif errcode = DID_OK; @@ -5285,7 +4940,7 @@ ips_rdcap(ips_ha_t *ha, ips_scb_t *scb) { cap = (IPS_SCSI_CAPACITY *) scb->scsi_cmd->request_buffer; cap->lba = cpu_to_be32(le32_to_cpu(ha->adapt->logical_drive_info.drive_info[scb->target_id].sector_count) - 1); - cap->len = cpu_to_be32((u_int32_t) IPS_BLKSIZE); + cap->len = cpu_to_be32((uint32_t) IPS_BLKSIZE); return (1); } @@ -5301,9 +4956,9 @@ ips_rdcap(ips_ha_t *ha, ips_scb_t *scb) { /****************************************************************************/ static int ips_msense(ips_ha_t *ha, ips_scb_t *scb) { - u_int16_t heads; - u_int16_t sectors; - u_int32_t cylinders; + uint16_t heads; + uint16_t sectors; + uint32_t cylinders; IPS_SCSI_MODE_PAGE_DATA mdata; METHOD_TRACE("ips_msense", 1); @@ -5406,7 +5061,6 @@ ips_reqsen(ips_ha_t *ha, ips_scb_t *scb) { /****************************************************************************/ static void ips_free(ips_ha_t *ha) { - int i; METHOD_TRACE("ips_free", 1); @@ -5422,7 +5076,8 @@ ips_free(ips_ha_t *ha) { } if (ha->adapt) { - kfree(ha->adapt); + pci_free_consistent(ha->pcidev,sizeof(IPS_ADAPTER)+ sizeof(IPS_IO_CMD), + ha->adapt, ha->adapt->hw_status_start); ha->adapt = NULL; } @@ -5436,27 +5091,13 @@ ips_free(ips_ha_t *ha) { ha->subsys = NULL; } - if (ha->dummy) { - kfree(ha->dummy); - ha->dummy = NULL; - } - if (ha->ioctl_data) { free_pages((unsigned long) ha->ioctl_data, ha->ioctl_order); ha->ioctl_data = NULL; ha->ioctl_datasize = 0; ha->ioctl_order = 0; } - - if (ha->scbs) { - for (i = 0; i < ha->max_cmds; i++) { - if (ha->scbs[i].sg_list) - kfree(ha->scbs[i].sg_list); - } - - kfree(ha->scbs); - ha->scbs = NULL; - } /* end if */ + ips_deallocatescbs(ha, ha->max_cmds); /* free memory mapped (if applicable) */ if (ha->mem_ptr) { @@ -5465,7 +5106,7 @@ ips_free(ips_ha_t *ha) { ha->mem_ptr = NULL; } -#if LINUX_VERSION_CODE >= LinuxVersionCode(2,3,17) +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,0) if (ha->mem_addr) release_mem_region(ha->mem_addr, ha->mem_len); #endif @@ -5473,6 +5114,26 @@ ips_free(ips_ha_t *ha) { } } +/****************************************************************************/ +/* */ +/* Routine Name: ips_deallocatescbs */ +/* */ +/* Routine Description: */ +/* */ +/* Free the command blocks */ +/* */ +/****************************************************************************/ +static int +ips_deallocatescbs(ips_ha_t *ha, int cmds) { + if (ha->scbs) { + pci_free_consistent(ha->pcidev,sizeof(IPS_SG_LIST) * IPS_MAX_SG * + cmds, ha->scbs->sg_list, ha->scbs->sg_busaddr); + pci_free_consistent(ha->pcidev, sizeof(ips_scb_t) * cmds, + ha->scbs, ha->scbs->scb_busaddr); + ha->scbs = NULL; + } /* end if */ +return 1; +} /****************************************************************************/ /* */ @@ -5486,25 +5147,32 @@ ips_free(ips_ha_t *ha) { static int ips_allocatescbs(ips_ha_t *ha) { ips_scb_t *scb_p; + IPS_SG_LIST* ips_sg; int i; - + dma_addr_t command_dma, sg_dma; + METHOD_TRACE("ips_allocatescbs", 1); - /* Allocate memory for the CCBs */ - ha->scbs = (ips_scb_t *) kmalloc(ha->max_cmds * sizeof(ips_scb_t), GFP_KERNEL); + /* Allocate memory for the SCBs */ + ha->scbs = pci_alloc_consistent(ha->pcidev, ha->max_cmds * sizeof(ips_scb_t), + &command_dma); if (ha->scbs == NULL) return 0; + ips_sg = pci_alloc_consistent(ha->pcidev, sizeof(IPS_SG_LIST) * IPS_MAX_SG * + ha->max_cmds, &sg_dma); + if(ips_sg == NULL){ + pci_free_consistent(ha->pcidev,ha->max_cmds * sizeof(ips_scb_t),ha->scbs, command_dma); + return 0; + } memset(ha->scbs, 0, ha->max_cmds * sizeof(ips_scb_t)); for (i = 0; i < ha->max_cmds; i++) { scb_p = &ha->scbs[i]; - - /* allocate S/G list */ - scb_p->sg_list = (IPS_SG_LIST *) kmalloc(sizeof(IPS_SG_LIST) * IPS_MAX_SG, GFP_ATOMIC); - - if (! scb_p->sg_list) - return (0); + scb_p->scb_busaddr = command_dma + sizeof(ips_scb_t) * i; + /* set up S/G list */ + scb_p->sg_list = ips_sg + i * IPS_MAX_SG; + scb_p->sg_busaddr = sg_dma + sizeof(IPS_SG_LIST) * IPS_MAX_SG * i; /* add to the free list */ if (i < ha->max_cmds - 1) { @@ -5529,30 +5197,34 @@ ips_allocatescbs(ips_ha_t *ha) { static void ips_init_scb(ips_ha_t *ha, ips_scb_t *scb) { IPS_SG_LIST *sg_list; - + uint32_t cmd_busaddr, sg_busaddr; METHOD_TRACE("ips_init_scb", 1); if (scb == NULL) return ; sg_list = scb->sg_list; - + cmd_busaddr = scb->scb_busaddr; + sg_busaddr = scb->sg_busaddr; /* zero fill */ memset(scb, 0, sizeof(ips_scb_t)); memset(ha->dummy, 0, sizeof(IPS_IO_CMD)); /* Initialize dummy command bucket */ ha->dummy->op_code = 0xFF; - ha->dummy->ccsar = cpu_to_le32(virt_to_bus(ha->dummy)); + ha->dummy->ccsar = cpu_to_le32(ha->adapt->hw_status_start + + sizeof(IPS_ADAPTER)); ha->dummy->command_id = IPS_MAX_CMDS; /* set bus address of scb */ - scb->scb_busaddr = virt_to_bus(scb); + scb->scb_busaddr = cmd_busaddr; + scb->sg_busaddr = sg_busaddr; scb->sg_list = sg_list; /* Neptune Fix */ - scb->cmd.basic_io.cccr = cpu_to_le32((u_int32_t) IPS_BIT_ILE); - scb->cmd.basic_io.ccsar = cpu_to_le32(virt_to_bus(ha->dummy)); + scb->cmd.basic_io.cccr = cpu_to_le32((uint32_t) IPS_BIT_ILE); + scb->cmd.basic_io.ccsar = cpu_to_le32(ha->adapt->hw_status_start + + sizeof(IPS_ADAPTER)); } /****************************************************************************/ @@ -5606,6 +5278,13 @@ ips_freescb(ips_ha_t *ha, ips_scb_t *scb) { unsigned long cpu_flags; METHOD_TRACE("ips_freescb", 1); + if(scb->flags & IPS_SCB_MAP_SG) + pci_unmap_sg(ha->pcidev, scb->scsi_cmd->request_buffer, + scb->scsi_cmd->use_sg, + IPS_DMA_DIR(scb)); + else if(scb->flags & IPS_SCB_MAP_SINGLE) + pci_unmap_single(ha->pcidev, scb->data_busaddr, scb->data_len, + IPS_DMA_DIR(scb)); /* check to make sure this is not our "special" scb */ if (IPS_COMMAND_ID(ha, scb) < (ha->max_cmds - 1)) { @@ -5622,13 +5301,13 @@ ips_freescb(ips_ha_t *ha, ips_scb_t *scb) { /* */ /* Routine Description: */ /* */ -/* Reset the controller */ +/* Is controller initialized ? */ /* */ /****************************************************************************/ static int ips_isinit_copperhead(ips_ha_t *ha) { - u_int8_t scpr; - u_int8_t isr; + uint8_t scpr; + uint8_t isr; METHOD_TRACE("ips_isinit_copperhead", 1); @@ -5647,13 +5326,13 @@ ips_isinit_copperhead(ips_ha_t *ha) { /* */ /* Routine Description: */ /* */ -/* Reset the controller */ +/* Is controller initialized ? */ /* */ /****************************************************************************/ static int ips_isinit_copperhead_memio(ips_ha_t *ha) { - u_int8_t isr=0; - u_int8_t scpr; + uint8_t isr=0; + uint8_t scpr; METHOD_TRACE("ips_is_init_copperhead_memio", 1); @@ -5672,18 +5351,18 @@ ips_isinit_copperhead_memio(ips_ha_t *ha) { /* */ /* Routine Description: */ /* */ -/* Reset the controller */ +/* Is controller initialized ? */ /* */ /****************************************************************************/ static int ips_isinit_morpheus(ips_ha_t *ha) { - u_int32_t post; - u_int32_t bits; + uint32_t post; + uint32_t bits; METHOD_TRACE("ips_is_init_morpheus", 1); - post = le32_to_cpu(readl(ha->mem_ptr + IPS_REG_I960_MSG0)); - bits = le32_to_cpu(readl(ha->mem_ptr + IPS_REG_I2O_HIR)); + post = readl(ha->mem_ptr + IPS_REG_I960_MSG0); + bits = readl(ha->mem_ptr + IPS_REG_I2O_HIR); if (post == 0) return (0); @@ -5733,13 +5412,13 @@ ips_enable_int_copperhead_memio(ips_ha_t *ha) { /****************************************************************************/ static void ips_enable_int_morpheus(ips_ha_t *ha) { - u_int32_t Oimr; + uint32_t Oimr; METHOD_TRACE("ips_enable_int_morpheus", 1); - Oimr = le32_to_cpu(readl(ha->mem_ptr + IPS_REG_I960_OIMR)); + Oimr = readl(ha->mem_ptr + IPS_REG_I960_OIMR); Oimr &= ~0x08; - writel(cpu_to_le32(Oimr), ha->mem_ptr + IPS_REG_I960_OIMR); + writel(Oimr, ha->mem_ptr + IPS_REG_I960_OIMR); } /****************************************************************************/ @@ -5753,11 +5432,11 @@ ips_enable_int_morpheus(ips_ha_t *ha) { /****************************************************************************/ static int ips_init_copperhead(ips_ha_t *ha) { - u_int8_t Isr; - u_int8_t Cbsp; - u_int8_t PostByte[IPS_MAX_POST_BYTES]; - u_int8_t ConfigByte[IPS_MAX_CONFIG_BYTES]; - int i, j; + uint8_t Isr; + uint8_t Cbsp; + uint8_t PostByte[IPS_MAX_POST_BYTES]; + uint8_t ConfigByte[IPS_MAX_CONFIG_BYTES]; + int i, j; METHOD_TRACE("ips_init_copperhead", 1); @@ -5767,8 +5446,9 @@ ips_init_copperhead(ips_ha_t *ha) { if (Isr & IPS_BIT_GHI) break; + /* Delay for 1 Second */ MDELAY(IPS_ONE_SEC); - } + } if (j >= 45) /* error occurred */ @@ -5791,7 +5471,8 @@ ips_init_copperhead(ips_ha_t *ha) { if (Isr & IPS_BIT_GHI) break; - MDELAY(IPS_ONE_SEC); /* 1 sec */ + /* Delay for 1 Second */ + MDELAY(IPS_ONE_SEC); } if (j >= 240) @@ -5806,8 +5487,9 @@ ips_init_copperhead(ips_ha_t *ha) { Cbsp = inb(ha->io_addr + IPS_REG_CBSP); if ((Cbsp & IPS_BIT_OP) == 0) - break; + break; + /* Delay for 1 Second */ MDELAY(IPS_ONE_SEC); } @@ -5842,11 +5524,11 @@ ips_init_copperhead(ips_ha_t *ha) { /****************************************************************************/ static int ips_init_copperhead_memio(ips_ha_t *ha) { - u_int8_t Isr=0; - u_int8_t Cbsp; - u_int8_t PostByte[IPS_MAX_POST_BYTES]; - u_int8_t ConfigByte[IPS_MAX_CONFIG_BYTES]; - int i, j; + uint8_t Isr=0; + uint8_t Cbsp; + uint8_t PostByte[IPS_MAX_POST_BYTES]; + uint8_t ConfigByte[IPS_MAX_CONFIG_BYTES]; + int i, j; METHOD_TRACE("ips_init_copperhead_memio", 1); @@ -5856,6 +5538,7 @@ ips_init_copperhead_memio(ips_ha_t *ha) { if (Isr & IPS_BIT_GHI) break; + /* Delay for 1 Second */ MDELAY(IPS_ONE_SEC); } @@ -5880,7 +5563,8 @@ ips_init_copperhead_memio(ips_ha_t *ha) { if (Isr & IPS_BIT_GHI) break; - MDELAY(IPS_ONE_SEC); /* 100 msec */ + /* Delay for 1 Second */ + MDELAY(IPS_ONE_SEC); } if (j >= 240) @@ -5897,6 +5581,7 @@ ips_init_copperhead_memio(ips_ha_t *ha) { if ((Cbsp & IPS_BIT_OP) == 0) break; + /* Delay for 1 Second */ MDELAY(IPS_ONE_SEC); } @@ -5905,7 +5590,7 @@ ips_init_copperhead_memio(ips_ha_t *ha) { return (0); /* setup CCCR */ - writel(cpu_to_le32(0x1010), ha->mem_ptr + IPS_REG_CCCR); + writel(0x1010, ha->mem_ptr + IPS_REG_CCCR); /* Enable busmastering */ writeb(IPS_BIT_EBM, ha->mem_ptr + IPS_REG_SCPR); @@ -5932,21 +5617,22 @@ ips_init_copperhead_memio(ips_ha_t *ha) { /****************************************************************************/ static int ips_init_morpheus(ips_ha_t *ha) { - u_int32_t Post; - u_int32_t Config; - u_int32_t Isr; - u_int32_t Oimr; + uint32_t Post; + uint32_t Config; + uint32_t Isr; + uint32_t Oimr; int i; METHOD_TRACE("ips_init_morpheus", 1); /* Wait up to 45 secs for Post */ for (i = 0; i < 45; i++) { - Isr = le32_to_cpu(readl(ha->mem_ptr + IPS_REG_I2O_HIR)); + Isr = readl(ha->mem_ptr + IPS_REG_I2O_HIR); if (Isr & IPS_BIT_I960_MSG0I) break; + /* Delay for 1 Second */ MDELAY(IPS_ONE_SEC); } @@ -5958,11 +5644,11 @@ ips_init_morpheus(ips_ha_t *ha) { return (0); } - Post = le32_to_cpu(readl(ha->mem_ptr + IPS_REG_I960_MSG0)); + Post = readl(ha->mem_ptr + IPS_REG_I960_MSG0); /* Clear the interrupt bit */ - Isr = (u_int32_t) IPS_BIT_I960_MSG0I; - writel(cpu_to_le32(Isr), ha->mem_ptr + IPS_REG_I2O_HIR); + Isr = (uint32_t) IPS_BIT_I960_MSG0I; + writel(Isr, ha->mem_ptr + IPS_REG_I2O_HIR); if (Post < (IPS_GOOD_POST_STATUS << 8)) { printk(KERN_WARNING "(%s%d) reset controller fails (post status %x).\n", @@ -5973,12 +5659,13 @@ ips_init_morpheus(ips_ha_t *ha) { /* Wait up to 240 secs for config bytes */ for (i = 0; i < 240; i++) { - Isr = le32_to_cpu(readl(ha->mem_ptr + IPS_REG_I2O_HIR)); + Isr = readl(ha->mem_ptr + IPS_REG_I2O_HIR); if (Isr & IPS_BIT_I960_MSG1I) break; - MDELAY(IPS_ONE_SEC); /* 100 msec */ + /* Delay for 1 Second */ + MDELAY(IPS_ONE_SEC); } if (i >= 240) { @@ -5989,16 +5676,16 @@ ips_init_morpheus(ips_ha_t *ha) { return (0); } - Config = le32_to_cpu(readl(ha->mem_ptr + IPS_REG_I960_MSG1)); + Config = readl(ha->mem_ptr + IPS_REG_I960_MSG1); /* Clear interrupt bit */ - Isr = (u_int32_t) IPS_BIT_I960_MSG1I; - writel(cpu_to_le32(Isr), ha->mem_ptr + IPS_REG_I2O_HIR); + Isr = (uint32_t) IPS_BIT_I960_MSG1I; + writel(Isr, ha->mem_ptr + IPS_REG_I2O_HIR); /* Turn on the interrupts */ - Oimr = le32_to_cpu(readl(ha->mem_ptr + IPS_REG_I960_OIMR)); + Oimr = readl(ha->mem_ptr + IPS_REG_I960_OIMR); Oimr &= ~0x8; - writel(cpu_to_le32(Oimr), ha->mem_ptr + IPS_REG_I960_OIMR); + writel(Oimr, ha->mem_ptr + IPS_REG_I960_OIMR); /* if we get here then everything went OK */ return (1); @@ -6031,10 +5718,15 @@ ips_reset_copperhead(ips_ha_t *ha) { reset_counter++; outb(IPS_BIT_RST, ha->io_addr + IPS_REG_SCPR); + + /* Delay for 1 Second */ MDELAY(IPS_ONE_SEC); + outb(0, ha->io_addr + IPS_REG_SCPR); - MDELAY(IPS_ONE_SEC); + /* Delay for 1 Second */ + MDELAY(IPS_ONE_SEC); + if ((*ha->func.init)(ha)) break; else if (reset_counter >= 2) { @@ -6076,10 +5768,15 @@ ips_reset_copperhead_memio(ips_ha_t *ha) { reset_counter++; writeb(IPS_BIT_RST, ha->mem_ptr + IPS_REG_SCPR); + + /* Delay for 1 Second */ MDELAY(IPS_ONE_SEC); + writeb(0, ha->mem_ptr + IPS_REG_SCPR); - MDELAY(IPS_ONE_SEC); + /* Delay for 1 Second */ + MDELAY(IPS_ONE_SEC); + if ((*ha->func.init)(ha)) break; else if (reset_counter >= 2) { @@ -6106,7 +5803,7 @@ ips_reset_copperhead_memio(ips_ha_t *ha) { static int ips_reset_morpheus(ips_ha_t *ha) { int reset_counter; - u_int8_t junk; + uint8_t junk; unsigned long cpu_flags; METHOD_TRACE("ips_reset_morpheus", 1); @@ -6121,11 +5818,11 @@ ips_reset_morpheus(ips_ha_t *ha) { while (reset_counter < 2) { reset_counter++; - writel(cpu_to_le32(0x80000000), ha->mem_ptr + IPS_REG_I960_IDR); + writel(0x80000000, ha->mem_ptr + IPS_REG_I960_IDR); - /* Delay for 5 sec */ + /* Delay for 5 Seconds */ MDELAY(5 * IPS_ONE_SEC); - + /* Do a PCI config read to wait for adapter */ pci_read_config_byte(ha->pcidev, 4, &junk); @@ -6154,7 +5851,7 @@ ips_reset_morpheus(ips_ha_t *ha) { /****************************************************************************/ static void ips_statinit(ips_ha_t *ha) { - u_int32_t phys_status_start; + uint32_t phys_status_start; METHOD_TRACE("ips_statinit", 1); @@ -6162,13 +5859,12 @@ ips_statinit(ips_ha_t *ha) { ha->adapt->p_status_end = ha->adapt->status + IPS_MAX_CMDS; ha->adapt->p_status_tail = ha->adapt->status; - phys_status_start = virt_to_bus(ha->adapt->status); + phys_status_start = ha->adapt->hw_status_start; outl(cpu_to_le32(phys_status_start), ha->io_addr + IPS_REG_SQSR); outl(cpu_to_le32(phys_status_start + IPS_STATUS_Q_SIZE), ha->io_addr + IPS_REG_SQER); outl(cpu_to_le32(phys_status_start + IPS_STATUS_SIZE), ha->io_addr + IPS_REG_SQHR); outl(cpu_to_le32(phys_status_start), ha->io_addr + IPS_REG_SQTR); - ha->adapt->hw_status_start = phys_status_start; ha->adapt->hw_status_tail = phys_status_start; } @@ -6183,7 +5879,7 @@ ips_statinit(ips_ha_t *ha) { /****************************************************************************/ static void ips_statinit_memio(ips_ha_t *ha) { - u_int32_t phys_status_start; + uint32_t phys_status_start; METHOD_TRACE("ips_statinit_memio", 1); @@ -6191,13 +5887,12 @@ ips_statinit_memio(ips_ha_t *ha) { ha->adapt->p_status_end = ha->adapt->status + IPS_MAX_CMDS; ha->adapt->p_status_tail = ha->adapt->status; - phys_status_start = virt_to_bus(ha->adapt->status); - writel(cpu_to_le32(phys_status_start), ha->mem_ptr + IPS_REG_SQSR); - writel(cpu_to_le32(phys_status_start + IPS_STATUS_Q_SIZE), ha->mem_ptr + IPS_REG_SQER); - writel(cpu_to_le32(phys_status_start + IPS_STATUS_SIZE), ha->mem_ptr + IPS_REG_SQHR); - writel(cpu_to_le32(phys_status_start), ha->mem_ptr + IPS_REG_SQTR); + phys_status_start = ha->adapt->hw_status_start; + writel(phys_status_start, ha->mem_ptr + IPS_REG_SQSR); + writel(phys_status_start + IPS_STATUS_Q_SIZE, ha->mem_ptr + IPS_REG_SQER); + writel(phys_status_start + IPS_STATUS_SIZE, ha->mem_ptr + IPS_REG_SQHR); + writel(phys_status_start, ha->mem_ptr + IPS_REG_SQTR); - ha->adapt->hw_status_start = phys_status_start; ha->adapt->hw_status_tail = phys_status_start; } @@ -6210,7 +5905,7 @@ ips_statinit_memio(ips_ha_t *ha) { /* Remove an element from the status queue */ /* */ /****************************************************************************/ -static u_int32_t +static uint32_t ips_statupd_copperhead(ips_ha_t *ha) { METHOD_TRACE("ips_statupd_copperhead", 1); @@ -6236,7 +5931,7 @@ ips_statupd_copperhead(ips_ha_t *ha) { /* Remove an element from the status queue */ /* */ /****************************************************************************/ -static u_int32_t +static uint32_t ips_statupd_copperhead_memio(ips_ha_t *ha) { METHOD_TRACE("ips_statupd_copperhead_memio", 1); @@ -6248,7 +5943,7 @@ ips_statupd_copperhead_memio(ips_ha_t *ha) { ha->adapt->hw_status_tail = ha->adapt->hw_status_start; } - writel(cpu_to_le32(ha->adapt->hw_status_tail), ha->mem_ptr + IPS_REG_SQTR); + writel(ha->adapt->hw_status_tail, ha->mem_ptr + IPS_REG_SQTR); return (ha->adapt->p_status_tail->value); } @@ -6262,13 +5957,13 @@ ips_statupd_copperhead_memio(ips_ha_t *ha) { /* Remove an element from the status queue */ /* */ /****************************************************************************/ -static u_int32_t +static uint32_t ips_statupd_morpheus(ips_ha_t *ha) { - u_int32_t val; + uint32_t val; METHOD_TRACE("ips_statupd_morpheus", 1); - val = le32_to_cpu(readl(ha->mem_ptr + IPS_REG_I2O_OUTMSGQ)); + val = readl(ha->mem_ptr + IPS_REG_I2O_OUTMSGQ); return (val); } @@ -6284,8 +5979,8 @@ ips_statupd_morpheus(ips_ha_t *ha) { /****************************************************************************/ static int ips_issue_copperhead(ips_ha_t *ha, ips_scb_t *scb) { - u_int32_t TimeOut; - u_int32_t val; + uint32_t TimeOut; + uint32_t val; unsigned long cpu_flags; METHOD_TRACE("ips_issue_copperhead", 1); @@ -6347,8 +6042,8 @@ ips_issue_copperhead(ips_ha_t *ha, ips_scb_t *scb) { /****************************************************************************/ static int ips_issue_copperhead_memio(ips_ha_t *ha, ips_scb_t *scb) { - u_int32_t TimeOut; - u_int32_t val; + uint32_t TimeOut; + uint32_t val; unsigned long cpu_flags; METHOD_TRACE("ips_issue_copperhead_memio", 1); @@ -6373,7 +6068,7 @@ ips_issue_copperhead_memio(ips_ha_t *ha, ips_scb_t *scb) { TimeOut = 0; - while ((val = le32_to_cpu(readl(ha->mem_ptr + IPS_REG_CCCR))) & IPS_BIT_SEM) { + while ((val = readl(ha->mem_ptr + IPS_REG_CCCR)) & IPS_BIT_SEM) { udelay(1000); if (++TimeOut >= IPS_SEM_TIMEOUT) { @@ -6391,8 +6086,8 @@ ips_issue_copperhead_memio(ips_ha_t *ha, ips_scb_t *scb) { } /* end if */ } /* end while */ - writel(cpu_to_le32(scb->scb_busaddr), ha->mem_ptr + IPS_REG_CCSAR); - writel(cpu_to_le32(IPS_BIT_START_CMD), ha->mem_ptr + IPS_REG_CCCR); + writel(scb->scb_busaddr, ha->mem_ptr + IPS_REG_CCSAR); + writel(IPS_BIT_START_CMD, ha->mem_ptr + IPS_REG_CCCR); IPS_HA_UNLOCK(cpu_flags); @@ -6472,7 +6167,7 @@ ips_issue_i2o_memio(ips_ha_t *ha, ips_scb_t *scb) { IPS_HA_LOCK(cpu_flags); - writel(cpu_to_le32(scb->scb_busaddr), ha->mem_ptr + IPS_REG_I2O_INMSGQ); + writel(scb->scb_busaddr, ha->mem_ptr + IPS_REG_I2O_INMSGQ); IPS_HA_UNLOCK(cpu_flags); @@ -6490,7 +6185,7 @@ ips_issue_i2o_memio(ips_ha_t *ha, ips_scb_t *scb) { /****************************************************************************/ static int ips_isintr_copperhead(ips_ha_t *ha) { - u_int8_t Isr; + uint8_t Isr; METHOD_TRACE("ips_isintr_copperhead", 2); @@ -6522,7 +6217,7 @@ ips_isintr_copperhead(ips_ha_t *ha) { /****************************************************************************/ static int ips_isintr_copperhead_memio(ips_ha_t *ha) { - u_int8_t Isr; + uint8_t Isr; METHOD_TRACE("ips_isintr_memio", 2); @@ -6554,11 +6249,11 @@ ips_isintr_copperhead_memio(ips_ha_t *ha) { /****************************************************************************/ static int ips_isintr_morpheus(ips_ha_t *ha) { - u_int32_t Isr; + uint32_t Isr; METHOD_TRACE("ips_isintr_morpheus", 2); - Isr = le32_to_cpu(readl(ha->mem_ptr + IPS_REG_I2O_HIR)); + Isr = readl(ha->mem_ptr + IPS_REG_I2O_HIR); if (Isr & IPS_BIT_I2O_OPQI) return (1); @@ -6577,15 +6272,15 @@ ips_isintr_morpheus(ips_ha_t *ha) { /****************************************************************************/ static int ips_wait(ips_ha_t *ha, int time, int intr) { - int ret; - u_int8_t done; + int ret; + int done; METHOD_TRACE("ips_wait", 1); ret = IPS_FAILURE; done = FALSE; - time *= IPS_ONE_SEC; /* convert seconds to milliseconds */ + time *= IPS_ONE_SEC; /* convert seconds */ while ((time > 0) && (!done)) { if (intr == IPS_INTR_ON) { @@ -6618,39 +6313,10 @@ ips_wait(ips_ha_t *ha, int time, int intr) { (*ha->func.intr)(ha); clear_bit(IPS_IN_INTR, &ha->flags); - } else if (intr == IPS_INTR_HAL) { - struct Scsi_Host *host = ips_sh[ha->host_num]; + } - if (ha->waitflag == FALSE) { - /* - * controller generated an interrupt to - * acknowledge completion of the command - * and ips_intr() has serviced the interrupt. - */ - ret = IPS_SUCCESS; - done = TRUE; - break; - } - - /* - * NOTE: since we were not called with the iorequest lock - * we must obtain it before we can call the interrupt handler. - * We were called under the HA lock so we can assume that interrupts - * are masked. - */ - spin_lock(host->host_lock); - - while (test_and_set_bit(IPS_IN_INTR, &ha->flags)) - udelay(1000); - - (*ha->func.intr)(ha); - - clear_bit(IPS_IN_INTR, &ha->flags); - - spin_unlock(host->host_lock); - } - - udelay(1000); /* 1 milisecond */ + /* This looks like a very evil loop, but it only does this during start-up */ + udelay(1000); time--; } @@ -6704,6 +6370,8 @@ ips_write_driver_status(ips_ha_t *ha, int intr) { strncpy((char *) ha->nvram->bios_high, ha->bios_version, 4); strncpy((char *) ha->nvram->bios_low, ha->bios_version + 4, 4); + ips_version_check(ha, intr); /* Check BIOS/FW/Driver Versions */ + /* now update the page */ if (!ips_readwrite_page5(ha, TRUE, intr)) { printk(KERN_WARNING "(%s%d) unable to write NVRAM page 5.\n", @@ -6745,11 +6413,15 @@ ips_read_adapter_status(ips_ha_t *ha, int intr) { scb->cmd.basic_io.op_code = IPS_CMD_ENQUIRY; scb->cmd.basic_io.command_id = IPS_COMMAND_ID(ha, scb); scb->cmd.basic_io.sg_count = 0; - scb->cmd.basic_io.sg_addr = cpu_to_le32(virt_to_bus(ha->enq)); scb->cmd.basic_io.lba = 0; scb->cmd.basic_io.sector_count = 0; scb->cmd.basic_io.log_drv = 0; scb->cmd.basic_io.reserved = 0; + scb->data_len = sizeof(*ha->enq); + scb->data_busaddr = pci_map_single(ha->pcidev, ha->enq, scb->data_len, + IPS_DMA_DIR(scb)); + scb->cmd.basic_io.sg_addr = scb->data_busaddr; + scb->flags |= IPS_SCB_MAP_SINGLE; /* send command */ if (((ret = ips_send_wait(ha, scb, ips_cmd_timeout, intr)) == IPS_FAILURE) || @@ -6786,11 +6458,15 @@ ips_read_subsystem_parameters(ips_ha_t *ha, int intr) { scb->cmd.basic_io.op_code = IPS_CMD_GET_SUBSYS; scb->cmd.basic_io.command_id = IPS_COMMAND_ID(ha, scb); scb->cmd.basic_io.sg_count = 0; - scb->cmd.basic_io.sg_addr = cpu_to_le32(virt_to_bus(ha->subsys)); scb->cmd.basic_io.lba = 0; scb->cmd.basic_io.sector_count = 0; scb->cmd.basic_io.log_drv = 0; scb->cmd.basic_io.reserved = 0; + scb->data_len = sizeof(*ha->subsys); + scb->data_busaddr = pci_map_single(ha->pcidev, ha->subsys, + scb->data_len, IPS_DMA_DIR(scb)); + scb->cmd.basic_io.sg_addr = scb->data_busaddr; + scb->flags |= IPS_SCB_MAP_SINGLE; /* send command */ if (((ret = ips_send_wait(ha, scb, ips_cmd_timeout, intr)) == IPS_FAILURE) || @@ -6831,7 +6507,11 @@ ips_read_config(ips_ha_t *ha, int intr) { scb->cmd.basic_io.op_code = IPS_CMD_READ_CONF; scb->cmd.basic_io.command_id = IPS_COMMAND_ID(ha, scb); - scb->cmd.basic_io.sg_addr = cpu_to_le32(virt_to_bus(ha->conf)); + scb->data_len = sizeof(*ha->conf); + scb->data_busaddr = pci_map_single(ha->pcidev, ha->conf, + scb->data_len, IPS_DMA_DIR(scb)); + scb->cmd.basic_io.sg_addr = scb->data_busaddr; + scb->flags |= IPS_SCB_MAP_SINGLE; /* send command */ if (((ret = ips_send_wait(ha, scb, ips_cmd_timeout, intr)) == IPS_FAILURE) || @@ -6844,6 +6524,10 @@ ips_read_config(ips_ha_t *ha, int intr) { for (i = 0; i < 4; i++) ha->conf->init_id[i] = 7; + /* Allow Completed with Errors, so JCRM can access the Adapter to fix the problems */ + if ((scb->basic_status & IPS_GSC_STATUS_MASK) == IPS_CMD_CMPLT_WERROR) + return (1); + return (0); } @@ -6877,9 +6561,13 @@ ips_readwrite_page5(ips_ha_t *ha, int write, int intr) { scb->cmd.nvram.command_id = IPS_COMMAND_ID(ha, scb); scb->cmd.nvram.page = 5; scb->cmd.nvram.write = write; - scb->cmd.nvram.buffer_addr = cpu_to_le32(virt_to_bus(ha->nvram)); scb->cmd.nvram.reserved = 0; scb->cmd.nvram.reserved2 = 0; + scb->data_len = sizeof(*ha->nvram); + scb->data_busaddr = pci_map_single(ha->pcidev, ha->nvram, + scb->data_len, IPS_DMA_DIR(scb)); + scb->cmd.nvram.buffer_addr = scb->data_busaddr; + scb->flags |= IPS_SCB_MAP_SINGLE; /* issue the command */ if (((ret = ips_send_wait(ha, scb, ips_cmd_timeout, intr)) == IPS_FAILURE) || @@ -6997,7 +6685,7 @@ ips_ffdc_reset(ips_ha_t *ha, int intr) { /* */ /****************************************************************************/ static void -ips_ffdc_time(ips_ha_t *ha, int intr) { +ips_ffdc_time(ips_ha_t *ha) { ips_scb_t *scb; METHOD_TRACE("ips_ffdc_time", 1); @@ -7020,7 +6708,7 @@ ips_ffdc_time(ips_ha_t *ha, int intr) { ips_fix_ffdc_time(ha, scb, ha->last_ffdc); /* issue command */ - ips_send_wait(ha, scb, ips_cmd_timeout, intr); + ips_send_wait(ha, scb, ips_cmd_timeout, IPS_FFDC); } /****************************************************************************/ @@ -7100,7 +6788,7 @@ ips_fix_ffdc_time(ips_ha_t *ha, ips_scb_t *scb, time_t current_time) { static int ips_erase_bios(ips_ha_t *ha) { int timeout; - u_int8_t status=0; + uint8_t status=0; METHOD_TRACE("ips_erase_bios", 1); @@ -7109,33 +6797,33 @@ ips_erase_bios(ips_ha_t *ha) { /* Clear the status register */ outl(0, ha->io_addr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ outb(0x50, ha->io_addr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ /* Erase Setup */ outb(0x20, ha->io_addr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ /* Erase Confirm */ outb(0xD0, ha->io_addr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ /* Erase Status */ outb(0x70, ha->io_addr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ timeout = 80000; /* 80 seconds */ while (timeout > 0) { if (ha->revision_id == IPS_REVID_TROMBONE64) { outl(0, ha->io_addr + IPS_REG_FLAP); - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ } status = inb(ha->io_addr + IPS_REG_FLDP); @@ -7154,14 +6842,14 @@ ips_erase_bios(ips_ha_t *ha) { /* try to suspend the erase */ outb(0xB0, ha->io_addr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ /* wait for 10 seconds */ timeout = 10000; while (timeout > 0) { if (ha->revision_id == IPS_REVID_TROMBONE64) { outl(0, ha->io_addr + IPS_REG_FLAP); - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ } status = inb(ha->io_addr + IPS_REG_FLDP); @@ -7190,12 +6878,12 @@ ips_erase_bios(ips_ha_t *ha) { /* clear status */ outb(0x50, ha->io_addr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ /* enable reads */ outb(0xFF, ha->io_addr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ return (0); } @@ -7211,7 +6899,7 @@ ips_erase_bios(ips_ha_t *ha) { static int ips_erase_bios_memio(ips_ha_t *ha) { int timeout; - u_int8_t status; + uint8_t status; METHOD_TRACE("ips_erase_bios_memio", 1); @@ -7220,33 +6908,33 @@ ips_erase_bios_memio(ips_ha_t *ha) { /* Clear the status register */ writel(0, ha->mem_ptr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ writeb(0x50, ha->mem_ptr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ /* Erase Setup */ writeb(0x20, ha->mem_ptr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ /* Erase Confirm */ writeb(0xD0, ha->mem_ptr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ /* Erase Status */ writeb(0x70, ha->mem_ptr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ timeout = 80000; /* 80 seconds */ while (timeout > 0) { if (ha->revision_id == IPS_REVID_TROMBONE64) { writel(0, ha->mem_ptr + IPS_REG_FLAP); - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ } status = readb(ha->mem_ptr + IPS_REG_FLDP); @@ -7265,14 +6953,14 @@ ips_erase_bios_memio(ips_ha_t *ha) { /* try to suspend the erase */ writeb(0xB0, ha->mem_ptr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ /* wait for 10 seconds */ timeout = 10000; while (timeout > 0) { if (ha->revision_id == IPS_REVID_TROMBONE64) { writel(0, ha->mem_ptr + IPS_REG_FLAP); - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ } status = readb(ha->mem_ptr + IPS_REG_FLDP); @@ -7301,12 +6989,12 @@ ips_erase_bios_memio(ips_ha_t *ha) { /* clear status */ writeb(0x50, ha->mem_ptr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ /* enable reads */ writeb(0xFF, ha->mem_ptr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ return (0); } @@ -7320,10 +7008,10 @@ ips_erase_bios_memio(ips_ha_t *ha) { /* */ /****************************************************************************/ static int -ips_program_bios(ips_ha_t *ha, char *buffer, u_int32_t buffersize, u_int32_t offset) { +ips_program_bios(ips_ha_t *ha, char *buffer, uint32_t buffersize, uint32_t offset) { int i; int timeout; - u_int8_t status=0; + uint8_t status=0; METHOD_TRACE("ips_program_bios", 1); @@ -7333,22 +7021,22 @@ ips_program_bios(ips_ha_t *ha, char *buffer, u_int32_t buffersize, u_int32_t off /* write a byte */ outl(cpu_to_le32(i + offset), ha->io_addr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ outb(0x40, ha->io_addr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ outb(buffer[i], ha->io_addr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ /* wait up to one second */ timeout = 1000; while (timeout > 0) { if (ha->revision_id == IPS_REVID_TROMBONE64) { outl(0, ha->io_addr + IPS_REG_FLAP); - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ } status = inb(ha->io_addr + IPS_REG_FLDP); @@ -7364,11 +7052,11 @@ ips_program_bios(ips_ha_t *ha, char *buffer, u_int32_t buffersize, u_int32_t off /* timeout error */ outl(0, ha->io_addr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ outb(0xFF, ha->io_addr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ return (1); } @@ -7378,11 +7066,11 @@ ips_program_bios(ips_ha_t *ha, char *buffer, u_int32_t buffersize, u_int32_t off /* programming error */ outl(0, ha->io_addr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ outb(0xFF, ha->io_addr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ return (1); } @@ -7391,11 +7079,11 @@ ips_program_bios(ips_ha_t *ha, char *buffer, u_int32_t buffersize, u_int32_t off /* Enable reading */ outl(0, ha->io_addr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ outb(0xFF, ha->io_addr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ return (0); } @@ -7409,10 +7097,10 @@ ips_program_bios(ips_ha_t *ha, char *buffer, u_int32_t buffersize, u_int32_t off /* */ /****************************************************************************/ static int -ips_program_bios_memio(ips_ha_t *ha, char *buffer, u_int32_t buffersize, u_int32_t offset) { +ips_program_bios_memio(ips_ha_t *ha, char *buffer, uint32_t buffersize, uint32_t offset) { int i; int timeout; - u_int8_t status=0; + uint8_t status=0; METHOD_TRACE("ips_program_bios_memio", 1); @@ -7420,24 +7108,24 @@ ips_program_bios_memio(ips_ha_t *ha, char *buffer, u_int32_t buffersize, u_int32 for (i = 0; i < buffersize; i++) { /* write a byte */ - writel(cpu_to_le32(i + offset), ha->mem_ptr + IPS_REG_FLAP); + writel(i + offset, ha->mem_ptr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ writeb(0x40, ha->mem_ptr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ writeb(buffer[i], ha->mem_ptr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ /* wait up to one second */ timeout = 1000; while (timeout > 0) { if (ha->revision_id == IPS_REVID_TROMBONE64) { writel(0, ha->mem_ptr + IPS_REG_FLAP); - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ } status = readb(ha->mem_ptr + IPS_REG_FLDP); @@ -7453,11 +7141,11 @@ ips_program_bios_memio(ips_ha_t *ha, char *buffer, u_int32_t buffersize, u_int32 /* timeout error */ writel(0, ha->mem_ptr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ writeb(0xFF, ha->mem_ptr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ return (1); } @@ -7467,11 +7155,11 @@ ips_program_bios_memio(ips_ha_t *ha, char *buffer, u_int32_t buffersize, u_int32 /* programming error */ writel(0, ha->mem_ptr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ writeb(0xFF, ha->mem_ptr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ return (1); } @@ -7480,11 +7168,11 @@ ips_program_bios_memio(ips_ha_t *ha, char *buffer, u_int32_t buffersize, u_int32 /* Enable reading */ writel(0, ha->mem_ptr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ writeb(0xFF, ha->mem_ptr + IPS_REG_FLDP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ return (0); } @@ -7498,8 +7186,8 @@ ips_program_bios_memio(ips_ha_t *ha, char *buffer, u_int32_t buffersize, u_int32 /* */ /****************************************************************************/ static int -ips_verify_bios(ips_ha_t *ha, char *buffer, u_int32_t buffersize, u_int32_t offset) { - u_int8_t checksum; +ips_verify_bios(ips_ha_t *ha, char *buffer, uint32_t buffersize, uint32_t offset) { + uint8_t checksum; int i; METHOD_TRACE("ips_verify_bios", 1); @@ -7507,14 +7195,14 @@ ips_verify_bios(ips_ha_t *ha, char *buffer, u_int32_t buffersize, u_int32_t offs /* test 1st byte */ outl(0, ha->io_addr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ if (inb(ha->io_addr + IPS_REG_FLDP) != 0x55) return (1); outl(cpu_to_le32(1), ha->io_addr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ if (inb(ha->io_addr + IPS_REG_FLDP) != 0xAA) return (1); @@ -7523,9 +7211,9 @@ ips_verify_bios(ips_ha_t *ha, char *buffer, u_int32_t buffersize, u_int32_t offs outl(cpu_to_le32(i + offset), ha->io_addr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ - checksum = (u_int8_t) checksum + inb(ha->io_addr + IPS_REG_FLDP); + checksum = (uint8_t) checksum + inb(ha->io_addr + IPS_REG_FLDP); } if (checksum != 0) @@ -7545,8 +7233,8 @@ ips_verify_bios(ips_ha_t *ha, char *buffer, u_int32_t buffersize, u_int32_t offs /* */ /****************************************************************************/ static int -ips_verify_bios_memio(ips_ha_t *ha, char *buffer, u_int32_t buffersize, u_int32_t offset) { - u_int8_t checksum; +ips_verify_bios_memio(ips_ha_t *ha, char *buffer, uint32_t buffersize, uint32_t offset) { + uint8_t checksum; int i; METHOD_TRACE("ips_verify_bios_memio", 1); @@ -7554,25 +7242,25 @@ ips_verify_bios_memio(ips_ha_t *ha, char *buffer, u_int32_t buffersize, u_int32_ /* test 1st byte */ writel(0, ha->mem_ptr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ if (readb(ha->mem_ptr + IPS_REG_FLDP) != 0x55) return (1); writel(1, ha->mem_ptr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ if (readb(ha->mem_ptr + IPS_REG_FLDP) != 0xAA) return (1); checksum = 0xff; for (i = 2; i < buffersize; i++) { - writel(cpu_to_le32(i + offset), ha->mem_ptr + IPS_REG_FLAP); + writel(i + offset, ha->mem_ptr + IPS_REG_FLAP); if (ha->revision_id == IPS_REVID_TROMBONE64) - udelay(5); /* 5 us */ + udelay(25); /* 25 us */ - checksum = (u_int8_t) checksum + readb(ha->mem_ptr + IPS_REG_FLDP); + checksum = (uint8_t) checksum + readb(ha->mem_ptr + IPS_REG_FLDP); } if (checksum != 0) @@ -7583,11 +7271,589 @@ ips_verify_bios_memio(ips_ha_t *ha, char *buffer, u_int32_t buffersize, u_int32_ return (0); } +/*---------------------------------------------------------------------------*/ +/* Routine Name: ips_version_check */ +/* */ +/* Dependencies: */ +/* Assumes that ips_read_adapter_status() is called first filling in */ +/* the data for SubSystem Parameters. */ +/* Called from ips_write_driver_status() so it also assumes NVRAM Page 5 */ +/* Data is availaible. */ +/* */ +/*---------------------------------------------------------------------------*/ +static void ips_version_check(ips_ha_t *ha, int intr) { + IPS_VERSION_DATA VersionInfo; + uint8_t FirmwareVersion[ IPS_COMPAT_ID_LENGTH + 1 ]; + uint8_t BiosVersion[ IPS_COMPAT_ID_LENGTH + 1]; + int MatchError; + int rc; + + METHOD_TRACE("ips_version_check", 1); + + memset(FirmwareVersion, 0, IPS_COMPAT_ID_LENGTH + 1); + memset(BiosVersion, 0, IPS_COMPAT_ID_LENGTH + 1); + + /* Get the Compatible BIOS Version from NVRAM Page 5 */ + memcpy(BiosVersion, ha->nvram->BiosCompatibilityID, IPS_COMPAT_ID_LENGTH); + + rc = IPS_FAILURE; + if (ha->subsys->param[4] & IPS_GET_VERSION_SUPPORT) /* If Versioning is Supported */ + { + /* Get the Version Info with a Get Version Command */ + rc = ips_get_version_info(ha, &VersionInfo, intr); + if (rc == IPS_SUCCESS) + memcpy(FirmwareVersion, VersionInfo.compatibilityId, IPS_COMPAT_ID_LENGTH); + } + + if (rc != IPS_SUCCESS) /* If Data Not Obtainable from a GetVersion Command */ + { + /* Get the Firmware Version from Enquiry Data */ + memcpy(FirmwareVersion, ha->enq->CodeBlkVersion, IPS_COMPAT_ID_LENGTH); + } + + /* printk(KERN_WARNING "Adapter's BIOS Version = %s\n", BiosVersion); */ + /* printk(KERN_WARNING "BIOS Compatible Version = %s\n", IPS_COMPAT_BIOS); */ + /* printk(KERN_WARNING "Adapter's Firmware Version = %s\n", FirmwareVersion); */ + /* printk(KERN_WARNING "Firmware Compatible Version = %s \n", Compatable[ ha->nvram->adapter_type ]); */ + + MatchError = 0; + + if (strncmp(FirmwareVersion, Compatable[ ha->nvram->adapter_type ], IPS_COMPAT_ID_LENGTH) != 0) + { + if (ips_cd_boot == 0) + printk(KERN_WARNING "Warning: Adapter %d Firmware Compatible Version is %s, but should be %s\n", + ha->host_num, FirmwareVersion, Compatable[ ha->nvram->adapter_type ]); + MatchError = 1; + } + + if (strncmp(BiosVersion, IPS_COMPAT_BIOS, IPS_COMPAT_ID_LENGTH) != 0) + { + if (ips_cd_boot == 0) + printk(KERN_WARNING "Warning: Adapter %d BIOS Compatible Version is %s, but should be %s\n", + ha->host_num, BiosVersion, IPS_COMPAT_BIOS); + MatchError = 1; + } + + ha->nvram->versioning = 1; /* Indicate the Driver Supports Versioning */ + + if (MatchError) + { + ha->nvram->version_mismatch = 1; + if (ips_cd_boot == 0) + printk(KERN_WARNING "Warning ! ! ! ServeRAID Version Mismatch\n"); + } + else + { + ha->nvram->version_mismatch = 0; + } + + return; +} + +/*---------------------------------------------------------------------------*/ +/* Routine Name: ips_get_version_info */ +/* */ +/* Routine Description: */ +/* Issue an internal GETVERSION ServeRAID Command */ +/* */ +/* Return Value: */ +/* 0 if Successful, else non-zero */ +/*---------------------------------------------------------------------------*/ +static int ips_get_version_info(ips_ha_t *ha, IPS_VERSION_DATA *Buffer, int intr ) { + ips_scb_t *scb; + int rc; + + METHOD_TRACE("ips_get_version_info", 1); + + memset(Buffer, 0, sizeof(IPS_VERSION_DATA)); + scb = &ha->scbs[ha->max_cmds-1]; + + ips_init_scb(ha, scb); + + scb->timeout = ips_cmd_timeout; + scb->cdb[0] = IPS_CMD_GET_VERSION_INFO; + scb->cmd.version_info.op_code = IPS_CMD_GET_VERSION_INFO; + scb->cmd.version_info.command_id = IPS_COMMAND_ID(ha, scb); + scb->cmd.version_info.reserved = 0; + scb->cmd.version_info.count = sizeof( IPS_VERSION_DATA); + scb->cmd.version_info.reserved2 = 0; + scb->data_len = sizeof(*Buffer); + scb->data_busaddr = pci_map_single(ha->pcidev, Buffer, + scb->data_len, IPS_DMA_DIR(scb)); + scb->cmd.version_info.buffer_addr = scb->data_busaddr; + scb->flags |= IPS_SCB_MAP_SINGLE; + + /* issue command */ + rc = ips_send_wait(ha, scb, ips_cmd_timeout, intr); + return( rc ); +} + + + #if defined (MODULE) || (LINUX_VERSION_CODE >= LinuxVersionCode(2,4,0)) static Scsi_Host_Template driver_template = IPS; #include "scsi_module.c" #endif + +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,0) + +/*---------------------------------------------------------------------------*/ +/* Routine Name: ips_remove_device */ +/* */ +/* Routine Description: */ +/* Remove one Adapter ( Hot Plugging ) */ +/*---------------------------------------------------------------------------*/ +static void __devexit ips_remove_device(struct pci_dev *pci_dev) +{ + int i; + struct Scsi_Host *sh; + ips_ha_t *ha; + + for (i = 0; i < IPS_MAX_ADAPTERS; i++) { + ha = ips_ha[i]; + if (ha) { + if ( (pci_dev->bus->number == ha->pcidev->bus->number) && + (pci_dev->devfn == ha->pcidev->devfn)) { + sh = ips_sh[i]; + ips_release(sh); + } + } + } +} + + +/*---------------------------------------------------------------------------*/ +/* Routine Name: ips_insert_device */ +/* */ +/* Routine Description: */ +/* Add One Adapter ( Hot Plug ) */ +/* */ +/* Return Value: */ +/* 0 if Successful, else non-zero */ +/*---------------------------------------------------------------------------*/ +static int __devinit ips_insert_device(struct pci_dev *pci_dev, const struct pci_device_id *ent) +{ + int index; + int rc; + + METHOD_TRACE("ips_insert_device", 1); + if (pci_enable_device(pci_dev)) + return -1; + + rc = ips_init_phase1(pci_dev, &index); + if (rc == SUCCESS) + rc = ips_init_phase2(index); + + if (rc == SUCCESS) + ips_num_controllers++; + + ips_next_controller = ips_num_controllers; + return rc; +} + + +/*---------------------------------------------------------------------------*/ +/* Routine Name: ips_init_phase1 */ +/* */ +/* Routine Description: */ +/* Adapter Initialization */ +/* */ +/* Return Value: */ +/* 0 if Successful, else non-zero */ +/*---------------------------------------------------------------------------*/ +static int ips_init_phase1( struct pci_dev *pci_dev, int *indexPtr ) +{ + struct Scsi_Host *sh; + ips_ha_t *ha; + uint32_t io_addr; + uint32_t mem_addr; + uint32_t io_len; + uint32_t mem_len; + uint8_t revision_id; + uint8_t bus; + uint8_t func; + uint8_t irq; + uint16_t subdevice_id; + int j; + int index; + uint32_t count; + dma_addr_t dma_address; + char *ioremap_ptr; + char *mem_ptr; + + METHOD_TRACE("ips_init_phase1", 1); + index = IPS_MAX_ADAPTERS; + for (j = 0; j < IPS_MAX_ADAPTERS; j++) { + if (ips_ha[j] ==0) { + index = j; + break; + } + } + + if (index >= IPS_MAX_ADAPTERS) + return -1; + + /* stuff that we get in dev */ + irq = pci_dev->irq; + bus = pci_dev->bus->number; + func = pci_dev->devfn; + + /* Init MEM/IO addresses to 0 */ + mem_addr = 0; + io_addr = 0; + mem_len = 0; + io_len = 0; + + for (j = 0; j < 2; j++) { + if (!pci_resource_start(pci_dev, j)) + break; + + if (pci_resource_flags(pci_dev, j) & IORESOURCE_IO) { + io_addr = pci_resource_start(pci_dev, j); + io_len = pci_resource_len(pci_dev, j); + } else { + mem_addr = pci_resource_start(pci_dev, j); + mem_len = pci_resource_len(pci_dev, j); + } + } + + /* setup memory mapped area (if applicable) */ + if (mem_addr) { + uint32_t base; + uint32_t offs; + + if (check_mem_region(mem_addr, mem_len)) { + printk(KERN_WARNING "Couldn't allocate IO Memory space %x len %d.\n", mem_addr, mem_len); + return -1; + } + + request_mem_region(mem_addr, mem_len, "ips"); + base = mem_addr & PAGE_MASK; + offs = mem_addr - base; + ioremap_ptr = ioremap(base, PAGE_SIZE); + mem_ptr = ioremap_ptr + offs; + } else { + ioremap_ptr = NULL; + mem_ptr = NULL; + } + + /* setup I/O mapped area (if applicable) */ + if (io_addr) { + if (check_region(io_addr, io_len)) { + printk(KERN_WARNING "Couldn't allocate IO space %x len %d.\n", io_addr, io_len); + return -1; + } + request_region(io_addr, io_len, "ips"); + } + + /* get the revision ID */ + if (pci_read_config_byte(pci_dev, PCI_REVISION_ID, &revision_id)) { + printk(KERN_WARNING "Can't get revision id.\n" ); + return -1; + } + + subdevice_id = pci_dev->subsystem_device; + + /* found a controller */ + sh = scsi_register(&driver_template, sizeof(ips_ha_t)); +#if LINUX_VERSION_CODE > LinuxVersionCode(2,5,0) + pci_set_dma_mask(pci_dev, (u64)0xffffffff); + scsi_set_pci_device(sh, pci_dev); +#endif + if (sh == NULL) { + printk(KERN_WARNING "Unable to register controller with SCSI subsystem\n" ); + return -1; + } + + ha = IPS_HA(sh); + memset(ha, 0, sizeof(ips_ha_t)); + + /* Initialize spin lock */ + spin_lock_init(&ha->scb_lock); + spin_lock_init(&ha->copp_lock); + spin_lock_init(&ha->ips_lock); + spin_lock_init(&ha->copp_waitlist.lock); + spin_lock_init(&ha->scb_waitlist.lock); + spin_lock_init(&ha->scb_activelist.lock); + + ips_sh[index] = sh; + ips_ha[index] = ha; + ha->active = 1; + + ha->enq = kmalloc(sizeof(IPS_ENQ), GFP_KERNEL); + + if (!ha->enq) { + printk(KERN_WARNING "Unable to allocate host inquiry structure\n" ); + ha->active = 0; + ips_free(ha); + scsi_unregister(sh); + ips_ha[index] = 0; + ips_sh[index] = 0; + return -1; + } + + ha->adapt = pci_alloc_consistent(ha->pcidev, sizeof(IPS_ADAPTER) + + sizeof(IPS_IO_CMD), &dma_address); + if (!ha->adapt) { + printk(KERN_WARNING "Unable to allocate host adapt & dummy structures\n"); + ha->active = 0; + ips_free(ha); + scsi_unregister(sh); + ips_ha[index] = 0; + ips_sh[index] = 0; + return -1; + } + ha->adapt->hw_status_start = dma_address; + ha->dummy = (void *)(ha->adapt + 1); + + ha->conf = kmalloc(sizeof(IPS_CONF), GFP_KERNEL); + + if (!ha->conf) { + printk(KERN_WARNING "Unable to allocate host conf structure\n" ); + ha->active = 0; + ips_free(ha); + scsi_unregister(sh); + ips_ha[index] = 0; + ips_sh[index] = 0; + return -1; + } + + ha->nvram = kmalloc(sizeof(IPS_NVRAM_P5), GFP_KERNEL); + + if (!ha->nvram) { + printk(KERN_WARNING "Unable to allocate host NVRAM structure\n" ); + ha->active = 0; + ips_free(ha); + scsi_unregister(sh); + ips_ha[index] = 0; + ips_sh[index] = 0; + return -1; + } + + ha->subsys = kmalloc(sizeof(IPS_SUBSYS), GFP_KERNEL); + + if (!ha->subsys) { + printk(KERN_WARNING "Unable to allocate host subsystem structure\n" ); + ha->active = 0; + ips_free(ha); + scsi_unregister(sh); + ips_ha[index] = 0; + ips_sh[index] = 0; + return -1; + } + + for (count = PAGE_SIZE, ha->ioctl_order = 0; + count < ips_ioctlsize; + ha->ioctl_order++, count <<= 1); + + ha->ioctl_data = (char *) __get_free_pages(GFP_KERNEL, ha->ioctl_order); + ha->ioctl_datasize = count; + + if (!ha->ioctl_data) { + printk(KERN_WARNING "Unable to allocate IOCTL data\n" ); + ha->ioctl_data = NULL; + ha->ioctl_order = 0; + ha->ioctl_datasize = 0; + } + + /* Store away needed values for later use */ + sh->io_port = io_addr; + sh->n_io_port = io_addr ? 255 : 0; + sh->unique_id = (io_addr) ? io_addr : mem_addr; + sh->irq = irq; + sh->select_queue_depths = ips_select_queue_depth; + sh->sg_tablesize = sh->hostt->sg_tablesize; + sh->can_queue = sh->hostt->can_queue; + sh->cmd_per_lun = sh->hostt->cmd_per_lun; + sh->unchecked_isa_dma = sh->hostt->unchecked_isa_dma; + sh->use_clustering = sh->hostt->use_clustering; + +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,7) + sh->max_sectors = 128; +#endif + + /* Store info in HA structure */ + ha->irq = irq; + ha->io_addr = io_addr; + ha->io_len = io_len; + ha->mem_addr = mem_addr; + ha->mem_len = mem_len; + ha->mem_ptr = mem_ptr; + ha->ioremap_ptr = ioremap_ptr; + ha->host_num = ( uint32_t) index; + ha->revision_id = revision_id; + ha->slot_num = PCI_SLOT(pci_dev->devfn); + ha->device_id = pci_dev->device; + ha->subdevice_id = subdevice_id; + ha->pcidev = pci_dev; + + /* + * Setup Functions + */ + if (IPS_IS_MORPHEUS(ha)) { + /* morpheus */ + ha->func.isintr = ips_isintr_morpheus; + ha->func.isinit = ips_isinit_morpheus; + ha->func.issue = ips_issue_i2o_memio; + ha->func.init = ips_init_morpheus; + ha->func.statupd = ips_statupd_morpheus; + ha->func.reset = ips_reset_morpheus; + ha->func.intr = ips_intr_morpheus; + ha->func.enableint = ips_enable_int_morpheus; + } else if (IPS_USE_MEMIO(ha)) { + /* copperhead w/MEMIO */ + ha->func.isintr = ips_isintr_copperhead_memio; + ha->func.isinit = ips_isinit_copperhead_memio; + ha->func.init = ips_init_copperhead_memio; + ha->func.statupd = ips_statupd_copperhead_memio; + ha->func.statinit = ips_statinit_memio; + ha->func.reset = ips_reset_copperhead_memio; + ha->func.intr = ips_intr_copperhead; + ha->func.erasebios = ips_erase_bios_memio; + ha->func.programbios = ips_program_bios_memio; + ha->func.verifybios = ips_verify_bios_memio; + ha->func.enableint = ips_enable_int_copperhead_memio; + + if (IPS_USE_I2O_DELIVER(ha)) + ha->func.issue = ips_issue_i2o_memio; + else + ha->func.issue = ips_issue_copperhead_memio; + } else { + /* copperhead */ + ha->func.isintr = ips_isintr_copperhead; + ha->func.isinit = ips_isinit_copperhead; + ha->func.init = ips_init_copperhead; + ha->func.statupd = ips_statupd_copperhead; + ha->func.statinit = ips_statinit; + ha->func.reset = ips_reset_copperhead; + ha->func.intr = ips_intr_copperhead; + ha->func.erasebios = ips_erase_bios; + ha->func.programbios = ips_program_bios; + ha->func.verifybios = ips_verify_bios; + ha->func.enableint = ips_enable_int_copperhead; + + if (IPS_USE_I2O_DELIVER(ha)) + ha->func.issue = ips_issue_i2o; + else + ha->func.issue = ips_issue_copperhead; + } + + /* + * Initialize the card if it isn't already + */ + + if (!(*ha->func.isinit)(ha)) { + if (!(*ha->func.init)(ha)) { + /* + * Initialization failed + */ + printk(KERN_WARNING "Unable to initialize controller\n" ); + ha->active = 0; + ips_free(ha); + scsi_unregister(sh); + ips_ha[index] = 0; + ips_sh[index] = 0; + return -1; + } + } + + /* Install the interrupt handler */ + if (request_irq(irq, do_ipsintr, SA_SHIRQ, ips_name, ha)) { + printk(KERN_WARNING "Unable to install interrupt handler\n" ); + ha->active = 0; + ips_free(ha); + scsi_unregister(sh); + ips_ha[index] = 0; + ips_sh[index] = 0; + return -1; + } + + /* + * Allocate a temporary SCB for initialization + */ + ha->max_cmds = 1; + if (!ips_allocatescbs(ha)) { + printk(KERN_WARNING "Unable to allocate a CCB\n" ); + ha->active = 0; + free_irq(ha->irq, ha); + ips_free(ha); + scsi_unregister(sh); + ips_ha[index] = 0; + ips_sh[index] = 0; + return -1; + } + + *indexPtr = index; + return SUCCESS; +} + +#endif + +/*---------------------------------------------------------------------------*/ +/* Routine Name: ips_init_phase2 */ +/* */ +/* Routine Description: */ +/* Adapter Initialization Phase 2 */ +/* */ +/* Return Value: */ +/* 0 if Successful, else non-zero */ +/*---------------------------------------------------------------------------*/ +static int ips_init_phase2( int index ) +{ + struct Scsi_Host *sh; + ips_ha_t *ha; + + ha = ips_ha[index]; + sh = ips_sh[index]; + + METHOD_TRACE("ips_init_phase2", 1); + if (!ha->active) { + scsi_unregister(sh); + ips_ha[index] = NULL; + ips_sh[index] = NULL; + return -1;; + } + + if (!ips_hainit(ha)) { + printk(KERN_WARNING "Unable to initialize controller\n" ); + ha->active = 0; + ips_free(ha); + free_irq(ha->irq, ha); + scsi_unregister(sh); + ips_ha[index] = NULL; + ips_sh[index] = NULL; + return -1; + } + /* Free the temporary SCB */ + ips_deallocatescbs(ha, 1); + + /* allocate CCBs */ + if (!ips_allocatescbs(ha)) { + printk(KERN_WARNING "Unable to allocate CCBs\n" ); + ha->active = 0; + ips_free(ha); + free_irq(ha->irq, ha); + scsi_unregister(sh); + ips_ha[index] = NULL; + ips_sh[index] = NULL; + return -1; + } + + /* finish setting values */ + sh->max_id = ha->ntargets; + sh->max_lun = ha->nlun; + sh->max_channel = ha->nbus - 1; + sh->can_queue = ha->max_cmds-1; + + return SUCCESS; +} + + +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,9) +MODULE_LICENSE("GPL"); +#endif + /* * Overrides for Emacs so that we almost follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically diff --git a/drivers/scsi/ips.h b/drivers/scsi/ips.h index 4d569e1b86fc..febbd953eb00 100644 --- a/drivers/scsi/ips.h +++ b/drivers/scsi/ips.h @@ -50,12 +50,6 @@ #include <asm/uaccess.h> #include <asm/io.h> - /* type definitions */ - #define u_int8_t uint8_t - #define u_int16_t uint16_t - #define u_int32_t uint32_t - #define u_int64_t uint64_t - /* Prototypes */ extern int ips_detect(Scsi_Host_Template *); extern int ips_release(struct Scsi_Host *); @@ -64,7 +58,7 @@ extern int ips_queue(Scsi_Cmnd *, void (*) (Scsi_Cmnd *)); extern int ips_biosparam(Disk *, kdev_t, int *); extern const char * ips_info(struct Scsi_Host *); - extern void do_ipsintr(int, void *, struct pt_regs *); + extern void do_ips(int, void *, struct pt_regs *); /* * Some handy macros @@ -91,11 +85,7 @@ (ips_force_memio))) ? 1 : 0) #ifndef VIRT_TO_BUS - #define VIRT_TO_BUS(x) (unsigned int)virt_to_bus((void *) x) - #endif - - #ifndef UDELAY - #define UDELAY udelay + #define VIRT_TO_BUS(x) (uint32_t) virt_to_bus((void *) x) #endif #ifndef MDELAY @@ -178,9 +168,14 @@ #define IPS_CMD_WRITE_SG 0x83 #define IPS_CMD_DCDB 0x04 #define IPS_CMD_DCDB_SG 0x84 + #define IPS_CMD_EXTENDED_DCDB 0x95 + #define IPS_CMD_EXTENDED_DCDB_SG 0x96 #define IPS_CMD_CONFIG_SYNC 0x58 #define IPS_CMD_ERROR_TABLE 0x17 + #define IPS_CMD_DOWNLOAD 0x20 #define IPS_CMD_RW_BIOSFW 0x22 + #define IPS_CMD_GET_VERSION_INFO 0xC6 + #define IPS_CMD_RESET_CHANNEL 0x1A /* * Adapter Equates @@ -206,10 +201,9 @@ #define IPS_GOOD_POST_STATUS 0x80 #define IPS_SEM_TIMEOUT 2000 #define IPS_IOCTL_COMMAND 0x0D - #define IPS_IOCTL_NEW_COMMAND 0x81 #define IPS_INTR_ON 0 #define IPS_INTR_IORL 1 - #define IPS_INTR_HAL 2 + #define IPS_FFDC 99 #define IPS_ADAPTER_ID 0xF #define IPS_VENDORID 0x1014 #define IPS_DEVICEID_COPPERHEAD 0x002E @@ -218,6 +212,8 @@ #define IPS_SUBDEVICEID_4L 0x01BF #define IPS_SUBDEVICEID_4MX 0x0208 #define IPS_SUBDEVICEID_4LX 0x020E + #define IPS_SUBDEVICEID_5I2 0x0259 + #define IPS_SUBDEVICEID_5I1 0x0258 #define IPS_IOCTL_SIZE 8192 #define IPS_STATUS_SIZE 4 #define IPS_STATUS_Q_SIZE (IPS_MAX_CMDS+1) * IPS_STATUS_SIZE @@ -298,6 +294,8 @@ #define IPS_ADTYPE_SERVERAID4L 0x09 #define IPS_ADTYPE_SERVERAID4MX 0x0A #define IPS_ADTYPE_SERVERAID4LX 0x0B + #define IPS_ADTYPE_SERVERAID5I2 0x0C + #define IPS_ADTYPE_SERVERAID5I1 0x0D /* * Adapter Command/Status Packet Definitions @@ -390,6 +388,8 @@ */ #define IPS_SCB_ACTIVE 0x00001 #define IPS_SCB_WAITING 0x00002 + #define IPS_SCB_MAP_SG 0x00008 + #define IPS_SCB_MAP_SINGLE 0X00010 /* * Passthru stuff @@ -400,6 +400,14 @@ #define IPS_CTRLINFO (('C'<<8) | 69) #define IPS_FLASHBIOS (('C'<<8) | 70) + /* flashing defines */ + #define IPS_FW_IMAGE 0x00 + #define IPS_BIOS_IMAGE 0x01 + #define IPS_WRITE_FW 0x01 + #define IPS_WRITE_BIOS 0x02 + #define IPS_ERASE_BIOS 0x03 + #define IPS_BIOS_HEADER 0xC0 + /* time oriented stuff */ #define IPS_IS_LEAP_YEAR(y) (((y % 4 == 0) && ((y % 100 != 0) || (y % 400 == 0))) ? 1 : 0) #define IPS_NUM_LEAP_YEARS_THROUGH(y) ((y) / 4 - (y) / 100 + (y) / 400) @@ -415,7 +423,7 @@ /* * Scsi_Host Template */ -#if LINUX_VERSION_CODE < LinuxVersionCode(2,3,27) +#if LINUX_VERSION_CODE < LinuxVersionCode(2,4,0) #define IPS { \ next : NULL, \ module : NULL, \ @@ -443,8 +451,36 @@ present : 0, \ unchecked_isa_dma : 0, \ use_clustering : ENABLE_CLUSTERING, \ - use_new_eh_code : 1, \ - highmem_io : 1 \ + use_new_eh_code : 1 \ +} +#elif LINUX_VERSION_CODE < LinuxVersionCode(2,5,0) + #define IPS { \ + next : NULL, \ + module : NULL, \ + proc_info : NULL, \ + name : NULL, \ + detect : ips_detect, \ + release : ips_release, \ + info : ips_info, \ + command : NULL, \ + queuecommand : ips_queue, \ + eh_strategy_handler : NULL, \ + eh_abort_handler : ips_eh_abort, \ + eh_device_reset_handler : NULL, \ + eh_bus_reset_handler : NULL, \ + eh_host_reset_handler : ips_eh_reset, \ + abort : NULL, \ + reset : NULL, \ + slave_attach : NULL, \ + bios_param : ips_biosparam, \ + can_queue : 0, \ + this_id: -1, \ + sg_tablesize : IPS_MAX_SG, \ + cmd_per_lun: 16, \ + present : 0, \ + unchecked_isa_dma : 0, \ + use_clustering : ENABLE_CLUSTERING, \ + use_new_eh_code : 1 \ } #else #define IPS { \ @@ -473,6 +509,7 @@ present : 0, \ unchecked_isa_dma : 0, \ use_clustering : ENABLE_CLUSTERING, \ + highmem_io : 1 \ } #endif @@ -480,201 +517,247 @@ * IBM PCI Raid Command Formats */ typedef struct { - u_int8_t op_code; - u_int8_t command_id; - u_int8_t log_drv; - u_int8_t sg_count; - u_int32_t lba; - u_int32_t sg_addr; - u_int16_t sector_count; - u_int16_t reserved; - u_int32_t ccsar; - u_int32_t cccr; + uint8_t op_code; + uint8_t command_id; + uint8_t log_drv; + uint8_t sg_count; + uint32_t lba; + uint32_t sg_addr; + uint16_t sector_count; + uint16_t reserved; + uint32_t ccsar; + uint32_t cccr; } IPS_IO_CMD, *PIPS_IO_CMD; typedef struct { - u_int8_t op_code; - u_int8_t command_id; - u_int16_t reserved; - u_int32_t reserved2; - u_int32_t buffer_addr; - u_int32_t reserved3; - u_int32_t ccsar; - u_int32_t cccr; + uint8_t op_code; + uint8_t command_id; + uint16_t reserved; + uint32_t reserved2; + uint32_t buffer_addr; + uint32_t reserved3; + uint32_t ccsar; + uint32_t cccr; } IPS_LD_CMD, *PIPS_LD_CMD; typedef struct { - u_int8_t op_code; - u_int8_t command_id; - u_int8_t reserved; - u_int8_t reserved2; - u_int32_t reserved3; - u_int32_t buffer_addr; - u_int32_t reserved4; -} IPS_IOCTL_CMD, *PIPS_IOCTL_CMD; + uint8_t op_code; + uint8_t command_id; + uint8_t reserved; + uint8_t reserved2; + uint32_t reserved3; + uint32_t buffer_addr; + uint32_t reserved4; +} IPS_IOCTL_CMD, *PIPS_IOCTL_CMD; typedef struct { - u_int8_t op_code; - u_int8_t command_id; - u_int16_t reserved; - u_int32_t reserved2; - u_int32_t dcdb_address; - u_int32_t reserved3; - u_int32_t ccsar; - u_int32_t cccr; + uint8_t op_code; + uint8_t command_id; + uint8_t channel; + uint8_t reserved3; + uint8_t reserved4; + uint8_t reserved5; + uint8_t reserved6; + uint8_t reserved7; + uint8_t reserved8; + uint8_t reserved9; + uint8_t reserved10; + uint8_t reserved11; + uint8_t reserved12; + uint8_t reserved13; + uint8_t reserved14; + uint8_t adapter_flag; +} IPS_RESET_CMD, *PIPS_RESET_CMD; + +typedef struct { + uint8_t op_code; + uint8_t command_id; + uint16_t reserved; + uint32_t reserved2; + uint32_t dcdb_address; + uint32_t reserved3; + uint32_t ccsar; + uint32_t cccr; } IPS_DCDB_CMD, *PIPS_DCDB_CMD; typedef struct { - u_int8_t op_code; - u_int8_t command_id; - u_int8_t channel; - u_int8_t source_target; - u_int32_t reserved; - u_int32_t reserved2; - u_int32_t reserved3; - u_int32_t ccsar; - u_int32_t cccr; + uint8_t op_code; + uint8_t command_id; + uint8_t channel; + uint8_t source_target; + uint32_t reserved; + uint32_t reserved2; + uint32_t reserved3; + uint32_t ccsar; + uint32_t cccr; } IPS_CS_CMD, *PIPS_CS_CMD; typedef struct { - u_int8_t op_code; - u_int8_t command_id; - u_int8_t log_drv; - u_int8_t control; - u_int32_t reserved; - u_int32_t reserved2; - u_int32_t reserved3; - u_int32_t ccsar; - u_int32_t cccr; + uint8_t op_code; + uint8_t command_id; + uint8_t log_drv; + uint8_t control; + uint32_t reserved; + uint32_t reserved2; + uint32_t reserved3; + uint32_t ccsar; + uint32_t cccr; } IPS_US_CMD, *PIPS_US_CMD; typedef struct { - u_int8_t op_code; - u_int8_t command_id; - u_int8_t reserved; - u_int8_t state; - u_int32_t reserved2; - u_int32_t reserved3; - u_int32_t reserved4; - u_int32_t ccsar; - u_int32_t cccr; + uint8_t op_code; + uint8_t command_id; + uint8_t reserved; + uint8_t state; + uint32_t reserved2; + uint32_t reserved3; + uint32_t reserved4; + uint32_t ccsar; + uint32_t cccr; } IPS_FC_CMD, *PIPS_FC_CMD; typedef struct { - u_int8_t op_code; - u_int8_t command_id; - u_int8_t reserved; - u_int8_t desc; - u_int32_t reserved2; - u_int32_t buffer_addr; - u_int32_t reserved3; - u_int32_t ccsar; - u_int32_t cccr; + uint8_t op_code; + uint8_t command_id; + uint8_t reserved; + uint8_t desc; + uint32_t reserved2; + uint32_t buffer_addr; + uint32_t reserved3; + uint32_t ccsar; + uint32_t cccr; } IPS_STATUS_CMD, *PIPS_STATUS_CMD; typedef struct { - u_int8_t op_code; - u_int8_t command_id; - u_int8_t page; - u_int8_t write; - u_int32_t reserved; - u_int32_t buffer_addr; - u_int32_t reserved2; - u_int32_t ccsar; - u_int32_t cccr; + uint8_t op_code; + uint8_t command_id; + uint8_t page; + uint8_t write; + uint32_t reserved; + uint32_t buffer_addr; + uint32_t reserved2; + uint32_t ccsar; + uint32_t cccr; } IPS_NVRAM_CMD, *PIPS_NVRAM_CMD; +typedef struct +{ + uint8_t op_code; + uint8_t command_id; + uint16_t reserved; + uint32_t count; + uint32_t buffer_addr; + uint32_t reserved2; +} IPS_VERSION_INFO, *PIPS_VERSION_INFO; + typedef struct { - u_int8_t op_code; - u_int8_t command_id; - u_int8_t reset_count; - u_int8_t reset_type; - u_int8_t second; - u_int8_t minute; - u_int8_t hour; - u_int8_t day; - u_int8_t reserved1[4]; - u_int8_t month; - u_int8_t yearH; - u_int8_t yearL; - u_int8_t reserved2; + uint8_t op_code; + uint8_t command_id; + uint8_t reset_count; + uint8_t reset_type; + uint8_t second; + uint8_t minute; + uint8_t hour; + uint8_t day; + uint8_t reserved1[4]; + uint8_t month; + uint8_t yearH; + uint8_t yearL; + uint8_t reserved2; } IPS_FFDC_CMD, *PIPS_FFDC_CMD; typedef struct { - u_int8_t op_code; - u_int8_t command_id; - u_int8_t type; - u_int8_t direction; - u_int32_t count; - u_int32_t buffer_addr; - u_int8_t total_packets; - u_int8_t packet_num; - u_int16_t reserved; + uint8_t op_code; + uint8_t command_id; + uint8_t type; + uint8_t direction; + uint32_t count; + uint32_t buffer_addr; + uint8_t total_packets; + uint8_t packet_num; + uint16_t reserved; } IPS_FLASHFW_CMD, *PIPS_FLASHFW_CMD; typedef struct { - u_int8_t op_code; - u_int8_t command_id; - u_int8_t type; - u_int8_t direction; - u_int32_t count; - u_int32_t buffer_addr; - u_int32_t offset; + uint8_t op_code; + uint8_t command_id; + uint8_t type; + uint8_t direction; + uint32_t count; + uint32_t buffer_addr; + uint32_t offset; } IPS_FLASHBIOS_CMD, *PIPS_FLASHBIOS_CMD; typedef union { - IPS_IO_CMD basic_io; - IPS_LD_CMD logical_info; - IPS_IOCTL_CMD ioctl_info; - IPS_DCDB_CMD dcdb; - IPS_CS_CMD config_sync; - IPS_US_CMD unlock_stripe; - IPS_FC_CMD flush_cache; - IPS_STATUS_CMD status; - IPS_NVRAM_CMD nvram; - IPS_FFDC_CMD ffdc; + IPS_IO_CMD basic_io; + IPS_LD_CMD logical_info; + IPS_IOCTL_CMD ioctl_info; + IPS_DCDB_CMD dcdb; + IPS_CS_CMD config_sync; + IPS_US_CMD unlock_stripe; + IPS_FC_CMD flush_cache; + IPS_STATUS_CMD status; + IPS_NVRAM_CMD nvram; + IPS_FFDC_CMD ffdc; IPS_FLASHFW_CMD flashfw; IPS_FLASHBIOS_CMD flashbios; + IPS_VERSION_INFO version_info; + IPS_RESET_CMD reset; } IPS_HOST_COMMAND, *PIPS_HOST_COMMAND; typedef struct { - u_int8_t logical_id; - u_int8_t reserved; - u_int8_t raid_level; - u_int8_t state; - u_int32_t sector_count; + uint8_t logical_id; + uint8_t reserved; + uint8_t raid_level; + uint8_t state; + uint32_t sector_count; } IPS_DRIVE_INFO, *PIPS_DRIVE_INFO; typedef struct { - u_int8_t no_of_log_drive; - u_int8_t reserved[3]; + uint8_t no_of_log_drive; + uint8_t reserved[3]; IPS_DRIVE_INFO drive_info[IPS_MAX_LD]; } IPS_LD_INFO, *PIPS_LD_INFO; typedef struct { - u_int8_t device_address; - u_int8_t cmd_attribute; - u_int16_t transfer_length; - u_int32_t buffer_pointer; - u_int8_t cdb_length; - u_int8_t sense_length; - u_int8_t sg_count; - u_int8_t reserved; - u_int8_t scsi_cdb[12]; - u_int8_t sense_info[64]; - u_int8_t scsi_status; - u_int8_t reserved2[3]; + uint8_t device_address; + uint8_t cmd_attribute; + uint16_t transfer_length; + uint32_t buffer_pointer; + uint8_t cdb_length; + uint8_t sense_length; + uint8_t sg_count; + uint8_t reserved; + uint8_t scsi_cdb[12]; + uint8_t sense_info[64]; + uint8_t scsi_status; + uint8_t reserved2[3]; } IPS_DCDB_TABLE, *PIPS_DCDB_TABLE; +typedef struct { + uint8_t device_address; + uint8_t cmd_attribute; + uint8_t cdb_length; + uint8_t reserved_for_LUN; + uint32_t transfer_length; + uint32_t buffer_pointer; + uint16_t sg_count; + uint8_t sense_length; + uint8_t scsi_status; + uint32_t reserved; + uint8_t scsi_cdb[16]; + uint8_t sense_info[56]; +} IPS_DCDB_TABLE_TAPE, *PIPS_DCDB_TABLE_TAPE; + typedef union { struct { - volatile u_int8_t reserved; - volatile u_int8_t command_id; - volatile u_int8_t basic_status; - volatile u_int8_t extended_status; + volatile uint8_t reserved; + volatile uint8_t command_id; + volatile uint8_t basic_status; + volatile uint8_t extended_status; } fields; - volatile u_int32_t value; + volatile uint32_t value; } IPS_STATUS, *PIPS_STATUS; typedef struct { @@ -682,110 +765,134 @@ typedef struct { volatile PIPS_STATUS p_status_start; volatile PIPS_STATUS p_status_end; volatile PIPS_STATUS p_status_tail; - volatile u_int32_t hw_status_start; - volatile u_int32_t hw_status_tail; + volatile uint32_t hw_status_start; + volatile uint32_t hw_status_tail; IPS_LD_INFO logical_drive_info; } IPS_ADAPTER, *PIPS_ADAPTER; typedef struct { - u_int8_t ucLogDriveCount; - u_int8_t ucMiscFlag; - u_int8_t ucSLTFlag; - u_int8_t ucBSTFlag; - u_int8_t ucPwrChgCnt; - u_int8_t ucWrongAdrCnt; - u_int8_t ucUnidentCnt; - u_int8_t ucNVramDevChgCnt; - u_int8_t CodeBlkVersion[8]; - u_int8_t BootBlkVersion[8]; - u_int32_t ulDriveSize[IPS_MAX_LD]; - u_int8_t ucConcurrentCmdCount; - u_int8_t ucMaxPhysicalDevices; - u_int16_t usFlashRepgmCount; - u_int8_t ucDefunctDiskCount; - u_int8_t ucRebuildFlag; - u_int8_t ucOfflineLogDrvCount; - u_int8_t ucCriticalDrvCount; - u_int16_t usConfigUpdateCount; - u_int8_t ucBlkFlag; - u_int8_t reserved; - u_int16_t usAddrDeadDisk[IPS_MAX_CHANNELS * IPS_MAX_TARGETS]; + uint8_t ucLogDriveCount; + uint8_t ucMiscFlag; + uint8_t ucSLTFlag; + uint8_t ucBSTFlag; + uint8_t ucPwrChgCnt; + uint8_t ucWrongAdrCnt; + uint8_t ucUnidentCnt; + uint8_t ucNVramDevChgCnt; + uint8_t CodeBlkVersion[8]; + uint8_t BootBlkVersion[8]; + uint32_t ulDriveSize[IPS_MAX_LD]; + uint8_t ucConcurrentCmdCount; + uint8_t ucMaxPhysicalDevices; + uint16_t usFlashRepgmCount; + uint8_t ucDefunctDiskCount; + uint8_t ucRebuildFlag; + uint8_t ucOfflineLogDrvCount; + uint8_t ucCriticalDrvCount; + uint16_t usConfigUpdateCount; + uint8_t ucBlkFlag; + uint8_t reserved; + uint16_t usAddrDeadDisk[IPS_MAX_CHANNELS * IPS_MAX_TARGETS]; } IPS_ENQ, *PIPS_ENQ; typedef struct { - u_int8_t ucInitiator; - u_int8_t ucParameters; - u_int8_t ucMiscFlag; - u_int8_t ucState; - u_int32_t ulBlockCount; - u_int8_t ucDeviceId[28]; + uint8_t ucInitiator; + uint8_t ucParameters; + uint8_t ucMiscFlag; + uint8_t ucState; + uint32_t ulBlockCount; + uint8_t ucDeviceId[28]; } IPS_DEVSTATE, *PIPS_DEVSTATE; typedef struct { - u_int8_t ucChn; - u_int8_t ucTgt; - u_int16_t ucReserved; - u_int32_t ulStartSect; - u_int32_t ulNoOfSects; + uint8_t ucChn; + uint8_t ucTgt; + uint16_t ucReserved; + uint32_t ulStartSect; + uint32_t ulNoOfSects; } IPS_CHUNK, *PIPS_CHUNK; typedef struct { - u_int16_t ucUserField; - u_int8_t ucState; - u_int8_t ucRaidCacheParam; - u_int8_t ucNoOfChunkUnits; - u_int8_t ucStripeSize; - u_int8_t ucParams; - u_int8_t ucReserved; - u_int32_t ulLogDrvSize; + uint16_t ucUserField; + uint8_t ucState; + uint8_t ucRaidCacheParam; + uint8_t ucNoOfChunkUnits; + uint8_t ucStripeSize; + uint8_t ucParams; + uint8_t ucReserved; + uint32_t ulLogDrvSize; IPS_CHUNK chunk[IPS_MAX_CHUNKS]; } IPS_LD, *PIPS_LD; typedef struct { - u_int8_t board_disc[8]; - u_int8_t processor[8]; - u_int8_t ucNoChanType; - u_int8_t ucNoHostIntType; - u_int8_t ucCompression; - u_int8_t ucNvramType; - u_int32_t ulNvramSize; + uint8_t board_disc[8]; + uint8_t processor[8]; + uint8_t ucNoChanType; + uint8_t ucNoHostIntType; + uint8_t ucCompression; + uint8_t ucNvramType; + uint32_t ulNvramSize; } IPS_HARDWARE, *PIPS_HARDWARE; typedef struct { - u_int8_t ucLogDriveCount; - u_int8_t ucDateD; - u_int8_t ucDateM; - u_int8_t ucDateY; - u_int8_t init_id[4]; - u_int8_t host_id[12]; - u_int8_t time_sign[8]; - u_int32_t UserOpt; - u_int16_t user_field; - u_int8_t ucRebuildRate; - u_int8_t ucReserve; + uint8_t ucLogDriveCount; + uint8_t ucDateD; + uint8_t ucDateM; + uint8_t ucDateY; + uint8_t init_id[4]; + uint8_t host_id[12]; + uint8_t time_sign[8]; + uint32_t UserOpt; + uint16_t user_field; + uint8_t ucRebuildRate; + uint8_t ucReserve; IPS_HARDWARE hardware_disc; IPS_LD logical_drive[IPS_MAX_LD]; IPS_DEVSTATE dev[IPS_MAX_CHANNELS][IPS_MAX_TARGETS+1]; - u_int8_t reserved[512]; + uint8_t reserved[512]; } IPS_CONF, *PIPS_CONF; typedef struct { - u_int32_t signature; - u_int8_t reserved; - u_int8_t adapter_slot; - u_int16_t adapter_type; - u_int8_t bios_high[4]; - u_int8_t bios_low[4]; - u_int16_t reserved2; - u_int8_t reserved3; - u_int8_t operating_system; - u_int8_t driver_high[4]; - u_int8_t driver_low[4]; - u_int8_t reserved4[100]; + uint32_t signature; + uint8_t reserved1; + uint8_t adapter_slot; + uint16_t adapter_type; + uint8_t ctrl_bios[8]; + uint8_t versioning; /* 1 = Versioning Supported, else 0 */ + uint8_t version_mismatch; /* 1 = Versioning MisMatch, else 0 */ + uint8_t reserved2; + uint8_t operating_system; + uint8_t driver_high[4]; + uint8_t driver_low[4]; + uint8_t BiosCompatibilityID[8]; + uint8_t ReservedForOS2[8]; + uint8_t bios_high[4]; /* Adapter's Flashed BIOS Version */ + uint8_t bios_low[4]; + uint8_t Filler[76]; } IPS_NVRAM_P5, *PIPS_NVRAM_P5; +/*--------------------------------------------------------------------------*/ +/* Data returned from a GetVersion Command */ +/*--------------------------------------------------------------------------*/ + + /* SubSystem Parameter[4] */ +#define IPS_GET_VERSION_SUPPORT 0x00018000 /* Mask for Versioning Support */ + +typedef struct +{ + uint32_t revision; + uint8_t bootBlkVersion[32]; + uint8_t bootBlkAttributes[4]; + uint8_t codeBlkVersion[32]; + uint8_t biosVersion[32]; + uint8_t biosAttributes[4]; + uint8_t compatibilityId[32]; + uint8_t reserved[4]; +} IPS_VERSION_DATA; + + typedef struct _IPS_SUBSYS { - u_int32_t param[128]; + uint32_t param[128]; } IPS_SUBSYS, *PIPS_SUBSYS; /** @@ -796,102 +903,103 @@ typedef struct _IPS_SUBSYS { * Inquiry Data Format */ typedef struct { - u_int8_t DeviceType; - u_int8_t DeviceTypeQualifier; - u_int8_t Version; - u_int8_t ResponseDataFormat; - u_int8_t AdditionalLength; - u_int8_t Reserved; - u_int8_t Flags[2]; - char VendorId[8]; - char ProductId[16]; - char ProductRevisionLevel[4]; + uint8_t DeviceType; + uint8_t DeviceTypeQualifier; + uint8_t Version; + uint8_t ResponseDataFormat; + uint8_t AdditionalLength; + uint8_t Reserved; + uint8_t Flags[2]; + uint8_t VendorId[8]; + uint8_t ProductId[16]; + uint8_t ProductRevisionLevel[4]; + uint8_t Reserved2; /* Provides NULL terminator to name */ } IPS_SCSI_INQ_DATA, *PIPS_SCSI_INQ_DATA; /* * Read Capacity Data Format */ typedef struct { - u_int32_t lba; - u_int32_t len; + uint32_t lba; + uint32_t len; } IPS_SCSI_CAPACITY; /* * Request Sense Data Format */ typedef struct { - u_int8_t ResponseCode; - u_int8_t SegmentNumber; - u_int8_t Flags; - u_int8_t Information[4]; - u_int8_t AdditionalLength; - u_int8_t CommandSpecific[4]; - u_int8_t AdditionalSenseCode; - u_int8_t AdditionalSenseCodeQual; - u_int8_t FRUCode; - u_int8_t SenseKeySpecific[3]; + uint8_t ResponseCode; + uint8_t SegmentNumber; + uint8_t Flags; + uint8_t Information[4]; + uint8_t AdditionalLength; + uint8_t CommandSpecific[4]; + uint8_t AdditionalSenseCode; + uint8_t AdditionalSenseCodeQual; + uint8_t FRUCode; + uint8_t SenseKeySpecific[3]; } IPS_SCSI_REQSEN; /* * Sense Data Format - Page 3 */ typedef struct { - u_int8_t PageCode; - u_int8_t PageLength; - u_int16_t TracksPerZone; - u_int16_t AltSectorsPerZone; - u_int16_t AltTracksPerZone; - u_int16_t AltTracksPerVolume; - u_int16_t SectorsPerTrack; - u_int16_t BytesPerSector; - u_int16_t Interleave; - u_int16_t TrackSkew; - u_int16_t CylinderSkew; - u_int8_t flags; - u_int8_t reserved[3]; + uint8_t PageCode; + uint8_t PageLength; + uint16_t TracksPerZone; + uint16_t AltSectorsPerZone; + uint16_t AltTracksPerZone; + uint16_t AltTracksPerVolume; + uint16_t SectorsPerTrack; + uint16_t BytesPerSector; + uint16_t Interleave; + uint16_t TrackSkew; + uint16_t CylinderSkew; + uint8_t flags; + uint8_t reserved[3]; } IPS_SCSI_MODE_PAGE3; /* * Sense Data Format - Page 4 */ typedef struct { - u_int8_t PageCode; - u_int8_t PageLength; - u_int16_t CylindersHigh; - u_int8_t CylindersLow; - u_int8_t Heads; - u_int16_t WritePrecompHigh; - u_int8_t WritePrecompLow; - u_int16_t ReducedWriteCurrentHigh; - u_int8_t ReducedWriteCurrentLow; - u_int16_t StepRate; - u_int16_t LandingZoneHigh; - u_int8_t LandingZoneLow; - u_int8_t flags; - u_int8_t RotationalOffset; - u_int8_t Reserved; - u_int16_t MediumRotationRate; - u_int8_t Reserved2[2]; + uint8_t PageCode; + uint8_t PageLength; + uint16_t CylindersHigh; + uint8_t CylindersLow; + uint8_t Heads; + uint16_t WritePrecompHigh; + uint8_t WritePrecompLow; + uint16_t ReducedWriteCurrentHigh; + uint8_t ReducedWriteCurrentLow; + uint16_t StepRate; + uint16_t LandingZoneHigh; + uint8_t LandingZoneLow; + uint8_t flags; + uint8_t RotationalOffset; + uint8_t Reserved; + uint16_t MediumRotationRate; + uint8_t Reserved2[2]; } IPS_SCSI_MODE_PAGE4; /* * Sense Data Format - Block Descriptor (DASD) */ typedef struct { - u_int32_t NumberOfBlocks; - u_int8_t DensityCode; - u_int16_t BlockLengthHigh; - u_int8_t BlockLengthLow; + uint32_t NumberOfBlocks; + uint8_t DensityCode; + uint16_t BlockLengthHigh; + uint8_t BlockLengthLow; } IPS_SCSI_MODE_PAGE_BLKDESC; /* * Sense Data Format - Mode Page Header */ typedef struct { - u_int8_t DataLength; - u_int8_t MediumType; - u_int8_t Reserved; - u_int8_t BlockDescLength; + uint8_t DataLength; + uint8_t MediumType; + uint8_t Reserved; + uint8_t BlockDescLength; } IPS_SCSI_MODE_PAGE_HEADER; typedef struct { @@ -908,8 +1016,8 @@ typedef struct { * Scatter Gather list format */ typedef struct ips_sglist { - u_int32_t address; - u_int32_t length; + uint32_t address; + uint32_t length; } IPS_SG_LIST, *PIPS_SG_LIST; typedef struct _IPS_INFOSTR { @@ -928,24 +1036,24 @@ typedef struct { typedef struct { void *userbuffer; - u_int32_t usersize; + uint32_t usersize; void *kernbuffer; - u_int32_t kernsize; + uint32_t kernsize; void *ha; void *SC; void *pt; struct semaphore *sem; - u_int32_t offset; - u_int32_t retcode; + uint32_t offset; + uint32_t retcode; } IPS_FLASH_DATA; /* * Status Info */ typedef struct ips_stat { - u_int32_t residue_len; + uint32_t residue_len; void *scb_addr; - u_int8_t padding[12 - sizeof(void *)]; + uint8_t padding[12 - sizeof(void *)]; } ips_stat_t; /* @@ -954,8 +1062,8 @@ typedef struct ips_stat { typedef struct ips_scb_queue { struct ips_scb *head; struct ips_scb *tail; - u_int32_t count; - u_int32_t cpu_flags; + int count; + unsigned long cpu_flags; spinlock_t lock; } ips_scb_queue_t; @@ -965,8 +1073,8 @@ typedef struct ips_scb_queue { typedef struct ips_wait_queue { Scsi_Cmnd *head; Scsi_Cmnd *tail; - u_int32_t count; - u_int32_t cpu_flags; + int count; + unsigned long cpu_flags; spinlock_t lock; } ips_wait_queue_t; @@ -979,8 +1087,8 @@ typedef struct ips_copp_wait_item { typedef struct ips_copp_queue { struct ips_copp_wait_item *head; struct ips_copp_wait_item *tail; - u_int32_t count; - u_int32_t cpu_flags; + int count; + unsigned long cpu_flags; spinlock_t lock; } ips_copp_queue_t; @@ -994,27 +1102,27 @@ typedef struct { int (*isintr)(struct ips_ha *); int (*init)(struct ips_ha *); int (*erasebios)(struct ips_ha *); - int (*programbios)(struct ips_ha *, char *, u_int32_t, u_int32_t); - int (*verifybios)(struct ips_ha *, char *, u_int32_t, u_int32_t); + int (*programbios)(struct ips_ha *, char *, uint32_t, uint32_t); + int (*verifybios)(struct ips_ha *, char *, uint32_t, uint32_t); void (*statinit)(struct ips_ha *); void (*intr)(struct ips_ha *); void (*enableint)(struct ips_ha *); - u_int32_t (*statupd)(struct ips_ha *); + uint32_t (*statupd)(struct ips_ha *); } ips_hw_func_t; typedef struct ips_ha { - u_int8_t ha_id[IPS_MAX_CHANNELS+1]; - u_int32_t dcdb_active[IPS_MAX_CHANNELS]; - u_int32_t io_addr; /* Base I/O address */ - u_int8_t irq; /* IRQ for adapter */ - u_int8_t ntargets; /* Number of targets */ - u_int8_t nbus; /* Number of buses */ - u_int8_t nlun; /* Number of Luns */ - u_int16_t ad_type; /* Adapter type */ - u_int16_t host_num; /* Adapter number */ - u_int32_t max_xfer; /* Maximum Xfer size */ - u_int32_t max_cmds; /* Max concurrent commands */ - u_int32_t num_ioctl; /* Number of Ioctls */ + uint8_t ha_id[IPS_MAX_CHANNELS+1]; + uint32_t dcdb_active[IPS_MAX_CHANNELS]; + uint32_t io_addr; /* Base I/O address */ + uint8_t irq; /* IRQ for adapter */ + uint8_t ntargets; /* Number of targets */ + uint8_t nbus; /* Number of buses */ + uint8_t nlun; /* Number of Luns */ + uint16_t ad_type; /* Adapter type */ + uint16_t host_num; /* Adapter number */ + uint32_t max_xfer; /* Maximum Xfer size */ + uint32_t max_cmds; /* Max concurrent commands */ + uint32_t num_ioctl; /* Number of Ioctls */ ips_stat_t sp; /* Status packer pointer */ struct ips_scb *scbs; /* Array of all CCBS */ struct ips_scb *scb_freelist; /* SCB free list */ @@ -1028,23 +1136,24 @@ typedef struct ips_ha { IPS_NVRAM_P5 *nvram; /* NVRAM page 5 data */ IPS_SUBSYS *subsys; /* Subsystem parameters */ char *ioctl_data; /* IOCTL data area */ - u_int32_t ioctl_datasize; /* IOCTL data size */ - u_int32_t cmd_in_progress; /* Current command in progress*/ + uint32_t ioctl_datasize; /* IOCTL data size */ + uint32_t cmd_in_progress; /* Current command in progress*/ unsigned long flags; /* HA flags */ - u_int8_t waitflag; /* are we waiting for cmd */ - u_int8_t active; - u_int16_t reset_count; /* number of resets */ - u_int32_t last_ffdc; /* last time we sent ffdc info*/ - u_int8_t revision_id; /* Revision level */ - u_int16_t device_id; /* PCI device ID */ - u_int8_t slot_num; /* PCI Slot Number */ - u_int16_t subdevice_id; /* Subsystem device ID */ - u_int8_t ioctl_order; /* Number of pages in ioctl */ - u_int8_t reserved2; /* Empty */ - u_int8_t bios_version[8]; /* BIOS Revision */ - u_int32_t mem_addr; /* Memory mapped address */ - u_int32_t io_len; /* Size of IO Address */ - u_int32_t mem_len; /* Size of memory address */ + uint8_t waitflag; /* are we waiting for cmd */ + uint8_t active; + int ioctl_reset; /* IOCTL Requested Reset Flag */ + uint16_t reset_count; /* number of resets */ + time_t last_ffdc; /* last time we sent ffdc info*/ + uint8_t revision_id; /* Revision level */ + uint16_t device_id; /* PCI device ID */ + uint8_t slot_num; /* PCI Slot Number */ + uint16_t subdevice_id; /* Subsystem device ID */ + uint8_t ioctl_order; /* Number of pages in ioctl */ + uint8_t reserved2; /* Empty */ + uint8_t bios_version[8]; /* BIOS Revision */ + uint32_t mem_addr; /* Memory mapped address */ + uint32_t io_len; /* Size of IO Address */ + uint32_t mem_len; /* Size of memory address */ char *mem_ptr; /* Memory mapped Ptr */ char *ioremap_ptr; /* ioremapped memory pointer */ ips_hw_func_t func; /* hw function pointers */ @@ -1054,9 +1163,9 @@ typedef struct ips_ha { spinlock_t ips_lock; struct semaphore ioctl_sem; /* Semaphore for new IOCTL's */ struct semaphore flash_ioctl_sem; /* Semaphore for Flashing */ - char *save_ioctl_data; /* Save Area for ioctl_data */ - u8 save_ioctl_order; /* Save Area for ioctl_order */ - u32 save_ioctl_datasize;/* Save Area for ioctl_datasize */ + char *flash_data; /* Save Area for flash data */ + u8 flash_order; /* Save Area for flash size order */ + u32 flash_datasize; /* Save Area for flash data size */ } ips_ha_t; typedef void (*ips_scb_callback) (ips_ha_t *, struct ips_scb *); @@ -1067,45 +1176,47 @@ typedef void (*ips_scb_callback) (ips_ha_t *, struct ips_scb *); typedef struct ips_scb { IPS_HOST_COMMAND cmd; IPS_DCDB_TABLE dcdb; - u_int8_t target_id; - u_int8_t bus; - u_int8_t lun; - u_int8_t cdb[12]; - u_int32_t scb_busaddr; - u_int32_t data_busaddr; - u_int32_t timeout; - u_int8_t basic_status; - u_int8_t extended_status; - u_int8_t breakup; - u_int8_t sg_break; - u_int32_t data_len; - u_int32_t sg_len; - u_int32_t flags; - u_int32_t op_code; + uint8_t target_id; + uint8_t bus; + uint8_t lun; + uint8_t cdb[12]; + uint32_t scb_busaddr; + uint32_t data_busaddr; + uint32_t timeout; + uint8_t basic_status; + uint8_t extended_status; + uint8_t breakup; + uint8_t sg_break; + uint32_t data_len; + uint32_t sg_len; + uint32_t flags; + uint32_t op_code; IPS_SG_LIST *sg_list; Scsi_Cmnd *scsi_cmd; struct ips_scb *q_next; ips_scb_callback callback; struct semaphore *sem; + uint32_t sg_busaddr; + int sg_count; } ips_scb_t; typedef struct ips_scb_pt { IPS_HOST_COMMAND cmd; IPS_DCDB_TABLE dcdb; - u_int8_t target_id; - u_int8_t bus; - u_int8_t lun; - u_int8_t cdb[12]; - u_int32_t scb_busaddr; - u_int32_t data_busaddr; - u_int32_t timeout; - u_int8_t basic_status; - u_int8_t extended_status; - u_int16_t breakup; - u_int32_t data_len; - u_int32_t sg_len; - u_int32_t flags; - u_int32_t op_code; + uint8_t target_id; + uint8_t bus; + uint8_t lun; + uint8_t cdb[12]; + uint32_t scb_busaddr; + uint32_t data_busaddr; + uint32_t timeout; + uint8_t basic_status; + uint8_t extended_status; + uint16_t breakup; + uint32_t data_len; + uint32_t sg_len; + uint32_t flags; + uint32_t op_code; IPS_SG_LIST *sg_list; Scsi_Cmnd *scsi_cmd; struct ips_scb *q_next; @@ -1116,20 +1227,87 @@ typedef struct ips_scb_pt { * Passthru Command Format */ typedef struct { - u_int8_t CoppID[4]; - u_int32_t CoppCmd; - u_int32_t PtBuffer; - u_int8_t *CmdBuffer; - u_int32_t CmdBSize; + uint8_t CoppID[4]; + uint32_t CoppCmd; + uint32_t PtBuffer; + uint8_t *CmdBuffer; + uint32_t CmdBSize; ips_scb_pt_t CoppCP; - u_int32_t TimeOut; - u_int8_t BasicStatus; - u_int8_t ExtendedStatus; - u_int16_t reserved; + uint32_t TimeOut; + uint8_t BasicStatus; + uint8_t ExtendedStatus; + uint8_t AdapterType; + uint8_t reserved; } ips_passthru_t; #endif +/* The Version Information below gets created by SED during the build process. */ +/* Do not modify the next line; it's what SED is looking for to do the insert. */ +/* Version Info */ +/************************************************************************* +* +* VERSION.H -- version numbers and copyright notices in various formats +* +*************************************************************************/ + +#define IPS_VER_MAJOR 5 +#define IPS_VER_MAJOR_STRING "5" +#define IPS_VER_MINOR 10 +#define IPS_VER_MINOR_STRING "10" +#define IPS_VER_BUILD_STRING "13" +#define IPS_VER_STRING "5.10.13-BETA" +#define IPS_LEGALCOPYRIGHT_STRING "(C) Copyright IBM Corp. 1994, 2002. All Rights Reserved." +#define IPS_NT_LEGALCOPYRIGHT_STRING "(C) Copyright IBM Corp. 1994, 2002." + +/* Version numbers for various adapters */ +#define IPS_VER_SERVERAID1 "2.25.01" +#define IPS_VER_SERVERAID2 "2.88.13" +#define IPS_VER_NAVAJO "2.88.13" +#define IPS_VER_SERVERAID3 "5.10.01" +#define IPS_VER_SERVERAID4H "5.10.01" +#define IPS_VER_SERVERAID4MLx "5.10.01" +#define IPS_VER_SARASOTA "5.10.05" + +/* Compatability IDs for various adapters */ +#define IPS_COMPAT_UNKNOWN "" +#define IPS_COMPAT_CURRENT "SA510" +#define IPS_COMPAT_SERVERAID1 "2.25.01" +#define IPS_COMPAT_SERVERAID2 "2.88.13" +#define IPS_COMPAT_NAVAJO "2.88.13" +#define IPS_COMPAT_KIOWA "2.88.13" +#define IPS_COMPAT_SERVERAID3H "5.10.01" +#define IPS_COMPAT_SERVERAID3L "5.10.01" +#define IPS_COMPAT_SERVERAID4H "5.10.01" +#define IPS_COMPAT_SERVERAID4M "SA510" +#define IPS_COMPAT_SERVERAID4L "SA510" +#define IPS_COMPAT_SERVERAID4Mx "SA510" +#define IPS_COMPAT_SERVERAID4Lx "SA510" +#define IPS_COMPAT_SARASOTA "SA510" +#define IPS_COMPAT_BIOS "SA510" + +#define IPS_COMPAT_MAX_ADAPTER_TYPE 14 +#define IPS_COMPAT_ID_LENGTH 8 + +#define IPS_DEFINE_COMPAT_TABLE(tablename) \ + char tablename[IPS_COMPAT_MAX_ADAPTER_TYPE] [IPS_COMPAT_ID_LENGTH] = { \ + IPS_COMPAT_UNKNOWN, \ + IPS_COMPAT_SERVERAID1, \ + IPS_COMPAT_SERVERAID2, \ + IPS_COMPAT_NAVAJO, \ + IPS_COMPAT_KIOWA, \ + IPS_COMPAT_SERVERAID3H, \ + IPS_COMPAT_SERVERAID3L, \ + IPS_COMPAT_SERVERAID4H, \ + IPS_COMPAT_SERVERAID4M, \ + IPS_COMPAT_SERVERAID4L, \ + IPS_COMPAT_SERVERAID4Mx, \ + IPS_COMPAT_SERVERAID4Lx, \ + IPS_COMPAT_SARASOTA, \ + IPS_COMPAT_SARASOTA \ + } + + /* * Overrides for Emacs so that we almost follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 72296081bfbd..3f74662114de 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -61,7 +61,7 @@ #include <linux/genhd.h> -static char sd_version_str[] = "Version: 2.0.3 (20020417)"; +/* static char sd_version_str[] = "Version: 2.0.3 (20020417)"; */ #define SD_MAJOR(i) (!(i) ? SCSI_DISK0_MAJOR : SCSI_DISK1_MAJOR-1+(i)) diff --git a/fs/Config.help b/fs/Config.help index 2b5b7758f2d4..58eee1f8ea80 100644 --- a/fs/Config.help +++ b/fs/Config.help @@ -1,11 +1,38 @@ CONFIG_QUOTA If you say Y here, you will be able to set per user limits for disk - usage (also called disk quotas). Currently, it works only for the - ext2 file system. You need additional software in order to use quota - support; for details, read the Quota mini-HOWTO, available from + usage (also called disk quotas). Currently, it works for the + ext2, ext3, and reiserfs file system. You need additional software + in order to use quota support (you can download sources from + <http://www.sf.net/projects/linuxquota/>). For further details, read + the Quota mini-HOWTO, available from <http://www.linuxdoc.org/docs.html#howto>. Probably the quota support is only useful for multi user systems. If unsure, say N. +CONFIG_QFMT_V1 + This quota format was (is) used by kernels earlier than 2.4.??. If + you have quota working and you don't want to convert to new quota + format say Y here. + +CONFIG_QFMT_V2 + This quota format allows using quotas with 32-bit UIDs/GIDs. If you + need this functionality say Y here. Note that you will need latest + quota utilities for new quota format with this kernel. + +CONFIG_QIFACE_COMPAT + This option will enable old quota interface in kernel. + If you have old quota tools (version <= 3.04) and you don't want to + upgrade them say Y here. + +CONFIG_QIFACE_V1 + This is the oldest quota interface. It was used for old quota format. + If you have old quota tools and you use old quota format choose this + interface (if unsure, this interface is the best one to choose). + +CONFIG_QIFACE_V2 + This quota interface was used by VFS v0 quota format. If you need + support for VFS v0 quota format (eg. you're using quota on ReiserFS) + and you don't want to upgrade quota tools, choose this interface. + CONFIG_MINIX_FS Minix is a simple operating system used in many classes about OS's. The minix file system (method to organize files on a hard disk diff --git a/fs/Config.in b/fs/Config.in index ef117fa232c4..e66a3b7e8472 100644 --- a/fs/Config.in +++ b/fs/Config.in @@ -5,6 +5,14 @@ mainmenu_option next_comment comment 'File systems' bool 'Quota support' CONFIG_QUOTA +dep_tristate ' Old quota format support' CONFIG_QFMT_V1 $CONFIG_QUOTA +dep_tristate ' VFS v0 quota format support' CONFIG_QFMT_V2 $CONFIG_QUOTA +dep_mbool ' Compatible quota interfaces' CONFIG_QIFACE_COMPAT $CONFIG_QUOTA +if [ "$CONFIG_QUOTA" = "y" -a "$CONFIG_QIFACE_COMPAT" = "y" ]; then + choice ' Compatible quota interfaces' \ + "Original CONFIG_QIFACE_V1 \ + VFSv0 CONFIG_QIFACE_V2" Original +fi tristate 'Kernel automounter support' CONFIG_AUTOFS_FS tristate 'Kernel automounter version 4 support (also supports v3)' CONFIG_AUTOFS4_FS diff --git a/fs/Makefile b/fs/Makefile index 2449b05e367a..daf8174867ac 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -7,7 +7,7 @@ O_TARGET := fs.o -export-objs := filesystems.o open.o dcache.o buffer.o bio.o +export-objs := filesystems.o open.o dcache.o buffer.o bio.o inode.o dquot.o mod-subdirs := nls obj-y := open.o read_write.o devices.o file_table.o buffer.o \ @@ -15,7 +15,7 @@ obj-y := open.o read_write.o devices.o file_table.o buffer.o \ namei.o fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \ dcache.o inode.o attr.o bad_inode.o file.o iobuf.o dnotify.o \ filesystems.o namespace.o seq_file.o xattr.o libfs.o \ - fs-writeback.o + fs-writeback.o quota.o ifneq ($(CONFIG_NFSD),n) ifneq ($(CONFIG_NFSD),) @@ -82,6 +82,8 @@ obj-y += binfmt_script.o obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o obj-$(CONFIG_QUOTA) += dquot.o +obj-$(CONFIG_QFMT_V1) += quota_v1.o +obj-$(CONFIG_QFMT_V2) += quota_v2.o # persistent filesystems obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 8f7403772d0c..fd736d937020 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -14,7 +14,6 @@ #include <linux/time.h> #include <linux/stat.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/mm.h> #include <linux/smp_lock.h> #include <linux/module.h> diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 55309e14720c..8f59cf69efbf 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -16,7 +16,6 @@ #include <linux/time.h> #include <linux/stat.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/init.h> #include <asm/bitops.h> diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index bc229488cfbf..fb4545696be9 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -13,7 +13,6 @@ #include <linux/time.h> #include <linux/affs_fs.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/mm.h> #include <linux/amigaffs.h> diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c index 2ff2854b0d74..b22cadd6e86f 100644 --- a/fs/affs/bitmap.c +++ b/fs/affs/bitmap.c @@ -13,7 +13,6 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/bitops.h> #include <linux/amigaffs.h> diff --git a/fs/affs/file.c b/fs/affs/file.c index 86a98ea9d4bf..3d35848490c7 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -22,7 +22,6 @@ #include <linux/errno.h> #include <linux/slab.h> #include <linux/stat.h> -#include <linux/locks.h> #include <linux/smp_lock.h> #include <linux/dirent.h> #include <linux/fs.h> diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 39f18dc29465..e831d12f1aeb 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -20,7 +20,6 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/genhd.h> #include <linux/amigaffs.h> #include <linux/major.h> diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 80578e97be18..63bcbb7f8162 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -14,7 +14,6 @@ #include <linux/string.h> #include <linux/stat.h> #include <linux/fcntl.h> -#include <linux/locks.h> #include <linux/amigaffs.h> #include <linux/smp_lock.h> #include <asm/uaccess.h> diff --git a/fs/affs/super.c b/fs/affs/super.c index 01041d693028..68af4188327c 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -20,7 +20,6 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/genhd.h> #include <linux/amigaffs.h> #include <linux/major.h> diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index 16f434801863..5a83e2ce5ad1 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -14,7 +14,6 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/file.h> -#include <linux/locks.h> #include <asm/bitops.h> #include "autofs_i.h" #define __NO_VERSION__ diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 86f73230fa29..c17dcb637608 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -13,7 +13,7 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/file.h> -#include <linux/locks.h> +#include <linux/pagemap.h> #include <asm/bitops.h> #include "autofs_i.h" #define __NO_VERSION__ diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index c36aff10c3c2..5913276d8d07 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -7,7 +7,6 @@ #include <linux/time.h> #include <linux/string.h> #include <linux/bfs_fs.h> -#include <linux/locks.h> #include <linux/smp_lock.h> #include "bfs_defs.h" diff --git a/fs/bfs/file.c b/fs/bfs/file.c index 6413652035c2..313e5e4009ab 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -5,7 +5,6 @@ */ #include <linux/fs.h> -#include <linux/locks.h> #include <linux/bfs_fs.h> #include <linux/smp_lock.h> #include "bfs_defs.h" diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 31823866a9c2..df4cf556785a 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -9,7 +9,6 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/init.h> -#include <linux/locks.h> #include <linux/bfs_fs.h> #include <linux/smp_lock.h> diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index 8f1a2752feef..d651e875b01e 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -12,7 +12,6 @@ #include <linux/string.h> #include <linux/stat.h> #include <linux/slab.h> -#include <linux/locks.h> #include <linux/smp_lock.h> #include <linux/binfmts.h> #include <linux/elf.h> diff --git a/fs/block_dev.c b/fs/block_dev.c index f9326d65a756..654d98a256b0 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -8,7 +8,6 @@ #include <linux/config.h> #include <linux/init.h> #include <linux/mm.h> -#include <linux/locks.h> #include <linux/fcntl.h> #include <linux/slab.h> #include <linux/kmod.h> @@ -331,7 +330,7 @@ struct block_device *bdget(dev_t dev) inode->i_bdev = new_bdev; inode->i_data.a_ops = &def_blk_aops; inode->i_data.gfp_mask = GFP_USER; - inode->i_data.ra_pages = &default_ra_pages; + inode->i_data.backing_dev_info = &default_backing_dev_info; spin_lock(&bdev_lock); bdev = bdfind(dev, head); if (!bdev) { @@ -594,11 +593,12 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file * } } } - if (bdev->bd_inode->i_data.ra_pages == &default_ra_pages) { - unsigned long *ra_pages = blk_get_ra_pages(bdev); - if (ra_pages == NULL) - ra_pages = &default_ra_pages; - inode->i_data.ra_pages = ra_pages; + if (bdev->bd_inode->i_data.backing_dev_info == + &default_backing_dev_info) { + struct backing_dev_info *bdi = blk_get_backing_dev_info(bdev); + if (bdi == NULL) + bdi = &default_backing_dev_info; + inode->i_data.backing_dev_info = bdi; } if (bdev->bd_op->open) { ret = bdev->bd_op->open(inode, file); @@ -624,7 +624,7 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file * out2: if (!bdev->bd_openers) { bdev->bd_op = NULL; - bdev->bd_inode->i_data.ra_pages = &default_ra_pages; + bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) { blkdev_put(bdev->bd_contains, BDEV_RAW); bdev->bd_contains = NULL; @@ -698,7 +698,7 @@ int blkdev_put(struct block_device *bdev, int kind) __MOD_DEC_USE_COUNT(bdev->bd_op->owner); if (!bdev->bd_openers) { bdev->bd_op = NULL; - bdev->bd_inode->i_data.ra_pages = &default_ra_pages; + bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) { blkdev_put(bdev->bd_contains, BDEV_RAW); bdev->bd_contains = NULL; diff --git a/fs/buffer.c b/fs/buffer.c index e1ea3dca4687..904fec39dd60 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -23,7 +23,6 @@ #include <linux/fs.h> #include <linux/mm.h> #include <linux/slab.h> -#include <linux/locks.h> #include <linux/smp_lock.h> #include <linux/blkdev.h> #include <linux/file.h> @@ -35,7 +34,7 @@ #include <linux/hash.h> #include <asm/bitops.h> -#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_inode_buffers) +#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) /* This is used by some architectures to estimate available memory. */ atomic_t buffermem_pages = ATOMIC_INIT(0); @@ -179,8 +178,8 @@ __clear_page_buffers(struct page *page) static void buffer_io_error(struct buffer_head *bh) { - printk(KERN_ERR "Buffer I/O error on device %s, logical block %ld\n", - bdevname(bh->b_bdev), bh->b_blocknr); + printk(KERN_ERR "Buffer I/O error on device %s, logical block %Ld\n", + bdevname(bh->b_bdev), (u64)bh->b_blocknr); } /* @@ -189,12 +188,12 @@ static void buffer_io_error(struct buffer_head *bh) */ void end_buffer_io_sync(struct buffer_head *bh, int uptodate) { - if (!uptodate) - buffer_io_error(bh); - if (uptodate) + if (uptodate) { set_buffer_uptodate(bh); - else + } else { + buffer_io_error(bh); clear_buffer_uptodate(bh); + } unlock_buffer(bh); put_bh(bh); } @@ -210,10 +209,7 @@ int sync_blockdev(struct block_device *bdev) if (bdev) { int err; - ret = filemap_fdatawait(bdev->bd_inode->i_mapping); - err = filemap_fdatawrite(bdev->bd_inode->i_mapping); - if (!ret) - ret = err; + ret = filemap_fdatawrite(bdev->bd_inode->i_mapping); err = filemap_fdatawait(bdev->bd_inode->i_mapping); if (!ret) ret = err; @@ -229,12 +225,14 @@ EXPORT_SYMBOL(sync_blockdev); */ int fsync_super(struct super_block *sb) { - sync_inodes_sb(sb); /* All the inodes */ + sync_inodes_sb(sb, 0); DQUOT_SYNC(sb); lock_super(sb); if (sb->s_dirt && sb->s_op && sb->s_op->write_super) sb->s_op->write_super(sb); unlock_super(sb); + sync_blockdev(sb->s_bdev); + sync_inodes_sb(sb, 1); return sync_blockdev(sb->s_bdev); } @@ -276,10 +274,10 @@ int fsync_dev(kdev_t dev) */ asmlinkage long sys_sync(void) { - sync_inodes(); /* All mappings and inodes, including block devices */ + sync_inodes(0); /* All mappings and inodes, including block devices */ DQUOT_SYNC(NULL); sync_supers(); /* Write the superblocks */ - sync_inodes(); /* All the mappings and inodes, again. */ + sync_inodes(1); /* All the mappings and inodes, again. */ return 0; } @@ -392,30 +390,31 @@ out: /* * Various filesystems appear to want __get_hash_table to be non-blocking. * But it's the page lock which protects the buffers. To get around this, - * we get exclusion from try_to_free_buffers with the inode's - * i_bufferlist_lock. + * we get exclusion from try_to_free_buffers with the blockdev mapping's + * private_lock. * * Hack idea: for the blockdev mapping, i_bufferlist_lock contention * may be quite high. This code could TryLock the page, and if that - * succeeds, there is no need to take i_bufferlist_lock. (But if - * i_bufferlist_lock is contended then so is mapping->page_lock). + * succeeds, there is no need to take private_lock. (But if + * private_lock is contended then so is mapping->page_lock). */ struct buffer_head * __get_hash_table(struct block_device *bdev, sector_t block, int unused) { - struct inode * const inode = bdev->bd_inode; + struct inode *bd_inode = bdev->bd_inode; + struct address_space *bd_mapping = bd_inode->i_mapping; struct buffer_head *ret = NULL; unsigned long index; struct buffer_head *bh; struct buffer_head *head; struct page *page; - index = block >> (PAGE_CACHE_SHIFT - inode->i_blkbits); - page = find_get_page(inode->i_mapping, index); + index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits); + page = find_get_page(bd_mapping, index); if (!page) goto out; - spin_lock(&inode->i_bufferlist_lock); + spin_lock(&bd_mapping->private_lock); if (!page_has_buffers(page)) goto out_unlock; head = page_buffers(page); @@ -430,40 +429,12 @@ __get_hash_table(struct block_device *bdev, sector_t block, int unused) } while (bh != head); buffer_error(); out_unlock: - spin_unlock(&inode->i_bufferlist_lock); + spin_unlock(&bd_mapping->private_lock); page_cache_release(page); out: return ret; } -void buffer_insert_list(spinlock_t *lock, - struct buffer_head *bh, struct list_head *list) -{ - spin_lock(lock); - list_del(&bh->b_inode_buffers); - list_add(&bh->b_inode_buffers, list); - spin_unlock(lock); -} - -/* - * i_bufferlist_lock must be held - */ -static inline void __remove_inode_queue(struct buffer_head *bh) -{ - list_del_init(&bh->b_inode_buffers); -} - -int inode_has_buffers(struct inode *inode) -{ - int ret; - - spin_lock(&inode->i_bufferlist_lock); - ret = !list_empty(&inode->i_dirty_buffers); - spin_unlock(&inode->i_bufferlist_lock); - - return ret; -} - /* If invalidate_buffers() will trash dirty buffers, it means some kind of fs corruption is going on. Trashing dirty data always imply losing information that was supposed to be just stored on the physical layer @@ -546,14 +517,12 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) BUG_ON(!buffer_async_read(bh)); - if (!uptodate) - buffer_io_error(bh); - page = bh->b_page; if (uptodate) { set_buffer_uptodate(bh); } else { clear_buffer_uptodate(bh); + buffer_io_error(bh); SetPageError(page); } @@ -606,13 +575,11 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate) BUG_ON(!buffer_async_write(bh)); - if (!uptodate) - buffer_io_error(bh); - page = bh->b_page; if (uptodate) { set_buffer_uptodate(bh); } else { + buffer_io_error(bh); clear_buffer_uptodate(bh); SetPageError(page); } @@ -674,6 +641,78 @@ inline void mark_buffer_async_write(struct buffer_head *bh) } EXPORT_SYMBOL(mark_buffer_async_write); + +/* + * fs/buffer.c contains helper functions for buffer-backed address space's + * fsync functions. A common requirement for buffer-based filesystems is + * that certain data from the backing blockdev needs to be written out for + * a successful fsync(). For example, ext2 indirect blocks need to be + * written back and waited upon before fsync() returns. + * + * The functions mark_buffer_inode_dirty(), fsync_inode_buffers(), + * inode_has_buffers() and invalidate_inode_buffers() are provided for the + * management of a list of dependent buffers at ->i_mapping->private_list. + * + * Locking is a little subtle: try_to_free_buffers() will remove buffers + * from their controlling inode's queue when they are being freed. But + * try_to_free_buffers() will be operating against the *blockdev* mapping + * at the time, not against the S_ISREG file which depends on those buffers. + * So the locking for private_list is via the private_lock in the address_space + * which backs the buffers. Which is different from the address_space + * against which the buffers are listed. So for a particular address_space, + * mapping->private_lock does *not* protect mapping->private_list! In fact, + * mapping->private_list will always be protected by the backing blockdev's + * ->private_lock. + * + * Which introduces a requirement: all buffers on an address_space's + * ->private_list must be from the same address_space: the blockdev's. + * + * address_spaces which do not place buffers at ->private_list via these + * utility functions are free to use private_lock and private_list for + * whatever they want. The only requirement is that list_empty(private_list) + * be true at clear_inode() time. + * + * FIXME: clear_inode should not call invalidate_inode_buffers(). The + * filesystems should do that. invalidate_inode_buffers() should just go + * BUG_ON(!list_empty). + * + * FIXME: mark_buffer_dirty_inode() is a data-plane operation. It should + * take an address_space, not an inode. And it should be called + * mark_buffer_dirty_fsync() to clearly define why those buffers are being + * queued up. + * + * FIXME: mark_buffer_dirty_inode() doesn't need to add the buffer to the + * list if it is already on a list. Because if the buffer is on a list, + * it *must* already be on the right one. If not, the filesystem is being + * silly. This will save a ton of locking. But first we have to ensure + * that buffers are taken *off* the old inode's list when they are freed + * (presumably in truncate). That requires careful auditing of all + * filesystems (do it inside bforget()). It could also be done by bringing + * b_inode back. + */ + +void buffer_insert_list(spinlock_t *lock, + struct buffer_head *bh, struct list_head *list) +{ + spin_lock(lock); + list_del(&bh->b_assoc_buffers); + list_add(&bh->b_assoc_buffers, list); + spin_unlock(lock); +} + +/* + * The buffer's backing address_space's private_lock must be held + */ +static inline void __remove_assoc_queue(struct buffer_head *bh) +{ + list_del_init(&bh->b_assoc_buffers); +} + +int inode_has_buffers(struct inode *inode) +{ + return !list_empty(&inode->i_mapping->private_list); +} + /* * osync is designed to support O_SYNC io. It waits synchronously for * all already-submitted IO to complete, but does not queue any new @@ -709,8 +748,124 @@ repeat: return err; } +/** + * sync_mapping_buffers - write out and wait upon a mapping's "associated" + * buffers + * @buffer_mapping - the mapping which backs the buffers' data + * @mapping - the mapping which wants those buffers written + * + * Starts I/O against the buffers at mapping->private_list, and waits upon + * that I/O. + * + * Basically, this is a convenience function for fsync(). @buffer_mapping is + * the blockdev which "owns" the buffers and @mapping is a file or directory + * which needs those buffers to be written for a successful fsync(). + */ +int sync_mapping_buffers(struct address_space *mapping) +{ + struct address_space *buffer_mapping = mapping->assoc_mapping; + + if (buffer_mapping == NULL || list_empty(&mapping->private_list)) + return 0; + + return fsync_buffers_list(&buffer_mapping->private_lock, + &mapping->private_list); +} +EXPORT_SYMBOL(sync_mapping_buffers); + +/** + * write_mapping_buffers - Start writeout of a mapping's "associated" buffers. + * @mapping - the mapping which wants those buffers written. + * + * Starts I/O against dirty buffers which are on @mapping->private_list. + * Those buffers must be backed by @mapping->assoc_mapping. + * + * The private_list buffers generally contain filesystem indirect blocks. + * The idea is that the filesystem can start I/O against the indirects at + * the same time as running generic_writeback_mapping(), so the indirect's + * I/O will be merged with the data. + * + * We sneakliy write the buffers in probable tail-to-head order. This is + * because generic_writeback_mapping writes in probable head-to-tail + * order. If the file is so huge that the data or the indirects overflow + * the request queue we will at least get some merging this way. + * + * Any clean+unlocked buffers are de-listed. clean/locked buffers must be + * left on the list for an fsync() to wait on. + * + * Couldn't think of a smart way of avoiding livelock, so chose the dumb + * way instead. + * + * FIXME: duplicates fsync_inode_buffers() functionality a bit. + */ +int write_mapping_buffers(struct address_space *mapping) +{ + spinlock_t *lock; + struct address_space *buffer_mapping; + unsigned nr_to_write; /* livelock avoidance */ + struct list_head *lh; + int ret = 0; + + if (list_empty(&mapping->private_list)) + goto out; + + buffer_mapping = mapping->assoc_mapping; + lock = &buffer_mapping->private_lock; + spin_lock(lock); + nr_to_write = 0; + lh = mapping->private_list.next; + while (lh != &mapping->private_list) { + lh = lh->next; + nr_to_write++; + } + nr_to_write *= 2; /* Allow for some late additions */ + + while (nr_to_write-- && !list_empty(&mapping->private_list)) { + struct buffer_head *bh; + + bh = BH_ENTRY(mapping->private_list.prev); + list_del_init(&bh->b_assoc_buffers); + if (!buffer_dirty(bh) && !buffer_locked(bh)) + continue; + /* Stick it on the far end of the list. Order is preserved. */ + list_add(&bh->b_assoc_buffers, &mapping->private_list); + if (test_set_buffer_locked(bh)) + continue; + get_bh(bh); + spin_unlock(lock); + if (test_clear_buffer_dirty(bh)) { + bh->b_end_io = end_buffer_io_sync; + submit_bh(WRITE, bh); + } else { + unlock_buffer(bh); + put_bh(bh); + } + spin_lock(lock); + } + spin_unlock(lock); +out: + return ret; +} + +void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) +{ + struct address_space *mapping = inode->i_mapping; + struct address_space *buffer_mapping = bh->b_page->mapping; + + mark_buffer_dirty(bh); + if (!mapping->assoc_mapping) { + mapping->assoc_mapping = buffer_mapping; + } else { + if (mapping->assoc_mapping != buffer_mapping) + BUG(); + } + buffer_insert_list(&buffer_mapping->private_lock, + bh, &mapping->private_list); +} +EXPORT_SYMBOL(mark_buffer_dirty_inode); + /* - * Synchronise all the inode's dirty buffers to the disk. + * Write out and wait upon a list of buffers. * * We have conflicting pressures: we want to make sure that all * initially dirty buffers get waited on, but that any subsequently @@ -739,9 +894,9 @@ int fsync_buffers_list(spinlock_t *lock, struct list_head *list) spin_lock(lock); while (!list_empty(list)) { bh = BH_ENTRY(list->next); - list_del_init(&bh->b_inode_buffers); + list_del_init(&bh->b_assoc_buffers); if (buffer_dirty(bh) || buffer_locked(bh)) { - list_add(&bh->b_inode_buffers, &tmp); + list_add(&bh->b_assoc_buffers, &tmp); if (buffer_dirty(bh)) { get_bh(bh); spin_unlock(lock); @@ -754,7 +909,7 @@ int fsync_buffers_list(spinlock_t *lock, struct list_head *list) while (!list_empty(&tmp)) { bh = BH_ENTRY(tmp.prev); - __remove_inode_queue(bh); + __remove_assoc_queue(bh); get_bh(bh); spin_unlock(lock); wait_on_buffer(bh); @@ -776,16 +931,23 @@ int fsync_buffers_list(spinlock_t *lock, struct list_head *list) * Invalidate any and all dirty buffers on a given inode. We are * probably unmounting the fs, but that doesn't mean we have already * done a sync(). Just drop the buffers from the inode list. + * + * NOTE: we take the inode's blockdev's mapping's private_lock. Which + * assumes that all the buffers are against the blockdev. Not true + * for reiserfs. */ void invalidate_inode_buffers(struct inode *inode) { - struct list_head * entry; - - spin_lock(&inode->i_bufferlist_lock); - while ((entry = inode->i_dirty_buffers.next) != - &inode->i_dirty_buffers) - __remove_inode_queue(BH_ENTRY(entry)); - spin_unlock(&inode->i_bufferlist_lock); + if (inode_has_buffers(inode)) { + struct address_space *mapping = inode->i_mapping; + struct list_head *list = &mapping->private_list; + struct address_space *buffer_mapping = mapping->assoc_mapping; + + spin_lock(&buffer_mapping->private_lock); + while (!list_empty(list)) + __remove_assoc_queue(BH_ENTRY(list->next)); + spin_unlock(&buffer_mapping->private_lock); + } } /* @@ -813,6 +975,7 @@ try_again: bh->b_bdev = NULL; bh->b_this_page = head; + bh->b_blocknr = -1; head = bh; bh->b_state = 0; @@ -939,10 +1102,10 @@ grow_dev_page(struct block_device *bdev, unsigned long block, * lock to be atomic wrt __get_hash_table(), which does not * run under the page lock. */ - spin_lock(&inode->i_bufferlist_lock); + spin_lock(&inode->i_mapping->private_lock); link_dev_buffers(page, bh); init_page_buffers(page, bdev, block, size); - spin_unlock(&inode->i_bufferlist_lock); + spin_unlock(&inode->i_mapping->private_lock); return page; failed: @@ -1051,11 +1214,13 @@ __getblk(struct block_device *bdev, sector_t block, int size) * address_space's dirty_pages list and then attach the address_space's * inode to its superblock's dirty inode list. * - * mark_buffer_dirty() is atomic. It takes inode->i_bufferlist_lock, + * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, * mapping->page_lock and the global inode_lock. */ void mark_buffer_dirty(struct buffer_head *bh) { + if (!buffer_uptodate(bh)) + buffer_error(); if (!test_set_buffer_dirty(bh)) __set_page_dirty_nobuffers(bh->b_page); } @@ -1235,7 +1400,7 @@ EXPORT_SYMBOL(block_flushpage); /* * We attach and possibly dirty the buffers atomically wrt - * __set_page_dirty_buffers() via i_bufferlist_lock. try_to_free_buffers + * __set_page_dirty_buffers() via private_lock. try_to_free_buffers * is already excluded via the page lock. */ void create_empty_buffers(struct page *page, @@ -1253,7 +1418,7 @@ void create_empty_buffers(struct page *page, } while (bh); tail->b_this_page = head; - spin_lock(&page->mapping->host->i_bufferlist_lock); + spin_lock(&page->mapping->private_lock); if (PageUptodate(page) || PageDirty(page)) { bh = head; do { @@ -1265,7 +1430,7 @@ void create_empty_buffers(struct page *page, } while (bh != head); } __set_page_buffers(page, head); - spin_unlock(&page->mapping->host->i_bufferlist_lock); + spin_unlock(&page->mapping->private_lock); } EXPORT_SYMBOL(create_empty_buffers); @@ -1279,6 +1444,11 @@ EXPORT_SYMBOL(create_empty_buffers); * unmap_buffer() for such invalidation, but that was wrong. We definitely * don't want to mark the alias unmapped, for example - it would confuse * anyone who might pick it with bread() afterwards... + * + * Also.. Note that bforget() doesn't lock the buffer. So there can + * be writeout I/O going on against recently-freed buffers. We don't + * wait on that I/O in bforget() - it's more efficient to wait on the I/O + * only if we really need to. That happens here. */ static void unmap_underlying_metadata(struct buffer_head *bh) { @@ -2207,7 +2377,7 @@ static void check_ttfb_buffer(struct page *page, struct buffer_head *bh) * are unused, and releases them if so. * * Exclusion against try_to_free_buffers may be obtained by either - * locking the page or by holding its inode's i_bufferlist_lock. + * locking the page or by holding its mapping's private_lock. * * If the page is dirty but all the buffers are clean then we need to * be sure to mark the page clean as well. This is because the page @@ -2218,7 +2388,7 @@ static void check_ttfb_buffer(struct page *page, struct buffer_head *bh) * The same applies to regular filesystem pages: if all the buffers are * clean then we set the page clean and proceed. To do that, we require * total exclusion from __set_page_dirty_buffers(). That is obtained with - * i_bufferlist_lock. + * private_lock. * * try_to_free_buffers() is non-blocking. */ @@ -2250,7 +2420,8 @@ static /*inline*/ int drop_buffers(struct page *page) do { struct buffer_head *next = bh->b_this_page; - __remove_inode_queue(bh); + if (!list_empty(&bh->b_assoc_buffers)) + __remove_assoc_queue(bh); free_buffer_head(bh); bh = next; } while (bh != head); @@ -2262,18 +2433,17 @@ failed: int try_to_free_buffers(struct page *page) { - struct inode *inode; + struct address_space * const mapping = page->mapping; int ret = 0; BUG_ON(!PageLocked(page)); if (PageWriteback(page)) return 0; - if (page->mapping == NULL) /* swapped-in anon page */ + if (mapping == NULL) /* swapped-in anon page */ return drop_buffers(page); - inode = page->mapping->host; - spin_lock(&inode->i_bufferlist_lock); + spin_lock(&mapping->private_lock); ret = drop_buffers(page); if (ret && !PageSwapCache(page)) { /* @@ -2286,7 +2456,7 @@ int try_to_free_buffers(struct page *page) */ ClearPageDirty(page); } - spin_unlock(&inode->i_bufferlist_lock); + spin_unlock(&mapping->private_lock); return ret; } EXPORT_SYMBOL(try_to_free_buffers); @@ -2310,11 +2480,6 @@ asmlinkage long sys_bdflush(int func, long data) return 0; } -void wakeup_bdflush(void) -{ - pdflush_flush(0); -} - /* * Buffer-head allocation */ @@ -2329,7 +2494,7 @@ EXPORT_SYMBOL(alloc_buffer_head); void free_buffer_head(struct buffer_head *bh) { - BUG_ON(!list_empty(&bh->b_inode_buffers)); + BUG_ON(!list_empty(&bh->b_assoc_buffers)); mempool_free(bh, bh_mempool); } EXPORT_SYMBOL(free_buffer_head); @@ -2341,8 +2506,7 @@ static void init_buffer_head(void *data, kmem_cache_t *cachep, unsigned long fla struct buffer_head * bh = (struct buffer_head *)data; memset(bh, 0, sizeof(*bh)); - bh->b_blocknr = -1; - INIT_LIST_HEAD(&bh->b_inode_buffers); + INIT_LIST_HEAD(&bh->b_assoc_buffers); } } diff --git a/fs/coda/cache.c b/fs/coda/cache.c index 47f8ebae639e..5e526d018e23 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c @@ -13,7 +13,6 @@ #include <linux/fs.h> #include <linux/stat.h> #include <linux/errno.h> -#include <linux/locks.h> #include <asm/uaccess.h> #include <linux/string.h> #include <linux/list.h> diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c index 6d260b30d551..60ee649aacc5 100644 --- a/fs/coda/cnode.c +++ b/fs/coda/cnode.c @@ -25,11 +25,6 @@ inline int coda_isnullfid(ViceFid *fid) return 1; } -static int coda_inocmp(struct inode *inode, unsigned long ino, void *opaque) -{ - return (coda_fideq((ViceFid *)opaque, &(ITOC(inode)->c_fid))); -} - static struct inode_operations coda_symlink_inode_operations = { readlink: page_readlink, follow_link: page_follow_link, @@ -55,26 +50,44 @@ static void coda_fill_inode(struct inode *inode, struct coda_vattr *attr) init_special_inode(inode, inode->i_mode, attr->va_rdev); } +static int coda_test_inode(struct inode *inode, void *data) +{ + ViceFid *fid = (ViceFid *)data; + return coda_fideq(&(ITOC(inode)->c_fid), fid); +} + +static int coda_set_inode(struct inode *inode, void *data) +{ + ViceFid *fid = (ViceFid *)data; + ITOC(inode)->c_fid = *fid; + return 0; +} + +static int coda_fail_inode(struct inode *inode, void *data) +{ + return -1; +} + struct inode * coda_iget(struct super_block * sb, ViceFid * fid, struct coda_vattr * attr) { struct inode *inode; struct coda_inode_info *cii; - ino_t ino = coda_f2i(fid); + struct coda_sb_info *sbi = coda_sbp(sb); + unsigned long hash = coda_f2i(fid); - inode = iget4(sb, ino, coda_inocmp, fid); + inode = iget5_locked(sb, hash, coda_test_inode, coda_set_inode, fid); if (!inode) return ERR_PTR(-ENOMEM); - /* check if the inode is already initialized */ - cii = ITOC(inode); - if (coda_isnullfid(&cii->c_fid)) - /* new, empty inode found... initializing */ - cii->c_fid = *fid; - - /* we shouldnt see inode collisions anymore */ - if (!coda_fideq(fid, &cii->c_fid)) BUG(); + if (inode->i_state & I_NEW) { + cii = ITOC(inode); + /* we still need to set i_ino for things like stat(2) */ + inode->i_ino = hash; + list_add(&cii->c_cilist, &sbi->sbi_cihead); + unlock_new_inode(inode); + } /* always replace the attributes, type might have changed */ coda_fill_inode(inode, attr); @@ -112,6 +125,7 @@ void coda_replace_fid(struct inode *inode, struct ViceFid *oldfid, struct ViceFid *newfid) { struct coda_inode_info *cii; + unsigned long hash = coda_f2i(newfid); cii = ITOC(inode); @@ -122,60 +136,46 @@ void coda_replace_fid(struct inode *inode, struct ViceFid *oldfid, /* XXX we probably need to hold some lock here! */ remove_inode_hash(inode); cii->c_fid = *newfid; - inode->i_ino = coda_f2i(newfid); - insert_inode_hash(inode); + inode->i_ino = hash; + __insert_inode_hash(inode, hash); } /* convert a fid to an inode. */ struct inode *coda_fid_to_inode(ViceFid *fid, struct super_block *sb) { - ino_t nr; struct inode *inode; - struct coda_inode_info *cii; + unsigned long hash = coda_f2i(fid); if ( !sb ) { printk("coda_fid_to_inode: no sb!\n"); return NULL; } - nr = coda_f2i(fid); - inode = iget4(sb, nr, coda_inocmp, fid); - if ( !inode ) { - printk("coda_fid_to_inode: null from iget, sb %p, nr %ld.\n", - sb, (long)nr); + inode = iget5_locked(sb, hash, coda_test_inode, coda_fail_inode, fid); + if ( !inode ) return NULL; - } - cii = ITOC(inode); + /* we should never see newly created inodes because we intentionally + * fail in the initialization callback */ + BUG_ON(inode->i_state & I_NEW); - /* The inode could already be purged due to memory pressure */ - if (coda_isnullfid(&cii->c_fid)) { - inode->i_nlink = 0; - iput(inode); - return NULL; - } - - /* we shouldn't see inode collisions anymore */ - if ( !coda_fideq(fid, &cii->c_fid) ) BUG(); - - return inode; + return inode; } /* the CONTROL inode is made without asking attributes from Venus */ int coda_cnode_makectl(struct inode **inode, struct super_block *sb) { - int error = 0; - - *inode = iget(sb, CTL_INO); - if ( *inode ) { - (*inode)->i_op = &coda_ioctl_inode_operations; - (*inode)->i_fop = &coda_ioctl_operations; - (*inode)->i_mode = 0444; - error = 0; - } else { - error = -ENOMEM; - } - - return error; + int error = -ENOMEM; + + *inode = new_inode(sb); + if (*inode) { + (*inode)->i_ino = CTL_INO; + (*inode)->i_op = &coda_ioctl_inode_operations; + (*inode)->i_fop = &coda_ioctl_operations; + (*inode)->i_mode = 0444; + error = 0; + } + + return error; } diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c index c50dae543692..ee14f574233b 100644 --- a/fs/coda/coda_linux.c +++ b/fs/coda/coda_linux.c @@ -14,7 +14,6 @@ #include <linux/fs.h> #include <linux/stat.h> #include <linux/errno.h> -#include <linux/locks.h> #include <asm/uaccess.h> #include <linux/string.h> diff --git a/fs/coda/dir.c b/fs/coda/dir.c index d18a8ad385bd..40398f8b66c7 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -15,7 +15,6 @@ #include <linux/file.h> #include <linux/stat.h> #include <linux/errno.h> -#include <linux/locks.h> #include <linux/string.h> #include <linux/smp_lock.h> diff --git a/fs/coda/file.c b/fs/coda/file.c index f74655873fa8..7d6dd4b5de74 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -14,7 +14,6 @@ #include <linux/fs.h> #include <linux/stat.h> #include <linux/errno.h> -#include <linux/locks.h> #include <linux/smp_lock.h> #include <linux/string.h> #include <asm/uaccess.h> diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 8a68f2a13461..5066d9a04984 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -15,7 +15,6 @@ #include <linux/string.h> #include <linux/stat.h> #include <linux/errno.h> -#include <linux/locks.h> #include <linux/unistd.h> #include <linux/smp_lock.h> #include <linux/file.h> @@ -33,7 +32,6 @@ #include <linux/coda_cache.h> /* VFS super_block ops */ -static void coda_read_inode(struct inode *); static void coda_clear_inode(struct inode *); static void coda_put_super(struct super_block *); static int coda_statfs(struct super_block *sb, struct statfs *buf); @@ -92,7 +90,6 @@ struct super_operations coda_super_operations = { alloc_inode: coda_alloc_inode, destroy_inode: coda_destroy_inode, - read_inode: coda_read_inode, clear_inode: coda_clear_inode, put_super: coda_put_super, statfs: coda_statfs, @@ -229,18 +226,6 @@ static void coda_put_super(struct super_block *sb) kfree(sbi); } -/* all filling in of inodes postponed until lookup */ -static void coda_read_inode(struct inode *inode) -{ - struct coda_sb_info *sbi = coda_sbp(inode->i_sb); - struct coda_inode_info *cii; - - if (!sbi) BUG(); - - cii = ITOC(inode); - list_add(&cii->c_cilist, &sbi->sbi_cihead); -} - static void coda_clear_inode(struct inode *inode) { struct coda_inode_info *cii = ITOC(inode); diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index a6a11d615fcf..edfb9aa96544 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c @@ -13,7 +13,6 @@ #include <linux/fs.h> #include <linux/stat.h> #include <linux/errno.h> -#include <linux/locks.h> #include <linux/string.h> #define __NO_VERSION__ #include <linux/module.h> diff --git a/fs/coda/symlink.c b/fs/coda/symlink.c index eed35ddd28a1..764a64ee8332 100644 --- a/fs/coda/symlink.c +++ b/fs/coda/symlink.c @@ -13,7 +13,7 @@ #include <linux/fs.h> #include <linux/stat.h> #include <linux/errno.h> -#include <linux/locks.h> +#include <linux/pagemap.h> #include <linux/smp_lock.h> #include <linux/coda.h> diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index f3b8699ad5f1..72700a2dcb3c 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c @@ -26,7 +26,6 @@ #include <linux/file.h> #include <linux/stat.h> #include <linux/errno.h> -#include <linux/locks.h> #include <linux/string.h> #include <asm/uaccess.h> #include <linux/vmalloc.h> diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index c9a6374289cd..0e9e2600a6db 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -16,7 +16,6 @@ #include <linux/pagemap.h> #include <linux/init.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/blkdev.h> #include <linux/cramfs_fs.h> #include <linux/smp_lock.h> diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 231bc91cd79d..0727e719279d 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -17,7 +17,6 @@ #include <linux/init.h> #include <linux/kdev_t.h> #include <linux/kernel.h> -#include <linux/locks.h> #include <linux/major.h> #include <linux/slab.h> #include <linux/stat.h> diff --git a/fs/dnotify.c b/fs/dnotify.c index 5152699a8493..a26be325d74e 100644 --- a/fs/dnotify.c +++ b/fs/dnotify.c @@ -135,7 +135,6 @@ void __inode_dir_notify(struct inode *inode, unsigned long event) } if (changed) redo_inode_mask(inode); -out: write_unlock(&dn_lock); } diff --git a/fs/dquot.c b/fs/dquot.c index 78043b2a618b..b585d30fee7a 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -35,7 +35,7 @@ * Jan Kara, <jack@suse.cz>, sponsored by SuSE CR, 10-11/99 * * Used struct list_head instead of own list struct - * Invalidation of dquots with dq_count > 0 no longer possible + * Invalidation of referenced dquots is no longer possible * Improved free_dquots list management * Quota and i_blocks are now updated in one place to avoid races * Warnings are now delayed so we won't block in critical section @@ -62,18 +62,50 @@ #include <linux/sysctl.h> #include <linux/smp_lock.h> #include <linux/init.h> +#include <linux/module.h> +#include <linux/proc_fs.h> #include <asm/uaccess.h> -#define __DQUOT_VERSION__ "dquot_6.4.0" +static char *quotatypes[] = INITQFNAMES; +static struct quota_format_type *quota_formats; /* List of registered formats */ -int nr_dquots, nr_free_dquots; +int register_quota_format(struct quota_format_type *fmt) +{ + lock_kernel(); + fmt->qf_next = quota_formats; + quota_formats = fmt; + unlock_kernel(); + return 0; +} -static char *quotatypes[] = INITQFNAMES; +void unregister_quota_format(struct quota_format_type *fmt) +{ + struct quota_format_type **actqf; + + lock_kernel(); + for (actqf = "a_formats; *actqf && *actqf != fmt; actqf = &(*actqf)->qf_next); + if (*actqf) + *actqf = (*actqf)->qf_next; + unlock_kernel(); +} + +static struct quota_format_type *find_quota_format(int id) +{ + struct quota_format_type *actqf; + + lock_kernel(); + for (actqf = quota_formats; actqf && actqf->qf_fmt_id != id; actqf = actqf->qf_next); + if (actqf && !try_inc_mod_count(actqf->qf_owner)) + actqf = NULL; + unlock_kernel(); + return actqf; +} -static inline struct quota_mount_options *sb_dqopt(struct super_block *sb) +static void put_quota_format(struct quota_format_type *fmt) { - return &sb->s_dquot; + if (fmt->qf_owner) + __MOD_DEC_USE_COUNT(fmt->qf_owner); } /* @@ -116,28 +148,32 @@ static LIST_HEAD(inuse_list); static LIST_HEAD(free_dquots); static struct list_head dquot_hash[NR_DQHASH]; -static struct dqstats dqstats; +struct dqstats dqstats; static void dqput(struct dquot *); static struct dquot *dqduplicate(struct dquot *); -static inline char is_enabled(struct quota_mount_options *dqopt, short type) +static inline void get_dquot_ref(struct dquot *dquot) { - switch (type) { - case USRQUOTA: - return((dqopt->flags & DQUOT_USR_ENABLED) != 0); - case GRPQUOTA: - return((dqopt->flags & DQUOT_GRP_ENABLED) != 0); - } - return(0); + dquot->dq_count++; +} + +static inline void put_dquot_ref(struct dquot *dquot) +{ + dquot->dq_count--; } -static inline char sb_has_quota_enabled(struct super_block *sb, short type) +static inline void get_dquot_dup_ref(struct dquot *dquot) { - return is_enabled(sb_dqopt(sb), type); + dquot->dq_dup_ref++; } -static inline int const hashfn(struct super_block *sb, unsigned int id, short type) +static inline void put_dquot_dup_ref(struct dquot *dquot) +{ + dquot->dq_dup_ref--; +} + +static inline int const hashfn(struct super_block *sb, unsigned int id, int type) { return((HASHDEV(sb->s_dev) ^ id) * (MAXQUOTAS - type)) % NR_DQHASH; } @@ -154,7 +190,7 @@ static inline void remove_dquot_hash(struct dquot *dquot) INIT_LIST_HEAD(&dquot->dq_hash); } -static inline struct dquot *find_dquot(unsigned int hashent, struct super_block *sb, unsigned int id, short type) +static inline struct dquot *find_dquot(unsigned int hashent, struct super_block *sb, unsigned int id, int type) { struct list_head *head; struct dquot *dquot; @@ -244,6 +280,7 @@ static inline void unlock_dquot(struct dquot *dquot) wake_up(&dquot->dq_wait_lock); } +/* Wait for dquot to be unused */ static void __wait_dquot_unused(struct dquot *dquot) { DECLARE_WAITQUEUE(wait, current); @@ -259,79 +296,50 @@ repeat: current->state = TASK_RUNNING; } -/* - * We don't have to be afraid of deadlocks as we never have quotas on quota files... - */ -static void write_dquot(struct dquot *dquot) -{ - short type = dquot->dq_type; - struct file *filp; - mm_segment_t fs; - loff_t offset; - ssize_t ret; - struct semaphore *sem = &dquot->dq_sb->s_dquot.dqio_sem; - struct dqblk dqbuf; - - down(sem); - filp = dquot->dq_sb->s_dquot.files[type]; - offset = dqoff(dquot->dq_id); - fs = get_fs(); - set_fs(KERNEL_DS); +/* Wait for all duplicated dquot references to be dropped */ +static void __wait_dup_drop(struct dquot *dquot) +{ + DECLARE_WAITQUEUE(wait, current); - /* - * Note: clear the DQ_MOD flag unconditionally, - * so we don't loop forever on failure. - */ - memcpy(&dqbuf, &dquot->dq_dqb, sizeof(struct dqblk)); - dquot->dq_flags &= ~DQ_MOD; - ret = 0; - if (filp) - ret = filp->f_op->write(filp, (char *)&dqbuf, - sizeof(struct dqblk), &offset); - if (ret != sizeof(struct dqblk)) - printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", - dquot->dq_sb->s_id); - - set_fs(fs); - up(sem); - dqstats.writes++; -} - -static void read_dquot(struct dquot *dquot) -{ - short type = dquot->dq_type; - struct file *filp; - mm_segment_t fs; - loff_t offset; - - filp = dquot->dq_sb->s_dquot.files[type]; - if (filp == (struct file *)NULL) - return; + add_wait_queue(&dquot->dq_wait_free, &wait); +repeat: + set_current_state(TASK_UNINTERRUPTIBLE); + if (dquot->dq_dup_ref) { + schedule(); + goto repeat; + } + remove_wait_queue(&dquot->dq_wait_free, &wait); + current->state = TASK_RUNNING; +} + +static int read_dqblk(struct dquot *dquot) +{ + int ret; + struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); lock_dquot(dquot); - if (!dquot->dq_sb) /* Invalidated quota? */ - goto out_lock; - /* Now we are sure filp is valid - the dquot isn't invalidated */ - down(&dquot->dq_sb->s_dquot.dqio_sem); - offset = dqoff(dquot->dq_id); - fs = get_fs(); - set_fs(KERNEL_DS); - filp->f_op->read(filp, (char *)&dquot->dq_dqb, sizeof(struct dqblk), &offset); - up(&dquot->dq_sb->s_dquot.dqio_sem); - set_fs(fs); - - if (dquot->dq_bhardlimit == 0 && dquot->dq_bsoftlimit == 0 && - dquot->dq_ihardlimit == 0 && dquot->dq_isoftlimit == 0) - dquot->dq_flags |= DQ_FAKE; - dqstats.reads++; -out_lock: + down(&dqopt->dqio_sem); + ret = dqopt->ops[dquot->dq_type]->read_dqblk(dquot); + up(&dqopt->dqio_sem); unlock_dquot(dquot); + return ret; +} + +static int commit_dqblk(struct dquot *dquot) +{ + int ret; + struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); + + down(&dqopt->dqio_sem); + ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); + up(&dqopt->dqio_sem); + return ret; } /* Invalidate all dquots on the list, wait for all users. Note that this function is called * after quota is disabled so no new quota might be created. As we only insert to the end of * inuse list, we don't have to restart searching... */ -static void invalidate_dquots(struct super_block *sb, short type) +static void invalidate_dquots(struct super_block *sb, int type) { struct dquot *dquot; struct list_head *head; @@ -360,12 +368,13 @@ restart: } } -int sync_dquots(struct super_block *sb, short type) +static int vfs_quota_sync(struct super_block *sb, int type) { struct list_head *head; struct dquot *dquot; + struct quota_info *dqopt = sb_dqopt(sb); + int cnt; - lock_kernel(); restart: list_for_each(head, &inuse_list) { dquot = list_entry(head, struct dquot, dq_inuse); @@ -375,22 +384,77 @@ restart: continue; if (!dquot->dq_sb) /* Invalidated? */ continue; - if (!(dquot->dq_flags & (DQ_MOD | DQ_LOCKED))) + if (!dquot_dirty(dquot) && !(dquot->dq_flags & DQ_LOCKED)) continue; - /* Raise use count so quota won't be invalidated. We can't use dqduplicate() as it does too many tests */ - dquot->dq_count++; + /* Get reference to quota so it won't be invalidated. get_dquot_ref() + * is enough since if dquot is locked/modified it can't be + * on the free list */ + get_dquot_ref(dquot); if (dquot->dq_flags & DQ_LOCKED) wait_on_dquot(dquot); - if (dquot->dq_flags & DQ_MOD) - write_dquot(dquot); + if (dquot_dirty(dquot)) + commit_dqblk(dquot); dqput(dquot); goto restart; } + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt)) + dqopt->info[cnt].dqi_flags &= ~DQF_ANY_DQUOT_DIRTY; + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt) && info_dirty(&dqopt->info[cnt])) + dqopt->ops[cnt]->write_file_info(sb, cnt); dqstats.syncs++; - unlock_kernel(); return 0; } +static struct super_block *get_super_to_sync(int type) +{ + struct list_head *head; + int cnt, dirty; + +restart: + spin_lock(&sb_lock); + list_for_each(head, &super_blocks) { + struct super_block *sb = list_entry(head, struct super_block, s_list); + + for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++) + if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt) + && sb_dqopt(sb)->info[cnt].dqi_flags & DQF_ANY_DQUOT_DIRTY) + dirty = 1; + if (!dirty) + continue; + sb->s_count++; + spin_unlock(&sb_lock); + down_read(&sb->s_umount); + if (!sb->s_root) { + drop_super(sb); + goto restart; + } + return sb; + } + spin_unlock(&sb_lock); + return NULL; +} + +void sync_dquots(struct super_block *sb, int type) +{ + if (sb) { + lock_kernel(); + if (sb->s_qcop->quota_sync) + sb->s_qcop->quota_sync(sb, type); + unlock_kernel(); + } + else { + while ((sb = get_super_to_sync(type))) { + lock_kernel(); + if (sb->s_qcop->quota_sync) + sb->s_qcop->quota_sync(sb, type); + unlock_kernel(); + drop_super(sb); + } + } +} + /* Free unused dquots from cache */ static void prune_dqcache(int count) { @@ -433,11 +497,15 @@ int shrink_dqcache_memory(int priority, unsigned int gfp_mask) return 0; } -/* NOTE: If you change this function please check whether dqput_blocks() works right... */ +/* + * Put reference to dquot + * NOTE: If you change this function please check whether dqput_blocks() works right... + */ static void dqput(struct dquot *dquot) { if (!dquot) return; +#ifdef __DQUOT_PARANOIA if (!dquot->dq_count) { printk("VFS: dqput: trying to free free dquot\n"); printk("VFS: device %s, dquot of %s %d\n", @@ -446,33 +514,38 @@ static void dqput(struct dquot *dquot) dquot->dq_id); return; } +#endif dqstats.drops++; we_slept: + if (dquot->dq_dup_ref && dquot->dq_count - dquot->dq_dup_ref <= 1) { /* Last unduplicated reference? */ + __wait_dup_drop(dquot); + goto we_slept; + } if (dquot->dq_count > 1) { /* We have more than one user... We can simply decrement use count */ - dquot->dq_count--; + put_dquot_ref(dquot); return; } - if (dquot->dq_flags & DQ_MOD) { - write_dquot(dquot); + if (dquot_dirty(dquot)) { + commit_dqblk(dquot); goto we_slept; } /* sanity check */ if (!list_empty(&dquot->dq_free)) { printk(KERN_ERR "dqput: dquot already on free list??\n"); - dquot->dq_count--; /* J.K. Just decrementing use count seems safer... */ + put_dquot_ref(dquot); return; } - dquot->dq_count--; + put_dquot_ref(dquot); /* If dquot is going to be invalidated invalidate_dquots() is going to free it so */ if (!(dquot->dq_flags & DQ_INVAL)) put_dquot_last(dquot); /* Place at end of LRU free queue */ wake_up(&dquot->dq_wait_free); } -static struct dquot *get_empty_dquot(void) +static struct dquot *get_empty_dquot(struct super_block *sb, int type) { struct dquot *dquot; @@ -486,6 +559,8 @@ static struct dquot *get_empty_dquot(void) INIT_LIST_HEAD(&dquot->dq_free); INIT_LIST_HEAD(&dquot->dq_inuse); INIT_LIST_HEAD(&dquot->dq_hash); + dquot->dq_sb = sb; + dquot->dq_type = type; dquot->dq_count = 1; /* all dquots go on the inuse_list */ put_inuse(dquot); @@ -493,11 +568,11 @@ static struct dquot *get_empty_dquot(void) return dquot; } -static struct dquot *dqget(struct super_block *sb, unsigned int id, short type) +static struct dquot *dqget(struct super_block *sb, unsigned int id, int type) { unsigned int hashent = hashfn(sb, id, type); struct dquot *dquot, *empty = NODQUOT; - struct quota_mount_options *dqopt = sb_dqopt(sb); + struct quota_info *dqopt = sb_dqopt(sb); we_slept: if (!is_enabled(dqopt, type)) { @@ -508,20 +583,19 @@ we_slept: if ((dquot = find_dquot(hashent, sb, id, type)) == NODQUOT) { if (empty == NODQUOT) { - if ((empty = get_empty_dquot()) == NODQUOT) + if ((empty = get_empty_dquot(sb, type)) == NODQUOT) schedule(); /* Try to wait for a moment... */ goto we_slept; } dquot = empty; dquot->dq_id = id; - dquot->dq_type = type; - dquot->dq_sb = sb; /* hash it first so it can be found */ insert_dquot_hash(dquot); - read_dquot(dquot); + read_dqblk(dquot); } else { - if (!dquot->dq_count++) + if (!dquot->dq_count) remove_free_dquot(dquot); + get_dquot_ref(dquot); dqstats.cache_hits++; wait_on_dquot(dquot); if (empty) @@ -539,24 +613,40 @@ we_slept: return dquot; } +/* Duplicate reference to dquot got from inode */ static struct dquot *dqduplicate(struct dquot *dquot) { if (dquot == NODQUOT) return NODQUOT; - dquot->dq_count++; + get_dquot_ref(dquot); if (!dquot->dq_sb) { printk(KERN_ERR "VFS: dqduplicate(): Invalidated quota to be duplicated!\n"); - dquot->dq_count--; + put_dquot_ref(dquot); return NODQUOT; } if (dquot->dq_flags & DQ_LOCKED) printk(KERN_ERR "VFS: dqduplicate(): Locked quota to be duplicated!\n"); + get_dquot_dup_ref(dquot); dquot->dq_referenced++; dqstats.lookups++; return dquot; } -static int dqinit_needed(struct inode *inode, short type) +/* Put duplicated reference */ +static void dqputduplicate(struct dquot *dquot) +{ + if (!dquot->dq_dup_ref) { + printk(KERN_ERR "VFS: dqputduplicate(): Duplicated dquot put without duplicate reference.\n"); + return; + } + put_dquot_dup_ref(dquot); + if (!dquot->dq_dup_ref) + wake_up(&dquot->dq_wait_free); + put_dquot_ref(dquot); + dqstats.drops++; +} + +static int dqinit_needed(struct inode *inode, int type) { int cnt; @@ -570,13 +660,10 @@ static int dqinit_needed(struct inode *inode, short type) return 0; } -static void add_dquot_ref(struct super_block *sb, short type) +static void add_dquot_ref(struct super_block *sb, int type) { struct list_head *p; - if (!sb->dq_op) - return; /* nothing to do */ - restart: file_list_lock(); list_for_each(p, &sb->s_files) { @@ -599,13 +686,15 @@ restart: /* Return 0 if dqput() won't block (note that 1 doesn't necessarily mean blocking) */ static inline int dqput_blocks(struct dquot *dquot) { - if (dquot->dq_count == 1) + if (dquot->dq_dup_ref && dquot->dq_count - dquot->dq_dup_ref <= 1) + return 1; + if (dquot->dq_count <= 1 && dquot->dq_flags & DQ_MOD) return 1; return 0; } /* Remove references to dquots from inode - add dquot to list for freeing if needed */ -int remove_inode_dquot_ref(struct inode *inode, short type, struct list_head *tofree_head) +int remove_inode_dquot_ref(struct inode *inode, int type, struct list_head *tofree_head) { struct dquot *dquot = inode->i_dquot[type]; int cnt; @@ -652,38 +741,38 @@ void put_dquot_list(struct list_head *tofree_head) static inline void dquot_incr_inodes(struct dquot *dquot, unsigned long number) { - dquot->dq_curinodes += number; - dquot->dq_flags |= DQ_MOD; + dquot->dq_dqb.dqb_curinodes += number; + mark_dquot_dirty(dquot); } -static inline void dquot_incr_blocks(struct dquot *dquot, unsigned long number) +static inline void dquot_incr_space(struct dquot *dquot, qsize_t number) { - dquot->dq_curblocks += number; - dquot->dq_flags |= DQ_MOD; + dquot->dq_dqb.dqb_curspace += number; + mark_dquot_dirty(dquot); } static inline void dquot_decr_inodes(struct dquot *dquot, unsigned long number) { - if (dquot->dq_curinodes > number) - dquot->dq_curinodes -= number; + if (dquot->dq_dqb.dqb_curinodes > number) + dquot->dq_dqb.dqb_curinodes -= number; else - dquot->dq_curinodes = 0; - if (dquot->dq_curinodes < dquot->dq_isoftlimit) - dquot->dq_itime = (time_t) 0; + dquot->dq_dqb.dqb_curinodes = 0; + if (dquot->dq_dqb.dqb_curinodes < dquot->dq_dqb.dqb_isoftlimit) + dquot->dq_dqb.dqb_itime = (time_t) 0; dquot->dq_flags &= ~DQ_INODES; - dquot->dq_flags |= DQ_MOD; + mark_dquot_dirty(dquot); } -static inline void dquot_decr_blocks(struct dquot *dquot, unsigned long number) +static inline void dquot_decr_space(struct dquot *dquot, qsize_t number) { - if (dquot->dq_curblocks > number) - dquot->dq_curblocks -= number; + if (dquot->dq_dqb.dqb_curspace > number) + dquot->dq_dqb.dqb_curspace -= number; else - dquot->dq_curblocks = 0; - if (dquot->dq_curblocks < dquot->dq_bsoftlimit) - dquot->dq_btime = (time_t) 0; + dquot->dq_dqb.dqb_curspace = 0; + if (toqb(dquot->dq_dqb.dqb_curspace) < dquot->dq_dqb.dqb_bsoftlimit) + dquot->dq_dqb.dqb_btime = (time_t) 0; dquot->dq_flags &= ~DQ_BLKS; - dquot->dq_flags |= DQ_MOD; + mark_dquot_dirty(dquot); } static inline int need_print_warning(struct dquot *dquot, int flag) @@ -756,7 +845,10 @@ static inline void flush_warnings(struct dquot **dquots, char *warntype) static inline char ignore_hardlimit(struct dquot *dquot) { - return capable(CAP_SYS_RESOURCE) && !dquot->dq_sb->s_dquot.rsquash[dquot->dq_type]; + struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type]; + + return capable(CAP_SYS_RESOURCE) && + (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD || !(info->dqi_flags & V1_DQF_RSQUASH)); } static int check_idq(struct dquot *dquot, ulong inodes, char *warntype) @@ -765,60 +857,60 @@ static int check_idq(struct dquot *dquot, ulong inodes, char *warntype) if (inodes <= 0 || dquot->dq_flags & DQ_FAKE) return QUOTA_OK; - if (dquot->dq_ihardlimit && - (dquot->dq_curinodes + inodes) > dquot->dq_ihardlimit && + if (dquot->dq_dqb.dqb_ihardlimit && + (dquot->dq_dqb.dqb_curinodes + inodes) > dquot->dq_dqb.dqb_ihardlimit && !ignore_hardlimit(dquot)) { *warntype = IHARDWARN; return NO_QUOTA; } - if (dquot->dq_isoftlimit && - (dquot->dq_curinodes + inodes) > dquot->dq_isoftlimit && - dquot->dq_itime && CURRENT_TIME >= dquot->dq_itime && + if (dquot->dq_dqb.dqb_isoftlimit && + (dquot->dq_dqb.dqb_curinodes + inodes) > dquot->dq_dqb.dqb_isoftlimit && + dquot->dq_dqb.dqb_itime && CURRENT_TIME >= dquot->dq_dqb.dqb_itime && !ignore_hardlimit(dquot)) { *warntype = ISOFTLONGWARN; return NO_QUOTA; } - if (dquot->dq_isoftlimit && - (dquot->dq_curinodes + inodes) > dquot->dq_isoftlimit && - dquot->dq_itime == 0) { + if (dquot->dq_dqb.dqb_isoftlimit && + (dquot->dq_dqb.dqb_curinodes + inodes) > dquot->dq_dqb.dqb_isoftlimit && + dquot->dq_dqb.dqb_itime == 0) { *warntype = ISOFTWARN; - dquot->dq_itime = CURRENT_TIME + dquot->dq_sb->s_dquot.inode_expire[dquot->dq_type]; + dquot->dq_dqb.dqb_itime = CURRENT_TIME + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace; } return QUOTA_OK; } -static int check_bdq(struct dquot *dquot, ulong blocks, char prealloc, char *warntype) +static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *warntype) { *warntype = 0; - if (blocks <= 0 || dquot->dq_flags & DQ_FAKE) + if (space <= 0 || dquot->dq_flags & DQ_FAKE) return QUOTA_OK; - if (dquot->dq_bhardlimit && - (dquot->dq_curblocks + blocks) > dquot->dq_bhardlimit && + if (dquot->dq_dqb.dqb_bhardlimit && + toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bhardlimit && !ignore_hardlimit(dquot)) { if (!prealloc) *warntype = BHARDWARN; return NO_QUOTA; } - if (dquot->dq_bsoftlimit && - (dquot->dq_curblocks + blocks) > dquot->dq_bsoftlimit && - dquot->dq_btime && CURRENT_TIME >= dquot->dq_btime && + if (dquot->dq_dqb.dqb_bsoftlimit && + toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bsoftlimit && + dquot->dq_dqb.dqb_btime && CURRENT_TIME >= dquot->dq_dqb.dqb_btime && !ignore_hardlimit(dquot)) { if (!prealloc) *warntype = BSOFTLONGWARN; return NO_QUOTA; } - if (dquot->dq_bsoftlimit && - (dquot->dq_curblocks + blocks) > dquot->dq_bsoftlimit && - dquot->dq_btime == 0) { + if (dquot->dq_dqb.dqb_bsoftlimit && + toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bsoftlimit && + dquot->dq_dqb.dqb_btime == 0) { if (!prealloc) { *warntype = BSOFTWARN; - dquot->dq_btime = CURRENT_TIME + dquot->dq_sb->s_dquot.block_expire[dquot->dq_type]; + dquot->dq_dqb.dqb_btime = CURRENT_TIME + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_bgrace; } else /* @@ -832,148 +924,15 @@ static int check_bdq(struct dquot *dquot, ulong blocks, char prealloc, char *war } /* - * Initialize a dquot-struct with new quota info. This is used by the - * system call interface functions. - */ -static int set_dqblk(struct super_block *sb, int id, short type, int flags, struct dqblk *dqblk) -{ - struct dquot *dquot; - int error = -EFAULT; - struct dqblk dq_dqblk; - - if (copy_from_user(&dq_dqblk, dqblk, sizeof(struct dqblk))) - return error; - - if (sb && (dquot = dqget(sb, id, type)) != NODQUOT) { - /* We can't block while changing quota structure... */ - if (id > 0 && ((flags & SET_QUOTA) || (flags & SET_QLIMIT))) { - dquot->dq_bhardlimit = dq_dqblk.dqb_bhardlimit; - dquot->dq_bsoftlimit = dq_dqblk.dqb_bsoftlimit; - dquot->dq_ihardlimit = dq_dqblk.dqb_ihardlimit; - dquot->dq_isoftlimit = dq_dqblk.dqb_isoftlimit; - } - - if ((flags & SET_QUOTA) || (flags & SET_USE)) { - if (dquot->dq_isoftlimit && - dquot->dq_curinodes < dquot->dq_isoftlimit && - dq_dqblk.dqb_curinodes >= dquot->dq_isoftlimit) - dquot->dq_itime = CURRENT_TIME + dquot->dq_sb->s_dquot.inode_expire[type]; - dquot->dq_curinodes = dq_dqblk.dqb_curinodes; - if (dquot->dq_curinodes < dquot->dq_isoftlimit) - dquot->dq_flags &= ~DQ_INODES; - if (dquot->dq_bsoftlimit && - dquot->dq_curblocks < dquot->dq_bsoftlimit && - dq_dqblk.dqb_curblocks >= dquot->dq_bsoftlimit) - dquot->dq_btime = CURRENT_TIME + dquot->dq_sb->s_dquot.block_expire[type]; - dquot->dq_curblocks = dq_dqblk.dqb_curblocks; - if (dquot->dq_curblocks < dquot->dq_bsoftlimit) - dquot->dq_flags &= ~DQ_BLKS; - } - - if (id == 0) { - dquot->dq_sb->s_dquot.block_expire[type] = dquot->dq_btime = dq_dqblk.dqb_btime; - dquot->dq_sb->s_dquot.inode_expire[type] = dquot->dq_itime = dq_dqblk.dqb_itime; - } - - if (dq_dqblk.dqb_bhardlimit == 0 && dq_dqblk.dqb_bsoftlimit == 0 && - dq_dqblk.dqb_ihardlimit == 0 && dq_dqblk.dqb_isoftlimit == 0) - dquot->dq_flags |= DQ_FAKE; - else - dquot->dq_flags &= ~DQ_FAKE; - - dquot->dq_flags |= DQ_MOD; - dqput(dquot); - } - return 0; -} - -static int get_quota(struct super_block *sb, int id, short type, struct dqblk *dqblk) -{ - struct dquot *dquot; - struct dqblk data; - int error = -ESRCH; - - if (!sb || !sb_has_quota_enabled(sb, type)) - goto out; - dquot = dqget(sb, id, type); - if (dquot == NODQUOT) - goto out; - - memcpy(&data, &dquot->dq_dqb, sizeof(struct dqblk)); /* We copy data to preserve them from changing */ - dqput(dquot); - error = -EFAULT; - if (dqblk && !copy_to_user(dqblk, &data, sizeof(struct dqblk))) - error = 0; -out: - return error; -} - -static int get_stats(caddr_t addr) -{ - int error = -EFAULT; - struct dqstats stats; - - dqstats.allocated_dquots = nr_dquots; - dqstats.free_dquots = nr_free_dquots; - - /* make a copy, in case we page-fault in user space */ - memcpy(&stats, &dqstats, sizeof(struct dqstats)); - if (!copy_to_user(addr, &stats, sizeof(struct dqstats))) - error = 0; - return error; -} - -static int quota_root_squash(struct super_block *sb, short type, int *addr) -{ - int new_value, error; - - if (!sb) - return(-ENODEV); - - error = -EFAULT; - if (!copy_from_user(&new_value, addr, sizeof(int))) { - sb_dqopt(sb)->rsquash[type] = new_value; - error = 0; - } - return error; -} - -#if 0 /* We are not going to support filesystems without i_blocks... */ -/* - * This is a simple algorithm that calculates the size of a file in blocks. - * This is only used on filesystems that do not have an i_blocks count. - */ -static u_long isize_to_blocks(loff_t isize, size_t blksize_bits) -{ - u_long blocks; - u_long indirect; - - if (!blksize_bits) - blksize_bits = BLOCK_SIZE_BITS; - blocks = (isize >> blksize_bits) + ((isize & ~((1 << blksize_bits)-1)) ? 1 : 0); - if (blocks > 10) { - indirect = ((blocks - 11) >> 8) + 1; /* single indirect blocks */ - if (blocks > (10 + 256)) { - indirect += ((blocks - 267) >> 16) + 1; /* double indirect blocks */ - if (blocks > (10 + 256 + (256 << 8))) - indirect++; /* triple indirect blocks */ - } - blocks += indirect; - } - return blocks; -} -#endif - -/* * Externally referenced functions through dquot_operations in inode. * * Note: this is a blocking operation. */ -void dquot_initialize(struct inode *inode, short type) +void dquot_initialize(struct inode *inode, int type) { struct dquot *dquot[MAXQUOTAS]; unsigned int id = 0; - short cnt; + int cnt; if (IS_NOQUOTA(inode)) return; @@ -1019,7 +978,7 @@ void dquot_initialize(struct inode *inode, short type) void dquot_drop(struct inode *inode) { struct dquot *dquot; - short cnt; + int cnt; inode->i_flags &= ~S_QUOTA; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { @@ -1034,7 +993,7 @@ void dquot_drop(struct inode *inode) /* * This operation can block, but only after everything is updated */ -int dquot_alloc_block(struct inode *inode, unsigned long number, char warn) +int dquot_alloc_space(struct inode *inode, qsize_t number, int warn) { int cnt, ret = NO_QUOTA; struct dquot *dquot[MAXQUOTAS]; @@ -1056,16 +1015,16 @@ int dquot_alloc_block(struct inode *inode, unsigned long number, char warn) for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (dquot[cnt] == NODQUOT) continue; - dquot_incr_blocks(dquot[cnt], number); + dquot_incr_space(dquot[cnt], number); } - inode->i_blocks += number << (BLOCK_SIZE_BITS - 9); + inode_add_bytes(inode, number); /* NOBLOCK End */ ret = QUOTA_OK; warn_put_all: flush_warnings(dquot, warntype); for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (dquot[cnt] != NODQUOT) - dqput(dquot[cnt]); + dqputduplicate(dquot[cnt]); unlock_kernel(); return ret; } @@ -1104,7 +1063,7 @@ warn_put_all: flush_warnings(dquot, warntype); for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (dquot[cnt] != NODQUOT) - dqput(dquot[cnt]); + dqputduplicate(dquot[cnt]); unlock_kernel(); return ret; } @@ -1112,9 +1071,9 @@ warn_put_all: /* * This is a non-blocking operation. */ -void dquot_free_block(struct inode *inode, unsigned long number) +void dquot_free_space(struct inode *inode, qsize_t number) { - unsigned short cnt; + unsigned int cnt; struct dquot *dquot; /* NOBLOCK Start */ @@ -1123,10 +1082,10 @@ void dquot_free_block(struct inode *inode, unsigned long number) dquot = dqduplicate(inode->i_dquot[cnt]); if (dquot == NODQUOT) continue; - dquot_decr_blocks(dquot, number); - dqput(dquot); + dquot_decr_space(dquot, number); + dqputduplicate(dquot); } - inode->i_blocks -= number << (BLOCK_SIZE_BITS - 9); + inode_sub_bytes(inode, number); unlock_kernel(); /* NOBLOCK End */ } @@ -1136,7 +1095,7 @@ void dquot_free_block(struct inode *inode, unsigned long number) */ void dquot_free_inode(const struct inode *inode, unsigned long number) { - unsigned short cnt; + unsigned int cnt; struct dquot *dquot; /* NOBLOCK Start */ @@ -1146,7 +1105,7 @@ void dquot_free_inode(const struct inode *inode, unsigned long number) if (dquot == NODQUOT) continue; dquot_decr_inodes(dquot, number); - dqput(dquot); + dqputduplicate(dquot); } unlock_kernel(); /* NOBLOCK End */ @@ -1159,7 +1118,7 @@ void dquot_free_inode(const struct inode *inode, unsigned long number) */ int dquot_transfer(struct inode *inode, struct iattr *iattr) { - unsigned long blocks; + qsize_t space; struct dquot *transfer_from[MAXQUOTAS]; struct dquot *transfer_to[MAXQUOTAS]; int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid, @@ -1189,7 +1148,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) } } /* NOBLOCK START: From now on we shouldn't block */ - blocks = (inode->i_blocks >> 1); + space = inode_get_bytes(inode); /* Build the transfer_from list and check the limits */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { /* The second test can fail when quotaoff is in progress... */ @@ -1199,7 +1158,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) if (transfer_from[cnt] == NODQUOT) /* Can happen on quotafiles (quota isn't initialized on them)... */ continue; if (check_idq(transfer_to[cnt], 1, warntype+cnt) == NO_QUOTA || - check_bdq(transfer_to[cnt], blocks, 0, warntype+cnt) == NO_QUOTA) + check_bdq(transfer_to[cnt], space, 0, warntype+cnt) == NO_QUOTA) goto warn_put_all; } @@ -1214,10 +1173,10 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) continue; dquot_decr_inodes(transfer_from[cnt], 1); - dquot_decr_blocks(transfer_from[cnt], blocks); + dquot_decr_space(transfer_from[cnt], space); dquot_incr_inodes(transfer_to[cnt], 1); - dquot_incr_blocks(transfer_to[cnt], blocks); + dquot_incr_space(transfer_to[cnt], space); if (inode->i_dquot[cnt] == NODQUOT) BUG(); @@ -1233,52 +1192,29 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) warn_put_all: flush_warnings(transfer_to, warntype); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + /* First we must put duplicate - otherwise we might deadlock */ if (transfer_to[cnt] != NODQUOT) - dqput(transfer_to[cnt]); + dqputduplicate(transfer_to[cnt]); if (transfer_from[cnt] != NODQUOT) dqput(transfer_from[cnt]); } return ret; } -static ctl_table fs_table[] = { - {FS_NRDQUOT, "dquot-nr", &nr_dquots, 2*sizeof(int), - 0444, NULL, &proc_dointvec}, - {}, -}; - -static ctl_table dquot_table[] = { - {CTL_FS, "fs", NULL, 0, 0555, fs_table}, - {}, -}; - -static int __init dquot_init(void) -{ - int i; - - register_sysctl_table(dquot_table, 0); - - for (i = 0; i < NR_DQHASH; i++) - INIT_LIST_HEAD(dquot_hash + i); - printk(KERN_NOTICE "VFS: Diskquotas version %s initialized\n", __DQUOT_VERSION__); - return 0; -} -__initcall(dquot_init); - /* * Definitions of diskquota operations. */ struct dquot_operations dquot_operations = { initialize: dquot_initialize, /* mandatory */ drop: dquot_drop, /* mandatory */ - alloc_block: dquot_alloc_block, + alloc_space: dquot_alloc_space, alloc_inode: dquot_alloc_inode, - free_block: dquot_free_block, + free_space: dquot_free_space, free_inode: dquot_free_inode, transfer: dquot_transfer }; -static inline void set_enable_flags(struct quota_mount_options *dqopt, short type) +static inline void set_enable_flags(struct quota_info *dqopt, int type) { switch (type) { case USRQUOTA: @@ -1290,7 +1226,7 @@ static inline void set_enable_flags(struct quota_mount_options *dqopt, short typ } } -static inline void reset_enable_flags(struct quota_mount_options *dqopt, short type) +static inline void reset_enable_flags(struct quota_info *dqopt, int type) { switch (type) { case USRQUOTA: @@ -1303,16 +1239,15 @@ static inline void reset_enable_flags(struct quota_mount_options *dqopt, short t } /* Function in inode.c - remove pointers to dquots in icache */ -extern void remove_dquot_ref(struct super_block *, short); +extern void remove_dquot_ref(struct super_block *, int); /* * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount) */ -int quota_off(struct super_block *sb, short type) +int vfs_quota_off(struct super_block *sb, int type) { - struct file *filp; - short cnt; - struct quota_mount_options *dqopt = sb_dqopt(sb); + int cnt; + struct quota_info *dqopt = sb_dqopt(sb); lock_kernel(); if (!sb) @@ -1330,12 +1265,18 @@ int quota_off(struct super_block *sb, short type) /* Note: these are blocking operations */ remove_dquot_ref(sb, cnt); invalidate_dquots(sb, cnt); + if (info_dirty(&dqopt->info[cnt])) + dqopt->ops[cnt]->write_file_info(sb, cnt); + if (dqopt->ops[cnt]->free_file_info) + dqopt->ops[cnt]->free_file_info(sb, cnt); + put_quota_format(dqopt->info[cnt].dqi_format); - filp = dqopt->files[cnt]; + fput(dqopt->files[cnt]); dqopt->files[cnt] = (struct file *)NULL; - dqopt->inode_expire[cnt] = 0; - dqopt->block_expire[cnt] = 0; - fput(filp); + dqopt->info[cnt].dqi_flags = 0; + dqopt->info[cnt].dqi_igrace = 0; + dqopt->info[cnt].dqi_bgrace = 0; + dqopt->ops[cnt] = NULL; } up(&dqopt->dqoff_sem); out: @@ -1343,38 +1284,29 @@ out: return 0; } -static inline int check_quotafile_size(loff_t size) +int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path) { - ulong blocks = size >> BLOCK_SIZE_BITS; - size_t off = size & (BLOCK_SIZE - 1); - - return !(((blocks % sizeof(struct dqblk)) * BLOCK_SIZE + off % sizeof(struct dqblk)) % sizeof(struct dqblk)); -} - -static int quota_on(struct super_block *sb, short type, char *path) -{ - struct file *f; + struct file *f = NULL; struct inode *inode; - struct dquot *dquot; - struct quota_mount_options *dqopt = sb_dqopt(sb); - char *tmp; + struct quota_info *dqopt = sb_dqopt(sb); + struct quota_format_type *fmt = find_quota_format(format_id); int error; - if (is_enabled(dqopt, type)) - return -EBUSY; + if (!fmt) + return -EINVAL; + if (is_enabled(dqopt, type)) { + error = -EBUSY; + goto out_fmt; + } down(&dqopt->dqoff_sem); - tmp = getname(path); - error = PTR_ERR(tmp); - if (IS_ERR(tmp)) - goto out_lock; - f = filp_open(tmp, O_RDWR, 0600); - putname(tmp); + f = filp_open(path, O_RDWR, 0600); error = PTR_ERR(f); if (IS_ERR(f)) goto out_lock; + dqopt->files[type] = f; error = -EIO; if (!f->f_op || !f->f_op->read || !f->f_op->write) goto out_f; @@ -1383,134 +1315,223 @@ static int quota_on(struct super_block *sb, short type, char *path) if (!S_ISREG(inode->i_mode)) goto out_f; error = -EINVAL; - if (inode->i_size == 0 || !check_quotafile_size(inode->i_size)) + if (!fmt->qf_ops->check_quota_file(sb, type)) goto out_f; /* We don't want quota on quota files */ dquot_drop(inode); inode->i_flags |= S_NOQUOTA; - dqopt->files[type] = f; - sb->dq_op = &dquot_operations; + dqopt->ops[type] = fmt->qf_ops; + dqopt->info[type].dqi_format = fmt; + if ((error = dqopt->ops[type]->read_file_info(sb, type)) < 0) + goto out_f; set_enable_flags(dqopt, type); - dquot = dqget(sb, 0, type); - dqopt->inode_expire[type] = (dquot != NODQUOT) ? dquot->dq_itime : MAX_IQ_TIME; - dqopt->block_expire[type] = (dquot != NODQUOT) ? dquot->dq_btime : MAX_DQ_TIME; - dqput(dquot); - add_dquot_ref(sb, type); up(&dqopt->dqoff_sem); return 0; out_f: - filp_close(f, NULL); + if (f) + filp_close(f, NULL); + dqopt->files[type] = NULL; out_lock: up(&dqopt->dqoff_sem); +out_fmt: + put_quota_format(fmt); return error; } -/* - * This is the system call interface. This communicates with - * the user-level programs. Currently this only supports diskquota - * calls. Maybe we need to add the process quotas etc. in the future, - * but we probably should use rlimits for that. - */ -asmlinkage long sys_quotactl(int cmd, const char *special, int id, caddr_t addr) +/* Generic routine for getting common part of quota structure */ +static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di) { - int cmds = 0, type = 0, flags = 0; - kdev_t dev; - struct super_block *sb = NULL; - int ret = -EINVAL; + struct mem_dqblk *dm = &dquot->dq_dqb; + + di->dqb_bhardlimit = dm->dqb_bhardlimit; + di->dqb_bsoftlimit = dm->dqb_bsoftlimit; + di->dqb_curspace = dm->dqb_curspace; + di->dqb_ihardlimit = dm->dqb_ihardlimit; + di->dqb_isoftlimit = dm->dqb_isoftlimit; + di->dqb_curinodes = dm->dqb_curinodes; + di->dqb_btime = dm->dqb_btime; + di->dqb_itime = dm->dqb_itime; + di->dqb_valid = QIF_ALL; +} - lock_kernel(); - cmds = cmd >> SUBCMDSHIFT; - type = cmd & SUBCMDMASK; +int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di) +{ + struct dquot *dquot = dqget(sb, id, type); - if ((u_int) type >= MAXQUOTAS) - goto out; - if (id & ~0xFFFF) - goto out; + if (!dquot) + return -EINVAL; + do_get_dqblk(dquot, di); + dqput(dquot); + return 0; +} - ret = -EPERM; - switch (cmds) { - case Q_SYNC: - case Q_GETSTATS: - break; - case Q_GETQUOTA: - if (((type == USRQUOTA && current->euid != id) || - (type == GRPQUOTA && !in_egroup_p(id))) && - !capable(CAP_SYS_ADMIN)) - goto out; - break; - default: - if (!capable(CAP_SYS_ADMIN)) - goto out; - } +/* Generic routine for setting common part of quota structure */ +static void do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) +{ + struct mem_dqblk *dm = &dquot->dq_dqb; + int check_blim = 0, check_ilim = 0; - ret = -EINVAL; - dev = NODEV; - if (special != NULL || (cmds != Q_SYNC && cmds != Q_GETSTATS)) { - mode_t mode; - struct nameidata nd; - - ret = user_path_walk(special, &nd); - if (ret) - goto out; - - dev = nd.dentry->d_inode->i_rdev; - mode = nd.dentry->d_inode->i_mode; - path_release(&nd); - - ret = -ENOTBLK; - if (!S_ISBLK(mode)) - goto out; - ret = -ENODEV; - sb = get_super(dev); - if (!sb) - goto out; + if (di->dqb_valid & QIF_SPACE) { + dm->dqb_curspace = di->dqb_curspace; + check_blim = 1; } - - ret = -EINVAL; - switch (cmds) { - case Q_QUOTAON: - ret = quota_on(sb, type, (char *) addr); - goto out; - case Q_QUOTAOFF: - ret = quota_off(sb, type); - goto out; - case Q_GETQUOTA: - ret = get_quota(sb, id, type, (struct dqblk *) addr); - goto out; - case Q_SETQUOTA: - flags |= SET_QUOTA; - break; - case Q_SETUSE: - flags |= SET_USE; - break; - case Q_SETQLIM: - flags |= SET_QLIMIT; - break; - case Q_SYNC: - ret = sync_dquots(sb, type); - goto out; - case Q_GETSTATS: - ret = get_stats(addr); - goto out; - case Q_RSQUASH: - ret = quota_root_squash(sb, type, (int *) addr); - goto out; - default: - goto out; + if (di->dqb_valid & QIF_BLIMITS) { + dm->dqb_bsoftlimit = di->dqb_bsoftlimit; + dm->dqb_bhardlimit = di->dqb_bhardlimit; + check_blim = 1; + } + if (di->dqb_valid & QIF_INODES) { + dm->dqb_curinodes = di->dqb_curinodes; + check_ilim = 1; + } + if (di->dqb_valid & QIF_ILIMITS) { + dm->dqb_isoftlimit = di->dqb_isoftlimit; + dm->dqb_ihardlimit = di->dqb_ihardlimit; + check_ilim = 1; + } + if (di->dqb_valid & QIF_BTIME) + dm->dqb_btime = di->dqb_btime; + if (di->dqb_valid & QIF_ITIME) + dm->dqb_itime = di->dqb_itime; + + if (check_blim) { + if (!dm->dqb_bsoftlimit || toqb(dm->dqb_curspace) < dm->dqb_bsoftlimit) { + dm->dqb_btime = 0; + dquot->dq_flags &= ~DQ_BLKS; + } + else if (!(di->dqb_valid & QIF_BTIME)) /* Set grace only if user hasn't provided his own... */ + dm->dqb_btime = CURRENT_TIME + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_bgrace; + } + if (check_ilim) { + if (!dm->dqb_isoftlimit || dm->dqb_curinodes < dm->dqb_isoftlimit) { + dm->dqb_itime = 0; + dquot->dq_flags &= ~DQ_INODES; + } + else if (!(di->dqb_valid & QIF_ITIME)) /* Set grace only if user hasn't provided his own... */ + dm->dqb_itime = CURRENT_TIME + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace; } + if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit || dm->dqb_isoftlimit) + dquot->dq_flags &= ~DQ_FAKE; + else + dquot->dq_flags |= DQ_FAKE; + dquot->dq_flags |= DQ_MOD; +} - ret = -ENODEV; - if (sb && sb_has_quota_enabled(sb, type)) - ret = set_dqblk(sb, id, type, flags, (struct dqblk *) addr); -out: - if (sb) - drop_super(sb); +int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di) +{ + struct dquot *dquot = dqget(sb, id, type); + + if (!dquot) + return -EINVAL; + do_set_dqblk(dquot, di); + dqput(dquot); + return 0; +} + +/* Generic routine for getting common part of quota file information */ +int vfs_get_info(struct super_block *sb, int type, struct if_dqinfo *ii) +{ + struct mem_dqinfo *mi = sb_dqopt(sb)->info + type; + + ii->dqi_bgrace = mi->dqi_bgrace; + ii->dqi_igrace = mi->dqi_igrace; + ii->dqi_flags = mi->dqi_flags & DQF_MASK; + ii->dqi_valid = IIF_ALL; + return 0; +} + +/* Generic routine for setting common part of quota file information */ +int vfs_set_info(struct super_block *sb, int type, struct if_dqinfo *ii) +{ + struct mem_dqinfo *mi = sb_dqopt(sb)->info + type; + + if (ii->dqi_valid & IIF_BGRACE) + mi->dqi_bgrace = ii->dqi_bgrace; + if (ii->dqi_valid & IIF_IGRACE) + mi->dqi_igrace = ii->dqi_igrace; + if (ii->dqi_valid & IIF_FLAGS) + mi->dqi_flags = (mi->dqi_flags & ~DQF_MASK) | (ii->dqi_flags & DQF_MASK); + mark_info_dirty(mi); + return 0; +} + +#ifdef CONFIG_PROC_FS +static int read_stats(char *buffer, char **start, off_t offset, int count, int *eof, void *data) +{ + int len; + struct quota_format_type *actqf; + + dqstats.allocated_dquots = nr_dquots; + dqstats.free_dquots = nr_free_dquots; + + len = sprintf(buffer, "Version %u\n", __DQUOT_NUM_VERSION__); + len += sprintf(buffer + len, "Formats"); + lock_kernel(); + for (actqf = quota_formats; actqf; actqf = actqf->qf_next) + len += sprintf(buffer + len, " %u", actqf->qf_fmt_id); unlock_kernel(); - return ret; + len += sprintf(buffer + len, "\n%u %u %u %u %u %u %u %u\n", + dqstats.lookups, dqstats.drops, + dqstats.reads, dqstats.writes, + dqstats.cache_hits, dqstats.allocated_dquots, + dqstats.free_dquots, dqstats.syncs); + + if (offset >= len) { + *start = buffer; + *eof = 1; + return 0; + } + *start = buffer + offset; + if ((len -= offset) > count) + return count; + *eof = 1; + + return len; +} +#endif + +struct quotactl_ops vfs_quotactl_ops = { + quota_on: vfs_quota_on, + quota_off: vfs_quota_off, + quota_sync: vfs_quota_sync, + get_info: vfs_get_info, + set_info: vfs_set_info, + get_dqblk: vfs_get_dqblk, + set_dqblk: vfs_set_dqblk +}; + +static ctl_table fs_table[] = { + {FS_NRDQUOT, "dquot-nr", &nr_dquots, 2*sizeof(int), + 0444, NULL, &proc_dointvec}, + {}, +}; + +static ctl_table dquot_table[] = { + {CTL_FS, "fs", NULL, 0, 0555, fs_table}, + {}, +}; + +static int __init dquot_init(void) +{ + int i; + + register_sysctl_table(dquot_table, 0); + for (i = 0; i < NR_DQHASH; i++) + INIT_LIST_HEAD(dquot_hash + i); + printk(KERN_NOTICE "VFS: Diskquotas version %s initialized\n", __DQUOT_VERSION__); +#ifdef CONFIG_PROC_FS + create_proc_read_entry("fs/quota", 0, 0, read_stats, NULL); +#endif + return 0; } +__initcall(dquot_init); + +EXPORT_SYMBOL(register_quota_format); +EXPORT_SYMBOL(unregister_quota_format); +EXPORT_SYMBOL(dqstats); diff --git a/fs/efs/super.c b/fs/efs/super.c index 4af82d06d5bf..51cca8ecfa9d 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -8,7 +8,6 @@ #include <linux/init.h> #include <linux/module.h> -#include <linux/locks.h> #include <linux/efs_fs.h> #include <linux/efs_vh.h> #include <linux/efs_fs_sb.h> diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 7cb0c303a6ea..4d900fbc710d 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -13,8 +13,8 @@ #include <linux/config.h> #include "ext2.h" -#include <linux/locks.h> #include <linux/quotaops.h> +#include <linux/sched.h> /* * balloc.c contains the blocks allocation and deallocation routines diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c index 71ac1701a3a4..4528b40c31c9 100644 --- a/fs/ext2/fsync.c +++ b/fs/ext2/fsync.c @@ -23,7 +23,6 @@ */ #include "ext2.h" -#include <linux/locks.h> #include <linux/smp_lock.h> @@ -37,7 +36,7 @@ int ext2_sync_file(struct file * file, struct dentry *dentry, int datasync) struct inode *inode = dentry->d_inode; int err; - err = fsync_inode_buffers(inode); + err = sync_mapping_buffers(inode->i_mapping); if (!(inode->i_state & I_DIRTY)) return err; if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index bfe4804c9549..f5daf0397000 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -14,8 +14,8 @@ #include <linux/config.h> #include "ext2.h" -#include <linux/locks.h> #include <linux/quotaops.h> +#include <linux/sched.h> /* @@ -218,6 +218,44 @@ error_return: } /* + * We perform asynchronous prereading of the new inode's inode block when + * we create the inode, in the expectation that the inode will be written + * back soon. There are two reasons: + * + * - When creating a large number of files, the async prereads will be + * nicely merged into large reads + * - When writing out a large number of inodes, we don't need to keep on + * stalling the writes while we read the inode block. + * + * FIXME: ext2_get_group_desc() needs to be simplified. + */ +static void ext2_preread_inode(struct inode *inode) +{ + unsigned long block_group; + unsigned long offset; + unsigned long block; + struct buffer_head *bh; + struct ext2_group_desc * gdp; + + block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb); + gdp = ext2_get_group_desc(inode->i_sb, block_group, &bh); + if (gdp == NULL) + return; + + /* + * Figure out the offset within the block group inode table + */ + offset = ((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb)) * + EXT2_INODE_SIZE(inode->i_sb); + block = le32_to_cpu(gdp->bg_inode_table) + + (offset >> EXT2_BLOCK_SIZE_BITS(inode->i_sb)); + bh = sb_getblk(inode->i_sb, block); + if (!buffer_uptodate(bh) && !buffer_locked(bh)) + ll_rw_block(READA, 1, &bh); + __brelse(bh); +} + +/* * There are two policies for allocating an inode. If the new inode is * a directory, then a forward search is made for a block group with both * free space and a low directory-to-inode ratio; if that fails, then of @@ -417,6 +455,7 @@ repeat: return ERR_PTR(-EDQUOT); } ext2_debug ("allocating inode %lu\n", inode->i_ino); + ext2_preread_inode(inode); return inode; fail2: diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 7200da15a9bf..592db3d7937f 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -23,10 +23,10 @@ */ #include "ext2.h" -#include <linux/locks.h> #include <linux/smp_lock.h> #include <linux/time.h> #include <linux/highuid.h> +#include <linux/pagemap.h> #include <linux/quotaops.h> #include <linux/module.h> @@ -41,7 +41,8 @@ static int ext2_update_inode(struct inode * inode, int do_sync); */ void ext2_put_inode (struct inode * inode) { - ext2_discard_prealloc (inode); + if (atomic_read(&inode->i_count) < 2) /* final iput? */ + ext2_discard_prealloc (inode); } /* @@ -583,6 +584,20 @@ static int ext2_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, u { return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, ext2_get_block); } + +static int +ext2_writeback_mapping(struct address_space *mapping, int *nr_to_write) +{ + int ret; + int err; + + ret = write_mapping_buffers(mapping); + err = generic_writeback_mapping(mapping, nr_to_write); + if (!ret) + ret = err; + return ret; +} + struct address_space_operations ext2_aops = { readpage: ext2_readpage, writepage: ext2_writepage, @@ -591,7 +606,7 @@ struct address_space_operations ext2_aops = { commit_write: generic_commit_write, bmap: ext2_bmap, direct_IO: ext2_direct_IO, - writeback_mapping: generic_writeback_mapping, + writeback_mapping: ext2_writeback_mapping, vm_writeback: generic_vm_writeback, }; @@ -860,7 +875,7 @@ do_indirects: } inode->i_mtime = inode->i_ctime = CURRENT_TIME; if (IS_SYNC(inode)) { - fsync_inode_buffers(inode); + sync_mapping_buffers(inode->i_mapping); ext2_sync_inode (inode); } else { mark_inode_dirty(inode); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index db59722a8c30..7e162a913b34 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -22,7 +22,6 @@ #include "ext2.h" #include <linux/slab.h> #include <linux/init.h> -#include <linux/locks.h> #include <linux/blkdev.h> #include <linux/random.h> #include <linux/smp_lock.h> diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 2da50dce0434..ea4bd4510319 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -17,7 +17,6 @@ #include <linux/jbd.h> #include <linux/ext3_fs.h> #include <linux/ext3_jbd.h> -#include <linux/locks.h> #include <linux/quotaops.h> /* diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 3ed85a1adcf8..d5040f1cdbcd 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -20,7 +20,6 @@ #include <linux/time.h> #include <linux/fs.h> -#include <linux/locks.h> #include <linux/jbd.h> #include <linux/ext3_fs.h> #include <linux/ext3_jbd.h> diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index 8266f2408664..463f2981437e 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c @@ -55,13 +55,13 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) J_ASSERT(ext3_journal_current_handle() == 0); /* - * fsync_inode_buffers() just walks i_dirty_buffers and waits + * fsync_inode_buffers() just walks private_list and waits * on them. It's a no-op for full data journalling because - * i_dirty_buffers will be ampty. + * private_list will be empty. * Really, we only need to start I/O on the dirty buffers - * we'll end up waiting on them in commit. */ - ret = fsync_inode_buffers(inode); + ret = sync_mapping_buffers(inode->i_mapping); ext3_force_commit(inode->i_sb); return ret; diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index cd57f9f5757d..f190708fd710 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -19,7 +19,6 @@ #include <linux/ext3_jbd.h> #include <linux/stat.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/quotaops.h> #include <asm/bitops.h> diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 076f527e7b23..4be6e5eaa4ea 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -22,15 +22,16 @@ * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000 */ +#include <linux/module.h> #include <linux/fs.h> #include <linux/time.h> #include <linux/ext3_jbd.h> #include <linux/jbd.h> -#include <linux/locks.h> #include <linux/smp_lock.h> #include <linux/highuid.h> +#include <linux/pagemap.h> #include <linux/quotaops.h> -#include <linux/module.h> +#include <linux/string.h> /* * SEARCH_FROM_ZERO forces each block allocation to search from the start @@ -1078,14 +1079,8 @@ static int commit_write_fn(handle_t *handle, struct buffer_head *bh) * We need to pick up the new inode size which generic_commit_write gave us * `file' can be NULL - eg, when called from block_symlink(). * - * ext3 inode->i_dirty_buffers policy: If we're journalling data we - * definitely don't want them to appear on the inode at all - instead - * we need to manage them at the JBD layer and we need to intercept - * the relevant sync operations and translate them into journal operations. - * - * If we're not journalling data then we can just leave the buffers - * on ->i_dirty_buffers. If someone writes them out for us then thanks. - * Otherwise we'll do it in commit, if we're using ordered data. + * ext3 never places buffers on inode->i_mapping->private_list. metadata + * buffers are managed internally. */ static int ext3_commit_write(struct file *file, struct page *page, @@ -1331,7 +1326,7 @@ static int ext3_writepage(struct page *page) out_fail: unlock_kernel(); - SetPageDirty(page); + set_page_dirty(page); unlock_page(page); return ret; } diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 58e3d8f89b61..2587c77a1d88 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -26,7 +26,6 @@ #include <linux/fcntl.h> #include <linux/stat.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/quotaops.h> #include <linux/smp_lock.h> diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 3ffa9fecf652..1c90e699030a 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -26,7 +26,6 @@ #include <linux/ext3_jbd.h> #include <linux/slab.h> #include <linux/init.h> -#include <linux/locks.h> #include <linux/blkdev.h> #include <linux/smp_lock.h> #include <asm/uaccess.h> diff --git a/fs/fat/file.c b/fs/fat/file.c index 32a79bfaa66a..ee581867252c 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -7,7 +7,6 @@ */ #include <linux/time.h> -#include <linux/locks.h> #include <linux/msdos_fs.h> #include <linux/fat_cvf.h> #include <linux/smp_lock.h> diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 03fe0dc5b3d9..81568264732c 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -12,11 +12,11 @@ #include <linux/module.h> #include <linux/time.h> -#include <linux/locks.h> #include <linux/slab.h> #include <linux/smp_lock.h> #include <linux/msdos_fs.h> #include <linux/fat_cvf.h> +#include <linux/pagemap.h> //#include <asm/uaccess.h> #include <asm/unaligned.h> diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index eadb01f85bd2..5ad90478a547 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -19,6 +19,7 @@ #include <linux/fs.h> #include <linux/mm.h> #include <linux/writeback.h> +#include <linux/backing-dev.h> /** * __mark_inode_dirty - internal function @@ -62,32 +63,43 @@ void __mark_inode_dirty(struct inode *inode, int flags) spin_lock(&inode_lock); if ((inode->i_state & flags) != flags) { + const int was_dirty = inode->i_state & I_DIRTY; + struct address_space *mapping = inode->i_mapping; + inode->i_state |= flags; + if (!was_dirty) + mapping->dirtied_when = jiffies; + /* * If the inode is locked, just update its dirty state. * The unlocker will place the inode on the appropriate * superblock list, based upon its state. */ if (inode->i_state & I_LOCK) - goto same_list; + goto out; /* * Only add valid (hashed) inode to the superblock's * dirty list. Add blockdev inodes as well. */ if (list_empty(&inode->i_hash) && !S_ISBLK(inode->i_mode)) - goto same_list; - if (inode->i_mapping->dirtied_when == 0) - inode->i_mapping->dirtied_when = jiffies; - list_del(&inode->i_list); - list_add(&inode->i_list, &sb->s_dirty); + goto out; + + /* + * If the inode was already on s_dirty, don't reposition + * it (that would break s_dirty time-ordering). + */ + if (!was_dirty) { + list_del(&inode->i_list); + list_add(&inode->i_list, &sb->s_dirty); + } } -same_list: +out: spin_unlock(&inode_lock); } -static inline void write_inode(struct inode *inode, int sync) +static void write_inode(struct inode *inode, int sync) { if (inode->i_sb->s_op && inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) @@ -116,27 +128,24 @@ static inline void write_inode(struct inode *inode, int sync) static void __sync_single_inode(struct inode *inode, int wait, int *nr_to_write) { unsigned dirty; + unsigned long orig_dirtied_when; struct address_space *mapping = inode->i_mapping; + struct super_block *sb = inode->i_sb; list_del(&inode->i_list); - list_add(&inode->i_list, &inode->i_sb->s_locked_inodes); + list_add(&inode->i_list, &sb->s_locked_inodes); - if (inode->i_state & I_LOCK) - BUG(); + BUG_ON(inode->i_state & I_LOCK); /* Set I_LOCK, reset I_DIRTY */ dirty = inode->i_state & I_DIRTY; inode->i_state |= I_LOCK; inode->i_state &= ~I_DIRTY; + orig_dirtied_when = mapping->dirtied_when; + mapping->dirtied_when = 0; /* assume it's whole-file writeback */ spin_unlock(&inode_lock); - if (wait) - filemap_fdatawait(mapping); - - if (mapping->a_ops->writeback_mapping) - mapping->a_ops->writeback_mapping(mapping, nr_to_write); - else - generic_writeback_mapping(mapping, NULL); + writeback_mapping(mapping, nr_to_write); /* Don't write the inode if only I_DIRTY_PAGES was set */ if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) @@ -145,36 +154,29 @@ static void __sync_single_inode(struct inode *inode, int wait, int *nr_to_write) if (wait) filemap_fdatawait(mapping); - /* - * For non-blocking writeout (wait == 0), we still - * count the inode as being clean. - */ spin_lock(&inode_lock); - /* - * Did we write back all the pages? - */ - if (nr_to_write && *nr_to_write == 0) { - /* - * Maybe not - */ - if (!list_empty(&mapping->dirty_pages)) /* No lock needed */ - inode->i_state |= I_DIRTY_PAGES; - } - inode->i_state &= ~I_LOCK; if (!(inode->i_state & I_FREEING)) { - struct list_head *to; - if (inode->i_state & I_DIRTY) - to = &inode->i_sb->s_dirty; - else if (atomic_read(&inode->i_count)) - to = &inode_in_use; - else - to = &inode_unused; list_del(&inode->i_list); - list_add(&inode->i_list, to); + if (inode->i_state & I_DIRTY) { /* Redirtied */ + list_add(&inode->i_list, &sb->s_dirty); + } else { + if (!list_empty(&mapping->dirty_pages)) { + /* Not a whole-file writeback */ + mapping->dirtied_when = orig_dirtied_when; + inode->i_state |= I_DIRTY_PAGES; + list_add_tail(&inode->i_list, + &sb->s_dirty); + } else if (atomic_read(&inode->i_count)) { + list_add(&inode->i_list, &inode_in_use); + } else { + list_add(&inode->i_list, &inode_unused); + } + } } - wake_up(&inode->i_wait); + if (waitqueue_active(&inode->i_wait)) + wake_up(&inode->i_wait); } /* @@ -183,6 +185,9 @@ static void __sync_single_inode(struct inode *inode, int wait, int *nr_to_write) static void __writeback_single_inode(struct inode *inode, int sync, int *nr_to_write) { + if (current_is_pdflush() && (inode->i_state & I_LOCK)) + return; + while (inode->i_state & I_LOCK) { __iget(inode); spin_unlock(&inode_lock); @@ -193,58 +198,76 @@ __writeback_single_inode(struct inode *inode, int sync, int *nr_to_write) __sync_single_inode(inode, sync, nr_to_write); } -void writeback_single_inode(struct inode *inode, int sync, int *nr_to_write) -{ - spin_lock(&inode_lock); - __writeback_single_inode(inode, sync, nr_to_write); - spin_unlock(&inode_lock); -} - /* - * Write out a list of inodes' pages, and the inode itself. - * - * If `sync' is true, wait on writeout of the last mapping - * which we write. + * Write out a superblock's list of dirty inodes. A wait will be performed + * upon no inodes, all inodes or the final one, depending upon sync_mode. * * If older_than_this is non-NULL, then only write out mappings which * had their first dirtying at a time earlier than *older_than_this. * - * Called under inode_lock. + * If we're a pdlfush thread, then implement pdlfush collision avoidance + * against the entire list. * - * FIXME: putting all the inodes on a local list could introduce a - * race with umount. Bump the superblock refcount? + * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so + * that it can be located for waiting on in __writeback_single_inode(). + * + * Called under inode_lock. */ -static void __sync_list(struct list_head *head, int sync_mode, +static void sync_sb_inodes(struct super_block *sb, int sync_mode, int *nr_to_write, unsigned long *older_than_this) { - struct list_head * tmp; - LIST_HEAD(hold); /* Unready inodes go here */ + struct list_head *tmp; + struct list_head *head; + const unsigned long start = jiffies; /* livelock avoidance */ + list_splice(&sb->s_dirty, &sb->s_io); + INIT_LIST_HEAD(&sb->s_dirty); + head = &sb->s_io; while ((tmp = head->prev) != head) { struct inode *inode = list_entry(tmp, struct inode, i_list); struct address_space *mapping = inode->i_mapping; + struct backing_dev_info *bdi; int really_sync; - if (older_than_this && *older_than_this) { - if (time_after(mapping->dirtied_when, - *older_than_this)) { - list_del(&inode->i_list); - list_add(&inode->i_list, &hold); - continue; - } - } + /* Was this inode dirtied after __sync_list was called? */ + if (time_after(mapping->dirtied_when, start)) + break; + + if (older_than_this && + time_after(mapping->dirtied_when, *older_than_this)) + goto out; + + bdi = mapping->backing_dev_info; + if (current_is_pdflush() && !writeback_acquire(bdi)) + break; + really_sync = (sync_mode == WB_SYNC_ALL); if ((sync_mode == WB_SYNC_LAST) && (head->prev == head)) really_sync = 1; + __writeback_single_inode(inode, really_sync, nr_to_write); + + if (sync_mode == WB_SYNC_HOLD) { + mapping->dirtied_when = jiffies; + list_del(&inode->i_list); + list_add(&inode->i_list, &inode->i_sb->s_dirty); + } + + if (current_is_pdflush()) + writeback_release(bdi); + if (nr_to_write && *nr_to_write == 0) break; } - /* - * Put the not-ready inodes back - */ - if (!list_empty(&hold)) - list_splice(&hold, head); +out: + if (!list_empty(&sb->s_io)) { + /* + * Put the rest back, in the correct order. + */ + list_splice(&sb->s_io, sb->s_dirty.prev); + INIT_LIST_HEAD(&sb->s_io); + } + return; } /* @@ -258,52 +281,25 @@ static void __sync_list(struct list_head *head, int sync_mode, * inode from superblock lists we are OK. * * If `older_than_this' is non-zero then only flush inodes which have a - * flushtime older than *older_than_this. Unless *older_than_this is - * zero. In which case we flush everything, like the old (dumb) wakeup_bdflush. + * flushtime older than *older_than_this. + * + * This is a "memory cleansing" operation, not a "data integrity" operation. */ void writeback_unlocked_inodes(int *nr_to_write, int sync_mode, unsigned long *older_than_this) { - struct super_block * sb; - static unsigned short writeback_gen; + struct super_block *sb; spin_lock(&inode_lock); spin_lock(&sb_lock); - - /* - * We could get into livelock here if someone is dirtying - * inodes fast enough. writeback_gen is used to avoid that. - */ - writeback_gen++; - sb = sb_entry(super_blocks.prev); for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) { - if (sb->s_writeback_gen == writeback_gen) - continue; - sb->s_writeback_gen = writeback_gen; - - if (current->flags & PF_FLUSHER) { - if (sb->s_flags & MS_FLUSHING) { - /* - * There's no point in two pdflush threads - * flushing the same device. But for other - * callers, we want to perform the flush - * because the fdatasync is how we implement - * writer throttling. - */ - continue; - } - sb->s_flags |= MS_FLUSHING; - } - if (!list_empty(&sb->s_dirty)) { spin_unlock(&sb_lock); - __sync_list(&sb->s_dirty, sync_mode, - nr_to_write, older_than_this); + sync_sb_inodes(sb, sync_mode, nr_to_write, + older_than_this); spin_lock(&sb_lock); } - if (current->flags & PF_FLUSHER) - sb->s_flags &= ~MS_FLUSHING; if (nr_to_write && *nr_to_write == 0) break; } @@ -311,32 +307,6 @@ void writeback_unlocked_inodes(int *nr_to_write, int sync_mode, spin_unlock(&inode_lock); } -/* - * Called under inode_lock - */ -static int __try_to_writeback_unused_list(struct list_head *head, int nr_inodes) -{ - struct list_head *tmp = head; - struct inode *inode; - - while (nr_inodes && (tmp = tmp->prev) != head) { - inode = list_entry(tmp, struct inode, i_list); - - if (!atomic_read(&inode->i_count)) { - __sync_single_inode(inode, 0, NULL); - nr_inodes--; - - /* - * __sync_single_inode moved the inode to another list, - * so we have to start looking from the list head. - */ - tmp = head; - } - } - - return nr_inodes; -} - static void __wait_on_locked(struct list_head *head) { struct list_head * tmp; @@ -351,99 +321,95 @@ static void __wait_on_locked(struct list_head *head) } /* - * writeback and wait upon the filesystem's dirty inodes. - * We do it in two passes - one to write, and one to wait. + * writeback and wait upon the filesystem's dirty inodes. The caller will + * do this in two passes - one to write, and one to wait. WB_SYNC_HOLD is + * used to park the written inodes on sb->s_dirty for the wait pass. */ -void sync_inodes_sb(struct super_block *sb) +void sync_inodes_sb(struct super_block *sb, int wait) { spin_lock(&inode_lock); - while (!list_empty(&sb->s_dirty)||!list_empty(&sb->s_locked_inodes)) { - __sync_list(&sb->s_dirty, WB_SYNC_NONE, NULL, NULL); - __sync_list(&sb->s_dirty, WB_SYNC_ALL, NULL, NULL); + sync_sb_inodes(sb, wait ? WB_SYNC_ALL : WB_SYNC_HOLD, NULL, NULL); + if (wait) __wait_on_locked(&sb->s_locked_inodes); - } spin_unlock(&inode_lock); } /* - * writeback the dirty inodes for this filesystem + * Rather lame livelock avoidance. */ -void writeback_inodes_sb(struct super_block *sb) +static void set_sb_syncing(int val) { - spin_lock(&inode_lock); - while (!list_empty(&sb->s_dirty)) - __sync_list(&sb->s_dirty, WB_SYNC_NONE, NULL, NULL); - spin_unlock(&inode_lock); + struct super_block *sb; + spin_lock(&sb_lock); + sb = sb_entry(super_blocks.prev); + for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) { + sb->s_syncing = val; + } + spin_unlock(&sb_lock); } /* * Find a superblock with inodes that need to be synced */ - static struct super_block *get_super_to_sync(void) { - struct list_head *p; + struct super_block *sb; restart: - spin_lock(&inode_lock); spin_lock(&sb_lock); - list_for_each(p, &super_blocks) { - struct super_block *s = list_entry(p,struct super_block,s_list); - if (list_empty(&s->s_dirty) && list_empty(&s->s_locked_inodes)) + sb = sb_entry(super_blocks.prev); + for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) { + if (sb->s_syncing) continue; - s->s_count++; + sb->s_syncing = 1; + sb->s_count++; spin_unlock(&sb_lock); - spin_unlock(&inode_lock); - down_read(&s->s_umount); - if (!s->s_root) { - drop_super(s); + down_read(&sb->s_umount); + if (!sb->s_root) { + drop_super(sb); goto restart; } - return s; + return sb; } spin_unlock(&sb_lock); - spin_unlock(&inode_lock); return NULL; } /** - * sync_inodes - * @dev: device to sync the inodes from. + * sync_inodes * - * sync_inodes goes through the super block's dirty list, - * writes them out, waits on the writeout and puts the inodes - * back on the normal list. + * sync_inodes() goes through each super block's dirty inode list, writes the + * inodes out, waits on the writeout and puts the inodes back on the normal + * list. + * + * This is for sys_sync(). fsync_dev() uses the same algorithm. The subtle + * part of the sync functions is that the blockdev "superblock" is processed + * last. This is because the write_inode() function of a typical fs will + * perform no I/O, but will mark buffers in the blockdev mapping as dirty. + * What we want to do is to perform all that dirtying first, and then write + * back all those inode blocks via the blockdev mapping in one sweep. So the + * additional (somewhat redundant) sync_blockdev() calls here are to make + * sure that really happens. Because if we call sync_inodes_sb(wait=1) with + * outstanding dirty inodes, the writeback goes block-at-a-time within the + * filesystem's write_inode(). This is extremely slow. */ - -void sync_inodes(void) -{ - struct super_block * s; - /* - * Search the super_blocks array for the device(s) to sync. - */ - while ((s = get_super_to_sync()) != NULL) { - sync_inodes_sb(s); - drop_super(s); - } -} - -void try_to_writeback_unused_inodes(unsigned long pexclusive) +void sync_inodes(int wait) { - struct super_block * sb; - int nr_inodes = inodes_stat.nr_unused; + struct super_block *sb; - spin_lock(&inode_lock); - spin_lock(&sb_lock); - sb = sb_entry(super_blocks.next); - for (; nr_inodes && sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) { - if (list_empty(&sb->s_dirty)) - continue; - spin_unlock(&sb_lock); - nr_inodes = __try_to_writeback_unused_list(&sb->s_dirty, nr_inodes); - spin_lock(&sb_lock); + set_sb_syncing(0); + while ((sb = get_super_to_sync()) != NULL) { + sync_inodes_sb(sb, 0); + sync_blockdev(sb->s_bdev); + drop_super(sb); + } + if (wait) { + set_sb_syncing(0); + while ((sb = get_super_to_sync()) != NULL) { + sync_inodes_sb(sb, 1); + sync_blockdev(sb->s_bdev); + drop_super(sb); + } } - spin_unlock(&sb_lock); - spin_unlock(&inode_lock); - clear_bit(0, (unsigned long *)pexclusive); } /** @@ -467,43 +433,34 @@ void write_inode_now(struct inode *inode, int sync) /** * generic_osync_inode - flush all dirty data for a given inode to disk * @inode: inode to write - * @datasync: if set, don't bother flushing timestamps + * @what: what to write and wait upon * * This can be called by file_write functions for files which have the - * O_SYNC flag set, to flush dirty writes to disk. + * O_SYNC flag set, to flush dirty writes to disk. + * + * @what is a bitmask, specifying which part of the inode's data should be + * written and waited upon: + * + * OSYNC_DATA: i_mapping's dirty data + * OSYNC_METADATA: the buffers at i_mapping->private_list + * OSYNC_INODE: the inode itself */ int generic_osync_inode(struct inode *inode, int what) { - int err = 0, err2 = 0, need_write_inode_now = 0; - - /* - * WARNING - * - * Currently, the filesystem write path does not pass the - * filp down to the low-level write functions. Therefore it - * is impossible for (say) __block_commit_write to know if - * the operation is O_SYNC or not. - * - * Ideally, O_SYNC writes would have the filesystem call - * ll_rw_block as it went to kick-start the writes, and we - * could call osync_inode_buffers() here to wait only for - * those IOs which have already been submitted to the device - * driver layer. As it stands, if we did this we'd not write - * anything to disk since our writes have not been queued by - * this point: they are still on the dirty LRU. - * - * So, currently we will call fsync_inode_buffers() instead, - * to flush _all_ dirty buffers for this inode to disk on - * every O_SYNC write, not just the synchronous I/Os. --sct - */ + int err = 0; + int need_write_inode_now = 0; + int err2; if (what & OSYNC_DATA) - writeback_single_inode(inode, 0, NULL); - if (what & (OSYNC_METADATA|OSYNC_DATA)) - err = fsync_inode_buffers(inode); + err = filemap_fdatawrite(inode->i_mapping); + if (what & (OSYNC_METADATA|OSYNC_DATA)) { + err2 = sync_mapping_buffers(inode->i_mapping); + if (!err) + err = err2; + } if (what & OSYNC_DATA) { - err2 = filemap_fdatawrite(inode->i_mapping); + err2 = filemap_fdatawait(inode->i_mapping); if (!err) err = err2; } @@ -521,3 +478,40 @@ int generic_osync_inode(struct inode *inode, int what) return err; } + +/** + * writeback_acquire: attempt to get exclusive writeback access to a device + * @bdi: the device's backing_dev_info structure + * + * It is a waste of resources to have more than one pdflush thread blocked on + * a single request queue. Exclusion at the request_queue level is obtained + * via a flag in the request_queue's backing_dev_info.state. + * + * Non-request_queue-backed address_spaces will share default_backing_dev_info, + * unless they implement their own. Which is somewhat inefficient, as this + * may prevent concurrent writeback against multiple devices. + */ +int writeback_acquire(struct backing_dev_info *bdi) +{ + return !test_and_set_bit(BDI_pdflush, &bdi->state); +} + +/** + * writeback_in_progress: determine whether there is writeback in progress + * against a backing device. + * @bdi: the device's backing_dev_info structure. + */ +int writeback_in_progress(struct backing_dev_info *bdi) +{ + return test_bit(BDI_pdflush, &bdi->state); +} + +/** + * writeback_release: relinquish exclusive writeback access against a device. + * @bdi: the device's backing_dev_info structure + */ +void writeback_release(struct backing_dev_info *bdi) +{ + BUG_ON(!writeback_in_progress(bdi)); + clear_bit(BDI_pdflush, &bdi->state); +} diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 323e66dd6b08..986e479e433b 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -9,6 +9,7 @@ #include <linux/string.h> #include <linux/time.h> #include <linux/smp_lock.h> +#include <linux/pagemap.h> #include "hpfs_fn.h" #define BLOCKS(size) (((size) + 511) >> 9) diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index a6cb5a596827..b5c09a1d66c3 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -16,7 +16,6 @@ #include <linux/slab.h> #include <linux/kernel.h> #include <linux/time.h> -#include <linux/locks.h> #include <linux/stat.h> #include <linux/string.h> #include <asm/bitops.h> diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 61578e25673e..08125d97bf3a 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -6,6 +6,7 @@ * inode VFS functions */ +#include <linux/fs.h> #include <linux/time.h> #include <linux/smp_lock.h> #include "hpfs_fn.h" diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 85ddc8aef4a6..2a54665058f8 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -6,6 +6,7 @@ * adding & removing files & directories */ +#include <linux/pagemap.h> #include <linux/string.h> #include "hpfs_fn.h" diff --git a/fs/inode.c b/fs/inode.c index 61e3f6678737..73e548636cb5 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -12,6 +12,8 @@ #include <linux/quotaops.h> #include <linux/slab.h> #include <linux/writeback.h> +#include <linux/module.h> +#include <linux/backing-dev.h> /* * New inode.c implementation. @@ -83,6 +85,8 @@ static struct inode *alloc_inode(struct super_block *sb) inode = (struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL); if (inode) { + struct address_space * const mapping = &inode->i_data; + inode->i_sb = sb; inode->i_dev = sb->s_dev; inode->i_blkbits = sb->s_blocksize_bits; @@ -95,20 +99,23 @@ static struct inode *alloc_inode(struct super_block *sb) atomic_set(&inode->i_writecount, 0); inode->i_size = 0; inode->i_blocks = 0; + inode->i_bytes = 0; inode->i_generation = 0; memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); inode->i_pipe = NULL; inode->i_bdev = NULL; inode->i_cdev = NULL; - inode->i_data.a_ops = &empty_aops; - inode->i_data.host = inode; - inode->i_data.gfp_mask = GFP_HIGHUSER; - inode->i_data.dirtied_when = 0; - inode->i_mapping = &inode->i_data; - inode->i_data.ra_pages = &default_ra_pages; + + mapping->a_ops = &empty_aops; + mapping->host = inode; + mapping->gfp_mask = GFP_HIGHUSER; + mapping->dirtied_when = 0; + mapping->assoc_mapping = NULL; + mapping->backing_dev_info = &default_backing_dev_info; if (sb->s_bdev) - inode->i_data.ra_pages = sb->s_bdev->bd_inode->i_mapping->ra_pages; + inode->i_data.backing_dev_info = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; memset(&inode->u, 0, sizeof(inode->u)); + inode->i_mapping = mapping; } return inode; } @@ -139,13 +146,13 @@ void inode_init_once(struct inode *inode) INIT_LIST_HEAD(&inode->i_data.locked_pages); INIT_LIST_HEAD(&inode->i_data.io_pages); INIT_LIST_HEAD(&inode->i_dentry); - INIT_LIST_HEAD(&inode->i_dirty_buffers); INIT_LIST_HEAD(&inode->i_devices); sema_init(&inode->i_sem, 1); INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); rwlock_init(&inode->i_data.page_lock); spin_lock_init(&inode->i_data.i_shared_lock); - spin_lock_init(&inode->i_bufferlist_lock); + INIT_LIST_HEAD(&inode->i_data.private_list); + spin_lock_init(&inode->i_data.private_lock); INIT_LIST_HEAD(&inode->i_data.i_mmap); INIT_LIST_HEAD(&inode->i_data.i_mmap_shared); } @@ -306,6 +313,7 @@ int invalidate_inodes(struct super_block * sb) busy = invalidate_list(&inode_in_use, sb, &throw_away); busy |= invalidate_list(&inode_unused, sb, &throw_away); busy |= invalidate_list(&sb->s_dirty, sb, &throw_away); + busy |= invalidate_list(&sb->s_io, sb, &throw_away); busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away); spin_unlock(&inode_lock); @@ -397,23 +405,8 @@ void prune_icache(int goal) spin_unlock(&inode_lock); dispose_list(freeable); - - /* - * If we didn't freed enough clean inodes schedule - * a sync of the dirty inodes, we cannot do it - * from here or we're either synchronously dogslow - * or we deadlock with oom. - */ - if (goal) { - static unsigned long exclusive; - - if (!test_and_set_bit(0, &exclusive)) { - if (pdflush_operation(try_to_writeback_unused_inodes, - (unsigned long)&exclusive)) - clear_bit(0, &exclusive); - } - } } + /* * This is called from kswapd when we think we need some * more memory, but aren't really sure how much. So we @@ -452,7 +445,32 @@ int shrink_icache_memory(int priority, int gfp_mask) * by hand after calling find_inode now! This simplifies iunique and won't * add any additional branch in the common code. */ -static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque) +static struct inode * find_inode(struct super_block * sb, struct list_head *head, int (*test)(struct inode *, void *), void *data) +{ + struct list_head *tmp; + struct inode * inode; + + tmp = head; + for (;;) { + tmp = tmp->next; + inode = NULL; + if (tmp == head) + break; + inode = list_entry(tmp, struct inode, i_hash); + if (inode->i_sb != sb) + continue; + if (!test(inode, data)) + continue; + break; + } + return inode; +} + +/* + * find_inode_fast is the fast path version of find_inode, see the comment at + * iget_locked for details. + */ +static struct inode * find_inode_fast(struct super_block * sb, struct list_head *head, unsigned long ino) { struct list_head *tmp; struct inode * inode; @@ -468,8 +486,6 @@ static struct inode * find_inode(struct super_block * sb, unsigned long ino, str continue; if (inode->i_sb != sb) continue; - if (find_actor && !find_actor(inode, ino, opaque)) - continue; break; } return inode; @@ -501,13 +517,28 @@ struct inode *new_inode(struct super_block *sb) return inode; } +void unlock_new_inode(struct inode *inode) +{ + /* + * This is special! We do not need the spinlock + * when clearing I_LOCK, because we're guaranteed + * that nobody else tries to do anything about the + * state of the inode when it is locked, as we + * just created it (so there can be no old holders + * that haven't tested I_LOCK). + */ + inode->i_state &= ~(I_LOCK|I_NEW); + wake_up(&inode->i_wait); +} + + /* * This is called without the inode lock held.. Be careful. * * We no longer cache the sb_flags in i_flags - see fs.h * -- rmk@arm.uk.linux.org */ -static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque) +static struct inode * get_new_inode(struct super_block *sb, struct list_head *head, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data) { struct inode * inode; @@ -517,37 +548,68 @@ static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, s spin_lock(&inode_lock); /* We released the lock, so.. */ - old = find_inode(sb, ino, head, find_actor, opaque); + old = find_inode(sb, head, test, data); if (!old) { + if (set(inode, data)) + goto set_failed; + inodes_stat.nr_inodes++; list_add(&inode->i_list, &inode_in_use); list_add(&inode->i_hash, head); - inode->i_ino = ino; - inode->i_state = I_LOCK; + inode->i_state = I_LOCK|I_NEW; spin_unlock(&inode_lock); - /* reiserfs specific hack right here. We don't - ** want this to last, and are looking for VFS changes - ** that will allow us to get rid of it. - ** -- mason@suse.com - */ - if (sb->s_op->read_inode2) { - sb->s_op->read_inode2(inode, opaque) ; - } else { - sb->s_op->read_inode(inode); - } - - /* - * This is special! We do not need the spinlock - * when clearing I_LOCK, because we're guaranteed - * that nobody else tries to do anything about the - * state of the inode when it is locked, as we - * just created it (so there can be no old holders - * that haven't tested I_LOCK). + /* Return the locked inode with I_NEW set, the + * caller is responsible for filling in the contents */ - inode->i_state &= ~I_LOCK; - wake_up(&inode->i_wait); + return inode; + } + + /* + * Uhhuh, somebody else created the same inode under + * us. Use the old inode instead of the one we just + * allocated. + */ + __iget(old); + spin_unlock(&inode_lock); + destroy_inode(inode); + inode = old; + wait_on_inode(inode); + } + return inode; + +set_failed: + spin_unlock(&inode_lock); + destroy_inode(inode); + return NULL; +} + +/* + * get_new_inode_fast is the fast path version of get_new_inode, see the + * comment at iget_locked for details. + */ +static struct inode * get_new_inode_fast(struct super_block *sb, struct list_head *head, unsigned long ino) +{ + struct inode * inode; + + inode = alloc_inode(sb); + if (inode) { + struct inode * old; + spin_lock(&inode_lock); + /* We released the lock, so.. */ + old = find_inode_fast(sb, head, ino); + if (!old) { + inode->i_ino = ino; + inodes_stat.nr_inodes++; + list_add(&inode->i_list, &inode_in_use); + list_add(&inode->i_hash, head); + inode->i_state = I_LOCK|I_NEW; + spin_unlock(&inode_lock); + + /* Return the locked inode with I_NEW set, the + * caller is responsible for filling in the contents + */ return inode; } @@ -565,9 +627,9 @@ static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, s return inode; } -static inline unsigned long hash(struct super_block *sb, unsigned long i_ino) +static inline unsigned long hash(struct super_block *sb, unsigned long hashval) { - unsigned long tmp = i_ino + ((unsigned long) sb / L1_CACHE_BYTES); + unsigned long tmp = hashval + ((unsigned long) sb / L1_CACHE_BYTES); tmp = tmp + (tmp >> I_HASHBITS); return tmp & I_HASHMASK; } @@ -599,7 +661,8 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved) retry: if (counter > max_reserved) { head = inode_hashtable + hash(sb,counter); - inode = find_inode(sb, res = counter++, head, NULL, NULL); + res = counter++; + inode = find_inode_fast(sb, head, res); if (!inode) { spin_unlock(&inode_lock); return res; @@ -627,14 +690,18 @@ struct inode *igrab(struct inode *inode) return inode; } - -struct inode *iget4(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque) +/* + * This is iget without the read_inode portion of get_new_inode + * the filesystem gets back a new locked and hashed inode and gets + * to fill it in before unlocking it via unlock_new_inode(). + */ +struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data) { - struct list_head * head = inode_hashtable + hash(sb,ino); + struct list_head * head = inode_hashtable + hash(sb, hashval); struct inode * inode; spin_lock(&inode_lock); - inode = find_inode(sb, ino, head, find_actor, opaque); + inode = find_inode(sb, head, test, data); if (inode) { __iget(inode); spin_unlock(&inode_lock); @@ -647,22 +714,57 @@ struct inode *iget4(struct super_block *sb, unsigned long ino, find_inode_t find * get_new_inode() will do the right thing, re-trying the search * in case it had to block at any point. */ - return get_new_inode(sb, ino, head, find_actor, opaque); + return get_new_inode(sb, head, test, set, data); +} + +/* + * Because most filesystems are based on 32-bit unique inode numbers some + * functions are duplicated to keep iget_locked as a fast path. We can avoid + * unnecessary pointer dereferences and function calls for this specific + * case. The duplicated functions (find_inode_fast and get_new_inode_fast) + * have the same pre- and post-conditions as their original counterparts. + */ +struct inode *iget_locked(struct super_block *sb, unsigned long ino) +{ + struct list_head * head = inode_hashtable + hash(sb, ino); + struct inode * inode; + + spin_lock(&inode_lock); + inode = find_inode_fast(sb, head, ino); + if (inode) { + __iget(inode); + spin_unlock(&inode_lock); + wait_on_inode(inode); + return inode; + } + spin_unlock(&inode_lock); + + /* + * get_new_inode_fast() will do the right thing, re-trying the search + * in case it had to block at any point. + */ + return get_new_inode_fast(sb, head, ino); } +EXPORT_SYMBOL(iget5_locked); +EXPORT_SYMBOL(iget_locked); +EXPORT_SYMBOL(unlock_new_inode); + /** - * insert_inode_hash - hash an inode + * __insert_inode_hash - hash an inode * @inode: unhashed inode + * @hashval: unsigned long value used to locate this object in the + * inode_hashtable. * * Add an inode to the inode hash for this superblock. If the inode * has no superblock it is added to a separate anonymous chain. */ -void insert_inode_hash(struct inode *inode) +void __insert_inode_hash(struct inode *inode, unsigned long hashval) { struct list_head *head = &anon_hash_chain; if (inode->i_sb) - head = inode_hashtable + hash(inode->i_sb, inode->i_ino); + head = inode_hashtable + hash(inode->i_sb, hashval); spin_lock(&inode_lock); list_add(&inode->i_hash, head); spin_unlock(&inode_lock); @@ -877,9 +979,9 @@ void update_atime (struct inode *inode) /* Functions back in dquot.c */ void put_dquot_list(struct list_head *); -int remove_inode_dquot_ref(struct inode *, short, struct list_head *); +int remove_inode_dquot_ref(struct inode *, int, struct list_head *); -void remove_dquot_ref(struct super_block *sb, short type) +void remove_dquot_ref(struct super_block *sb, int type) { struct inode *inode; struct list_head *act_head; @@ -906,6 +1008,11 @@ void remove_dquot_ref(struct super_block *sb, short type) if (IS_QUOTAINIT(inode)) remove_inode_dquot_ref(inode, type, &tofree_head); } + list_for_each(act_head, &sb->s_io) { + inode = list_entry(act_head, struct inode, i_list); + if (IS_QUOTAINIT(inode)) + remove_inode_dquot_ref(inode, type, &tofree_head); + } list_for_each(act_head, &sb->s_locked_inodes) { inode = list_entry(act_head, struct inode, i_list); if (IS_QUOTAINIT(inode)) diff --git a/fs/intermezzo/cache.c b/fs/intermezzo/cache.c index 7c3d32d4152f..93a534473ea2 100644 --- a/fs/intermezzo/cache.c +++ b/fs/intermezzo/cache.c @@ -22,7 +22,6 @@ #include <linux/time.h> #include <linux/stat.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/blkdev.h> #include <linux/init.h> diff --git a/fs/intermezzo/dcache.c b/fs/intermezzo/dcache.c index eca114afc4fa..6e7bd681ae47 100644 --- a/fs/intermezzo/dcache.c +++ b/fs/intermezzo/dcache.c @@ -14,7 +14,6 @@ #include <linux/fs.h> #include <linux/stat.h> #include <linux/errno.h> -#include <linux/locks.h> #include <linux/slab.h> #include <asm/uaccess.h> #include <linux/string.h> diff --git a/fs/intermezzo/dir.c b/fs/intermezzo/dir.c index 7617c500c35f..c8a8c1988f16 100644 --- a/fs/intermezzo/dir.c +++ b/fs/intermezzo/dir.c @@ -24,7 +24,6 @@ #include <linux/time.h> #include <linux/stat.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/blkdev.h> #include <linux/init.h> #define __NO_VERSION__ diff --git a/fs/intermezzo/ext_attr.c b/fs/intermezzo/ext_attr.c index 398c6d50554a..3c317baa7911 100644 --- a/fs/intermezzo/ext_attr.c +++ b/fs/intermezzo/ext_attr.c @@ -14,7 +14,6 @@ #include <linux/string.h> #include <linux/stat.h> #include <linux/errno.h> -#include <linux/locks.h> #include <linux/unistd.h> #include <asm/system.h> diff --git a/fs/intermezzo/file.c b/fs/intermezzo/file.c index 68084e55ef73..67c34b7bce4a 100644 --- a/fs/intermezzo/file.c +++ b/fs/intermezzo/file.c @@ -31,7 +31,6 @@ #include <linux/time.h> #include <linux/stat.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/blkdev.h> #include <linux/init.h> #include <linux/smp_lock.h> diff --git a/fs/intermezzo/inode.c b/fs/intermezzo/inode.c index 111721845226..ace8cacad054 100644 --- a/fs/intermezzo/inode.c +++ b/fs/intermezzo/inode.c @@ -15,7 +15,6 @@ #include <linux/string.h> #include <linux/stat.h> #include <linux/errno.h> -#include <linux/locks.h> #include <linux/unistd.h> #include <asm/system.h> @@ -24,7 +23,6 @@ #include <linux/fs.h> #include <linux/stat.h> #include <linux/errno.h> -#include <linux/locks.h> #include <linux/string.h> #include <asm/uaccess.h> #include <linux/slab.h> diff --git a/fs/intermezzo/journal.c b/fs/intermezzo/journal.c index df8f31533479..049760de033c 100644 --- a/fs/intermezzo/journal.c +++ b/fs/intermezzo/journal.c @@ -13,7 +13,6 @@ #include <linux/vmalloc.h> #include <linux/time.h> #include <linux/errno.h> -#include <linux/locks.h> #include <asm/uaccess.h> #include <linux/string.h> #include <linux/smp_lock.h> diff --git a/fs/intermezzo/journal_ext2.c b/fs/intermezzo/journal_ext2.c index 2a4a5d7b40f9..74888cb0c367 100644 --- a/fs/intermezzo/journal_ext2.c +++ b/fs/intermezzo/journal_ext2.c @@ -11,7 +11,6 @@ #include <linux/vmalloc.h> #include <linux/stat.h> #include <linux/errno.h> -#include <linux/locks.h> #include <asm/uaccess.h> #include <linux/string.h> #include <linux/ext2_fs.h> diff --git a/fs/intermezzo/journal_ext3.c b/fs/intermezzo/journal_ext3.c index 58ab5f50dccc..46bebc15d30b 100644 --- a/fs/intermezzo/journal_ext3.c +++ b/fs/intermezzo/journal_ext3.c @@ -16,7 +16,6 @@ #include <linux/vmalloc.h> #include <linux/stat.h> #include <linux/errno.h> -#include <linux/locks.h> #include <asm/uaccess.h> #include <linux/string.h> #include <linux/smp_lock.h> diff --git a/fs/intermezzo/journal_obdfs.c b/fs/intermezzo/journal_obdfs.c index c6d239b2685e..2ce2d08f73ab 100644 --- a/fs/intermezzo/journal_obdfs.c +++ b/fs/intermezzo/journal_obdfs.c @@ -16,7 +16,6 @@ #include <linux/vmalloc.h> #include <linux/stat.h> #include <linux/errno.h> -#include <linux/locks.h> #include <asm/uaccess.h> #include <linux/string.h> #ifdef CONFIG_OBDFS_FS diff --git a/fs/intermezzo/journal_reiserfs.c b/fs/intermezzo/journal_reiserfs.c index 6531887ad3f5..23804ec54fff 100644 --- a/fs/intermezzo/journal_reiserfs.c +++ b/fs/intermezzo/journal_reiserfs.c @@ -16,7 +16,6 @@ #include <linux/stat.h> #include <linux/errno.h> #include <linux/smp_lock.h> -#include <linux/locks.h> #include <asm/uaccess.h> #include <linux/string.h> #if 0 diff --git a/fs/intermezzo/journal_xfs.c b/fs/intermezzo/journal_xfs.c index 0ec4372e918a..70aad72b613e 100644 --- a/fs/intermezzo/journal_xfs.c +++ b/fs/intermezzo/journal_xfs.c @@ -11,7 +11,6 @@ #include <linux/vmalloc.h> #include <linux/stat.h> #include <linux/errno.h> -#include <linux/locks.h> #include <asm/uaccess.h> #include <linux/string.h> #ifdef CONFIG_FS_XFS diff --git a/fs/intermezzo/methods.c b/fs/intermezzo/methods.c index 05af7f28bdf9..276f4be455cd 100644 --- a/fs/intermezzo/methods.c +++ b/fs/intermezzo/methods.c @@ -23,7 +23,6 @@ #include <linux/time.h> #include <linux/stat.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/blkdev.h> #include <linux/init.h> #define __NO_VERSION__ diff --git a/fs/intermezzo/presto.c b/fs/intermezzo/presto.c index 12243f3e199b..5c8514c2c665 100644 --- a/fs/intermezzo/presto.c +++ b/fs/intermezzo/presto.c @@ -16,7 +16,6 @@ #include <linux/errno.h> #include <linux/vmalloc.h> #include <linux/slab.h> -#include <linux/locks.h> #include <asm/uaccess.h> #include <linux/string.h> #include <linux/smp_lock.h> diff --git a/fs/intermezzo/super.c b/fs/intermezzo/super.c index 4a9358f82c07..f1804c2a7860 100644 --- a/fs/intermezzo/super.c +++ b/fs/intermezzo/super.c @@ -23,7 +23,6 @@ #include <linux/time.h> #include <linux/stat.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/blkdev.h> #include <linux/init.h> #define __NO_VERSION__ diff --git a/fs/intermezzo/upcall.c b/fs/intermezzo/upcall.c index 37491a4c3dd0..604705413305 100644 --- a/fs/intermezzo/upcall.c +++ b/fs/intermezzo/upcall.c @@ -32,7 +32,6 @@ #include <linux/fs.h> #include <linux/stat.h> #include <linux/errno.h> -#include <linux/locks.h> #include <linux/string.h> #include <asm/uaccess.h> #include <linux/vmalloc.h> diff --git a/fs/ioctl.c b/fs/ioctl.c index a36c61f18769..8d38a2f2539c 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -102,6 +102,16 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) filp->f_flags &= ~FASYNC; break; + case FIOQSIZE: + if (S_ISDIR(filp->f_dentry->d_inode->i_mode) || + S_ISREG(filp->f_dentry->d_inode->i_mode) || + S_ISLNK(filp->f_dentry->d_inode->i_mode)) { + loff_t res = inode_get_bytes(filp->f_dentry->d_inode); + error = copy_to_user((loff_t *)arg, &res, sizeof(res)) ? -EFAULT : 0; + } + else + error = -ENOTTY; + break; default: error = -ENOTTY; if (S_ISREG(filp->f_dentry->d_inode->i_mode)) diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c index 4ce2fb2a5f96..a5728e0f7d6a 100644 --- a/fs/isofs/compress.c +++ b/fs/isofs/compress.c @@ -26,7 +26,6 @@ #include <linux/major.h> #include <linux/mm.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/slab.h> #include <linux/errno.h> #include <linux/cdrom.h> diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index 19a643b650e6..610462dd9cc9 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c @@ -19,7 +19,6 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/time.h> -#include <linux/locks.h> #include <linux/config.h> #include <linux/smp_lock.h> diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 08d4a148495b..9058e9763e88 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -19,7 +19,6 @@ #include <linux/major.h> #include <linux/mm.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/slab.h> #include <linux/errno.h> #include <linux/cdrom.h> diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index d809febc5abc..17a94591d021 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -22,7 +22,6 @@ #include <linux/jbd.h> #include <linux/errno.h> #include <linux/slab.h> -#include <linux/locks.h> extern spinlock_t journal_datalist_lock; diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 59c3b994d47a..e4ce53b05a55 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -18,7 +18,6 @@ #include <linux/jbd.h> #include <linux/errno.h> #include <linux/slab.h> -#include <linux/locks.h> #include <linux/smp_lock.h> extern spinlock_t journal_datalist_lock; diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index baafedb4afa5..a89f7e547581 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -28,11 +28,11 @@ #include <linux/jbd.h> #include <linux/errno.h> #include <linux/slab.h> -#include <linux/locks.h> #include <linux/smp_lock.h> #include <linux/init.h> #include <linux/mm.h> #include <linux/slab.h> +#include <linux/pagemap.h> #include <asm/uaccess.h> #include <linux/proc_fs.h> diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index d32693a96bea..e6a96d3c30ce 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -21,7 +21,6 @@ #include <linux/jbd.h> #include <linux/errno.h> #include <linux/slab.h> -#include <linux/locks.h> #endif /* @@ -482,9 +481,9 @@ static int do_one_pass(journal_t *journal, } BUFFER_TRACE(nbh, "marking dirty"); + set_buffer_uptodate(nbh); mark_buffer_dirty(nbh); BUFFER_TRACE(nbh, "marking uptodate"); - set_buffer_uptodate(nbh); ++info->nr_replays; /* ll_rw_block(WRITE, 1, &nbh); */ unlock_buffer(nbh); diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index 464e828db9d1..7cecb0237988 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -65,7 +65,6 @@ #include <linux/jbd.h> #include <linux/errno.h> #include <linux/slab.h> -#include <linux/locks.h> #include <linux/list.h> #include <linux/smp_lock.h> #include <linux/init.h> diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 9e868b3c390c..cf10a8ce12e3 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -22,10 +22,10 @@ #include <linux/jbd.h> #include <linux/errno.h> #include <linux/slab.h> -#include <linux/locks.h> #include <linux/timer.h> #include <linux/smp_lock.h> #include <linux/mm.h> +#include <linux/highmem.h> extern spinlock_t journal_datalist_lock; @@ -518,6 +518,38 @@ void journal_unlock_updates (journal_t *journal) } /* + * Report any unexpected dirty buffers which turn up. Normally those + * indicate an error, but they can occur if the user is running (say) + * tune2fs to modify the live filesystem, so we need the option of + * continuing as gracefully as possible. # + * + * The caller should already hold the journal lock and + * journal_datalist_lock spinlock: most callers will need those anyway + * in order to probe the buffer's journaling state safely. + */ +static void jbd_unexpected_dirty_buffer(struct journal_head *jh) +{ + struct buffer_head *bh = jh2bh(jh); + int jlist; + + if (buffer_dirty(bh)) { + /* If this buffer is one which might reasonably be dirty + * --- ie. data, or not part of this journal --- then + * we're OK to leave it alone, but otherwise we need to + * move the dirty bit to the journal's own internal + * JBDDirty bit. */ + jlist = jh->b_jlist; + + if (jlist == BJ_Metadata || jlist == BJ_Reserved || + jlist == BJ_Shadow || jlist == BJ_Forget) { + if (test_clear_buffer_dirty(jh2bh(jh))) { + set_bit(BH_JBDDirty, &jh2bh(jh)->b_state); + } + } + } +} + +/* * journal_get_write_access: notify intent to modify a buffer for metadata * (not data) update. * @@ -538,72 +570,66 @@ void journal_unlock_updates (journal_t *journal) static int do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy) { + struct buffer_head *bh; transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; int error; char *frozen_buffer = NULL; int need_copy = 0; + int locked; jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy); JBUFFER_TRACE(jh, "entry"); repeat: + bh = jh2bh(jh); + /* @@@ Need to check for errors here at some point. */ - /* - * AKPM: neither bdflush nor kupdate run with the BKL. There's - * nothing we can do to prevent them from starting writeout of a - * BUF_DIRTY buffer at any time. And checkpointing buffers are on - * BUF_DIRTY. So. We no longer assert that the buffer is unlocked. - * - * However. It is very wrong for us to allow ext3 to start directly - * altering the ->b_data of buffers which may at that very time be - * undergoing writeout to the client filesystem. This can leave - * the filesystem in an inconsistent, transient state if we crash. - * So what we do is to steal the buffer if it is in checkpoint - * mode and dirty. The journal lock will keep out checkpoint-mode - * state transitions within journal_remove_checkpoint() and the buffer - * is locked to keep bdflush/kupdate/whoever away from it as well. - * - * AKPM: we have replaced all the lock_journal_bh_wait() stuff with a - * simple lock_journal(). This code here will care for locked buffers. - */ - /* - * The buffer_locked() || buffer_dirty() tests here are simply an - * optimisation tweak. If anyone else in the system decides to - * lock this buffer later on, we'll blow up. There doesn't seem - * to be a good reason why they should do this. - */ - if (jh->b_cp_transaction && - (buffer_locked(jh2bh(jh)) || buffer_dirty(jh2bh(jh)))) { + locked = test_set_buffer_locked(bh); + if (locked) { + /* We can't reliably test the buffer state if we found + * it already locked, so just wait for the lock and + * retry. */ unlock_journal(journal); - lock_buffer(jh2bh(jh)); - spin_lock(&journal_datalist_lock); - if (jh->b_cp_transaction && buffer_dirty(jh2bh(jh))) { - /* OK, we need to steal it */ - JBUFFER_TRACE(jh, "stealing from checkpoint mode"); - J_ASSERT_JH(jh, jh->b_next_transaction == NULL); - J_ASSERT_JH(jh, jh->b_frozen_data == NULL); - - J_ASSERT(handle->h_buffer_credits > 0); - handle->h_buffer_credits--; - - /* This will clear BH_Dirty and set BH_JBDDirty. */ - JBUFFER_TRACE(jh, "file as BJ_Reserved"); - __journal_file_buffer(jh, transaction, BJ_Reserved); - - /* - * The buffer is now hidden from bdflush. It is - * metadata against the current transaction. - */ - JBUFFER_TRACE(jh, "steal from cp mode is complete"); - } - spin_unlock(&journal_datalist_lock); - unlock_buffer(jh2bh(jh)); + wait_on_buffer(bh); lock_journal(journal); + goto repeat; } - J_ASSERT_JH(jh, !buffer_locked(jh2bh(jh))); + /* We now hold the buffer lock so it is safe to query the buffer + * state. Is the buffer dirty? + * + * If so, there are two possibilities. The buffer may be + * non-journaled, and undergoing a quite legitimate writeback. + * Otherwise, it is journaled, and we don't expect dirty buffers + * in that state (the buffers should be marked JBD_Dirty + * instead.) So either the IO is being done under our own + * control and this is a bug, or it's a third party IO such as + * dump(8) (which may leave the buffer scheduled for read --- + * ie. locked but not dirty) or tune2fs (which may actually have + * the buffer dirtied, ugh.) */ + + if (buffer_dirty(bh)) { + spin_lock(&journal_datalist_lock); + /* First question: is this buffer already part of the + * current transaction or the existing committing + * transaction? */ + if (jh->b_transaction) { + J_ASSERT_JH(jh, + jh->b_transaction == transaction || + jh->b_transaction == + journal->j_committing_transaction); + if (jh->b_next_transaction) + J_ASSERT_JH(jh, jh->b_next_transaction == + transaction); + JBUFFER_TRACE(jh, "Unexpected dirty buffer"); + jbd_unexpected_dirty_buffer(jh); + } + spin_unlock(&journal_datalist_lock); + } + + unlock_buffer(bh); error = -EROFS; if (is_handle_aborted(handle)) @@ -1915,6 +1941,8 @@ void __journal_file_buffer(struct journal_head *jh, transaction_t *transaction, int jlist) { struct journal_head **list = 0; + int was_dirty = 0; + struct buffer_head *bh = jh2bh(jh); assert_spin_locked(&journal_datalist_lock); @@ -1925,13 +1953,24 @@ void __journal_file_buffer(struct journal_head *jh, J_ASSERT_JH(jh, jh->b_transaction == transaction || jh->b_transaction == 0); - if (jh->b_transaction) { - if (jh->b_jlist == jlist) - return; + if (jh->b_transaction && jh->b_jlist == jlist) + return; + + /* The following list of buffer states needs to be consistent + * with __jbd_unexpected_dirty_buffer()'s handling of dirty + * state. */ + + if (jlist == BJ_Metadata || jlist == BJ_Reserved || + jlist == BJ_Shadow || jlist == BJ_Forget) { + if (test_clear_buffer_dirty(bh) || + test_clear_buffer_jbddirty(bh)) + was_dirty = 1; + } + + if (jh->b_transaction) __journal_unfile_buffer(jh); - } else { + else jh->b_transaction = transaction; - } switch (jlist) { case BJ_None: @@ -1968,12 +2007,8 @@ void __journal_file_buffer(struct journal_head *jh, __blist_add_buffer(list, jh); jh->b_jlist = jlist; - if (jlist == BJ_Metadata || jlist == BJ_Reserved || - jlist == BJ_Shadow || jlist == BJ_Forget) { - if (test_clear_buffer_dirty(jh2bh(jh))) { - set_bit(BH_JBDDirty, &jh2bh(jh)->b_state); - } - } + if (was_dirty) + set_buffer_jbddirty(bh); } void journal_file_buffer(struct journal_head *jh, diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c index 3e11c2dd55b8..fe077a51775e 100644 --- a/fs/jffs/inode-v23.c +++ b/fs/jffs/inode-v23.c @@ -42,7 +42,6 @@ #include <linux/slab.h> #include <linux/jffs.h> #include <linux/fs.h> -#include <linux/locks.h> #include <linux/smp_lock.h> #include <linux/ioctl.h> #include <linux/stat.h> diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c index ec26515ce2b4..cc3f7895b294 100644 --- a/fs/jffs/intrep.c +++ b/fs/jffs/intrep.c @@ -63,7 +63,6 @@ #include <linux/fs.h> #include <linux/stat.h> #include <linux/pagemap.h> -#include <linux/locks.h> #include <asm/semaphore.h> #include <asm/byteorder.h> #include <linux/version.h> diff --git a/fs/jfs/file.c b/fs/jfs/file.c index b926fb7ffacd..ec8ea1484ff5 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -17,7 +17,6 @@ */ #include <linux/fs.h> -#include <linux/locks.h> #include "jfs_incore.h" #include "jfs_txnmgr.h" #include "jfs_debug.h" diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index eb34bd53ff04..032d52434350 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -17,7 +17,6 @@ */ #include <linux/fs.h> -#include <linux/locks.h> #include "jfs_incore.h" #include "jfs_filsys.h" #include "jfs_imap.h" diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 9742962de44f..1b223e2275ad 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -101,7 +101,6 @@ */ #include <linux/fs.h> -#include <linux/locks.h> #include <linux/smp_lock.h> #include "jfs_incore.h" #include "jfs_superblock.h" diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 9360c94d857f..becd4caa108a 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -42,7 +42,6 @@ */ #include <linux/fs.h> -#include <linux/locks.h> #include "jfs_incore.h" #include "jfs_filsys.h" #include "jfs_dinode.h" diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index a2e91a853516..219d5dbb2d18 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -59,7 +59,6 @@ */ #include <linux/fs.h> -#include <linux/locks.h> #include <linux/blkdev.h> #include <linux/interrupt.h> #include <linux/smp_lock.h> diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 47b4b1f763f1..d7c441edbffa 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -43,7 +43,6 @@ #include <linux/fs.h> -#include <linux/locks.h> #include <linux/vmalloc.h> #include <linux/smp_lock.h> #include <linux/completion.h> diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index ffdb26471641..c09af0a01b3b 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c @@ -20,7 +20,6 @@ */ #include <linux/fs.h> -#include <linux/locks.h> #include "jfs_incore.h" #include "jfs_filsys.h" #include "jfs_metapage.h" diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 4653128b97ef..69beb37da170 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -17,7 +17,6 @@ */ #include <linux/fs.h> -#include <linux/locks.h> #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_dinode.h" diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 46f0cceb3cbe..248ab7a6be33 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -17,7 +17,6 @@ */ #include <linux/fs.h> -#include <linux/locks.h> #include <linux/config.h> #include <linux/module.h> #include <linux/completion.h> diff --git a/fs/minix/file.c b/fs/minix/file.c index 870b602d0c39..c9ac58fe9872 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -31,7 +31,7 @@ int minix_sync_file(struct file * file, struct dentry *dentry, int datasync) struct inode *inode = dentry->d_inode; int err; - err = fsync_inode_buffers(inode); + err = sync_mapping_buffers(inode->i_mapping); if (!(inode->i_state & I_DIRTY)) return err; if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 10fdf3f6973e..c8d490c5d24f 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -12,7 +12,6 @@ #include <linux/module.h> #include "minix.h" #include <linux/slab.h> -#include <linux/locks.h> #include <linux/init.h> #include <linux/highuid.h> diff --git a/fs/minix/itree_v1.c b/fs/minix/itree_v1.c index 83064f69675f..26216b80ab5a 100644 --- a/fs/minix/itree_v1.c +++ b/fs/minix/itree_v1.c @@ -1,5 +1,4 @@ #include "minix.h" -#include <linux/locks.h> enum {DEPTH = 3, DIRECT = 7}; /* Only double indirect */ diff --git a/fs/minix/itree_v2.c b/fs/minix/itree_v2.c index 80885cc9b437..732a878b1e4c 100644 --- a/fs/minix/itree_v2.c +++ b/fs/minix/itree_v2.c @@ -1,5 +1,4 @@ #include "minix.h" -#include <linux/locks.h> enum {DIRECT = 7, DEPTH = 4}; /* Have triple indirect */ diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 32ec4f105c24..fd44657ee616 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -20,7 +20,6 @@ #include <linux/mm.h> #include <asm/uaccess.h> #include <asm/byteorder.h> -#include <linux/locks.h> #include <linux/smp_lock.h> #include <linux/ncp_fs.h> diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 0b5f437715e2..81c224f1558f 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -15,7 +15,6 @@ #include <linux/fcntl.h> #include <linux/stat.h> #include <linux/mm.h> -#include <linux/locks.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/smp_lock.h> diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 854b599fde90..1c19c0269f9e 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -21,7 +21,6 @@ #include <linux/string.h> #include <linux/stat.h> #include <linux/errno.h> -#include <linux/locks.h> #include <linux/file.h> #include <linux/fcntl.h> #include <linux/slab.h> diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5a105fc344eb..d5bced0cdbde 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -23,7 +23,6 @@ #include <linux/string.h> #include <linux/stat.h> #include <linux/errno.h> -#include <linux/locks.h> #include <linux/unistd.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/stats.h> @@ -47,7 +46,6 @@ static void nfs_invalidate_inode(struct inode *); static struct inode *nfs_alloc_inode(struct super_block *sb); static void nfs_destroy_inode(struct inode *); -static void nfs_read_inode(struct inode *); static void nfs_write_inode(struct inode *,int); static void nfs_delete_inode(struct inode *); static void nfs_put_super(struct super_block *); @@ -59,7 +57,6 @@ static int nfs_show_options(struct seq_file *, struct vfsmount *); static struct super_operations nfs_sops = { alloc_inode: nfs_alloc_inode, destroy_inode: nfs_destroy_inode, - read_inode: nfs_read_inode, write_inode: nfs_write_inode, delete_inode: nfs_delete_inode, put_super: nfs_put_super, @@ -98,15 +95,6 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) return nfs_fileid_to_ino_t(fattr->fileid); } -/* - * The "read_inode" function doesn't actually do anything: - * the real data is filled in later in nfs_fhget. - */ -static void -nfs_read_inode(struct inode * inode) -{ -} - static void nfs_write_inode(struct inode *inode, int sync) { @@ -592,7 +580,7 @@ struct nfs_find_desc { * i_ino. */ static int -nfs_find_actor(struct inode *inode, unsigned long ino, void *opaque) +nfs_find_actor(struct inode *inode, void *opaque) { struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; struct nfs_fh *fh = desc->fh; @@ -610,6 +598,18 @@ nfs_find_actor(struct inode *inode, unsigned long ino, void *opaque) return 1; } +static int +nfs_init_locked(struct inode *inode, void *opaque) +{ + struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; + struct nfs_fh *fh = desc->fh; + struct nfs_fattr *fattr = desc->fattr; + + NFS_FILEID(inode) = fattr->fileid; + memcpy(NFS_FH(inode), fh, sizeof(struct nfs_fh)); + return 0; +} + /* * This is our own version of iget that looks up inodes by file handle * instead of inode number. We use this technique instead of using @@ -640,7 +640,7 @@ __nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) fattr: fattr }; struct inode *inode = NULL; - unsigned long ino; + unsigned long hash; if ((fattr->valid & NFS_ATTR_FATTR) == 0) goto out_no_inode; @@ -650,20 +650,21 @@ __nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) goto out_no_inode; } - ino = nfs_fattr_to_ino_t(fattr); + hash = nfs_fattr_to_ino_t(fattr); - if (!(inode = iget4(sb, ino, nfs_find_actor, &desc))) + if (!(inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc))) goto out_no_inode; - if (NFS_NEW(inode)) { + if (inode->i_state & I_NEW) { __u64 new_size, new_mtime; loff_t new_isize; time_t new_atime; + /* We set i_ino for the few things that still rely on it, + * such as stat(2) */ + inode->i_ino = hash; + /* We can't support UPDATE_ATIME(), since the server will reset it */ - NFS_FLAGS(inode) &= ~NFS_INO_NEW; - NFS_FILEID(inode) = fattr->fileid; - memcpy(NFS_FH(inode), fh, sizeof(struct nfs_fh)); inode->i_flags |= S_NOATIME; inode->i_mode = fattr->mode; /* Why so? Because we want revalidate for devices/FIFOs, and @@ -711,6 +712,8 @@ __nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); NFS_ATTRTIMEO_UPDATE(inode) = jiffies; memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); + + unlock_new_inode(inode); } else nfs_refresh_inode(inode, fattr); dprintk("NFS: __nfs_fhget(%s/%Ld ct=%d)\n", @@ -1231,7 +1234,7 @@ static struct inode *nfs_alloc_inode(struct super_block *sb) nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL); if (!nfsi) return NULL; - nfsi->flags = NFS_INO_NEW; + nfsi->flags = 0; nfsi->mm_cred = NULL; return &nfsi->vfs_inode; } diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index d0e7f24fe3fd..0e3de324ec19 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -9,7 +9,6 @@ #include <linux/linkage.h> #include <linux/time.h> #include <linux/errno.h> -#include <linux/locks.h> #include <linux/fs.h> #include <linux/ext2_fs.h> #include <linux/stat.h> diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index f81849743d7b..5527f0d6b5d4 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -10,7 +10,6 @@ #include <linux/linkage.h> #include <linux/time.h> #include <linux/errno.h> -#include <linux/locks.h> #include <linux/fs.h> #include <linux/stat.h> #include <linux/fcntl.h> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 7384bc06463e..b9d0bbfb333a 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -20,7 +20,6 @@ #include <linux/string.h> #include <linux/time.h> #include <linux/errno.h> -#include <linux/locks.h> #include <linux/fs.h> #include <linux/major.h> #include <linux/ext2_fs.h> diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 21e2c0095bd2..5393901948bf 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -25,7 +25,6 @@ #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/swap.h> -#include <linux/locks.h> #include "ntfs.h" diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index b4896ab4008b..395cf7fec9d2 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -21,7 +21,6 @@ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include <linux/locks.h> #include <linux/fs.h> #include "ntfs.h" diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index f5ff98b3c1ad..9db0fb99f901 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -20,7 +20,6 @@ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include <linux/locks.h> #include <linux/swap.h> #include "ntfs.h" diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 049babc922de..1ee4c7b74aa9 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -23,9 +23,9 @@ #include <linux/stddef.h> #include <linux/init.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/spinlock.h> #include <linux/blkdev.h> /* For bdev_hardsect_size(). */ +#include <linux/backing-dev.h> #include "ntfs.h" #include "sysctl.h" @@ -1510,10 +1510,17 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) INIT_LIST_HEAD(&vol->mftbmp_mapping.i_mmap); INIT_LIST_HEAD(&vol->mftbmp_mapping.i_mmap_shared); spin_lock_init(&vol->mftbmp_mapping.i_shared_lock); + /* + * private_lock and private_list are unused by ntfs. But they + * are available. + */ + spin_lock_init(&vol->mftbmp_mapping.private_lock); + INIT_LIST_HEAD(&vol->mftbmp_mapping.private_list); + vol->mftbmp_mapping.assoc_mapping = NULL; vol->mftbmp_mapping.dirtied_when = 0; vol->mftbmp_mapping.gfp_mask = GFP_HIGHUSER; - vol->mftbmp_mapping.ra_pages = - sb->s_bdev->bd_inode->i_mapping->ra_pages; + vol->mftbmp_mapping.backing_dev_info = + sb->s_bdev->bd_inode->i_mapping->backing_dev_info; /* * Default is group and other don't have any access to files or diff --git a/fs/open.c b/fs/open.c index e0231b191336..2ef917feadb7 100644 --- a/fs/open.c +++ b/fs/open.c @@ -15,6 +15,7 @@ #include <linux/slab.h> #include <linux/tty.h> #include <linux/iobuf.h> +#include <linux/backing-dev.h> #include <asm/uaccess.h> @@ -632,7 +633,7 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) goto cleanup_file; } - f->f_ra.ra_pages = *inode->i_mapping->ra_pages; + f->f_ra.ra_pages = inode->i_mapping->backing_dev_info->ra_pages; f->f_dentry = dentry; f->f_vfsmnt = mnt; f->f_pos = 0; diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index c9ee76ba5d06..2c4cf1dcc5f5 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -10,7 +10,6 @@ #include <linux/string.h> #include <linux/fs.h> #include <linux/openprom_fs.h> -#include <linux/locks.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/smp_lock.h> diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 638d218a7f15..8495f3e1fe41 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -11,7 +11,6 @@ #include <linux/string.h> #include <linux/stat.h> #include <linux/file.h> -#include <linux/locks.h> #include <linux/limits.h> #include <linux/init.h> #define __NO_VERSION__ diff --git a/fs/qnx4/fsync.c b/fs/qnx4/fsync.c index 56136f136665..76c4cb0d5f0f 100644 --- a/fs/qnx4/fsync.c +++ b/fs/qnx4/fsync.c @@ -15,7 +15,6 @@ #include <linux/time.h> #include <linux/stat.h> #include <linux/fcntl.h> -#include <linux/locks.h> #include <linux/smp_lock.h> #include <linux/fs.h> diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 82b411437b4f..a839e24bfa9b 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -20,10 +20,10 @@ #include <linux/slab.h> #include <linux/fs.h> #include <linux/qnx4_fs.h> -#include <linux/locks.h> #include <linux/init.h> #include <linux/highuid.h> #include <linux/smp_lock.h> +#include <linux/pagemap.h> #include <asm/uaccess.h> diff --git a/fs/qnx4/truncate.c b/fs/qnx4/truncate.c index 88d9e181565a..681bdeb50a7c 100644 --- a/fs/qnx4/truncate.c +++ b/fs/qnx4/truncate.c @@ -15,7 +15,6 @@ #include <linux/errno.h> #include <linux/fs.h> #include <linux/qnx4_fs.h> -#include <linux/locks.h> #include <linux/smp_lock.h> #include <asm/uaccess.h> diff --git a/fs/quota.c b/fs/quota.c new file mode 100644 index 000000000000..45ef892edc5c --- /dev/null +++ b/fs/quota.c @@ -0,0 +1,656 @@ +/* + * Quota code necessary even when VFS quota support is not compiled + * into the kernel. The interesting stuff is over in dquot.c, here + * we have symbols for initial quotactl(2) handling, the sysctl(2) + * variables, etc - things needed even when quota support disabled. + */ + +#include <linux/fs.h> +#include <linux/slab.h> +#include <asm/current.h> +#include <asm/uaccess.h> +#include <linux/kernel.h> +#include <linux/smp_lock.h> +#ifdef CONFIG_QIFACE_COMPAT +#include <linux/quotacompat.h> +#endif + + +int nr_dquots, nr_free_dquots; + +/* Check validity of quotactl */ +static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id) +{ + if (type >= MAXQUOTAS) + return -EINVAL; + /* Is operation supported? */ + if (!sb->s_qcop) + return -ENOSYS; + + switch (cmd) { + case Q_GETFMT: + break; + case Q_QUOTAON: + if (!sb->s_qcop->quota_on) + return -ENOSYS; + break; + case Q_QUOTAOFF: + if (!sb->s_qcop->quota_off) + return -ENOSYS; + break; + case Q_SETINFO: + if (!sb->s_qcop->set_info) + return -ENOSYS; + break; + case Q_GETINFO: + if (!sb->s_qcop->get_info) + return -ENOSYS; + break; + case Q_SETQUOTA: + if (!sb->s_qcop->set_dqblk) + return -ENOSYS; + break; + case Q_GETQUOTA: + if (!sb->s_qcop->get_dqblk) + return -ENOSYS; + break; + case Q_SYNC: + if (!sb->s_qcop->quota_sync) + return -ENOSYS; + break; + case Q_XQUOTAON: + case Q_XQUOTAOFF: + case Q_XQUOTARM: + if (!sb->s_qcop->set_xstate) + return -ENOSYS; + break; + case Q_XGETQSTAT: + if (!sb->s_qcop->get_xstate) + return -ENOSYS; + break; + case Q_XSETQLIM: + if (!sb->s_qcop->set_xquota) + return -ENOSYS; + break; + case Q_XGETQUOTA: + if (!sb->s_qcop->get_xquota) + return -ENOSYS; + break; + default: + return -EINVAL; + } + + /* Is quota turned on for commands which need it? */ + switch (cmd) { + case Q_GETFMT: + case Q_GETINFO: + case Q_QUOTAOFF: + case Q_SETINFO: + case Q_SETQUOTA: + case Q_GETQUOTA: + if (!sb_has_quota_enabled(sb, type)) + return -ESRCH; + } + /* Check privileges */ + if (cmd == Q_GETQUOTA || cmd == Q_XGETQUOTA) { + if (((type == USRQUOTA && current->euid != id) || + (type == GRPQUOTA && !in_egroup_p(id))) && + !capable(CAP_SYS_ADMIN)) + return -EPERM; + } + else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO && cmd != Q_XGETQSTAT) + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + return 0; +} + +/* Resolve device pathname to superblock */ +static struct super_block *resolve_dev(const char *path) +{ + int ret; + mode_t mode; + struct nameidata nd; + kdev_t dev; + struct super_block *sb; + + ret = user_path_walk(path, &nd); + if (ret) + goto out; + + dev = nd.dentry->d_inode->i_rdev; + mode = nd.dentry->d_inode->i_mode; + path_release(&nd); + + ret = -ENOTBLK; + if (!S_ISBLK(mode)) + goto out; + ret = -ENODEV; + sb = get_super(dev); + if (!sb) + goto out; + return sb; +out: + return ERR_PTR(ret); +} + +/* Copy parameters and call proper function */ +static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, caddr_t addr) +{ + int ret; + + switch (cmd) { + case Q_QUOTAON: { + char *pathname; + + if (IS_ERR(pathname = getname(addr))) + return PTR_ERR(pathname); + ret = sb->s_qcop->quota_on(sb, type, id, pathname); + putname(pathname); + return ret; + } + case Q_QUOTAOFF: + return sb->s_qcop->quota_off(sb, type); + + case Q_GETFMT: { + __u32 fmt; + + fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id; + if (copy_to_user(addr, &fmt, sizeof(fmt))) + return -EFAULT; + return 0; + } + case Q_GETINFO: { + struct if_dqinfo info; + + if ((ret = sb->s_qcop->get_info(sb, type, &info))) + return ret; + if (copy_to_user(addr, &info, sizeof(info))) + return -EFAULT; + return 0; + } + case Q_SETINFO: { + struct if_dqinfo info; + + if (copy_from_user(&info, addr, sizeof(info))) + return -EFAULT; + return sb->s_qcop->set_info(sb, type, &info); + } + case Q_GETQUOTA: { + struct if_dqblk idq; + + if ((ret = sb->s_qcop->get_dqblk(sb, type, id, &idq))) + return ret; + if (copy_to_user(addr, &idq, sizeof(idq))) + return -EFAULT; + return 0; + } + case Q_SETQUOTA: { + struct if_dqblk idq; + + if (copy_from_user(&idq, addr, sizeof(idq))) + return -EFAULT; + return sb->s_qcop->set_dqblk(sb, type, id, &idq); + } + case Q_SYNC: + return sb->s_qcop->quota_sync(sb, type); + + case Q_XQUOTAON: + case Q_XQUOTAOFF: + case Q_XQUOTARM: { + __u32 flags; + + if (copy_from_user(&flags, addr, sizeof(flags))) + return -EFAULT; + return sb->s_qcop->set_xstate(sb, flags, cmd); + } + case Q_XGETQSTAT: { + struct fs_quota_stat fqs; + + if ((ret = sb->s_qcop->get_xstate(sb, &fqs))) + return ret; + if (copy_to_user(addr, &fqs, sizeof(fqs))) + return -EFAULT; + return 0; + } + case Q_XSETQLIM: { + struct fs_disk_quota fdq; + + if (copy_from_user(&fdq, addr, sizeof(fdq))) + return -EFAULT; + return sb->s_qcop->set_xquota(sb, type, id, &fdq); + } + case Q_XGETQUOTA: { + struct fs_disk_quota fdq; + + if ((ret = sb->s_qcop->get_xquota(sb, type, id, &fdq))) + return ret; + if (copy_to_user(addr, &fdq, sizeof(fdq))) + return -EFAULT; + return 0; + } + /* We never reach here unless validity check is broken */ + default: + BUG(); + } + return 0; +} + +#ifdef CONFIG_QIFACE_COMPAT +static int check_compat_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id) +{ + if (type >= MAXQUOTAS) + return -EINVAL; + /* Is operation supported? */ + /* sb==NULL for GETSTATS calls */ + if (sb && !sb->s_qcop) + return -ENOSYS; + + switch (cmd) { + case Q_COMP_QUOTAON: + if (!sb->s_qcop->quota_on) + return -ENOSYS; + break; + case Q_COMP_QUOTAOFF: + if (!sb->s_qcop->quota_off) + return -ENOSYS; + break; + case Q_COMP_SYNC: + if (!sb->s_qcop->quota_sync) + return -ENOSYS; + break; +#ifdef CONFIG_QIFACE_V2 + case Q_V2_SETFLAGS: + case Q_V2_SETGRACE: + case Q_V2_SETINFO: + if (!sb->s_qcop->set_info) + return -ENOSYS; + break; + case Q_V2_GETINFO: + if (!sb->s_qcop->get_info) + return -ENOSYS; + break; + case Q_V2_SETQLIM: + case Q_V2_SETUSE: + case Q_V2_SETQUOTA: + if (!sb->s_qcop->set_dqblk) + return -ENOSYS; + break; + case Q_V2_GETQUOTA: + if (!sb->s_qcop->get_dqblk) + return -ENOSYS; + break; + case Q_V2_GETSTATS: + return 0; /* GETSTATS need no other checks */ +#endif +#ifdef CONFIG_QIFACE_V1 + case Q_V1_SETQLIM: + case Q_V1_SETUSE: + case Q_V1_SETQUOTA: + if (!sb->s_qcop->set_dqblk) + return -ENOSYS; + break; + case Q_V1_GETQUOTA: + if (!sb->s_qcop->get_dqblk) + return -ENOSYS; + break; + case Q_V1_RSQUASH: + if (!sb->s_qcop->set_info) + return -ENOSYS; + break; + case Q_V1_GETSTATS: + return 0; /* GETSTATS need no other checks */ +#endif + default: + return -EINVAL; + } + + /* Is quota turned on for commands which need it? */ + switch (cmd) { + case Q_V2_SETFLAGS: + case Q_V2_SETGRACE: + case Q_V2_SETINFO: + case Q_V2_GETINFO: + case Q_COMP_QUOTAOFF: + case Q_V1_RSQUASH: + case Q_V1_SETQUOTA: + case Q_V1_SETQLIM: + case Q_V1_SETUSE: + case Q_V2_SETQUOTA: + /* Q_V2_SETQLIM: collision with Q_V1_SETQLIM */ + case Q_V2_SETUSE: + case Q_V1_GETQUOTA: + case Q_V2_GETQUOTA: + if (!sb_has_quota_enabled(sb, type)) + return -ESRCH; + } +#ifdef CONFIG_QIFACE_V1 + if (cmd != Q_COMP_QUOTAON && cmd != Q_COMP_QUOTAOFF && cmd != Q_COMP_SYNC && sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id != QFMT_VFS_OLD) +#else + if (cmd != Q_COMP_QUOTAON && cmd != Q_COMP_QUOTAOFF && cmd != Q_COMP_SYNC && sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id != QFMT_VFS_V0) +#endif + return -ESRCH; + + /* Check privileges */ + if (cmd == Q_V1_GETQUOTA || cmd == Q_V2_GETQUOTA) { + if (((type == USRQUOTA && current->euid != id) || + (type == GRPQUOTA && !in_egroup_p(id))) && + !capable(CAP_SYS_ADMIN)) + return -EPERM; + } + else if (cmd != Q_V1_GETSTATS && cmd != Q_V2_GETSTATS && cmd != Q_V2_GETINFO && cmd != Q_COMP_SYNC) + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + return 0; +} + +#ifdef CONFIG_QIFACE_V1 +static int v1_set_rsquash(struct super_block *sb, int type, int flag) +{ + struct if_dqinfo info; + + info.dqi_valid = IIF_FLAGS; + info.dqi_flags = flag ? V1_DQF_RSQUASH : 0; + return sb->s_qcop->set_info(sb, type, &info); +} + +static int v1_get_dqblk(struct super_block *sb, int type, qid_t id, struct v1c_mem_dqblk *mdq) +{ + struct if_dqblk idq; + int ret; + + if ((ret = sb->s_qcop->get_dqblk(sb, type, id, &idq)) < 0) + return ret; + mdq->dqb_ihardlimit = idq.dqb_ihardlimit; + mdq->dqb_isoftlimit = idq.dqb_isoftlimit; + mdq->dqb_curinodes = idq.dqb_curinodes; + mdq->dqb_bhardlimit = idq.dqb_bhardlimit; + mdq->dqb_bsoftlimit = idq.dqb_bsoftlimit; + mdq->dqb_curblocks = toqb(idq.dqb_curspace); + mdq->dqb_itime = idq.dqb_itime; + mdq->dqb_btime = idq.dqb_btime; + if (id == 0) { /* Times for id 0 are in fact grace times */ + struct if_dqinfo info; + + if ((ret = sb->s_qcop->get_info(sb, type, &info)) < 0) + return ret; + mdq->dqb_btime = info.dqi_bgrace; + mdq->dqb_itime = info.dqi_igrace; + } + return 0; +} + +static int v1_set_dqblk(struct super_block *sb, int type, int cmd, qid_t id, struct v1c_mem_dqblk *mdq) +{ + struct if_dqblk idq; + int ret; + + idq.dqb_valid = 0; + if (cmd == Q_V1_SETQUOTA || cmd == Q_V1_SETQLIM) { + idq.dqb_ihardlimit = mdq->dqb_ihardlimit; + idq.dqb_isoftlimit = mdq->dqb_isoftlimit; + idq.dqb_bhardlimit = mdq->dqb_bhardlimit; + idq.dqb_bsoftlimit = mdq->dqb_bsoftlimit; + idq.dqb_valid |= QIF_LIMITS; + } + if (cmd == Q_V1_SETQUOTA || cmd == Q_V1_SETUSE) { + idq.dqb_curinodes = mdq->dqb_curinodes; + idq.dqb_curspace = ((qsize_t)mdq->dqb_curblocks) << QUOTABLOCK_BITS; + idq.dqb_valid |= QIF_USAGE; + } + ret = sb->s_qcop->set_dqblk(sb, type, id, &idq); + if (!ret && id == 0 && cmd == Q_V1_SETQUOTA) { /* Times for id 0 are in fact grace times */ + struct if_dqinfo info; + + info.dqi_bgrace = mdq->dqb_btime; + info.dqi_igrace = mdq->dqb_itime; + info.dqi_valid = IIF_BGRACE | IIF_IGRACE; + ret = sb->s_qcop->set_info(sb, type, &info); + } + return ret; +} + +static void v1_get_stats(struct v1c_dqstats *dst) +{ + memcpy(dst, &dqstats, sizeof(dqstats)); +} +#endif + +#ifdef CONFIG_QIFACE_V2 +static int v2_get_info(struct super_block *sb, int type, struct v2c_mem_dqinfo *oinfo) +{ + struct if_dqinfo info; + int ret; + + if ((ret = sb->s_qcop->get_info(sb, type, &info)) < 0) + return ret; + oinfo->dqi_bgrace = info.dqi_bgrace; + oinfo->dqi_igrace = info.dqi_igrace; + oinfo->dqi_flags = info.dqi_flags; + oinfo->dqi_blocks = sb_dqopt(sb)->info[type].u.v2_i.dqi_blocks; + oinfo->dqi_free_blk = sb_dqopt(sb)->info[type].u.v2_i.dqi_free_blk; + oinfo->dqi_free_entry = sb_dqopt(sb)->info[type].u.v2_i.dqi_free_entry; + return 0; +} + +static int v2_set_info(struct super_block *sb, int type, int cmd, struct v2c_mem_dqinfo *oinfo) +{ + struct if_dqinfo info; + + info.dqi_valid = 0; + if (cmd == Q_V2_SETGRACE || cmd == Q_V2_SETINFO) { + info.dqi_bgrace = oinfo->dqi_bgrace; + info.dqi_igrace = oinfo->dqi_igrace; + info.dqi_valid |= IIF_BGRACE | IIF_IGRACE; + } + if (cmd == Q_V2_SETFLAGS || cmd == Q_V2_SETINFO) { + info.dqi_flags = oinfo->dqi_flags; + info.dqi_valid |= IIF_FLAGS; + } + /* We don't simulate deadly effects of setting other parameters ;-) */ + return sb->s_qcop->set_info(sb, type, &info); +} + +static int v2_get_dqblk(struct super_block *sb, int type, qid_t id, struct v2c_mem_dqblk *mdq) +{ + struct if_dqblk idq; + int ret; + + if ((ret = sb->s_qcop->get_dqblk(sb, type, id, &idq)) < 0) + return ret; + mdq->dqb_ihardlimit = idq.dqb_ihardlimit; + mdq->dqb_isoftlimit = idq.dqb_isoftlimit; + mdq->dqb_curinodes = idq.dqb_curinodes; + mdq->dqb_bhardlimit = idq.dqb_bhardlimit; + mdq->dqb_bsoftlimit = idq.dqb_bsoftlimit; + mdq->dqb_curspace = idq.dqb_curspace; + mdq->dqb_itime = idq.dqb_itime; + mdq->dqb_btime = idq.dqb_btime; + return 0; +} + +static int v2_set_dqblk(struct super_block *sb, int type, int cmd, qid_t id, struct v2c_mem_dqblk *mdq) +{ + struct if_dqblk idq; + + idq.dqb_valid = 0; + if (cmd == Q_V2_SETQUOTA || cmd == Q_V2_SETQLIM) { + idq.dqb_ihardlimit = mdq->dqb_ihardlimit; + idq.dqb_isoftlimit = mdq->dqb_isoftlimit; + idq.dqb_bhardlimit = mdq->dqb_bhardlimit; + idq.dqb_bsoftlimit = mdq->dqb_bsoftlimit; + idq.dqb_valid |= QIF_LIMITS; + } + if (cmd == Q_V2_SETQUOTA || cmd == Q_V2_SETUSE) { + idq.dqb_curinodes = mdq->dqb_curinodes; + idq.dqb_curspace = mdq->dqb_curspace; + idq.dqb_valid |= QIF_USAGE; + } + return sb->s_qcop->set_dqblk(sb, type, id, &idq); +} + +static void v2_get_stats(struct v2c_dqstats *dst) +{ + memcpy(dst, &dqstats, sizeof(dqstats)); + dst->version = __DQUOT_NUM_VERSION__; +} +#endif + +/* Handle requests to old interface */ +static int do_compat_quotactl(struct super_block *sb, int type, int cmd, qid_t id, caddr_t addr) +{ + int ret; + + switch (cmd) { + case Q_COMP_QUOTAON: { + char *pathname; + + if (IS_ERR(pathname = getname(addr))) + return PTR_ERR(pathname); +#ifdef CONFIG_QIFACE_V1 + ret = sb->s_qcop->quota_on(sb, type, QFMT_VFS_OLD, pathname); +#else + ret = sb->s_qcop->quota_on(sb, type, QFMT_VFS_V0, pathname); +#endif + putname(pathname); + return ret; + } + case Q_COMP_QUOTAOFF: + return sb->s_qcop->quota_off(sb, type); + case Q_COMP_SYNC: + return sb->s_qcop->quota_sync(sb, type); +#ifdef CONFIG_QIFACE_V1 + case Q_V1_RSQUASH: { + int flag; + + if (copy_from_user(&flag, addr, sizeof(flag))) + return -EFAULT; + return v1_set_rsquash(sb, type, flag); + } + case Q_V1_GETQUOTA: { + struct v1c_mem_dqblk mdq; + + if ((ret = v1_get_dqblk(sb, type, id, &mdq))) + return ret; + if (copy_to_user(addr, &mdq, sizeof(mdq))) + return -EFAULT; + return 0; + } + case Q_V1_SETQLIM: + case Q_V1_SETUSE: + case Q_V1_SETQUOTA: { + struct v1c_mem_dqblk mdq; + + if (copy_from_user(&mdq, addr, sizeof(mdq))) + return -EFAULT; + return v1_set_dqblk(sb, type, cmd, id, &mdq); + } + case Q_V1_GETSTATS: { + struct v1c_dqstats dst; + + v1_get_stats(&dst); + if (copy_to_user(addr, &dst, sizeof(dst))) + return -EFAULT; + return 0; + } +#endif +#ifdef CONFIG_QIFACE_V2 + case Q_V2_GETINFO: { + struct v2c_mem_dqinfo info; + + if ((ret = v2_get_info(sb, type, &info))) + return ret; + if (copy_to_user(addr, &info, sizeof(info))) + return -EFAULT; + return 0; + } + case Q_V2_SETFLAGS: + case Q_V2_SETGRACE: + case Q_V2_SETINFO: { + struct v2c_mem_dqinfo info; + + if (copy_from_user(&info, addr, sizeof(info))) + return -EFAULT; + + return v2_set_info(sb, type, cmd, &info); + } + case Q_V2_GETQUOTA: { + struct v2c_mem_dqblk mdq; + + if ((ret = v2_get_dqblk(sb, type, id, &mdq))) + return ret; + if (copy_to_user(addr, &mdq, sizeof(mdq))) + return -EFAULT; + return 0; + } + case Q_V2_SETUSE: + case Q_V2_SETQLIM: + case Q_V2_SETQUOTA: { + struct v2c_mem_dqblk mdq; + + if (copy_from_user(&mdq, addr, sizeof(mdq))) + return -EFAULT; + return v2_set_dqblk(sb, type, cmd, id, &mdq); + } + case Q_V2_GETSTATS: { + struct v2c_dqstats dst; + + v2_get_stats(&dst); + if (copy_to_user(addr, &dst, sizeof(dst))) + return -EFAULT; + return 0; + } +#endif + } + BUG(); + return 0; +} +#endif + +/* Macros for short-circuiting the compatibility tests */ +#define NEW_COMMAND(c) ((c) & (0x80 << 16)) +#define XQM_COMMAND(c) (((c) & ('X' << 8)) == ('X' << 8)) + +/* + * This is the system call interface. This communicates with + * the user-level programs. Currently this only supports diskquota + * calls. Maybe we need to add the process quotas etc. in the future, + * but we probably should use rlimits for that. + */ +asmlinkage long sys_quotactl(unsigned int cmd, const char *special, qid_t id, caddr_t addr) +{ + uint cmds, type; + struct super_block *sb = NULL; + int ret = -EINVAL; + + lock_kernel(); + cmds = cmd >> SUBCMDSHIFT; + type = cmd & SUBCMDMASK; + +#ifdef CONFIG_QIFACE_COMPAT + if (cmds != Q_V1_GETSTATS && cmds != Q_V2_GETSTATS && IS_ERR(sb = resolve_dev(special))) { + ret = PTR_ERR(sb); + sb = NULL; + goto out; + } + if (!NEW_COMMAND(cmds) && !XQM_COMMAND(cmds)) { + if ((ret = check_compat_quotactl_valid(sb, type, cmds, id)) < 0) + goto out; + ret = do_compat_quotactl(sb, type, cmds, id, addr); + goto out; + } +#else + if (IS_ERR(sb = resolve_dev(special))) { + ret = PTR_ERR(sb); + sb = NULL; + goto out; + } +#endif + if ((ret = check_quotactl_valid(sb, type, cmds, id)) < 0) + goto out; + ret = do_quotactl(sb, type, cmds, id, addr); +out: + if (sb) + drop_super(sb); + unlock_kernel(); + return ret; +} diff --git a/fs/quota_v1.c b/fs/quota_v1.c new file mode 100644 index 000000000000..aa3b7842399d --- /dev/null +++ b/fs/quota_v1.c @@ -0,0 +1,239 @@ +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/quota.h> +#include <linux/dqblk_v1.h> +#include <linux/quotaio_v1.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> + +#include <asm/uaccess.h> +#include <asm/byteorder.h> + +static void v1_disk2mem_dqblk(struct mem_dqblk *m, struct v1_disk_dqblk *d) +{ + m->dqb_ihardlimit = d->dqb_ihardlimit; + m->dqb_isoftlimit = d->dqb_isoftlimit; + m->dqb_curinodes = d->dqb_curinodes; + m->dqb_bhardlimit = d->dqb_bhardlimit; + m->dqb_bsoftlimit = d->dqb_bsoftlimit; + m->dqb_curspace = d->dqb_curblocks << QUOTABLOCK_BITS; + m->dqb_itime = d->dqb_itime; + m->dqb_btime = d->dqb_btime; +} + +static void v1_mem2disk_dqblk(struct v1_disk_dqblk *d, struct mem_dqblk *m) +{ + d->dqb_ihardlimit = m->dqb_ihardlimit; + d->dqb_isoftlimit = m->dqb_isoftlimit; + d->dqb_curinodes = m->dqb_curinodes; + d->dqb_bhardlimit = m->dqb_bhardlimit; + d->dqb_bsoftlimit = m->dqb_bsoftlimit; + d->dqb_curblocks = toqb(m->dqb_curspace); + d->dqb_itime = m->dqb_itime; + d->dqb_btime = m->dqb_btime; +} + +static int v1_read_dqblk(struct dquot *dquot) +{ + int type = dquot->dq_type; + struct file *filp; + mm_segment_t fs; + loff_t offset; + struct v1_disk_dqblk dqblk; + + filp = sb_dqopt(dquot->dq_sb)->files[type]; + if (filp == (struct file *)NULL) + return -EINVAL; + + /* Now we are sure filp is valid */ + offset = v1_dqoff(dquot->dq_id); + fs = get_fs(); + set_fs(KERNEL_DS); + filp->f_op->read(filp, (char *)&dqblk, sizeof(struct v1_disk_dqblk), &offset); + set_fs(fs); + + v1_disk2mem_dqblk(&dquot->dq_dqb, &dqblk); + if (dquot->dq_dqb.dqb_bhardlimit == 0 && dquot->dq_dqb.dqb_bsoftlimit == 0 && + dquot->dq_dqb.dqb_ihardlimit == 0 && dquot->dq_dqb.dqb_isoftlimit == 0) + dquot->dq_flags |= DQ_FAKE; + dqstats.reads++; + return 0; +} + +static int v1_commit_dqblk(struct dquot *dquot) +{ + short type = dquot->dq_type; + struct file *filp; + mm_segment_t fs; + loff_t offset; + ssize_t ret; + struct v1_disk_dqblk dqblk; + + filp = sb_dqopt(dquot->dq_sb)->files[type]; + offset = v1_dqoff(dquot->dq_id); + fs = get_fs(); + set_fs(KERNEL_DS); + + /* + * Note: clear the DQ_MOD flag unconditionally, + * so we don't loop forever on failure. + */ + v1_mem2disk_dqblk(&dqblk, &dquot->dq_dqb); + dquot->dq_flags &= ~DQ_MOD; + if (dquot->dq_id == 0) { + dqblk.dqb_btime = sb_dqopt(dquot->dq_sb)->info[type].dqi_bgrace; + dqblk.dqb_itime = sb_dqopt(dquot->dq_sb)->info[type].dqi_igrace; + } + ret = 0; + if (filp) + ret = filp->f_op->write(filp, (char *)&dqblk, + sizeof(struct v1_disk_dqblk), &offset); + if (ret != sizeof(struct v1_disk_dqblk)) { + printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", + kdevname(dquot->dq_sb->s_dev)); + if (ret >= 0) + ret = -EIO; + goto out; + } + ret = 0; + +out: + set_fs(fs); + dqstats.writes++; + return ret; +} + +/* Magics of new quota format */ +#define V2_INITQMAGICS {\ + 0xd9c01f11, /* USRQUOTA */\ + 0xd9c01927 /* GRPQUOTA */\ +} + +/* Header of new quota format */ +struct v2_disk_dqheader { + __u32 dqh_magic; /* Magic number identifying file */ + __u32 dqh_version; /* File version */ +}; + +static int v1_check_quota_file(struct super_block *sb, int type) +{ + struct file *f = sb_dqopt(sb)->files[type]; + struct inode *inode = f->f_dentry->d_inode; + ulong blocks; + size_t off; + struct v2_disk_dqheader dqhead; + mm_segment_t fs; + ssize_t size; + loff_t offset = 0; + static const uint quota_magics[] = V2_INITQMAGICS; + + if (!inode->i_size) + return 0; + blocks = inode->i_size >> BLOCK_SIZE_BITS; + off = inode->i_size & (BLOCK_SIZE - 1); + if ((blocks % sizeof(struct v1_disk_dqblk) * BLOCK_SIZE + off) % sizeof(struct v1_disk_dqblk)) + return 0; + /* Doublecheck whether we didn't get file with new format - with old quotactl() this could happen */ + fs = get_fs(); + set_fs(KERNEL_DS); + size = f->f_op->read(f, (char *)&dqhead, sizeof(struct v2_disk_dqheader), &offset); + set_fs(fs); + if (size != sizeof(struct v2_disk_dqheader)) + return 1; /* Probably not new format */ + if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type]) + return 1; /* Definitely not new format */ + printk(KERN_INFO "VFS: %s: Refusing to turn on old quota format on given file. It probably contains newer quota format.\n", kdevname(sb->s_dev)); + return 0; /* Seems like a new format file -> refuse it */ +} + +static int v1_read_file_info(struct super_block *sb, int type) +{ + struct quota_info *dqopt = sb_dqopt(sb); + mm_segment_t fs; + loff_t offset; + struct file *filp = dqopt->files[type]; + struct v1_disk_dqblk dqblk; + int ret; + + down(&dqopt->dqio_sem); + offset = v1_dqoff(0); + fs = get_fs(); + set_fs(KERNEL_DS); + if ((ret = filp->f_op->read(filp, (char *)&dqblk, sizeof(struct v1_disk_dqblk), &offset)) != sizeof(struct v1_disk_dqblk)) { + if (ret >= 0) + ret = -EIO; + goto out; + } + ret = 0; + dqopt->info[type].dqi_igrace = dqblk.dqb_itime ? dqblk.dqb_itime : MAX_IQ_TIME; + dqopt->info[type].dqi_bgrace = dqblk.dqb_btime ? dqblk.dqb_btime : MAX_DQ_TIME; +out: + up(&dqopt->dqio_sem); + set_fs(fs); + return ret; +} + +static int v1_write_file_info(struct super_block *sb, int type) +{ + struct quota_info *dqopt = sb_dqopt(sb); + mm_segment_t fs; + struct file *filp = dqopt->files[type]; + struct v1_disk_dqblk dqblk; + loff_t offset; + int ret; + + down(&dqopt->dqio_sem); + dqopt->info[type].dqi_flags &= ~DQF_INFO_DIRTY; + offset = v1_dqoff(0); + fs = get_fs(); + set_fs(KERNEL_DS); + if ((ret = filp->f_op->read(filp, (char *)&dqblk, sizeof(struct v1_disk_dqblk), &offset)) != sizeof(struct v1_disk_dqblk)) { + if (ret >= 0) + ret = -EIO; + goto out; + } + dqblk.dqb_itime = dqopt->info[type].dqi_igrace; + dqblk.dqb_btime = dqopt->info[type].dqi_bgrace; + offset = v1_dqoff(0); + ret = filp->f_op->write(filp, (char *)&dqblk, sizeof(struct v1_disk_dqblk), &offset); + if (ret == sizeof(struct v1_disk_dqblk)) + ret = 0; + else if (ret > 0) + ret = -EIO; +out: + up(&dqopt->dqio_sem); + set_fs(fs); + return ret; +} + +static struct quota_format_ops v1_format_ops = { + check_quota_file: v1_check_quota_file, + read_file_info: v1_read_file_info, + write_file_info: v1_write_file_info, + free_file_info: NULL, + read_dqblk: v1_read_dqblk, + commit_dqblk: v1_commit_dqblk, +}; + +static struct quota_format_type v1_quota_format = { + qf_fmt_id: QFMT_VFS_OLD, + qf_ops: &v1_format_ops, + qf_owner: THIS_MODULE +}; + +static int __init init_v1_quota_format(void) +{ + return register_quota_format(&v1_quota_format); +} + +static void __exit exit_v1_quota_format(void) +{ + unregister_quota_format(&v1_quota_format); +} + +EXPORT_NO_SYMBOLS; + +module_init(init_v1_quota_format); +module_exit(exit_v1_quota_format); + diff --git a/fs/quota_v2.c b/fs/quota_v2.c new file mode 100644 index 000000000000..e28bee8c52ab --- /dev/null +++ b/fs/quota_v2.c @@ -0,0 +1,690 @@ +/* + * vfsv0 quota IO operations on file + */ + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/dqblk_v2.h> +#include <linux/quotaio_v2.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> + +#include <asm/byteorder.h> +#include <asm/uaccess.h> + +#define __QUOTA_V2_PARANOIA + +typedef char *dqbuf_t; + +#define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff) +#define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader))) + +/* Check whether given file is really vfsv0 quotafile */ +static int v2_check_quota_file(struct super_block *sb, int type) +{ + struct v2_disk_dqheader dqhead; + struct file *f = sb_dqopt(sb)->files[type]; + mm_segment_t fs; + ssize_t size; + loff_t offset = 0; + static const uint quota_magics[] = V2_INITQMAGICS; + static const uint quota_versions[] = V2_INITQVERSIONS; + + fs = get_fs(); + set_fs(KERNEL_DS); + size = f->f_op->read(f, (char *)&dqhead, sizeof(struct v2_disk_dqheader), &offset); + set_fs(fs); + if (size != sizeof(struct v2_disk_dqheader)) + return 0; + if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] || + le32_to_cpu(dqhead.dqh_version) != quota_versions[type]) + return 0; + return 1; +} + +/* Read information header from quota file */ +static int v2_read_file_info(struct super_block *sb, int type) +{ + mm_segment_t fs; + struct v2_disk_dqinfo dinfo; + struct mem_dqinfo *info = sb_dqopt(sb)->info+type; + struct file *f = sb_dqopt(sb)->files[type]; + ssize_t size; + loff_t offset = V2_DQINFOOFF; + + fs = get_fs(); + set_fs(KERNEL_DS); + size = f->f_op->read(f, (char *)&dinfo, sizeof(struct v2_disk_dqinfo), &offset); + set_fs(fs); + if (size != sizeof(struct v2_disk_dqinfo)) { + printk(KERN_WARNING "Can't read info structure on device %s.\n", + kdevname(f->f_dentry->d_sb->s_dev)); + return -1; + } + info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); + info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); + info->dqi_flags = le32_to_cpu(dinfo.dqi_flags); + info->u.v2_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); + info->u.v2_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk); + info->u.v2_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry); + return 0; +} + +/* Write information header to quota file */ +static int v2_write_file_info(struct super_block *sb, int type) +{ + mm_segment_t fs; + struct v2_disk_dqinfo dinfo; + struct mem_dqinfo *info = sb_dqopt(sb)->info+type; + struct file *f = sb_dqopt(sb)->files[type]; + ssize_t size; + loff_t offset = V2_DQINFOOFF; + + info->dqi_flags &= ~DQF_INFO_DIRTY; + dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace); + dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace); + dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK); + dinfo.dqi_blocks = cpu_to_le32(info->u.v2_i.dqi_blocks); + dinfo.dqi_free_blk = cpu_to_le32(info->u.v2_i.dqi_free_blk); + dinfo.dqi_free_entry = cpu_to_le32(info->u.v2_i.dqi_free_entry); + fs = get_fs(); + set_fs(KERNEL_DS); + size = f->f_op->write(f, (char *)&dinfo, sizeof(struct v2_disk_dqinfo), &offset); + set_fs(fs); + if (size != sizeof(struct v2_disk_dqinfo)) { + printk(KERN_WARNING "Can't write info structure on device %s.\n", + kdevname(f->f_dentry->d_sb->s_dev)); + return -1; + } + return 0; +} + +static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d) +{ + m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit); + m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit); + m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes); + m->dqb_itime = le64_to_cpu(d->dqb_itime); + m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit); + m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit); + m->dqb_curspace = le64_to_cpu(d->dqb_curspace); + m->dqb_btime = le64_to_cpu(d->dqb_btime); +} + +static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id) +{ + d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit); + d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit); + d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes); + d->dqb_itime = cpu_to_le64(m->dqb_itime); + d->dqb_bhardlimit = cpu_to_le32(m->dqb_bhardlimit); + d->dqb_bsoftlimit = cpu_to_le32(m->dqb_bsoftlimit); + d->dqb_curspace = cpu_to_le64(m->dqb_curspace); + d->dqb_btime = cpu_to_le64(m->dqb_btime); + d->dqb_id = cpu_to_le32(id); +} + +static dqbuf_t getdqbuf(void) +{ + dqbuf_t buf = kmalloc(V2_DQBLKSIZE, GFP_KERNEL); + if (!buf) + printk(KERN_WARNING "VFS: Not enough memory for quota buffers.\n"); + return buf; +} + +static inline void freedqbuf(dqbuf_t buf) +{ + kfree(buf); +} + +static ssize_t read_blk(struct file *filp, uint blk, dqbuf_t buf) +{ + mm_segment_t fs; + ssize_t ret; + loff_t offset = blk<<V2_DQBLKSIZE_BITS; + + memset(buf, 0, V2_DQBLKSIZE); + fs = get_fs(); + set_fs(KERNEL_DS); + ret = filp->f_op->read(filp, (char *)buf, V2_DQBLKSIZE, &offset); + set_fs(fs); + return ret; +} + +static ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf) +{ + mm_segment_t fs; + ssize_t ret; + loff_t offset = blk<<V2_DQBLKSIZE_BITS; + + fs = get_fs(); + set_fs(KERNEL_DS); + ret = filp->f_op->write(filp, (char *)buf, V2_DQBLKSIZE, &offset); + set_fs(fs); + return ret; + +} + +/* Remove empty block from list and return it */ +static int get_free_dqblk(struct file *filp, struct mem_dqinfo *info) +{ + dqbuf_t buf = getdqbuf(); + struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; + int ret, blk; + + if (!buf) + return -ENOMEM; + if (info->u.v2_i.dqi_free_blk) { + blk = info->u.v2_i.dqi_free_blk; + if ((ret = read_blk(filp, blk, buf)) < 0) + goto out_buf; + info->u.v2_i.dqi_free_blk = le32_to_cpu(dh->dqdh_next_free); + } + else { + memset(buf, 0, V2_DQBLKSIZE); + if ((ret = write_blk(filp, info->u.v2_i.dqi_blocks, buf)) < 0) /* Assure block allocation... */ + goto out_buf; + blk = info->u.v2_i.dqi_blocks++; + } + mark_info_dirty(info); + ret = blk; +out_buf: + freedqbuf(buf); + return ret; +} + +/* Insert empty block to the list */ +static int put_free_dqblk(struct file *filp, struct mem_dqinfo *info, dqbuf_t buf, uint blk) +{ + struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; + int err; + + dh->dqdh_next_free = cpu_to_le32(info->u.v2_i.dqi_free_blk); + dh->dqdh_prev_free = cpu_to_le32(0); + dh->dqdh_entries = cpu_to_le16(0); + info->u.v2_i.dqi_free_blk = blk; + mark_info_dirty(info); + if ((err = write_blk(filp, blk, buf)) < 0) /* Some strange block. We had better leave it... */ + return err; + return 0; +} + +/* Remove given block from the list of blocks with free entries */ +static int remove_free_dqentry(struct file *filp, struct mem_dqinfo *info, dqbuf_t buf, uint blk) +{ + dqbuf_t tmpbuf = getdqbuf(); + struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; + uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk = le32_to_cpu(dh->dqdh_prev_free); + int err; + + if (!tmpbuf) + return -ENOMEM; + if (nextblk) { + if ((err = read_blk(filp, nextblk, tmpbuf)) < 0) + goto out_buf; + ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = dh->dqdh_prev_free; + if ((err = write_blk(filp, nextblk, tmpbuf)) < 0) + goto out_buf; + } + if (prevblk) { + if ((err = read_blk(filp, prevblk, tmpbuf)) < 0) + goto out_buf; + ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_next_free = dh->dqdh_next_free; + if ((err = write_blk(filp, prevblk, tmpbuf)) < 0) + goto out_buf; + } + else { + info->u.v2_i.dqi_free_entry = nextblk; + mark_info_dirty(info); + } + freedqbuf(tmpbuf); + dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0); + if (write_blk(filp, blk, buf) < 0) /* No matter whether write succeeds block is out of list */ + printk(KERN_ERR "VFS: Can't write block (%u) with free entries.\n", blk); + return 0; +out_buf: + freedqbuf(tmpbuf); + return err; +} + +/* Insert given block to the beginning of list with free entries */ +static int insert_free_dqentry(struct file *filp, struct mem_dqinfo *info, dqbuf_t buf, uint blk) +{ + dqbuf_t tmpbuf = getdqbuf(); + struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; + int err; + + if (!tmpbuf) + return -ENOMEM; + dh->dqdh_next_free = cpu_to_le32(info->u.v2_i.dqi_free_entry); + dh->dqdh_prev_free = cpu_to_le32(0); + if ((err = write_blk(filp, blk, buf)) < 0) + goto out_buf; + if (info->u.v2_i.dqi_free_entry) { + if ((err = read_blk(filp, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0) + goto out_buf; + ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = cpu_to_le32(blk); + if ((err = write_blk(filp, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0) + goto out_buf; + } + freedqbuf(tmpbuf); + info->u.v2_i.dqi_free_entry = blk; + mark_info_dirty(info); + return 0; +out_buf: + freedqbuf(tmpbuf); + return err; +} + +/* Find space for dquot */ +static uint find_free_dqentry(struct dquot *dquot, int *err) +{ + struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; + struct mem_dqinfo *info = sb_dqopt(dquot->dq_sb)->info+dquot->dq_type; + uint blk, i; + struct v2_disk_dqdbheader *dh; + struct v2_disk_dqblk *ddquot; + struct v2_disk_dqblk fakedquot; + dqbuf_t buf; + + *err = 0; + if (!(buf = getdqbuf())) { + *err = -ENOMEM; + return 0; + } + dh = (struct v2_disk_dqdbheader *)buf; + ddquot = GETENTRIES(buf); + if (info->u.v2_i.dqi_free_entry) { + blk = info->u.v2_i.dqi_free_entry; + if ((*err = read_blk(filp, blk, buf)) < 0) + goto out_buf; + } + else { + blk = get_free_dqblk(filp, info); + if ((int)blk < 0) { + *err = blk; + return 0; + } + memset(buf, 0, V2_DQBLKSIZE); + info->u.v2_i.dqi_free_entry = blk; /* This is enough as block is already zeroed and entry list is empty... */ + mark_info_dirty(info); + } + if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK) /* Block will be full? */ + if ((*err = remove_free_dqentry(filp, info, buf, blk)) < 0) { + printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk); + goto out_buf; + } + dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1); + memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk)); + /* Find free structure in block */ + for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++); +#ifdef __QUOTA_V2_PARANOIA + if (i == V2_DQSTRINBLK) { + printk(KERN_ERR "VFS: find_free_dqentry(): Data block full but it shouldn't.\n"); + *err = -EIO; + goto out_buf; + } +#endif + if ((*err = write_blk(filp, blk, buf)) < 0) { + printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk); + goto out_buf; + } + dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk); + freedqbuf(buf); + return blk; +out_buf: + freedqbuf(buf); + return 0; +} + +/* Insert reference to structure into the trie */ +static int do_insert_tree(struct dquot *dquot, uint *treeblk, int depth) +{ + struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; + struct mem_dqinfo *info = sb_dqopt(dquot->dq_sb)->info + dquot->dq_type; + dqbuf_t buf; + int ret = 0, newson = 0, newact = 0; + u32 *ref; + uint newblk; + + if (!(buf = getdqbuf())) + return -ENOMEM; + if (!*treeblk) { + ret = get_free_dqblk(filp, info); + if (ret < 0) + goto out_buf; + *treeblk = ret; + memset(buf, 0, V2_DQBLKSIZE); + newact = 1; + } + else { + if ((ret = read_blk(filp, *treeblk, buf)) < 0) { + printk(KERN_ERR "VFS: Can't read tree quota block %u.\n", *treeblk); + goto out_buf; + } + } + ref = (u32 *)buf; + newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]); + if (!newblk) + newson = 1; + if (depth == V2_DQTREEDEPTH-1) { +#ifdef __QUOTA_V2_PARANOIA + if (newblk) { + printk(KERN_ERR "VFS: Inserting already present quota entry (block %u).\n", ref[GETIDINDEX(dquot->dq_id, depth)]); + ret = -EIO; + goto out_buf; + } +#endif + newblk = find_free_dqentry(dquot, &ret); + } + else + ret = do_insert_tree(dquot, &newblk, depth+1); + if (newson && ret >= 0) { + ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk); + ret = write_blk(filp, *treeblk, buf); + } + else if (newact && ret < 0) + put_free_dqblk(filp, info, buf, *treeblk); +out_buf: + freedqbuf(buf); + return ret; +} + +/* Wrapper for inserting quota structure into tree */ +static inline int dq_insert_tree(struct dquot *dquot) +{ + int tmp = V2_DQTREEOFF; + return do_insert_tree(dquot, &tmp, 0); +} + +/* + * We don't have to be afraid of deadlocks as we never have quotas on quota files... + */ +static int v2_write_dquot(struct dquot *dquot) +{ + int type = dquot->dq_type; + struct file *filp; + mm_segment_t fs; + loff_t offset; + ssize_t ret; + struct v2_disk_dqblk ddquot; + + if (!dquot->dq_off) + if ((ret = dq_insert_tree(dquot)) < 0) { + printk(KERN_ERR "VFS: Error %d occured while creating quota.\n", ret); + return ret; + } + filp = sb_dqopt(dquot->dq_sb)->files[type]; + offset = dquot->dq_off; + mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id); + fs = get_fs(); + set_fs(KERNEL_DS); + ret = filp->f_op->write(filp, (char *)&ddquot, sizeof(struct v2_disk_dqblk), &offset); + set_fs(fs); + if (ret != sizeof(struct v2_disk_dqblk)) { + printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", kdevname(dquot->dq_sb->s_dev)); + if (ret >= 0) + ret = -ENOSPC; + } + else + ret = 0; + dqstats.writes++; + return ret; +} + +/* Free dquot entry in data block */ +static int free_dqentry(struct dquot *dquot, uint blk) +{ + struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; + struct mem_dqinfo *info = sb_dqopt(dquot->dq_sb)->info + dquot->dq_type; + struct v2_disk_dqdbheader *dh; + dqbuf_t buf = getdqbuf(); + int ret = 0; + + if (!buf) + return -ENOMEM; + if (dquot->dq_off >> V2_DQBLKSIZE_BITS != blk) { + printk(KERN_ERR "VFS: Quota structure has offset to other block (%u) than it should (%u).\n", blk, (uint)(dquot->dq_off >> V2_DQBLKSIZE_BITS)); + goto out_buf; + } + if ((ret = read_blk(filp, blk, buf)) < 0) { + printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk); + goto out_buf; + } + dh = (struct v2_disk_dqdbheader *)buf; + dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)-1); + if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */ + if ((ret = remove_free_dqentry(filp, info, buf, blk)) < 0 || + (ret = put_free_dqblk(filp, info, buf, blk)) < 0) { + printk(KERN_ERR "VFS: Can't move quota data block (%u) to free list.\n", blk); + goto out_buf; + } + } + else { + memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0, sizeof(struct v2_disk_dqblk)); + if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) { + /* Insert will write block itself */ + if ((ret = insert_free_dqentry(filp, info, buf, blk)) < 0) { + printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk); + goto out_buf; + } + } + else + if ((ret = write_blk(filp, blk, buf)) < 0) { + printk(KERN_ERR "VFS: Can't write quota data block %u\n", blk); + goto out_buf; + } + } + dquot->dq_off = 0; /* Quota is now unattached */ +out_buf: + freedqbuf(buf); + return ret; +} + +/* Remove reference to dquot from tree */ +static int remove_tree(struct dquot *dquot, uint *blk, int depth) +{ + struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; + struct mem_dqinfo *info = sb_dqopt(dquot->dq_sb)->info + dquot->dq_type; + dqbuf_t buf = getdqbuf(); + int ret = 0; + uint newblk; + u32 *ref = (u32 *)buf; + + if (!buf) + return -ENOMEM; + if ((ret = read_blk(filp, *blk, buf)) < 0) { + printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk); + goto out_buf; + } + newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]); + if (depth == V2_DQTREEDEPTH-1) { + ret = free_dqentry(dquot, newblk); + newblk = 0; + } + else + ret = remove_tree(dquot, &newblk, depth+1); + if (ret >= 0 && !newblk) { + int i; + ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0); + for (i = 0; i < V2_DQBLKSIZE && !buf[i]; i++); /* Block got empty? */ + if (i == V2_DQBLKSIZE) { + put_free_dqblk(filp, info, buf, *blk); + *blk = 0; + } + else + if ((ret = write_blk(filp, *blk, buf)) < 0) + printk(KERN_ERR "VFS: Can't write quota tree block %u.\n", *blk); + } +out_buf: + freedqbuf(buf); + return ret; +} + +/* Delete dquot from tree */ +static int v2_delete_dquot(struct dquot *dquot) +{ + uint tmp = V2_DQTREEOFF; + + if (!dquot->dq_off) /* Even not allocated? */ + return 0; + return remove_tree(dquot, &tmp, 0); +} + +/* Find entry in block */ +static loff_t find_block_dqentry(struct dquot *dquot, uint blk) +{ + struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; + dqbuf_t buf = getdqbuf(); + loff_t ret = 0; + int i; + struct v2_disk_dqblk *ddquot = GETENTRIES(buf); + + if (!buf) + return -ENOMEM; + if ((ret = read_blk(filp, blk, buf)) < 0) { + printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); + goto out_buf; + } + if (dquot->dq_id) + for (i = 0; i < V2_DQSTRINBLK && le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++); + else { /* ID 0 as a bit more complicated searching... */ + struct v2_disk_dqblk fakedquot; + + memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk)); + for (i = 0; i < V2_DQSTRINBLK; i++) + if (!le32_to_cpu(ddquot[i].dqb_id) && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk))) + break; + } + if (i == V2_DQSTRINBLK) { + printk(KERN_ERR "VFS: Quota for id %u referenced but not present.\n", dquot->dq_id); + ret = -EIO; + goto out_buf; + } + else + ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk); +out_buf: + freedqbuf(buf); + return ret; +} + +/* Find entry for given id in the tree */ +static loff_t find_tree_dqentry(struct dquot *dquot, uint blk, int depth) +{ + struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; + dqbuf_t buf = getdqbuf(); + loff_t ret = 0; + u32 *ref = (u32 *)buf; + + if (!buf) + return -ENOMEM; + if ((ret = read_blk(filp, blk, buf)) < 0) { + printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); + goto out_buf; + } + ret = 0; + blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]); + if (!blk) /* No reference? */ + goto out_buf; + if (depth < V2_DQTREEDEPTH-1) + ret = find_tree_dqentry(dquot, blk, depth+1); + else + ret = find_block_dqentry(dquot, blk); +out_buf: + freedqbuf(buf); + return ret; +} + +/* Find entry for given id in the tree - wrapper function */ +static inline loff_t find_dqentry(struct dquot *dquot) +{ + return find_tree_dqentry(dquot, V2_DQTREEOFF, 0); +} + +static int v2_read_dquot(struct dquot *dquot) +{ + int type = dquot->dq_type; + struct file *filp; + mm_segment_t fs; + loff_t offset; + struct v2_disk_dqblk ddquot; + int ret = 0; + + filp = sb_dqopt(dquot->dq_sb)->files[type]; + +#ifdef __QUOTA_V2_PARANOIA + if (!filp || !dquot->dq_sb) { /* Invalidated quota? */ + printk(KERN_ERR "VFS: Quota invalidated while reading!\n"); + return -EIO; + } +#endif + offset = find_dqentry(dquot); + if (offset <= 0) { /* Entry not present? */ + if (offset < 0) + printk(KERN_ERR "VFS: Can't read quota structure for id %u.\n", dquot->dq_id); + dquot->dq_off = 0; + dquot->dq_flags |= DQ_FAKE; + memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); + ret = offset; + } + else { + dquot->dq_off = offset; + fs = get_fs(); + set_fs(KERNEL_DS); + if ((ret = filp->f_op->read(filp, (char *)&ddquot, sizeof(struct v2_disk_dqblk), &offset)) != sizeof(struct v2_disk_dqblk)) { + if (ret >= 0) + ret = -EIO; + printk(KERN_ERR "VFS: Error while reading quota structure for id %u.\n", dquot->dq_id); + memset(&ddquot, 0, sizeof(struct v2_disk_dqblk)); + } + else + ret = 0; + set_fs(fs); + disk2memdqb(&dquot->dq_dqb, &ddquot); + } + dqstats.reads++; + return ret; +} + +/* Commit changes of dquot to disk - it might also mean deleting it when quota became fake one and user has no blocks... */ +static int v2_commit_dquot(struct dquot *dquot) +{ + /* We clear the flag everytime so we don't loop when there was an IO error... */ + dquot->dq_flags &= ~DQ_MOD; + if (dquot->dq_flags & DQ_FAKE && !(dquot->dq_dqb.dqb_curinodes | dquot->dq_dqb.dqb_curspace)) + return v2_delete_dquot(dquot); + else + return v2_write_dquot(dquot); +} + +static struct quota_format_ops v2_format_ops = { + check_quota_file: v2_check_quota_file, + read_file_info: v2_read_file_info, + write_file_info: v2_write_file_info, + free_file_info: NULL, + read_dqblk: v2_read_dquot, + commit_dqblk: v2_commit_dquot, +}; + +static struct quota_format_type v2_quota_format = { + qf_fmt_id: QFMT_VFS_V0, + qf_ops: &v2_format_ops, + qf_owner: THIS_MODULE +}; + +static int __init init_v2_quota_format(void) +{ + return register_quota_format(&v2_quota_format); +} + +static void __exit exit_v2_quota_format(void) +{ + unregister_quota_format(&v2_quota_format); +} + +EXPORT_NO_SYMBOLS; + +module_init(init_v2_quota_format); +module_exit(exit_v2_quota_format); diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 6d31e937127e..35ee0a1c2604 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -28,7 +28,6 @@ #include <linux/pagemap.h> #include <linux/init.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/smp_lock.h> #include <asm/uaccess.h> diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 6968c41a4680..95fccb745e06 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -5,7 +5,6 @@ #include <linux/config.h> #include <linux/time.h> #include <linux/reiserfs_fs.h> -#include <linux/locks.h> #include <asm/bitops.h> #include <linux/list.h> diff --git a/fs/reiserfs/buffer2.c b/fs/reiserfs/buffer2.c index b7025254a64b..62ec8424eca1 100644 --- a/fs/reiserfs/buffer2.c +++ b/fs/reiserfs/buffer2.c @@ -4,7 +4,6 @@ #include <linux/config.h> #include <linux/time.h> -#include <linux/locks.h> #include <linux/reiserfs_fs.h> #include <linux/smp_lock.h> #include <linux/kernel_stat.h> diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index f6aa41b82360..75c3a35d11a1 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -72,6 +72,12 @@ static void reiserfs_vfs_truncate_file(struct inode *inode) { } /* Sync a reiserfs file. */ + +/* + * FIXME: sync_mapping_buffers() never has anything to sync. Can + * be removed... + */ + static int reiserfs_sync_file( struct file * p_s_filp, struct dentry * p_s_dentry, @@ -85,7 +91,7 @@ static int reiserfs_sync_file( if (!S_ISREG(p_s_inode->i_mode)) BUG (); - n_err = fsync_inode_buffers(p_s_inode) ; + n_err = sync_mapping_buffers(p_s_inode->i_mapping) ; reiserfs_commit_for_inode(p_s_inode) ; unlock_kernel() ; return ( n_err < 0 ) ? -EIO : 0; diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c index 9a5dd50cfaf7..802eefb3825d 100644 --- a/fs/reiserfs/fix_node.c +++ b/fs/reiserfs/fix_node.c @@ -38,7 +38,6 @@ #include <linux/config.h> #include <linux/time.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/reiserfs_fs.h> diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 5e1aa98bf4e5..4a757803c44b 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -5,8 +5,8 @@ #include <linux/config.h> #include <linux/time.h> #include <linux/reiserfs_fs.h> -#include <linux/locks.h> #include <linux/smp_lock.h> +#include <linux/pagemap.h> #include <asm/uaccess.h> #include <asm/unaligned.h> @@ -33,7 +33,7 @@ void reiserfs_delete_inode (struct inode * inode) lock_kernel() ; /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ - if (INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ + if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ down (&inode->i_sem); journal_begin(&th, inode->i_sb, jbegin_count) ; @@ -886,7 +886,7 @@ int reiserfs_get_block (struct inode * inode, sector_t block, // item version directly // -// called by read_inode +// called by read_locked_inode static void init_inode (struct inode * inode, struct path * path) { struct buffer_head * bh; @@ -1117,7 +1117,7 @@ void reiserfs_update_sd (struct reiserfs_transaction_handle *th, return; } -/* reiserfs_read_inode2 is called to read the inode off disk, and it +/* reiserfs_read_locked_inode is called to read the inode off disk, and it ** does a make_bad_inode when things go wrong. But, we need to make sure ** and clear the key in the private portion of the inode, otherwise a ** corresponding iput might try to delete whatever object the inode last @@ -1128,32 +1128,29 @@ static void reiserfs_make_bad_inode(struct inode *inode) { make_bad_inode(inode); } -void reiserfs_read_inode(struct inode *inode) { - reiserfs_make_bad_inode(inode) ; -} - - // // initially this function was derived from minix or ext2's analog and // evolved as the prototype did // +int reiserfs_init_locked_inode (struct inode * inode, void *p) +{ + struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p ; + inode->i_ino = args->objectid; + INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid); + return 0; +} + /* looks for stat data in the tree, and fills up the fields of in-core inode stat data fields */ -void reiserfs_read_inode2 (struct inode * inode, void *p) +void reiserfs_read_locked_inode (struct inode * inode, struct reiserfs_iget_args *args) { INITIALIZE_PATH (path_to_sd); struct cpu_key key; - struct reiserfs_iget4_args *args = (struct reiserfs_iget4_args *)p ; unsigned long dirino; int retval; - if (!p) { - reiserfs_make_bad_inode(inode) ; - return; - } - - dirino = args->objectid ; + dirino = args->dirid ; /* set version 1, version 2 could be used too, because stat data key is the same in both versions */ @@ -1166,7 +1163,7 @@ void reiserfs_read_inode2 (struct inode * inode, void *p) /* look for the object's stat data */ retval = search_item (inode->i_sb, &key, &path_to_sd); if (retval == IO_ERROR) { - reiserfs_warning ("vs-13070: reiserfs_read_inode2: " + reiserfs_warning ("vs-13070: reiserfs_read_locked_inode: " "i/o failure occurred trying to find stat data of %K\n", &key); reiserfs_make_bad_inode(inode) ; @@ -1198,7 +1195,7 @@ void reiserfs_read_inode2 (struct inode * inode, void *p) during mount (fs/reiserfs/super.c:finish_unfinished()). */ if( ( inode -> i_nlink == 0 ) && ! REISERFS_SB(inode -> i_sb) -> s_is_unlinked_ok ) { - reiserfs_warning( "vs-13075: reiserfs_read_inode2: " + reiserfs_warning( "vs-13075: reiserfs_read_locked_inode: " "dead inode read from disk %K. " "This is likely to be race with knfsd. Ignore\n", &key ); @@ -1210,39 +1207,44 @@ void reiserfs_read_inode2 (struct inode * inode, void *p) } /** - * reiserfs_find_actor() - "find actor" reiserfs supplies to iget4(). + * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked(). * * @inode: inode from hash table to check - * @inode_no: inode number we are looking for - * @opaque: "cookie" passed to iget4(). This is &reiserfs_iget4_args. + * @opaque: "cookie" passed to iget5_locked(). This is &reiserfs_iget_args. * - * This function is called by iget4() to distinguish reiserfs inodes + * This function is called by iget5_locked() to distinguish reiserfs inodes * having the same inode numbers. Such inodes can only exist due to some * error condition. One of them should be bad. Inodes with identical * inode numbers (objectids) are distinguished by parent directory ids. * */ -static int reiserfs_find_actor( struct inode *inode, - unsigned long inode_no, void *opaque ) +int reiserfs_find_actor( struct inode *inode, void *opaque ) { - struct reiserfs_iget4_args *args; + struct reiserfs_iget_args *args; args = opaque; /* args is already in CPU order */ - return le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args -> objectid; + return (inode->i_ino == args->objectid) && + (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid); } struct inode * reiserfs_iget (struct super_block * s, const struct cpu_key * key) { struct inode * inode; - struct reiserfs_iget4_args args ; + struct reiserfs_iget_args args ; - args.objectid = key->on_disk_key.k_dir_id ; - inode = iget4 (s, key->on_disk_key.k_objectid, - reiserfs_find_actor, (void *)(&args)); + args.objectid = key->on_disk_key.k_objectid ; + args.dirid = key->on_disk_key.k_dir_id ; + inode = iget5_locked (s, key->on_disk_key.k_objectid, + reiserfs_find_actor, reiserfs_init_locked_inode, (void *)(&args)); if (!inode) return ERR_PTR(-ENOMEM) ; + if (inode->i_state & I_NEW) { + reiserfs_read_locked_inode(inode, &args); + unlock_new_inode(inode); + } + if (comp_short_keys (INODE_PKEY (inode), key) || is_bad_inode (inode)) { /* either due to i/o error or a stale NFS handle */ iput (inode); @@ -2105,9 +2107,47 @@ static int reiserfs_commit_write(struct file *f, struct page *page, return ret ; } +/* + * Returns 1 if the page's buffers were dropped. The page is locked. + * + * Takes j_dirty_buffers_lock to protect the b_assoc_buffers list_heads + * in the buffers at page_buffers(page). + * + * FIXME: Chris says the buffer list is not used with `mount -o notail', + * so in that case the fs can avoid the extra locking. Create a second + * address_space_operations with a NULL ->releasepage and install that + * into new address_spaces. + */ +static int reiserfs_releasepage(struct page *page, int unused_gfp_flags) +{ + struct inode *inode = page->mapping->host ; + struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb) ; + struct buffer_head *head ; + struct buffer_head *bh ; + int ret = 1 ; + + spin_lock(&j->j_dirty_buffers_lock) ; + head = page_buffers(page) ; + bh = head ; + do { + if (!buffer_dirty(bh) && !buffer_locked(bh)) { + list_del_init(&bh->b_assoc_buffers) ; + } else { + ret = 0 ; + break ; + } + bh = bh->b_this_page ; + } while (bh != head) ; + if (ret) + ret = try_to_free_buffers(page) ; + spin_unlock(&j->j_dirty_buffers_lock) ; + return ret ; +} + struct address_space_operations reiserfs_address_space_operations = { writepage: reiserfs_writepage, readpage: reiserfs_readpage, + releasepage: reiserfs_releasepage, sync_page: block_sync_page, prepare_write: reiserfs_prepare_write, commit_write: reiserfs_commit_write, diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index ef41742ba491..6957b5f69ce4 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -6,8 +6,8 @@ #include <linux/reiserfs_fs.h> #include <linux/time.h> #include <asm/uaccess.h> +#include <linux/pagemap.h> #include <linux/smp_lock.h> -#include <linux/locks.h> /* ** reiserfs_ioctl - handler for ioctl for inode diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 9de865554b51..de6e7de3068f 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -54,7 +54,6 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fcntl.h> -#include <linux/locks.h> #include <linux/stat.h> #include <linux/string.h> #include <linux/smp_lock.h> diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c index 503ef628aa5b..8d47a4edabd9 100644 --- a/fs/reiserfs/objectid.c +++ b/fs/reiserfs/objectid.c @@ -4,7 +4,6 @@ #include <linux/config.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/random.h> #include <linux/time.h> #include <linux/reiserfs_fs.h> diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index 359f66b1351b..19d7e4f36976 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -17,7 +17,6 @@ #include <linux/reiserfs_fs.h> #include <linux/reiserfs_fs_sb.h> #include <linux/smp_lock.h> -#include <linux/locks.h> #include <linux/init.h> #include <linux/proc_fs.h> diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index 9b073804f3f4..59cdfa57b354 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c @@ -10,7 +10,6 @@ #include <linux/kernel.h> #include <linux/vmalloc.h> -#include <linux/locks.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/reiserfs_fs.h> diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index e5271b25ae23..8f067bd52f2c 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -56,7 +56,6 @@ #include <linux/config.h> #include <linux/time.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/pagemap.h> #include <linux/reiserfs_fs.h> #include <linux/smp_lock.h> diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 46d63a4defbf..c9dd2d3b5d5a 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -17,7 +17,6 @@ #include <asm/uaccess.h> #include <linux/reiserfs_fs.h> #include <linux/smp_lock.h> -#include <linux/locks.h> #include <linux/init.h> #include <linux/blkdev.h> @@ -484,8 +483,6 @@ struct super_operations reiserfs_sops = { alloc_inode: reiserfs_alloc_inode, destroy_inode: reiserfs_destroy_inode, - read_inode: reiserfs_read_inode, - read_inode2: reiserfs_read_inode2, write_inode: reiserfs_write_inode, dirty_inode: reiserfs_dirty_inode, delete_inode: reiserfs_delete_inode, @@ -1007,7 +1004,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) int old_format = 0; unsigned long blocks; int jinit_done = 0 ; - struct reiserfs_iget4_args args ; + struct reiserfs_iget_args args ; struct reiserfs_super_block * rs; char *jdev_name; struct reiserfs_sb_info *sbi; @@ -1069,13 +1066,19 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) printk("clm-7000: Detected readonly device, marking FS readonly\n") ; s->s_flags |= MS_RDONLY ; } - args.objectid = REISERFS_ROOT_PARENT_OBJECTID ; - root_inode = iget4 (s, REISERFS_ROOT_OBJECTID, 0, (void *)(&args)); + args.objectid = REISERFS_ROOT_OBJECTID ; + args.dirid = REISERFS_ROOT_PARENT_OBJECTID ; + root_inode = iget5_locked (s, REISERFS_ROOT_OBJECTID, reiserfs_find_actor, reiserfs_init_locked_inode, (void *)(&args)); if (!root_inode) { printk ("reiserfs_fill_super: get root inode failed\n"); goto error; } + if (root_inode->i_state & I_NEW) { + reiserfs_read_locked_inode(root_inode, &args); + unlock_new_inode(root_inode); + } + s->s_root = d_alloc_root(root_inode); if (!s->s_root) { iput(root_inode); diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c index defa8d3dd6b0..3393998bb7ab 100644 --- a/fs/reiserfs/tail_conversion.c +++ b/fs/reiserfs/tail_conversion.c @@ -6,7 +6,6 @@ #include <linux/time.h> #include <linux/pagemap.h> #include <linux/reiserfs_fs.h> -#include <linux/locks.h> /* access to tail : when one is going to read tail it must make sure, that is not running. direct2indirect and indirect2direct can not run concurrently */ diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index 25d55c453f93..3beb8e661e1b 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -70,8 +70,8 @@ #include <linux/slab.h> #include <linux/romfs_fs.h> #include <linux/fs.h> -#include <linux/locks.h> #include <linux/init.h> +#include <linux/pagemap.h> #include <linux/smp_lock.h> #include <asm/uaccess.h> diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 6807dd38a288..22a8e371b0c3 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -15,7 +15,6 @@ #include <linux/string.h> #include <linux/stat.h> #include <linux/errno.h> -#include <linux/locks.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/file.h> diff --git a/fs/super.c b/fs/super.c index 9a1be36c2012..13f1b7a7e34d 100644 --- a/fs/super.c +++ b/fs/super.c @@ -22,11 +22,11 @@ #include <linux/config.h> #include <linux/slab.h> -#include <linux/locks.h> #include <linux/smp_lock.h> #include <linux/devfs_fs_kernel.h> #include <linux/acct.h> #include <linux/blkdev.h> +#include <linux/quotaops.h> #include <asm/uaccess.h> void get_filesystem(struct file_system_type *fs); @@ -48,6 +48,7 @@ static struct super_block *alloc_super(void) if (s) { memset(s, 0, sizeof(struct super_block)); INIT_LIST_HEAD(&s->s_dirty); + INIT_LIST_HEAD(&s->s_io); INIT_LIST_HEAD(&s->s_locked_inodes); INIT_LIST_HEAD(&s->s_files); INIT_LIST_HEAD(&s->s_instances); @@ -61,6 +62,8 @@ static struct super_block *alloc_super(void) sema_init(&s->s_dquot.dqio_sem, 1); sema_init(&s->s_dquot.dqoff_sem, 1); s->s_maxbytes = MAX_NON_LFS; + s->dq_op = sb_dquot_ops; + s->s_qcop = sb_quotactl_ops; } return s; } @@ -154,6 +157,9 @@ static int grab_super(struct super_block *s) * * Associates superblock with fs type and puts it on per-type and global * superblocks' lists. Should be called with sb_lock held; drops it. + * + * NOTE: the super_blocks ordering here is important: writeback wants + * the blockdev superblock to be at super_blocks.next. */ static void insert_super(struct super_block *s, struct file_system_type *type) { diff --git a/fs/sysv/balloc.c b/fs/sysv/balloc.c index 9194ccf7fdeb..568b6231b2ae 100644 --- a/fs/sysv/balloc.c +++ b/fs/sysv/balloc.c @@ -19,7 +19,6 @@ * This file contains code for allocating/freeing blocks. */ -#include <linux/locks.h> #include "sysv.h" /* We don't trust the value of diff --git a/fs/sysv/file.c b/fs/sysv/file.c index 255230c20988..9dce95103718 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c @@ -36,7 +36,7 @@ int sysv_sync_file(struct file * file, struct dentry *dentry, int datasync) struct inode *inode = dentry->d_inode; int err; - err = fsync_inode_buffers(inode); + err = sync_mapping_buffers(inode->i_mapping); if (!(inode->i_state & I_DIRTY)) return err; if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c index afda24dcf2d0..183358d83b43 100644 --- a/fs/sysv/ialloc.c +++ b/fs/sysv/ialloc.c @@ -21,9 +21,9 @@ #include <linux/kernel.h> #include <linux/stddef.h> +#include <linux/sched.h> #include <linux/stat.h> #include <linux/string.h> -#include <linux/locks.h> #include "sysv.h" /* We don't trust the value of diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 29ab0cc3c3f1..953a70c01df4 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -21,7 +21,6 @@ * the superblock. */ -#include <linux/locks.h> #include <linux/smp_lock.h> #include <linux/highuid.h> #include <linux/slab.h> diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index 75beb1554f98..bc6690583a1b 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -5,7 +5,6 @@ * AV, Sep--Dec 2000 */ -#include <linux/locks.h> #include "sysv.h" enum {DIRECT = 10, DEPTH = 4}; /* Have triple indirect */ diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index eab06b2999d7..09411fa827d2 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -26,7 +26,6 @@ #include "udfdecl.h" -#include <linux/locks.h> #include <linux/quotaops.h> #include <asm/bitops.h> diff --git a/fs/udf/file.c b/fs/udf/file.c index 30e38892a753..6e5c92c5d275 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -37,8 +37,8 @@ #include <linux/kernel.h> #include <linux/string.h> /* memset */ #include <linux/errno.h> -#include <linux/locks.h> #include <linux/smp_lock.h> +#include <linux/pagemap.h> #include "udf_i.h" #include "udf_sb.h" diff --git a/fs/udf/fsync.c b/fs/udf/fsync.c index c48ddf0d0408..e81448d0c875 100644 --- a/fs/udf/fsync.c +++ b/fs/udf/fsync.c @@ -26,7 +26,6 @@ #include "udfdecl.h" #include <linux/fs.h> -#include <linux/locks.h> #include <linux/smp_lock.h> /* @@ -44,7 +43,7 @@ int udf_fsync_inode(struct inode *inode, int datasync) { int err; - err = fsync_inode_buffers(inode); + err = sync_mapping_buffers(inode->i_mapping); if (!(inode->i_state & I_DIRTY)) return err; if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 5f58afa05f8a..d22e26bed2af 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -25,7 +25,6 @@ #include "udfdecl.h" #include <linux/fs.h> -#include <linux/locks.h> #include <linux/quotaops.h> #include <linux/udf_fs.h> diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 1c229f5c912b..40696bade927 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -34,10 +34,10 @@ */ #include "udfdecl.h" -#include <linux/locks.h> #include <linux/mm.h> #include <linux/smp_lock.h> #include <linux/module.h> +#include <linux/pagemap.h> #include "udf_i.h" #include "udf_sb.h" diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 60b9a37438e3..28db72d58da5 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -33,7 +33,6 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/quotaops.h> -#include <linux/locks.h> #include <linux/smp_lock.h> static inline int udf_match(int len, const char * const name, struct qstr *qs) diff --git a/fs/udf/super.c b/fs/udf/super.c index bc8a0576ec45..0858d29c3418 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -50,7 +50,6 @@ #include <linux/blkdev.h> #include <linux/slab.h> #include <linux/kernel.h> -#include <linux/locks.h> #include <linux/module.h> #include <linux/stat.h> #include <linux/cdrom.h> diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 5f7102461577..6154c29324f8 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -11,7 +11,6 @@ #include <linux/stat.h> #include <linux/time.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/quotaops.h> #include <asm/bitops.h> #include <asm/byteorder.h> diff --git a/fs/ufs/cylinder.c b/fs/ufs/cylinder.c index a0729feed07f..daf11e4dcf66 100644 --- a/fs/ufs/cylinder.c +++ b/fs/ufs/cylinder.c @@ -13,7 +13,6 @@ #include <linux/time.h> #include <linux/stat.h> #include <linux/string.h> -#include <linux/locks.h> #include <asm/bitops.h> #include <asm/byteorder.h> diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 9bbd31501b72..bd3c40da7d9e 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -14,7 +14,6 @@ */ #include <linux/time.h> -#include <linux/locks.h> #include <linux/fs.h> #include <linux/ufs_fs.h> #include <linux/smp_lock.h> diff --git a/fs/ufs/file.c b/fs/ufs/file.c index fd6332175401..f282ea559c80 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c @@ -32,7 +32,6 @@ #include <linux/fcntl.h> #include <linux/time.h> #include <linux/stat.h> -#include <linux/locks.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/smp_lock.h> diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index c0a435a09a26..d82fd117b869 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -25,7 +25,6 @@ #include <linux/time.h> #include <linux/stat.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/quotaops.h> #include <asm/bitops.h> #include <asm/byteorder.h> diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index e10b5a35dca9..8e5bcf749231 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -34,7 +34,6 @@ #include <linux/time.h> #include <linux/stat.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/mm.h> #include <linux/smp_lock.h> diff --git a/fs/ufs/super.c b/fs/ufs/super.c index cdf4ad6a84b2..5971709836a6 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -77,7 +77,6 @@ #include <linux/time.h> #include <linux/stat.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/blkdev.h> #include <linux/init.h> #include <linux/smp_lock.h> diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index a2b6ed7a016c..758fc57b5574 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -35,7 +35,6 @@ #include <linux/fcntl.h> #include <linux/time.h> #include <linux/stat.h> -#include <linux/locks.h> #include <linux/string.h> #include <linux/smp_lock.h> diff --git a/fs/ufs/util.c b/fs/ufs/util.c index b40e7ab8524d..6e859dc6afef 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c @@ -8,7 +8,6 @@ #include <linux/string.h> #include <linux/slab.h> -#include <linux/locks.h> #include <linux/ufs_fs.h> #include "swab.h" diff --git a/include/asm-alpha/ioctls.h b/include/asm-alpha/ioctls.h index 2cad3d5a8ec2..a363c50ce6d7 100644 --- a/include/asm-alpha/ioctls.h +++ b/include/asm-alpha/ioctls.h @@ -9,6 +9,7 @@ #define FIONBIO _IOW('f', 126, int) #define FIONREAD _IOR('f', 127, int) #define TIOCINQ FIONREAD +#define FIOQSIZE _IOR('f', 128, loff_t) #define TIOCGETP _IOR('t', 8, struct sgttyb) #define TIOCSETP _IOW('t', 9, struct sgttyb) diff --git a/include/asm-cris/ioctls.h b/include/asm-cris/ioctls.h index 634628728762..21957ec44d7e 100644 --- a/include/asm-cris/ioctls.h +++ b/include/asm-cris/ioctls.h @@ -69,6 +69,7 @@ #define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ #define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */ #define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */ +#define FIOQSIZE 0x5460 /* Used for packet mode */ #define TIOCPKT_DATA 0 diff --git a/include/asm-i386/ioctls.h b/include/asm-i386/ioctls.h index 97b41f2feddb..ea0e6ae58c29 100644 --- a/include/asm-i386/ioctls.h +++ b/include/asm-i386/ioctls.h @@ -67,6 +67,7 @@ #define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ #define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */ #define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */ +#define FIOQSIZE 0x5460 /* Used for packet mode */ #define TIOCPKT_DATA 0 diff --git a/include/asm-ia64/ioctls.h b/include/asm-ia64/ioctls.h index 5e95e8b7f104..e727e4a67189 100644 --- a/include/asm-ia64/ioctls.h +++ b/include/asm-ia64/ioctls.h @@ -72,6 +72,7 @@ #define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ #define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */ #define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */ +#define FIOQSIZE 0x5460 /* Used for packet mode */ #define TIOCPKT_DATA 0 diff --git a/include/asm-m68k/ioctls.h b/include/asm-m68k/ioctls.h index 213bd58df199..89c0df1262ed 100644 --- a/include/asm-m68k/ioctls.h +++ b/include/asm-m68k/ioctls.h @@ -65,6 +65,7 @@ #define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ #define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ +#define FIOQSIZE 0x545E /* Used for packet mode */ #define TIOCPKT_DATA 0 diff --git a/include/asm-parisc/ioctls.h b/include/asm-parisc/ioctls.h index 332027f38424..9210a0f87c3f 100644 --- a/include/asm-parisc/ioctls.h +++ b/include/asm-parisc/ioctls.h @@ -67,6 +67,7 @@ #define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ #define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */ #define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */ +#define FIOQSIZE 0x5460 /* Get exact space used by quota */ /* Used for packet mode */ #define TIOCPKT_DATA 0 diff --git a/include/asm-sh/ioctls.h b/include/asm-sh/ioctls.h index 5d3e2cd245f5..f535202e8380 100644 --- a/include/asm-sh/ioctls.h +++ b/include/asm-sh/ioctls.h @@ -9,6 +9,7 @@ #define FIONBIO _IOW('f', 126, int) #define FIONREAD _IOR('f', 127, int) #define TIOCINQ FIONREAD +#define FIOQSIZE _IOR('f', 128, loff_t) #define TCGETS 0x5401 #define TCSETS 0x5402 diff --git a/include/asm-sparc/ioctls.h b/include/asm-sparc/ioctls.h index aa2b9ea89a6a..531460ef91ff 100644 --- a/include/asm-sparc/ioctls.h +++ b/include/asm-sparc/ioctls.h @@ -86,6 +86,7 @@ #define FIONBIO _IOW('f', 126, int) #define FIONREAD _IOR('f', 127, int) #define TIOCINQ FIONREAD +#define FIOQSIZE _IOR('f', 128, loff_t) /* SCARY Rutgers local SunOS kernel hackery, perhaps I will support it * someday. This is completely bogus, I know... diff --git a/include/asm-sparc64/ioctls.h b/include/asm-sparc64/ioctls.h index ebec66679415..0c8068dc32a9 100644 --- a/include/asm-sparc64/ioctls.h +++ b/include/asm-sparc64/ioctls.h @@ -87,6 +87,7 @@ #define FIONBIO _IOW('f', 126, int) #define FIONREAD _IOR('f', 127, int) #define TIOCINQ FIONREAD +#define FIOQSIZE _IOR('f', 128, loff_t) /* SCARY Rutgers local SunOS kernel hackery, perhaps I will support it * someday. This is completely bogus, I know... diff --git a/include/linux/amigaffs.h b/include/linux/amigaffs.h index 535c3bf41b9a..f02e8cbd0131 100644 --- a/include/linux/amigaffs.h +++ b/include/linux/amigaffs.h @@ -2,7 +2,6 @@ #define AMIGAFFS_H #include <linux/types.h> -#include <linux/locks.h> #include <asm/byteorder.h> diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h new file mode 100644 index 000000000000..075cacc389e1 --- /dev/null +++ b/include/linux/backing-dev.h @@ -0,0 +1,30 @@ +/* + * include/linux/backing-dev.h + * + * low-level device information and state which is propagated up through + * to high-level code. + */ + +#ifndef _LINUX_BACKING_DEV_H +#define _LINUX_BACKING_DEV_H + +/* + * Bits in backing_dev_info.state + */ +enum bdi_state { + BDI_pdflush, /* A pdflush thread is working this device */ + BDI_unused, /* Available bits start here */ +}; + +struct backing_dev_info { + unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ + unsigned long state; /* Always use atomic bitops on this */ +}; + +extern struct backing_dev_info default_backing_dev_info; + +int writeback_acquire(struct backing_dev_info *bdi); +int writeback_in_progress(struct backing_dev_info *bdi); +void writeback_release(struct backing_dev_info *bdi); + +#endif /* _LINUX_BACKING_DEV_H */ diff --git a/include/linux/blk.h b/include/linux/blk.h index 9be0913f6069..62d37b2b4c17 100644 --- a/include/linux/blk.h +++ b/include/linux/blk.h @@ -3,7 +3,6 @@ #include <linux/blkdev.h> #include <linux/elevator.h> -#include <linux/locks.h> #include <linux/config.h> #include <linux/spinlock.h> #include <linux/compiler.h> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d8175ccc104c..ac373e6a2454 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -7,6 +7,7 @@ #include <linux/tqueue.h> #include <linux/list.h> #include <linux/pagemap.h> +#include <linux/backing-dev.h> #include <asm/scatterlist.h> @@ -162,11 +163,7 @@ struct request_queue make_request_fn *make_request_fn; prep_rq_fn *prep_rq_fn; - /* - * The VM-level readahead tunable for this device. In - * units of PAGE_CACHE_SIZE pages. - */ - unsigned long ra_pages; + struct backing_dev_info backing_dev_info; /* * The queue owner gets to use this for whatever they like. @@ -328,7 +325,7 @@ extern void blk_queue_hardsect_size(request_queue_t *q, unsigned short); extern void blk_queue_segment_boundary(request_queue_t *q, unsigned long); extern void blk_queue_assign_lock(request_queue_t *q, spinlock_t *); extern void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn); -extern unsigned long *blk_get_ra_pages(struct block_device *bdev); +extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 500a082b7bcc..5560b6ee5878 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -29,6 +29,7 @@ enum bh_state_bits { struct page; struct kiobuf; struct buffer_head; +struct address_space; typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); /* @@ -44,13 +45,13 @@ struct buffer_head { struct page *b_page; /* the page this bh is mapped to */ sector_t b_blocknr; /* block number */ - unsigned short b_size; /* block size */ + u32 b_size; /* block size */ char *b_data; /* pointer to data block */ struct block_device *b_bdev; bh_end_io_t *b_end_io; /* I/O completion */ void *b_private; /* reserved for b_end_io */ - struct list_head b_inode_buffers; /* list of inode dirty buffers */ + struct list_head b_assoc_buffers; /* associated with another mapping */ }; @@ -145,12 +146,19 @@ int try_to_free_buffers(struct page *); void create_empty_buffers(struct page *, unsigned long, unsigned long b_state); void end_buffer_io_sync(struct buffer_head *bh, int uptodate); + +/* Things to do with buffers at mapping->private_list */ void buffer_insert_list(spinlock_t *lock, struct buffer_head *, struct list_head *); +void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode); +int write_mapping_buffers(struct address_space *mapping); +int inode_has_buffers(struct inode *); +void invalidate_inode_buffers(struct inode *); +int fsync_buffers_list(spinlock_t *lock, struct list_head *); +int sync_mapping_buffers(struct address_space *mapping); void mark_buffer_async_read(struct buffer_head *bh); void mark_buffer_async_write(struct buffer_head *bh); -void invalidate_inode_buffers(struct inode *); void invalidate_bdev(struct block_device *, int); void __invalidate_buffers(kdev_t dev, int); int sync_blockdev(struct block_device *bdev); @@ -161,8 +169,6 @@ int fsync_dev(kdev_t); int fsync_bdev(struct block_device *); int fsync_super(struct super_block *); int fsync_no_super(struct block_device *); -int fsync_buffers_list(spinlock_t *lock, struct list_head *); -int inode_has_buffers(struct inode *); struct buffer_head *__get_hash_table(struct block_device *, sector_t, int); struct buffer_head * __getblk(struct block_device *, sector_t, int); void __brelse(struct buffer_head *); @@ -217,14 +223,6 @@ static inline void put_bh(struct buffer_head *bh) atomic_dec(&bh->b_count); } -static inline void -mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) -{ - mark_buffer_dirty(bh); - buffer_insert_list(&inode->i_bufferlist_lock, - bh, &inode->i_dirty_buffers); -} - /* * If an error happens during the make_request, this function * has to be recalled. It marks the buffer as clean and not @@ -243,11 +241,6 @@ static inline void buffer_IO_error(struct buffer_head * bh) bh->b_end_io(bh, buffer_uptodate(bh)); } -static inline int fsync_inode_buffers(struct inode *inode) -{ - return fsync_buffers_list(&inode->i_bufferlist_lock, - &inode->i_dirty_buffers); -} static inline void brelse(struct buffer_head *buf) { diff --git a/include/linux/dqblk_v1.h b/include/linux/dqblk_v1.h new file mode 100644 index 000000000000..42fbf4797156 --- /dev/null +++ b/include/linux/dqblk_v1.h @@ -0,0 +1,18 @@ +/* + * File with in-memory structures of old quota format + */ + +#ifndef _LINUX_DQBLK_V1_H +#define _LINUX_DQBLK_V1_H + +/* Id of quota format */ +#define QFMT_VFS_OLD 1 + +/* Root squash turned on */ +#define V1_DQF_RSQUASH 1 + +/* Special information about quotafile */ +struct v1_mem_dqinfo { +}; + +#endif /* _LINUX_DQBLK_V1_H */ diff --git a/include/linux/dqblk_v2.h b/include/linux/dqblk_v2.h new file mode 100644 index 000000000000..4a6c5f6867bb --- /dev/null +++ b/include/linux/dqblk_v2.h @@ -0,0 +1,20 @@ +/* + * Definitions of structures for vfsv0 quota format + */ + +#ifndef _LINUX_DQBLK_V2_H +#define _LINUX_DQBLK_V2_H + +#include <linux/types.h> + +/* id numbers of quota format */ +#define QFMT_VFS_V0 2 + +/* Inmemory copy of version specific information */ +struct v2_mem_dqinfo { + unsigned int dqi_blocks; + unsigned int dqi_free_blk; + unsigned int dqi_free_entry; +}; + +#endif /* _LINUX_DQBLK_V2_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 4b858f90c6fe..ab0a05dc8e26 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -112,7 +112,6 @@ extern int leases_enable, dir_notify_enable, lease_break_time; #define MS_MOVE 8192 #define MS_REC 16384 #define MS_VERBOSE 32768 -#define MS_FLUSHING (1<<16) /* inodes are currently under writeout */ #define MS_ACTIVE (1<<30) #define MS_NOUSER (1<<31) @@ -156,7 +155,6 @@ extern int leases_enable, dir_notify_enable, lease_break_time; #define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || ((inode)->i_flags & S_SYNC)) #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) -#define IS_FLUSHING(inode) __IS_FLG(inode, MS_FLUSHING) #define IS_QUOTAINIT(inode) ((inode)->i_flags & S_QUOTA) #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) @@ -305,7 +303,9 @@ struct address_space_operations { int (*direct_IO)(int, struct inode *, struct kiobuf *, unsigned long, int); }; +struct backing_dev_info; struct address_space { + struct inode *host; /* owner: inode, block_device */ struct radix_tree_root page_tree; /* radix tree of all pages */ rwlock_t page_lock; /* and rwlock protecting it */ struct list_head clean_pages; /* list of clean pages */ @@ -314,13 +314,15 @@ struct address_space { struct list_head io_pages; /* being prepared for I/O */ unsigned long nrpages; /* number of total pages */ struct address_space_operations *a_ops; /* methods */ - struct inode *host; /* owner: inode, block_device */ list_t i_mmap; /* list of private mappings */ list_t i_mmap_shared; /* list of private mappings */ spinlock_t i_shared_lock; /* and spinlock protecting it */ unsigned long dirtied_when; /* jiffies of first page dirtying */ int gfp_mask; /* how to allocate the pages */ - unsigned long *ra_pages; /* device readahead */ + struct backing_dev_info *backing_dev_info; /* device readahead, etc */ + spinlock_t private_lock; /* for use by the address_space */ + struct list_head private_list; /* ditto */ + struct address_space *assoc_mapping; /* ditto */ }; struct char_device { @@ -350,10 +352,6 @@ struct inode { struct list_head i_hash; struct list_head i_list; struct list_head i_dentry; - - struct list_head i_dirty_buffers; /* uses i_bufferlist_lock */ - spinlock_t i_bufferlist_lock; - unsigned long i_ino; atomic_t i_count; kdev_t i_dev; @@ -370,6 +368,7 @@ struct inode { unsigned long i_blksize; unsigned long i_blocks; unsigned long i_version; + unsigned short i_bytes; struct semaphore i_sem; struct inode_operations *i_op; struct file_operations *i_fop; /* former ->i_op->default_file_ops */ @@ -427,6 +426,39 @@ struct fown_struct { int signum; /* posix.1b rt signal to be delivered on IO */ }; +static inline void inode_add_bytes(struct inode *inode, loff_t bytes) +{ + inode->i_blocks += bytes >> 9; + bytes &= 511; + inode->i_bytes += bytes; + if (inode->i_bytes >= 512) { + inode->i_blocks++; + inode->i_bytes -= 512; + } +} + +static inline void inode_sub_bytes(struct inode *inode, loff_t bytes) +{ + inode->i_blocks -= bytes >> 9; + bytes &= 511; + if (inode->i_bytes < bytes) { + inode->i_blocks--; + inode->i_bytes += 512; + } + inode->i_bytes -= bytes; +} + +static inline loff_t inode_get_bytes(struct inode *inode) +{ + return (((loff_t)inode->i_blocks) << 9) + inode->i_bytes; +} + +static inline void inode_set_bytes(struct inode *inode, loff_t bytes) +{ + inode->i_blocks = bytes >> 9; + inode->i_bytes = bytes & 511; +} + /* * Track a single file's readahead state */ @@ -583,20 +615,6 @@ struct nameidata { struct vfsmount *old_mnt; }; -#define DQUOT_USR_ENABLED 0x01 /* User diskquotas enabled */ -#define DQUOT_GRP_ENABLED 0x02 /* Group diskquotas enabled */ - -struct quota_mount_options -{ - unsigned int flags; /* Flags for diskquotas on this device */ - struct semaphore dqio_sem; /* lock device while I/O in progress */ - struct semaphore dqoff_sem; /* serialize quota_off() and quota_on() on device */ - struct file *files[MAXQUOTAS]; /* fp's to quotafiles */ - time_t inode_expire[MAXQUOTAS]; /* expiretime for inode-quota */ - time_t block_expire[MAXQUOTAS]; /* expiretime for block-quota */ - char rsquash[MAXQUOTAS]; /* for quotas threat root as any other user */ -}; - /* * Umount options */ @@ -620,13 +638,13 @@ struct super_block { kdev_t s_dev; unsigned long s_blocksize; unsigned long s_old_blocksize; - unsigned short s_writeback_gen;/* To avoid writeback livelock */ unsigned char s_blocksize_bits; unsigned char s_dirt; unsigned long long s_maxbytes; /* Max file size */ struct file_system_type *s_type; struct super_operations *s_op; struct dquot_operations *dq_op; + struct quotactl_ops *s_qcop; struct export_operations *s_export_op; unsigned long s_flags; unsigned long s_magic; @@ -634,16 +652,18 @@ struct super_block { struct rw_semaphore s_umount; struct semaphore s_lock; int s_count; + int s_syncing; atomic_t s_active; struct list_head s_dirty; /* dirty inodes */ + struct list_head s_io; /* parked for writeback */ struct list_head s_locked_inodes;/* inodes being synced */ struct list_head s_anon; /* anonymous dentries for (nfs) exporting */ struct list_head s_files; struct block_device *s_bdev; struct list_head s_instances; - struct quota_mount_options s_dquot; /* Diskquota specific options */ + struct quota_info s_dquot; /* Diskquota specific options */ char s_id[32]; /* Informational name */ @@ -663,6 +683,19 @@ struct super_block { }; /* + * Superblock locking. + */ +static inline void lock_super(struct super_block * sb) +{ + down(&sb->s_lock); +} + +static inline void unlock_super(struct super_block * sb) +{ + up(&sb->s_lock); +} + +/* * VFS helper functions.. */ extern int vfs_create(struct inode *, struct dentry *, int); @@ -772,13 +805,6 @@ struct super_operations { void (*read_inode) (struct inode *); - /* reiserfs kludge. reiserfs needs 64 bits of information to - ** find an inode. We are using the read_inode2 call to get - ** that information. We don't like this, and are waiting on some - ** VFS changes for the real solution. - ** iget4 calls read_inode2, iff it is defined - */ - void (*read_inode2) (struct inode *, void *) ; void (*dirty_inode) (struct inode *); void (*write_inode) (struct inode *, int); void (*put_inode) (struct inode *); @@ -802,6 +828,7 @@ struct super_operations { #define I_LOCK 8 #define I_FREEING 16 #define I_CLEAR 32 +#define I_NEW 64 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) @@ -816,16 +843,6 @@ static inline void mark_inode_dirty_sync(struct inode *inode) __mark_inode_dirty(inode, I_DIRTY_SYNC); } -struct dquot_operations { - void (*initialize) (struct inode *, short); - void (*drop) (struct inode *); - int (*alloc_block) (struct inode *, unsigned long, char); - int (*alloc_inode) (const struct inode *, unsigned long); - void (*free_block) (struct inode *, unsigned long); - void (*free_inode) (const struct inode *, unsigned long); - int (*transfer) (struct inode *, struct iattr *); -}; - /** * &export_operations - for nfsd to communicate with file systems @@ -1118,7 +1135,6 @@ extern int invalidate_device(kdev_t, int); extern void invalidate_inode_pages(struct inode *); extern void invalidate_inode_pages2(struct address_space *); extern void write_inode_now(struct inode *, int); -extern void sync_inodes_sb(struct super_block *); extern int filemap_fdatawrite(struct address_space *); extern int filemap_fdatawait(struct address_space *); extern void sync_supers(void); @@ -1210,19 +1226,33 @@ extern void force_delete(struct inode *); extern struct inode * igrab(struct inode *); extern ino_t iunique(struct super_block *, ino_t); -typedef int (*find_inode_t)(struct inode *, unsigned long, void *); -extern struct inode * iget4(struct super_block *, unsigned long, find_inode_t, void *); +extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); +extern struct inode * iget_locked(struct super_block *, unsigned long); +extern void unlock_new_inode(struct inode *); + static inline struct inode *iget(struct super_block *sb, unsigned long ino) { - return iget4(sb, ino, NULL, NULL); + struct inode *inode = iget_locked(sb, ino); + + if (inode && (inode->i_state & I_NEW)) { + sb->s_op->read_inode(inode); + unlock_new_inode(inode); + } + + return inode; } extern void __iget(struct inode * inode); extern void clear_inode(struct inode *); extern struct inode *new_inode(struct super_block *); extern void remove_suid(struct dentry *); -extern void insert_inode_hash(struct inode *); + +extern void __insert_inode_hash(struct inode *, unsigned long hashval); extern void remove_inode_hash(struct inode *); +static inline void insert_inode_hash(struct inode *inode) { + __insert_inode_hash(inode, inode->i_ino); +} + extern struct file * get_empty_filp(void); extern void file_move(struct file *f, struct list_head *list); extern void ll_rw_block(int, int, struct buffer_head * bh[]); diff --git a/include/linux/hfs_sysdep.h b/include/linux/hfs_sysdep.h index 62fcf2ea311f..a08d5aa9e39d 100644 --- a/include/linux/hfs_sysdep.h +++ b/include/linux/hfs_sysdep.h @@ -19,8 +19,8 @@ #include <linux/slab.h> #include <linux/types.h> -#include <linux/locks.h> #include <linux/fs.h> +#include <linux/sched.h> #include <asm/byteorder.h> #include <asm/unaligned.h> diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 2752f3a7375d..4a96c4ac35b9 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -235,6 +235,7 @@ enum jbd_state_bits { BUFFER_FNS(JBD, jbd) BUFFER_FNS(JBDDirty, jbddirty) +TAS_BUFFER_FNS(JBDDirty, jbddirty) static inline struct buffer_head *jh2bh(struct journal_head *jh) { diff --git a/include/linux/locks.h b/include/linux/locks.h deleted file mode 100644 index a380c5e4f0bb..000000000000 --- a/include/linux/locks.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef _LINUX_LOCKS_H -#define _LINUX_LOCKS_H - -#ifndef _LINUX_MM_H -#include <linux/mm.h> -#endif -#ifndef _LINUX_PAGEMAP_H -#include <linux/pagemap.h> -#endif - -/* - * super-block locking. Again, interrupts may only unlock - * a super-block (although even this isn't done right now. - * nfs may need it). - */ - -static inline void lock_super(struct super_block * sb) -{ - down(&sb->s_lock); -} - -static inline void unlock_super(struct super_block * sb) -{ - up(&sb->s_lock); -} - -#endif /* _LINUX_LOCKS_H */ - diff --git a/include/linux/mm.h b/include/linux/mm.h index 2f0b56f0183b..451cdff1ec16 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -454,7 +454,6 @@ void do_page_cache_readahead(struct file *file, void page_cache_readahead(struct file *file, unsigned long offset); void page_cache_readaround(struct file *file, unsigned long offset); void handle_ra_thrashing(struct file *file); -extern unsigned long default_ra_pages; /* vma is the first one with address < vma->vm_end, * and even address < vma->vm_start. Have to extend vma. */ diff --git a/include/linux/module.h b/include/linux/module.h index f3a8370db10a..1021d58d1742 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -370,6 +370,8 @@ extern struct module *module_list; #define EXPORT_SYMBOL_NOVERS(var) error this_object_must_be_defined_as_export_objs_in_the_Makefile #define EXPORT_SYMBOL_GPL(var) error this_object_must_be_defined_as_export_objs_in_the_Makefile +__asm__(".section __ksymtab,\"a\"\n.previous"); + #else #define __EXPORT_SYMBOL(sym, str) \ diff --git a/include/linux/nbd.h b/include/linux/nbd.h index b6120317731d..556b847804ca 100644 --- a/include/linux/nbd.h +++ b/include/linux/nbd.h @@ -22,7 +22,6 @@ #ifdef MAJOR_NR -#include <linux/locks.h> #include <asm/semaphore.h> #define LOCAL_END_REQUEST diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 499b246788f4..a8a2259a8343 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -170,7 +170,6 @@ struct nfs_inode { #define NFS_INO_REVALIDATING 0x0004 /* revalidating attrs */ #define NFS_IS_SNAPSHOT 0x0010 /* a snapshot file */ #define NFS_INO_FLUSH 0x0020 /* inode is due for flushing */ -#define NFS_INO_NEW 0x0040 /* hadn't been filled yet */ static inline struct nfs_inode *NFS_I(struct inode *inode) { @@ -208,7 +207,6 @@ do { \ #define NFS_FLAGS(inode) (NFS_I(inode)->flags) #define NFS_REVALIDATING(inode) (NFS_FLAGS(inode) & NFS_INO_REVALIDATING) #define NFS_STALE(inode) (NFS_FLAGS(inode) & NFS_INO_STALE) -#define NFS_NEW(inode) (NFS_FLAGS(inode) & NFS_INO_NEW) #define NFS_FILEID(inode) (NFS_I(inode)->fileid) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index f56db65ebef3..52b7117c4f64 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -62,9 +62,8 @@ #define PG_arch_1 10 #define PG_reserved 11 -#define PG_launder 12 /* written out by VM pressure.. */ -#define PG_private 13 /* Has something at ->private */ -#define PG_writeback 14 /* Page is under writeback */ +#define PG_private 12 /* Has something at ->private */ +#define PG_writeback 13 /* Page is under writeback */ /* * Global page accounting. One instance per CPU. @@ -172,10 +171,6 @@ extern void get_page_state(struct page_state *ret); #define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags) #define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags) -#define PageLaunder(page) test_bit(PG_launder, &(page)->flags) -#define SetPageLaunder(page) set_bit(PG_launder, &(page)->flags) -#define ClearPageLaunder(page) clear_bit(PG_launder, &(page)->flags) - #define SetPagePrivate(page) set_bit(PG_private, &(page)->flags) #define ClearPagePrivate(page) clear_bit(PG_private, &(page)->flags) #define PagePrivate(page) test_bit(PG_private, &(page)->flags) diff --git a/include/linux/quota.h b/include/linux/quota.h index b2d5de7368f6..0a36a5e59caf 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -40,30 +40,22 @@ #define _LINUX_QUOTA_ #include <linux/errno.h> +#include <linux/types.h> -/* - * Convert diskblocks to blocks and the other way around. - */ -#define dbtob(num) (num << BLOCK_SIZE_BITS) -#define btodb(num) (num >> BLOCK_SIZE_BITS) +#define __DQUOT_VERSION__ "dquot_6.5.1" +#define __DQUOT_NUM_VERSION__ 6*10000+5*100+1 -/* - * Convert count of filesystem blocks to diskquota blocks, meant - * for filesystems where i_blksize != BLOCK_SIZE - */ -#define fs_to_dq_blocks(num, blksize) (((num) * (blksize)) / BLOCK_SIZE) +typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */ +typedef __u64 qsize_t; /* Type in which we store sizes */ -/* - * Definitions for disk quotas imposed on the average user - * (big brother finally hits Linux). - * - * The following constants define the amount of time given a user - * before the soft limits are treated as hard limits (usually resulting - * in an allocation failure). The timer is started when the user crosses - * their soft limit, it is reset when they go below their soft limit. - */ -#define MAX_IQ_TIME 604800 /* (7*24*60*60) 1 week */ -#define MAX_DQ_TIME 604800 /* (7*24*60*60) 1 week */ +/* Size of blocks in which are counted size limits */ +#define QUOTABLOCK_BITS 10 +#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS) + +/* Conversion routines from and to quota blocks */ +#define qb2kb(x) ((x) << (QUOTABLOCK_BITS-10)) +#define kb2qb(x) ((x) >> (QUOTABLOCK_BITS-10)) +#define toqb(x) (((x) + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS) #define MAXQUOTAS 2 #define USRQUOTA 0 /* element used for user quotas */ @@ -78,9 +70,6 @@ "undefined", \ }; -#define QUOTAFILENAME "quota" -#define QUOTAGROUP "staff" - /* * Command definitions for the 'quotactl' system call. * The commands are broken into a main command defined below @@ -91,45 +80,111 @@ #define SUBCMDSHIFT 8 #define QCMD(cmd, type) (((cmd) << SUBCMDSHIFT) | ((type) & SUBCMDMASK)) -#define Q_QUOTAON 0x0100 /* enable quotas */ -#define Q_QUOTAOFF 0x0200 /* disable quotas */ -#define Q_GETQUOTA 0x0300 /* get limits and usage */ -#define Q_SETQUOTA 0x0400 /* set limits and usage */ -#define Q_SETUSE 0x0500 /* set usage */ -#define Q_SYNC 0x0600 /* sync disk copy of a filesystems quotas */ -#define Q_SETQLIM 0x0700 /* set limits */ -#define Q_GETSTATS 0x0800 /* get collected stats */ -#define Q_RSQUASH 0x1000 /* set root_squash option */ +#define Q_SYNC 0x800001 /* sync disk copy of a filesystems quotas */ +#define Q_QUOTAON 0x800002 /* turn quotas on */ +#define Q_QUOTAOFF 0x800003 /* turn quotas off */ +#define Q_GETFMT 0x800004 /* get quota format used on given filesystem */ +#define Q_GETINFO 0x800005 /* get information about quota files */ +#define Q_SETINFO 0x800006 /* set information about quota files */ +#define Q_GETQUOTA 0x800007 /* get user quota structure */ +#define Q_SETQUOTA 0x800008 /* set user quota structure */ + +/* + * Quota structure used for communication with userspace via quotactl + * Following flags are used to specify which fields are valid + */ +#define QIF_BLIMITS 1 +#define QIF_SPACE 2 +#define QIF_ILIMITS 4 +#define QIF_INODES 8 +#define QIF_BTIME 16 +#define QIF_ITIME 32 +#define QIF_LIMITS (QIF_BLIMITS | QIF_ILIMITS) +#define QIF_USAGE (QIF_SPACE | QIF_INODES) +#define QIF_TIMES (QIF_BTIME | QIF_ITIME) +#define QIF_ALL (QIF_LIMITS | QIF_USAGE | QIF_TIMES) + +struct if_dqblk { + __u64 dqb_bhardlimit; + __u64 dqb_bsoftlimit; + __u64 dqb_curspace; + __u64 dqb_ihardlimit; + __u64 dqb_isoftlimit; + __u64 dqb_curinodes; + __u64 dqb_btime; + __u64 dqb_itime; + __u32 dqb_valid; +}; /* - * The following structure defines the format of the disk quota file - * (as it appears on disk) - the file is an array of these structures - * indexed by user or group number. + * Structure used for setting quota information about file via quotactl + * Following flags are used to specify which fields are valid */ -struct dqblk { +#define IIF_BGRACE 1 +#define IIF_IGRACE 2 +#define IIF_FLAGS 4 +#define IIF_ALL (IIF_BGRACE | IIF_IGRACE | IIF_FLAGS) + +struct if_dqinfo { + __u64 dqi_bgrace; + __u64 dqi_igrace; + __u32 dqi_flags; + __u32 dqi_valid; +}; + +#ifdef __KERNEL__ + +#include <linux/xqm.h> +#include <linux/dqblk_v1.h> +#include <linux/dqblk_v2.h> + +/* + * Data for one user/group kept in memory + */ +struct mem_dqblk { __u32 dqb_bhardlimit; /* absolute limit on disk blks alloc */ __u32 dqb_bsoftlimit; /* preferred limit on disk blks */ - __u32 dqb_curblocks; /* current block count */ + qsize_t dqb_curspace; /* current used space */ __u32 dqb_ihardlimit; /* absolute limit on allocated inodes */ __u32 dqb_isoftlimit; /* preferred inode limit */ __u32 dqb_curinodes; /* current # allocated inodes */ - time_t dqb_btime; /* time limit for excessive disk use */ - time_t dqb_itime; /* time limit for excessive inode use */ + time_t dqb_btime; /* time limit for excessive disk use */ + time_t dqb_itime; /* time limit for excessive inode use */ }; /* - * Shorthand notation. + * Data for one quotafile kept in memory */ -#define dq_bhardlimit dq_dqb.dqb_bhardlimit -#define dq_bsoftlimit dq_dqb.dqb_bsoftlimit -#define dq_curblocks dq_dqb.dqb_curblocks -#define dq_ihardlimit dq_dqb.dqb_ihardlimit -#define dq_isoftlimit dq_dqb.dqb_isoftlimit -#define dq_curinodes dq_dqb.dqb_curinodes -#define dq_btime dq_dqb.dqb_btime -#define dq_itime dq_dqb.dqb_itime +struct quota_format_type; + +struct mem_dqinfo { + struct quota_format_type *dqi_format; + int dqi_flags; + unsigned int dqi_bgrace; + unsigned int dqi_igrace; + union { + struct v1_mem_dqinfo v1_i; + struct v2_mem_dqinfo v2_i; + } u; +}; + +#define DQF_MASK 0xffff /* Mask for format specific flags */ +#define DQF_INFO_DIRTY 0x10000 /* Is info dirty? */ +#define DQF_ANY_DQUOT_DIRTY 0x20000 /* Is any dquot dirty? */ + +extern inline void mark_info_dirty(struct mem_dqinfo *info) +{ + info->dqi_flags |= DQF_INFO_DIRTY; +} -#define dqoff(UID) ((loff_t)((UID) * sizeof (struct dqblk))) +#define info_dirty(info) ((info)->dqi_flags & DQF_INFO_DIRTY) + +#define info_any_dirty(info) ((info)->dqi_flags & DQF_INFO_DIRTY ||\ + (info)->dqi_flags & DQF_ANY_DQUOT_DIRTY) + +#define sb_dqopt(sb) (&(sb)->s_dquot) + +extern int nr_dquots, nr_free_dquots; struct dqstats { __u32 lookups; @@ -142,9 +197,7 @@ struct dqstats { __u32 syncs; }; -#ifdef __KERNEL__ - -extern int dquot_root_squash; +extern struct dqstats dqstats; #define NR_DQHASH 43 /* Just an arbitrary number */ @@ -161,36 +214,112 @@ struct dquot { struct list_head dq_free; /* Free list element */ wait_queue_head_t dq_wait_lock; /* Pointer to waitqueue on dquot lock */ wait_queue_head_t dq_wait_free; /* Pointer to waitqueue for quota to be unused */ - int dq_count; /* Reference count */ + int dq_count; /* Use count */ + int dq_dup_ref; /* Number of duplicated refences */ /* fields after this point are cleared when invalidating */ struct super_block *dq_sb; /* superblock this applies to */ unsigned int dq_id; /* ID this applies to (uid, gid) */ + loff_t dq_off; /* Offset of dquot on disk */ short dq_type; /* Type of quota */ short dq_flags; /* See DQ_* */ unsigned long dq_referenced; /* Number of times this dquot was referenced during its lifetime */ - struct dqblk dq_dqb; /* Diskquota usage */ + struct mem_dqblk dq_dqb; /* Diskquota usage */ }; #define NODQUOT (struct dquot *)NULL -/* - * Flags used for set_dqblk. - */ -#define SET_QUOTA 0x02 -#define SET_USE 0x04 -#define SET_QLIMIT 0x08 - #define QUOTA_OK 0 #define NO_QUOTA 1 +/* Operations which must be implemented by each quota format */ +struct quota_format_ops { + int (*check_quota_file)(struct super_block *sb, int type); /* Detect whether file is in our format */ + int (*read_file_info)(struct super_block *sb, int type); /* Read main info about file - called on quotaon() */ + int (*write_file_info)(struct super_block *sb, int type); /* Write main info about file */ + int (*free_file_info)(struct super_block *sb, int type); /* Called on quotaoff() */ + int (*read_dqblk)(struct dquot *dquot); /* Read structure for one user */ + int (*commit_dqblk)(struct dquot *dquot); /* Write (or delete) structure for one user */ +}; + +/* Operations working with dquots */ +struct dquot_operations { + void (*initialize) (struct inode *, int); + void (*drop) (struct inode *); + int (*alloc_space) (struct inode *, qsize_t, int); + int (*alloc_inode) (const struct inode *, unsigned long); + void (*free_space) (struct inode *, qsize_t); + void (*free_inode) (const struct inode *, unsigned long); + int (*transfer) (struct inode *, struct iattr *); +}; + +/* Operations handling requests from userspace */ +struct quotactl_ops { + int (*quota_on)(struct super_block *, int, int, char *); + int (*quota_off)(struct super_block *, int); + int (*quota_sync)(struct super_block *, int); + int (*get_info)(struct super_block *, int, struct if_dqinfo *); + int (*set_info)(struct super_block *, int, struct if_dqinfo *); + int (*get_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *); + int (*set_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *); + int (*get_xstate)(struct super_block *, struct fs_quota_stat *); + int (*set_xstate)(struct super_block *, unsigned int, int); + int (*get_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *); + int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *); +}; + +struct quota_format_type { + int qf_fmt_id; /* Quota format id */ + struct quota_format_ops *qf_ops; /* Operations of format */ + struct module *qf_owner; /* Module implementing quota format */ + struct quota_format_type *qf_next; +}; + +#define DQUOT_USR_ENABLED 0x01 /* User diskquotas enabled */ +#define DQUOT_GRP_ENABLED 0x02 /* Group diskquotas enabled */ + +struct quota_info { + unsigned int flags; /* Flags for diskquotas on this device */ + struct semaphore dqio_sem; /* lock device while I/O in progress */ + struct semaphore dqoff_sem; /* serialize quota_off() and quota_on() on device */ + struct file *files[MAXQUOTAS]; /* fp's to quotafiles */ + struct mem_dqinfo info[MAXQUOTAS]; /* Information for each quota type */ + struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */ +}; + +/* Inline would be better but we need to dereference super_block which is not defined yet */ +#define mark_dquot_dirty(dquot) do {\ + dquot->dq_flags |= DQ_MOD;\ + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_flags |= DQF_ANY_DQUOT_DIRTY;\ +} while (0) + +#define dquot_dirty(dquot) ((dquot)->dq_flags & DQ_MOD) + +static inline int is_enabled(struct quota_info *dqopt, int type) +{ + switch (type) { + case USRQUOTA: + return dqopt->flags & DQUOT_USR_ENABLED; + case GRPQUOTA: + return dqopt->flags & DQUOT_GRP_ENABLED; + } + return 0; +} + +#define sb_any_quota_enabled(sb) (is_enabled(sb_dqopt(sb), USRQUOTA) | is_enabled(sb_dqopt(sb), GRPQUOTA)) + +#define sb_has_quota_enabled(sb, type) (is_enabled(sb_dqopt(sb), type)) + +int register_quota_format(struct quota_format_type *fmt); +void unregister_quota_format(struct quota_format_type *fmt); + #else # /* nodep */ include <sys/cdefs.h> __BEGIN_DECLS -long quotactl __P ((int, const char *, int, caddr_t)); +long quotactl __P ((unsigned int, const char *, int, caddr_t)); __END_DECLS #endif /* __KERNEL__ */ diff --git a/include/linux/quotacompat.h b/include/linux/quotacompat.h new file mode 100644 index 000000000000..484aac17efc9 --- /dev/null +++ b/include/linux/quotacompat.h @@ -0,0 +1,86 @@ +/* + * Definition of symbols used for backward compatible interface + */ + +#ifndef _LINUX_QUOTACOMPAT_ +#define _LINUX_QUOTACOMPAT_ + +#include <linux/types.h> +#include <linux/quota.h> + +struct v1c_mem_dqblk { + __u32 dqb_bhardlimit; /* absolute limit on disk blks alloc */ + __u32 dqb_bsoftlimit; /* preferred limit on disk blks */ + __u32 dqb_curblocks; /* current block count */ + __u32 dqb_ihardlimit; /* maximum # allocated inodes */ + __u32 dqb_isoftlimit; /* preferred inode limit */ + __u32 dqb_curinodes; /* current # allocated inodes */ + time_t dqb_btime; /* time limit for excessive disk use */ + time_t dqb_itime; /* time limit for excessive files */ +}; + +struct v1c_dqstats { + __u32 lookups; + __u32 drops; + __u32 reads; + __u32 writes; + __u32 cache_hits; + __u32 allocated_dquots; + __u32 free_dquots; + __u32 syncs; +}; + +struct v2c_mem_dqblk { + unsigned int dqb_ihardlimit; + unsigned int dqb_isoftlimit; + unsigned int dqb_curinodes; + unsigned int dqb_bhardlimit; + unsigned int dqb_bsoftlimit; + qsize_t dqb_curspace; + __kernel_time_t dqb_btime; + __kernel_time_t dqb_itime; +}; + +struct v2c_mem_dqinfo { + unsigned int dqi_bgrace; + unsigned int dqi_igrace; + unsigned int dqi_flags; + unsigned int dqi_blocks; + unsigned int dqi_free_blk; + unsigned int dqi_free_entry; +}; + +struct v2c_dqstats { + __u32 lookups; + __u32 drops; + __u32 reads; + __u32 writes; + __u32 cache_hits; + __u32 allocated_dquots; + __u32 free_dquots; + __u32 syncs; + __u32 version; +}; + +#define Q_COMP_QUOTAON 0x0100 /* enable quotas */ +#define Q_COMP_QUOTAOFF 0x0200 /* disable quotas */ +#define Q_COMP_SYNC 0x0600 /* sync disk copy of a filesystems quotas */ + +#define Q_V1_GETQUOTA 0x0300 /* get limits and usage */ +#define Q_V1_SETQUOTA 0x0400 /* set limits and usage */ +#define Q_V1_SETUSE 0x0500 /* set usage */ +#define Q_V1_SETQLIM 0x0700 /* set limits */ +#define Q_V1_GETSTATS 0x0800 /* get collected stats */ +#define Q_V1_RSQUASH 0x1000 /* set root_squash option */ + +#define Q_V2_SETQLIM 0x0700 /* set limits */ +#define Q_V2_GETINFO 0x0900 /* get info about quotas - graces, flags... */ +#define Q_V2_SETINFO 0x0A00 /* set info about quotas */ +#define Q_V2_SETGRACE 0x0B00 /* set inode and block grace */ +#define Q_V2_SETFLAGS 0x0C00 /* set flags for quota */ +#define Q_V2_GETQUOTA 0x0D00 /* get limits and usage */ +#define Q_V2_SETQUOTA 0x0E00 /* set limits and usage */ +#define Q_V2_SETUSE 0x0F00 /* set usage */ +#define Q_V2_GETSTATS 0x1100 /* get collected stats */ + +#endif diff --git a/include/linux/quotaio_v1.h b/include/linux/quotaio_v1.h new file mode 100644 index 000000000000..746654b5de70 --- /dev/null +++ b/include/linux/quotaio_v1.h @@ -0,0 +1,33 @@ +#ifndef _LINUX_QUOTAIO_V1_H +#define _LINUX_QUOTAIO_V1_H + +#include <linux/types.h> + +/* + * The following constants define the amount of time given a user + * before the soft limits are treated as hard limits (usually resulting + * in an allocation failure). The timer is started when the user crosses + * their soft limit, it is reset when they go below their soft limit. + */ +#define MAX_IQ_TIME 604800 /* (7*24*60*60) 1 week */ +#define MAX_DQ_TIME 604800 /* (7*24*60*60) 1 week */ + +/* + * The following structure defines the format of the disk quota file + * (as it appears on disk) - the file is an array of these structures + * indexed by user or group number. + */ +struct v1_disk_dqblk { + __u32 dqb_bhardlimit; /* absolute limit on disk blks alloc */ + __u32 dqb_bsoftlimit; /* preferred limit on disk blks */ + __u32 dqb_curblocks; /* current block count */ + __u32 dqb_ihardlimit; /* absolute limit on allocated inodes */ + __u32 dqb_isoftlimit; /* preferred inode limit */ + __u32 dqb_curinodes; /* current # allocated inodes */ + time_t dqb_btime; /* time limit for excessive disk use */ + time_t dqb_itime; /* time limit for excessive inode use */ +}; + +#define v1_dqoff(UID) ((loff_t)((UID) * sizeof (struct v1_disk_dqblk))) + +#endif /* _LINUX_QUOTAIO_V1_H */ diff --git a/include/linux/quotaio_v2.h b/include/linux/quotaio_v2.h new file mode 100644 index 000000000000..da4e02730bc8 --- /dev/null +++ b/include/linux/quotaio_v2.h @@ -0,0 +1,79 @@ +/* + * Definitions of structures for vfsv0 quota format + */ + +#ifndef _LINUX_QUOTAIO_V2_H +#define _LINUX_QUOTAIO_V2_H + +#include <linux/types.h> +#include <linux/quota.h> + +/* + * Definitions of magics and versions of current quota files + */ +#define V2_INITQMAGICS {\ + 0xd9c01f11, /* USRQUOTA */\ + 0xd9c01927 /* GRPQUOTA */\ +} + +#define V2_INITQVERSIONS {\ + 0, /* USRQUOTA */\ + 0 /* GRPQUOTA */\ +} + +/* + * The following structure defines the format of the disk quota file + * (as it appears on disk) - the file is a radix tree whose leaves point + * to blocks of these structures. + */ +struct v2_disk_dqblk { + __u32 dqb_id; /* id this quota applies to */ + __u32 dqb_ihardlimit; /* absolute limit on allocated inodes */ + __u32 dqb_isoftlimit; /* preferred inode limit */ + __u32 dqb_curinodes; /* current # allocated inodes */ + __u32 dqb_bhardlimit; /* absolute limit on disk space (in QUOTABLOCK_SIZE) */ + __u32 dqb_bsoftlimit; /* preferred limit on disk space (in QUOTABLOCK_SIZE) */ + __u64 dqb_curspace; /* current space occupied (in bytes) */ + __u64 dqb_btime; /* time limit for excessive disk use */ + __u64 dqb_itime; /* time limit for excessive inode use */ +}; + +/* + * Here are header structures as written on disk and their in-memory copies + */ +/* First generic header */ +struct v2_disk_dqheader { + __u32 dqh_magic; /* Magic number identifying file */ + __u32 dqh_version; /* File version */ +}; + +/* Header with type and version specific information */ +struct v2_disk_dqinfo { + __u32 dqi_bgrace; /* Time before block soft limit becomes hard limit */ + __u32 dqi_igrace; /* Time before inode soft limit becomes hard limit */ + __u32 dqi_flags; /* Flags for quotafile (DQF_*) */ + __u32 dqi_blocks; /* Number of blocks in file */ + __u32 dqi_free_blk; /* Number of first free block in the list */ + __u32 dqi_free_entry; /* Number of block with at least one free entry */ +}; + +/* + * Structure of header of block with quota structures. It is padded to 16 bytes so + * there will be space for exactly 18 quota-entries in a block + */ +struct v2_disk_dqdbheader { + __u32 dqdh_next_free; /* Number of next block with free entry */ + __u32 dqdh_prev_free; /* Number of previous block with free entry */ + __u16 dqdh_entries; /* Number of valid entries in block */ + __u16 dqdh_pad1; + __u32 dqdh_pad2; +}; + +#define V2_DQINFOOFF sizeof(struct v2_disk_dqheader) /* Offset of info header in file */ +#define V2_DQBLKSIZE_BITS 10 +#define V2_DQBLKSIZE (1 << V2_DQBLKSIZE_BITS) /* Size of block with quota structures */ +#define V2_DQTREEOFF 1 /* Offset of tree in file in blocks */ +#define V2_DQTREEDEPTH 4 /* Depth of quota tree */ +#define V2_DQSTRINBLK ((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk)) /* Number of entries in one blocks */ + +#endif /* _LINUX_QUOTAIO_V2_H */ diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 0a1df9e1fe56..31b24e37c159 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -20,15 +20,15 @@ /* * declaration of quota_function calls in kernel. */ -extern void dquot_initialize(struct inode *inode, short type); +extern void sync_dquots(struct super_block *sb, int type); + +extern void dquot_initialize(struct inode *inode, int type); extern void dquot_drop(struct inode *inode); -extern int quota_off(struct super_block *sb, short type); -extern int sync_dquots(struct super_block *sb, short type); -extern int dquot_alloc_block(struct inode *inode, unsigned long number, char prealloc); +extern int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc); extern int dquot_alloc_inode(const struct inode *inode, unsigned long number); -extern void dquot_free_block(struct inode *inode, unsigned long number); +extern void dquot_free_space(struct inode *inode, qsize_t number); extern void dquot_free_inode(const struct inode *inode, unsigned long number); extern int dquot_transfer(struct inode *inode, struct iattr *iattr); @@ -36,7 +36,11 @@ extern int dquot_transfer(struct inode *inode, struct iattr *iattr); /* * Operations supported for diskquotas. */ -#define sb_any_quota_enabled(sb) ((sb)->s_dquot.flags & (DQUOT_USR_ENABLED | DQUOT_GRP_ENABLED)) +extern struct dquot_operations dquot_operations; +extern struct quotactl_ops vfs_quotactl_ops; + +#define sb_dquot_ops (&dquot_operations) +#define sb_quotactl_ops (&vfs_quotactl_ops) static __inline__ void DQUOT_INIT(struct inode *inode) { @@ -59,50 +63,50 @@ static __inline__ void DQUOT_DROP(struct inode *inode) unlock_kernel(); } -static __inline__ int DQUOT_PREALLOC_BLOCK_NODIRTY(struct inode *inode, int nr) +static __inline__ int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { lock_kernel(); if (sb_any_quota_enabled(inode->i_sb)) { - /* Number of used blocks is updated in alloc_block() */ - if (inode->i_sb->dq_op->alloc_block(inode, fs_to_dq_blocks(nr, inode->i_sb->s_blocksize), 1) == NO_QUOTA) { + /* Used space is updated in alloc_space() */ + if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA) { unlock_kernel(); return 1; } } else - inode->i_blocks += nr << (inode->i_sb->s_blocksize_bits - 9); + inode_add_bytes(inode, nr); unlock_kernel(); return 0; } -static __inline__ int DQUOT_PREALLOC_BLOCK(struct inode *inode, int nr) +static __inline__ int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr) { int ret; - if (!(ret = DQUOT_PREALLOC_BLOCK_NODIRTY(inode, nr))) + if (!(ret = DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr))) mark_inode_dirty(inode); return ret; } -static __inline__ int DQUOT_ALLOC_BLOCK_NODIRTY(struct inode *inode, int nr) +static __inline__ int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { lock_kernel(); if (sb_any_quota_enabled(inode->i_sb)) { - /* Number of used blocks is updated in alloc_block() */ - if (inode->i_sb->dq_op->alloc_block(inode, fs_to_dq_blocks(nr, inode->i_sb->s_blocksize), 0) == NO_QUOTA) { + /* Used space is updated in alloc_space() */ + if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA) { unlock_kernel(); return 1; } } else - inode->i_blocks += nr << (inode->i_sb->s_blocksize_bits - 9); + inode_add_bytes(inode, nr); unlock_kernel(); return 0; } -static __inline__ int DQUOT_ALLOC_BLOCK(struct inode *inode, int nr) +static __inline__ int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr) { int ret; - if (!(ret = DQUOT_ALLOC_BLOCK_NODIRTY(inode, nr))) + if (!(ret = DQUOT_ALLOC_SPACE_NODIRTY(inode, nr))) mark_inode_dirty(inode); return ret; } @@ -121,19 +125,19 @@ static __inline__ int DQUOT_ALLOC_INODE(struct inode *inode) return 0; } -static __inline__ void DQUOT_FREE_BLOCK_NODIRTY(struct inode *inode, int nr) +static __inline__ void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { lock_kernel(); if (sb_any_quota_enabled(inode->i_sb)) - inode->i_sb->dq_op->free_block(inode, fs_to_dq_blocks(nr, inode->i_sb->s_blocksize)); + inode->i_sb->dq_op->free_space(inode, nr); else - inode->i_blocks -= nr << (inode->i_sb->s_blocksize_bits - 9); + inode_sub_bytes(inode, nr); unlock_kernel(); } -static __inline__ void DQUOT_FREE_BLOCK(struct inode *inode, int nr) +static __inline__ void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr) { - DQUOT_FREE_BLOCK_NODIRTY(inode, nr); + DQUOT_FREE_SPACE_NODIRTY(inode, nr); mark_inode_dirty(inode); } @@ -160,13 +164,25 @@ static __inline__ int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr) } #define DQUOT_SYNC(sb) sync_dquots(sb, -1) -#define DQUOT_OFF(sb) quota_off(sb, -1) + +static __inline__ int DQUOT_OFF(struct super_block *sb) +{ + int ret = -ENOSYS; + + lock_kernel(); + if (sb->s_qcop && sb->s_qcop->quota_off) + ret = sb->s_qcop->quota_off(sb, -1); + unlock_kernel(); + return ret; +} #else /* * NO-OP when quota not configured. */ +#define sb_dquot_ops (NULL) +#define sb_quotactl_ops (NULL) #define DQUOT_INIT(inode) do { } while(0) #define DQUOT_DROP(inode) do { } while(0) #define DQUOT_ALLOC_INODE(inode) (0) @@ -174,48 +190,56 @@ static __inline__ int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr) #define DQUOT_SYNC(sb) do { } while(0) #define DQUOT_OFF(sb) do { } while(0) #define DQUOT_TRANSFER(inode, iattr) (0) -extern __inline__ int DQUOT_PREALLOC_BLOCK_NODIRTY(struct inode *inode, int nr) +extern __inline__ int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { lock_kernel(); - inode->i_blocks += nr << (inode->i_sb->s_blocksize_bits - 9); + inode_add_bytes(inode, nr); unlock_kernel(); return 0; } -extern __inline__ int DQUOT_PREALLOC_BLOCK(struct inode *inode, int nr) +extern __inline__ int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr) { - DQUOT_PREALLOC_BLOCK_NODIRTY(inode, nr); + DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr); mark_inode_dirty(inode); return 0; } -extern __inline__ int DQUOT_ALLOC_BLOCK_NODIRTY(struct inode *inode, int nr) +extern __inline__ int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { lock_kernel(); - inode->i_blocks += nr << (inode->i_sb->s_blocksize_bits - 9); + inode_add_bytes(inode, nr); unlock_kernel(); return 0; } -extern __inline__ int DQUOT_ALLOC_BLOCK(struct inode *inode, int nr) +extern __inline__ int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr) { - DQUOT_ALLOC_BLOCK_NODIRTY(inode, nr); + DQUOT_ALLOC_SPACE_NODIRTY(inode, nr); mark_inode_dirty(inode); return 0; } -extern __inline__ void DQUOT_FREE_BLOCK_NODIRTY(struct inode *inode, int nr) +extern __inline__ void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { lock_kernel(); - inode->i_blocks -= nr << (inode->i_sb->s_blocksize_bits - 9); + inode_sub_bytes(inode, nr); unlock_kernel(); } -extern __inline__ void DQUOT_FREE_BLOCK(struct inode *inode, int nr) +extern __inline__ void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr) { - DQUOT_FREE_BLOCK_NODIRTY(inode, nr); + DQUOT_FREE_SPACE_NODIRTY(inode, nr); mark_inode_dirty(inode); } #endif /* CONFIG_QUOTA */ + +#define DQUOT_PREALLOC_BLOCK_NODIRTY(inode, nr) DQUOT_PREALLOC_SPACE_NODIRTY(inode, ((qsize_t)(nr)) << (inode)->i_sb->s_blocksize_bits) +#define DQUOT_PREALLOC_BLOCK(inode, nr) DQUOT_PREALLOC_SPACE(inode, ((qsize_t)(nr)) << (inode)->i_sb->s_blocksize_bits) +#define DQUOT_ALLOC_BLOCK_NODIRTY(inode, nr) DQUOT_ALLOC_SPACE_NODIRTY(inode, ((qsize_t)(nr)) << (inode)->i_sb->s_blocksize_bits) +#define DQUOT_ALLOC_BLOCK(inode, nr) DQUOT_ALLOC_SPACE(inode, ((qsize_t)(nr)) << (inode)->i_sb->s_blocksize_bits) +#define DQUOT_FREE_BLOCK_NODIRTY(inode, nr) DQUOT_FREE_SPACE_NODIRTY(inode, ((qsize_t)(nr)) << (inode)->i_sb->s_blocksize_bits) +#define DQUOT_FREE_BLOCK(inode, nr) DQUOT_FREE_SPACE(inode, ((qsize_t)(nr)) << (inode)->i_sb->s_blocksize_bits) + #endif /* _LINUX_QUOTAOPS_ */ diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index bf586df47298..cb6332482af2 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h @@ -31,7 +31,6 @@ #include <linux/delay.h> #include <net/checksum.h> #include <linux/random.h> -#include <linux/locks.h> #include <linux/kernel_stat.h> #include <asm/io.h> #include <linux/completion.h> diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index e98ccaf9d217..173279f6ff0a 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -1564,8 +1564,9 @@ extern struct item_operations * item_ops [TYPE_ANY + 1]; #define B_I_POS_UNFM_POINTER(bh,ih,pos) le32_to_cpu(*(((unp_t *)B_I_PITEM(bh,ih)) + (pos))) #define PUT_B_I_POS_UNFM_POINTER(bh,ih,pos, val) do {*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)) = cpu_to_le32(val); } while (0) -struct reiserfs_iget4_args { +struct reiserfs_iget_args { __u32 objectid ; + __u32 dirid ; } ; /***************************************************************************/ @@ -1818,8 +1819,9 @@ void padd_item (char * item, int total_length, int length); /* inode.c */ -void reiserfs_read_inode (struct inode * inode) ; -void reiserfs_read_inode2(struct inode * inode, void *p) ; +void reiserfs_read_locked_inode(struct inode * inode, struct reiserfs_iget_args *args) ; +int reiserfs_find_actor(struct inode * inode, void *p) ; +int reiserfs_init_locked_inode(struct inode * inode, void *p) ; void reiserfs_delete_inode (struct inode * inode); void reiserfs_write_inode (struct inode * inode, int) ; struct dentry *reiserfs_get_dentry(struct super_block *, void *) ; diff --git a/include/linux/swap.h b/include/linux/swap.h index 1674b5acd6f7..3a376842c21c 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -91,6 +91,7 @@ struct swap_info_struct { int next; /* next entry on swap list */ }; +struct inode; extern int nr_swap_pages; /* Swap 50% full? Release swapcache more aggressively.. */ diff --git a/include/linux/ufs_fs.h b/include/linux/ufs_fs.h index 609d0dab2c6f..faccf5ad22d5 100644 --- a/include/linux/ufs_fs.h +++ b/include/linux/ufs_fs.h @@ -31,6 +31,7 @@ #include <linux/kernel.h> #include <linux/time.h> #include <linux/stat.h> +#include <linux/fs.h> #define UFS_BBLOCK 0 #define UFS_BBSIZE 8192 diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 1978e06d1131..9dc03210ee62 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -13,20 +13,27 @@ extern struct list_head inode_in_use; extern struct list_head inode_unused; /* + * Yes, writeback.h requires sched.h + * No, sched.h is not included from here. + */ +static inline int current_is_pdflush(void) +{ + return current->flags & PF_FLUSHER; +} + +/* * fs/fs-writeback.c */ #define WB_SYNC_NONE 0 /* Don't wait on anything */ #define WB_SYNC_LAST 1 /* Wait on the last-written mapping */ #define WB_SYNC_ALL 2 /* Wait on every mapping */ +#define WB_SYNC_HOLD 3 /* Hold the inode on sb_dirty for sys_sync() */ -void try_to_writeback_unused_inodes(unsigned long pexclusive); -void writeback_single_inode(struct inode *inode, - int sync, int *nr_to_write); void writeback_unlocked_inodes(int *nr_to_write, int sync_mode, unsigned long *older_than_this); -void writeback_inodes_sb(struct super_block *); void __wait_on_inode(struct inode * inode); -void sync_inodes(void); +void sync_inodes_sb(struct super_block *, int wait); +void sync_inodes(int wait); static inline void wait_on_inode(struct inode *inode) { @@ -37,17 +44,9 @@ static inline void wait_on_inode(struct inode *inode) /* * mm/page-writeback.c */ -/* - * How much data to write out at a time in various places. This isn't - * really very important - it's just here to prevent any thread from - * locking an inode for too long and blocking other threads which wish - * to write the same file for allocation throttling purposes. - */ -#define WRITEOUT_PAGES ((4096 * 1024) / PAGE_CACHE_SIZE) - void balance_dirty_pages(struct address_space *mapping); void balance_dirty_pages_ratelimited(struct address_space *mapping); -int pdflush_flush(unsigned long nr_pages); int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0); +int writeback_mapping(struct address_space *mapping, int *nr_to_write); #endif /* WRITEBACK_H */ diff --git a/include/linux/xqm.h b/include/linux/xqm.h new file mode 100644 index 000000000000..d077bc18a424 --- /dev/null +++ b/include/linux/xqm.h @@ -0,0 +1,159 @@ +/* + * Copyright (c) 1995-2001 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, + * USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef _LINUX_XQM_H +#define _LINUX_XQM_H + +#include <linux/types.h> + +/* + * Disk quota - quotactl(2) commands for the XFS Quota Manager (XQM). + */ + +#define XQM_CMD(x) (('X'<<8)+(x)) /* note: forms first QCMD argument */ +#define Q_XQUOTAON XQM_CMD(0x1) /* enable accounting/enforcement */ +#define Q_XQUOTAOFF XQM_CMD(0x2) /* disable accounting/enforcement */ +#define Q_XGETQUOTA XQM_CMD(0x3) /* get disk limits and usage */ +#define Q_XSETQLIM XQM_CMD(0x4) /* set disk limits */ +#define Q_XGETQSTAT XQM_CMD(0x5) /* get quota subsystem status */ +#define Q_XQUOTARM XQM_CMD(0x6) /* free disk space used by dquots */ + +/* + * fs_disk_quota structure: + * + * This contains the current quota information regarding a user/proj/group. + * It is 64-bit aligned, and all the blk units are in BBs (Basic Blocks) of + * 512 bytes. + */ +#define FS_DQUOT_VERSION 1 /* fs_disk_quota.d_version */ +typedef struct fs_disk_quota { + __s8 d_version; /* version of this structure */ + __s8 d_flags; /* XFS_{USER,PROJ,GROUP}_QUOTA */ + __u16 d_fieldmask; /* field specifier */ + __u32 d_id; /* user, project, or group ID */ + __u64 d_blk_hardlimit;/* absolute limit on disk blks */ + __u64 d_blk_softlimit;/* preferred limit on disk blks */ + __u64 d_ino_hardlimit;/* maximum # allocated inodes */ + __u64 d_ino_softlimit;/* preferred inode limit */ + __u64 d_bcount; /* # disk blocks owned by the user */ + __u64 d_icount; /* # inodes owned by the user */ + __s32 d_itimer; /* zero if within inode limits */ + /* if not, we refuse service */ + __s32 d_btimer; /* similar to above; for disk blocks */ + __u16 d_iwarns; /* # warnings issued wrt num inodes */ + __u16 d_bwarns; /* # warnings issued wrt disk blocks */ + __s32 d_padding2; /* padding2 - for future use */ + __u64 d_rtb_hardlimit;/* absolute limit on realtime blks */ + __u64 d_rtb_softlimit;/* preferred limit on RT disk blks */ + __u64 d_rtbcount; /* # realtime blocks owned */ + __s32 d_rtbtimer; /* similar to above; for RT disk blks */ + __u16 d_rtbwarns; /* # warnings issued wrt RT disk blks */ + __s16 d_padding3; /* padding3 - for future use */ + char d_padding4[8]; /* yet more padding */ +} fs_disk_quota_t; + +/* + * These fields are sent to Q_XSETQLIM to specify fields that need to change. + */ +#define FS_DQ_ISOFT (1<<0) +#define FS_DQ_IHARD (1<<1) +#define FS_DQ_BSOFT (1<<2) +#define FS_DQ_BHARD (1<<3) +#define FS_DQ_RTBSOFT (1<<4) +#define FS_DQ_RTBHARD (1<<5) +#define FS_DQ_LIMIT_MASK (FS_DQ_ISOFT | FS_DQ_IHARD | FS_DQ_BSOFT | \ + FS_DQ_BHARD | FS_DQ_RTBSOFT | FS_DQ_RTBHARD) +/* + * These timers can only be set in super user's dquot. For others, timers are + * automatically started and stopped. Superusers timer values set the limits + * for the rest. In case these values are zero, the DQ_{F,B}TIMELIMIT values + * defined below are used. + * These values also apply only to the d_fieldmask field for Q_XSETQLIM. + */ +#define FS_DQ_BTIMER (1<<6) +#define FS_DQ_ITIMER (1<<7) +#define FS_DQ_RTBTIMER (1<<8) +#define FS_DQ_TIMER_MASK (FS_DQ_BTIMER | FS_DQ_ITIMER | FS_DQ_RTBTIMER) + +/* + * The following constants define the default amount of time given a user + * before the soft limits are treated as hard limits (usually resulting + * in an allocation failure). These may be modified by the quotactl(2) + * system call with the Q_XSETQLIM command. + */ +#define DQ_FTIMELIMIT (7 * 24*60*60) /* 1 week */ +#define DQ_BTIMELIMIT (7 * 24*60*60) /* 1 week */ + +/* + * Various flags related to quotactl(2). Only relevant to XFS filesystems. + */ +#define XFS_QUOTA_UDQ_ACCT (1<<0) /* user quota accounting */ +#define XFS_QUOTA_UDQ_ENFD (1<<1) /* user quota limits enforcement */ +#define XFS_QUOTA_GDQ_ACCT (1<<2) /* group quota accounting */ +#define XFS_QUOTA_GDQ_ENFD (1<<3) /* group quota limits enforcement */ + +#define XFS_USER_QUOTA (1<<0) /* user quota type */ +#define XFS_PROJ_QUOTA (1<<1) /* (IRIX) project quota type */ +#define XFS_GROUP_QUOTA (1<<2) /* group quota type */ + +/* + * fs_quota_stat is the struct returned in Q_XGETQSTAT for a given file system. + * Provides a centralized way to get meta infomation about the quota subsystem. + * eg. space taken up for user and group quotas, number of dquots currently + * incore. + */ +#define FS_QSTAT_VERSION 1 /* fs_quota_stat.qs_version */ + +/* + * Some basic infomation about 'quota files'. + */ +typedef struct fs_qfilestat { + __u64 qfs_ino; /* inode number */ + __u64 qfs_nblks; /* number of BBs 512-byte-blks */ + __u32 qfs_nextents; /* number of extents */ +} fs_qfilestat_t; + +typedef struct fs_quota_stat { + __s8 qs_version; /* version number for future changes */ + __u16 qs_flags; /* XFS_QUOTA_{U,P,G}DQ_{ACCT,ENFD} */ + __s8 qs_pad; /* unused */ + fs_qfilestat_t qs_uquota; /* user quota storage information */ + fs_qfilestat_t qs_gquota; /* group quota storage information */ + __u32 qs_incoredqs; /* number of dquots incore */ + __s32 qs_btimelimit; /* limit for blks timer */ + __s32 qs_itimelimit; /* limit for inodes timer */ + __s32 qs_rtbtimelimit;/* limit for rt blks timer */ + __u16 qs_bwarnlimit; /* limit for num warnings */ + __u16 qs_iwarnlimit; /* limit for num warnings */ +} fs_quota_stat_t; + +#endif /* _LINUX_XQM_H */ diff --git a/init/Makefile b/init/Makefile new file mode 100644 index 000000000000..5703814c836e --- /dev/null +++ b/init/Makefile @@ -0,0 +1,22 @@ +# +# Makefile for the linux kernel. +# + +O_TARGET := init.o + +obj-y := main.o version.o do_mounts.o + +include $(TOPDIR)/Rules.make + +# dependencies on generated files need to be listed explicitly + +version.o: ../include/linux/compile.h + +# compile.h changes depending on hostname, generation number, etc, +# so we regenerate it always. +# mkcompile_h will make sure to only update the +# actualy file if its content has changed. + +../include/linux/compile.h: ../include/linux/version.h dummy + @echo Generating $@ + @. ../scripts/mkcompile_h $@ "$(ARCH)" "$(CONFIG_SMP)" "$(CC) $(CFLAGS)" diff --git a/kernel/ksyms.c b/kernel/ksyms.c index 3963850911bc..add2ac9dd8f3 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -21,7 +21,6 @@ #include <linux/interrupt.h> #include <linux/ioport.h> #include <linux/serial.h> -#include <linux/locks.h> #include <linux/delay.h> #include <linux/random.h> #include <linux/reboot.h> @@ -137,7 +136,6 @@ EXPORT_SYMBOL(fput); EXPORT_SYMBOL(fget); EXPORT_SYMBOL(igrab); EXPORT_SYMBOL(iunique); -EXPORT_SYMBOL(iget4); EXPORT_SYMBOL(iput); EXPORT_SYMBOL(inode_init_once); EXPORT_SYMBOL(force_delete); @@ -540,7 +538,7 @@ EXPORT_SYMBOL(clear_inode); EXPORT_SYMBOL(init_special_inode); EXPORT_SYMBOL(__get_hash_table); EXPORT_SYMBOL(new_inode); -EXPORT_SYMBOL(insert_inode_hash); +EXPORT_SYMBOL(__insert_inode_hash); EXPORT_SYMBOL(remove_inode_hash); EXPORT_SYMBOL(buffer_insert_list); EXPORT_SYMBOL(make_bad_inode); diff --git a/mm/filemap.c b/mm/filemap.c index 681d02d62685..67a7cf78292d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -42,7 +42,7 @@ * * pagemap_lru_lock * ->i_shared_lock (vmtruncate) - * ->i_bufferlist_lock (__free_pte->__set_page_dirty_buffers) + * ->private_lock (__free_pte->__set_page_dirty_buffers) * ->mapping->page_lock * ->inode_lock (__mark_inode_dirty) * ->sb_lock (fs/fs-writeback.c) @@ -425,11 +425,14 @@ void invalidate_inode_pages2(struct address_space * mapping) * - activate the page so that the page stealer * doesn't try to write it out over and over * again. + * + * NOTE! The livelock in fdatasync went away, due to io_pages. + * So this function can now call set_page_dirty(). */ int fail_writepage(struct page *page) { /* Only activate on memory-pressure, not fsync.. */ - if (PageLaunder(page)) { + if (current->flags & PF_MEMALLOC) { activate_page(page); SetPageReferenced(page); } @@ -450,9 +453,7 @@ EXPORT_SYMBOL(fail_writepage); */ int filemap_fdatawrite(struct address_space *mapping) { - if (mapping->a_ops->writeback_mapping) - return mapping->a_ops->writeback_mapping(mapping, NULL); - return generic_writeback_mapping(mapping, NULL); + return writeback_mapping(mapping, NULL); } /** @@ -651,7 +652,6 @@ void unlock_page(struct page *page) void end_page_writeback(struct page *page) { wait_queue_head_t *waitqueue = page_waitqueue(page); - ClearPageLaunder(page); smp_mb__before_clear_bit(); if (!TestClearPageWriteback(page)) BUG(); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 40e80dff9369..725a4bdb60e1 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -20,6 +20,15 @@ #include <linux/writeback.h> #include <linux/init.h> #include <linux/sysrq.h> +#include <linux/backing-dev.h> + +/* + * The maximum number of pages to writeout in a single bdflush/kupdate + * operation. We do this so we don't hold I_LOCK against an inode for + * enormous amounts of time, which would block a userspace task which has + * been forced to throttle against that inode. + */ +#define MAX_WRITEBACK_PAGES 1024 /* * Memory thresholds, in percentages @@ -41,6 +50,8 @@ static int dirty_async_ratio = 50; */ static int dirty_sync_ratio = 60; +static void background_writeout(unsigned long unused); + /* * balance_dirty_pages() must be called by processes which are * generating dirty data. It looks at the number of dirty pages @@ -53,15 +64,16 @@ static int dirty_sync_ratio = 60; * - Does nothing at all. * * balance_dirty_pages() can sleep. + * + * FIXME: WB_SYNC_LAST doesn't actually work. It waits on the last dirty + * inode on the superblock list. It should wait when nr_to_write is + * exhausted. Doesn't seem to matter. */ void balance_dirty_pages(struct address_space *mapping) { const int tot = nr_free_pagecache_pages(); struct page_state ps; - int background_thresh; - int async_thresh; - int sync_thresh; - int wake_pdflush = 0; + int background_thresh, async_thresh, sync_thresh; unsigned long dirty_and_writeback; get_page_state(&ps); @@ -76,30 +88,27 @@ void balance_dirty_pages(struct address_space *mapping) writeback_unlocked_inodes(&nr_to_write, WB_SYNC_LAST, NULL); get_page_state(&ps); - dirty_and_writeback = ps.nr_dirty + ps.nr_writeback; - wake_pdflush = 1; } else if (dirty_and_writeback > async_thresh) { int nr_to_write = 1500; writeback_unlocked_inodes(&nr_to_write, WB_SYNC_NONE, NULL); - } else if (dirty_and_writeback > background_thresh) { - wake_pdflush = 1; + get_page_state(&ps); } - if (wake_pdflush && !IS_FLUSHING(mapping->host)) { - /* - * There is no flush thread against this device. Start one now. - */ - if (dirty_and_writeback > async_thresh) { - pdflush_flush(dirty_and_writeback - async_thresh); - yield(); - } - } + if (!writeback_in_progress(mapping->backing_dev_info) && + ps.nr_dirty > background_thresh) + pdflush_operation(background_writeout, 0); } -/* - * Front-end to balance_dirty_pages - just to make sure it's not called - * too often. +/** + * balance_dirty_pages_ratelimited - balance dirty memory state + * @mapping - address_space which was dirtied + * + * Processes which are dirtying memory should call in here once for each page + * which was newly dirtied. The function will periodically check the system's + * dirty state and will initiate writeback if needed. + * + * balance_dirty_pages_ratelimited() may sleep. */ void balance_dirty_pages_ratelimited(struct address_space *mapping) { @@ -120,39 +129,38 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping) } /* - * Here are some applications of the pdflush thread pool - */ - -/* - * Start heavy writeback of everything. This is the analogue of the old - * wakeup_bdflush(). Returns zero if a thread was successfully launched. - * - * Is passed in the number of pages to write. - * - * We yield, to allow page allocators to perform their I/O against large files. + * writeback at least _min_pages, and keep writing until the amount of dirty + * memory is less than the background threshold, or until we're all clean. */ - -static void pdflush_bdflush(unsigned long arg) +static void background_writeout(unsigned long _min_pages) { - int nr_pages = arg; - - CHECK_EMERGENCY_SYNC + const int tot = nr_free_pagecache_pages(); + const int background_thresh = (dirty_background_ratio * tot) / 100; + long min_pages = _min_pages; + int nr_to_write; - while (nr_pages) { - int nr_to_write = WRITEOUT_PAGES; + do { + struct page_state ps; - if (nr_to_write > nr_pages) - nr_to_write = nr_pages; - nr_pages -= nr_to_write; + get_page_state(&ps); + if (ps.nr_dirty < background_thresh && min_pages <= 0) + break; + nr_to_write = MAX_WRITEBACK_PAGES; writeback_unlocked_inodes(&nr_to_write, WB_SYNC_NONE, NULL); - yield(); - } + min_pages -= MAX_WRITEBACK_PAGES - nr_to_write; + } while (nr_to_write <= 0); run_task_queue(&tq_disk); } -int pdflush_flush(unsigned long nr_pages) +/* + * Start heavy writeback of everything. + */ +void wakeup_bdflush(void) { - return pdflush_operation(pdflush_bdflush, nr_pages); + struct page_state ps; + + get_page_state(&ps); + pdflush_operation(background_writeout, ps.nr_dirty); } /* @@ -166,6 +174,7 @@ int pdflush_flush(unsigned long nr_pages) * to perform their I/O against a large file. */ static int wb_writeback_jifs = 5 * HZ; +static struct timer_list wb_timer; /* * Periodic writeback of "old" data. @@ -175,52 +184,41 @@ static int wb_writeback_jifs = 5 * HZ; * just walks the superblock inode list, writing back any inodes which are * older than a specific point in time. * - * Spot the bug: at jiffies wraparound, the attempt to set the inode's dirtying - * time won't work, because zero means not-dirty. That's OK. The data will get - * written out later by the VM (at least). + * Try to run once per wb_writeback_jifs jiffies. But if a writeback event + * takes longer than a wb_writeback_jifs interval, then leave a one-second + * gap. * - * We also limit the number of pages which are written out, to avoid writing - * huge amounts of data against a single file, which would cause memory - * allocators to block for too long. + * older_than_this takes precedence over nr_to_write. So we'll only write back + * all dirty pages if they are all attached to "old" mappings. */ static void wb_kupdate(unsigned long arg) { - unsigned long oldest_jif = jiffies - 30*HZ; + unsigned long oldest_jif; + unsigned long start_jif; + unsigned long next_jif; struct page_state ps; - int total_to_write; int nr_to_write; sync_supers(); - get_page_state(&ps); - total_to_write = ps.nr_dirty / 6; - if (total_to_write < 16384) { - total_to_write = 16384; - if (total_to_write > ps.nr_dirty) - total_to_write = ps.nr_dirty; - } - while (total_to_write > 0) { - nr_to_write = total_to_write; - if (nr_to_write > WRITEOUT_PAGES) - nr_to_write = WRITEOUT_PAGES; - total_to_write -= nr_to_write; - writeback_unlocked_inodes(&nr_to_write, - WB_SYNC_NONE, &oldest_jif); - yield(); - } + oldest_jif = jiffies - 30*HZ; + start_jif = jiffies; + next_jif = start_jif + wb_writeback_jifs; + nr_to_write = ps.nr_dirty; + writeback_unlocked_inodes(&nr_to_write, WB_SYNC_NONE, &oldest_jif); run_task_queue(&tq_disk); -} + yield(); -/* - * The writeback timer, for kupdate-style functionality - */ -static struct timer_list wb_timer; + if (time_before(next_jif, jiffies + HZ)) + next_jif = jiffies + HZ; + mod_timer(&wb_timer, next_jif); +} static void wb_timer_fn(unsigned long unused) { - mod_timer(&wb_timer, jiffies + wb_writeback_jifs); - pdflush_operation(wb_kupdate, 0); + if (pdflush_operation(wb_kupdate, 0) < 0) + mod_timer(&wb_timer, jiffies + HZ); } static int __init wb_timer_init(void) @@ -235,23 +233,42 @@ static int __init wb_timer_init(void) module_init(wb_timer_init); /* - * FIXME: PG_launder gets cleared by accident. + * A library function, which implements the vm_writeback a_op. It's fairly + * lame at this time. The idea is: the VM wants to liberate this page, + * so we pass the page to the address_space and give the fs the opportunity + * to write out lots of pages around this one. It allows extent-based + * filesytems to do intelligent things. It lets delayed-allocate filesystems + * perform better file layout. It lets the address_space opportunistically + * write back disk-contiguous pages which are in other zones. + * + * FIXME: the VM wants to start I/O against *this* page. Because its zone + * is under pressure. But this function may start writeout against a + * totally different set of pages. Unlikely to be a huge problem, but if it + * is, we could just writepage the page if it is still (PageDirty && + * !PageWriteback) (See below). + * + * Another option is to just reposition page->mapping->dirty_pages so we + * *know* that the page will be written. That will work fine, but seems + * unpleasant. (If the page is not for-sure on ->dirty_pages we're dead). + * Plus it assumes that the address_space is performing writeback in + * ->dirty_pages order. + * + * So. The proper fix is to leave the page locked-and-dirty and to pass + * it all the way down. */ -static int writeback_mapping(struct page *page, int *nr_to_write) +int generic_vm_writeback(struct page *page, int *nr_to_write) { struct inode *inode = page->mapping->host; - SetPageDirty(page); - /* - * We don't own this inode, so we don't want the address_space - * vanishing while writeback is walking the list + * We don't own this inode, and we don't want the address_space + * vanishing while writeback is walking its pages. */ inode = igrab(inode); unlock_page(page); if (inode) { - writeback_single_inode(inode, 0, nr_to_write); + writeback_mapping(inode->i_mapping, nr_to_write); /* * This iput() will internally call ext2_discard_prealloc(), @@ -261,23 +278,18 @@ static int writeback_mapping(struct page *page, int *nr_to_write) * Just a waste of cycles. */ iput(inode); +#if 0 + if (!PageWriteback(page) && PageDirty(page)) { + lock_page(page); + if (!PageWriteback(page) && TestClearPageDirty(page)) + page->mapping->a_ops->writepage(page); + else + unlock_page(page); + } +#endif } return 0; } - -/* - * A library function, which implements the vm_writeback a_op. It's fairly - * lame at this time. The idea is: the VM wants to liberate this page, - * so we pass the page to the address_space and give the fs the opportunity - * to write out lots of pages around this one. It allows extent-based - * filesytems to do intelligent things. It lets delayed-allocate filesystems - * perform better file layout. It lets the address_space opportunistically - * write back disk-contiguous pages which are in other zones. - */ -int generic_vm_writeback(struct page *page, int *nr_to_write) -{ - return writeback_mapping(page, nr_to_write); -} EXPORT_SYMBOL(generic_vm_writeback); /** @@ -288,8 +300,7 @@ EXPORT_SYMBOL(generic_vm_writeback); * @nr_to_write: subtract the number of written pages from *@nr_to_write * * This is a library function, which implements the writeback_mapping() - * address_space_operation for filesystems which are using multipage BIO - * writeback. + * address_space_operation. * * (The next two paragraphs refer to code which isn't here yet, but they * explain the presence of address_space.io_pages) @@ -319,16 +330,15 @@ EXPORT_SYMBOL(generic_vm_writeback); */ int generic_writeback_mapping(struct address_space *mapping, int *nr_to_write) { + int (*writepage)(struct page *) = mapping->a_ops->writepage; int ret = 0; int done = 0; int err; - int (*writepage)(struct page *) = mapping->a_ops->writepage; write_lock(&mapping->page_lock); list_splice(&mapping->dirty_pages, &mapping->io_pages); INIT_LIST_HEAD(&mapping->dirty_pages); - mapping->dirtied_when = 0; while (!list_empty(&mapping->io_pages) && !done) { struct page *page = list_entry(mapping->io_pages.prev, @@ -347,23 +357,27 @@ int generic_writeback_mapping(struct address_space *mapping, int *nr_to_write) continue; } list_add(&page->list, &mapping->locked_pages); - page_cache_get(page); write_unlock(&mapping->page_lock); - lock_page(page); - if (TestClearPageDirty(page)) { - if (current->flags & PF_MEMALLOC) - SetPageLaunder(page); + /* It may have been removed from swapcache: check ->mapping */ + if (page->mapping && TestClearPageDirty(page) && + !PageWriteback(page)) { + /* FIXME: batch this up */ + if (!PageActive(page) && PageLRU(page)) { + spin_lock(&pagemap_lru_lock); + if (!PageActive(page) && PageLRU(page)) { + list_del(&page->lru); + list_add(&page->lru, &inactive_list); + } + spin_unlock(&pagemap_lru_lock); + } err = writepage(page); if (!ret) ret = err; - if (nr_to_write) { - --(*nr_to_write); - if (*nr_to_write <= 0) - done = 1; - } + if (nr_to_write && --(*nr_to_write) <= 0) + done = 1; } else { unlock_page(page); } @@ -383,14 +397,20 @@ int generic_writeback_mapping(struct address_space *mapping, int *nr_to_write) } EXPORT_SYMBOL(generic_writeback_mapping); +int writeback_mapping(struct address_space *mapping, int *nr_to_write) +{ + if (mapping->a_ops->writeback_mapping) + return mapping->a_ops->writeback_mapping(mapping, nr_to_write); + return generic_writeback_mapping(mapping, nr_to_write); +} + /** * write_one_page - write out a single page and optionally wait on I/O * * @page - the page to write * @wait - if true, wait on writeout * - * The page must be locked by the caller and will come unlocked when I/O - * completes. + * The page must be locked by the caller and will be unlocked upon return. * * write_one_page() returns a negative error code if I/O failed. */ @@ -450,7 +470,7 @@ EXPORT_SYMBOL(write_one_page); * It's better to have clean pages accidentally attached to dirty_pages than to * leave dirty pages attached to clean_pages. * - * We use i_bufferlist_lock to lock against try_to_free_buffers while using the + * We use private_lock to lock against try_to_free_buffers while using the * page's buffer list. Also use this to protect against clean buffers being * added to the page after it was set dirty. * @@ -462,18 +482,15 @@ EXPORT_SYMBOL(write_one_page); */ int __set_page_dirty_buffers(struct page *page) { + struct address_space * const mapping = page->mapping; int ret = 0; - struct address_space *mapping = page->mapping; - struct inode *inode; if (mapping == NULL) { SetPageDirty(page); goto out; } - inode = mapping->host; - - spin_lock(&inode->i_bufferlist_lock); + spin_lock(&mapping->private_lock); if (page_has_buffers(page) && !PageSwapCache(page)) { struct buffer_head *head = page_buffers(page); @@ -496,7 +513,7 @@ int __set_page_dirty_buffers(struct page *page) __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); } - spin_unlock(&inode->i_bufferlist_lock); + spin_unlock(&mapping->private_lock); out: return ret; } diff --git a/mm/page_io.c b/mm/page_io.c index 05594b07aba9..85bb9049ee0f 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -12,8 +12,8 @@ #include <linux/mm.h> #include <linux/kernel_stat.h> +#include <linux/pagemap.h> #include <linux/swap.h> -#include <linux/locks.h> #include <linux/swapctl.h> #include <asm/pgtable.h> diff --git a/mm/pdflush.c b/mm/pdflush.c index 07ceb439e9ae..5e7d0125c39d 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c @@ -103,6 +103,7 @@ static int __pdflush(struct pdflush_work *my_work) preempt_disable(); spin_lock_irq(&pdflush_lock); nr_pdflush_threads++; +// printk("pdflush %d [%d] starts\n", nr_pdflush_threads, current->pid); for ( ; ; ) { struct pdflush_work *pdf; @@ -124,7 +125,7 @@ static int __pdflush(struct pdflush_work *my_work) if (jiffies - last_empty_jifs > 1 * HZ) { /* unlocked list_empty() test is OK here */ if (list_empty(&pdflush_list)) { - /* unlocked nr_pdflush_threads test is OK here */ + /* unlocked test is OK here */ if (nr_pdflush_threads < MAX_PDFLUSH_THREADS) start_one_pdflush_thread(); } @@ -147,6 +148,7 @@ static int __pdflush(struct pdflush_work *my_work) } } nr_pdflush_threads--; +// printk("pdflush %d [%d] ends\n", nr_pdflush_threads, current->pid); spin_unlock_irq(&pdflush_lock); preempt_enable(); return 0; diff --git a/mm/readahead.c b/mm/readahead.c index 86d54f5b38e5..03fd19c23bbb 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -11,8 +11,12 @@ #include <linux/fs.h> #include <linux/mm.h> #include <linux/blkdev.h> +#include <linux/backing-dev.h> -unsigned long default_ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; +struct backing_dev_info default_backing_dev_info = { + ra_pages: (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE, + state: 0, +}; /* * Return max readahead size for this inode in number-of-pages. @@ -117,25 +121,27 @@ void do_page_cache_readahead(struct file *file, /* * Preallocate as many pages as we will need. */ + read_lock(&mapping->page_lock); for (page_idx = 0; page_idx < nr_to_read; page_idx++) { unsigned long page_offset = offset + page_idx; if (page_offset > end_index) break; - read_lock(&mapping->page_lock); page = radix_tree_lookup(&mapping->page_tree, page_offset); - read_unlock(&mapping->page_lock); if (page) continue; + read_unlock(&mapping->page_lock); page = page_cache_alloc(mapping); + read_lock(&mapping->page_lock); if (!page) break; page->index = page_offset; list_add(&page->list, &page_pool); nr_to_really_read++; } + read_unlock(&mapping->page_lock); /* * Now start the IO. We ignore I/O errors - if the page is not diff --git a/mm/shmem.c b/mm/shmem.c index 64330ed216f4..fa365c456448 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -25,7 +25,6 @@ #include <linux/swap.h> #include <linux/pagemap.h> #include <linux/string.h> -#include <linux/locks.h> #include <linux/slab.h> #include <linux/smp_lock.h> #include <linux/shmem_fs.h> @@ -438,7 +437,8 @@ static int shmem_writepage(struct page * page) if (!PageLocked(page)) BUG(); - if (!PageLaunder(page)) + + if (!(current->flags & PF_MEMALLOC)) return fail_writepage(page); mapping = page->mapping; diff --git a/mm/swap_state.c b/mm/swap_state.c index 85002f16a6fa..e802fb3aa707 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -31,17 +31,34 @@ static int swap_writepage(struct page *page) return 0; } +/* + * swapper_space doesn't have a real inode, so it gets a special vm_writeback() + * so we don't need swap special cases in generic_vm_writeback(). + * + * FIXME: swap pages are locked, but not PageWriteback while under writeout. + * This will confuse throttling in shrink_cache(). It may be advantageous to + * set PG_writeback against swap pages while they're also locked. Either that, + * or special-case swap pages in shrink_cache(). + */ +static int swap_vm_writeback(struct page *page, int *nr_to_write) +{ + struct address_space *mapping = page->mapping; + + unlock_page(page); + return generic_writeback_mapping(mapping, nr_to_write); +} + static struct address_space_operations swap_aops = { + vm_writeback: swap_vm_writeback, writepage: swap_writepage, sync_page: block_sync_page, }; /* - * swapper_inode is needed only for for i_bufferlist_lock. This - * avoid special-casing in other parts of the kernel. + * swapper_inode doesn't do anything much. It is really only here to + * avoid some special-casing in other parts of the kernel. */ static struct inode swapper_inode = { - i_bufferlist_lock: SPIN_LOCK_UNLOCKED, i_mapping: &swapper_space, }; @@ -55,6 +72,8 @@ struct address_space swapper_space = { host: &swapper_inode, a_ops: &swap_aops, i_shared_lock: SPIN_LOCK_UNLOCKED, + private_lock: SPIN_LOCK_UNLOCKED, + private_list: LIST_HEAD_INIT(swapper_space.private_list), }; #ifdef SWAP_CACHE_INFO @@ -219,8 +238,16 @@ int move_from_swap_cache(struct page *page, unsigned long index, page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_referenced | 1 << PG_arch_1 | 1 << PG_checked); + /* + * ___add_to_page_cache puts the page on ->clean_pages, + * but it's dirty. If it's on ->clean_pages, it will basically + * never get written out. + */ SetPageDirty(page); ___add_to_page_cache(page, mapping, index); + /* fix that up */ + list_del(&page->list); + list_add(&page->list, &mapping->dirty_pages); } write_unlock(&mapping->page_lock); diff --git a/mm/swapfile.c b/mm/swapfile.c index 0c49a1481ddb..504c34fbbaf0 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -311,6 +311,11 @@ int remove_exclusive_swap_page(struct page *page) write_lock(&swapper_space.page_lock); if (page_count(page) - !!PagePrivate(page) == 2) { __delete_from_swap_cache(page); + /* + * NOTE: if/when swap gets buffer/page coherency + * like other mappings, we'll need to mark the buffers + * dirty here too. set_page_dirty(). + */ SetPageDirty(page); retval = 1; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 42b8ba093e9d..c4ef073b682e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -424,11 +424,10 @@ static int shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, goto page_mapped; /* - * The page is locked. IO in progress? - * Move it to the back of the list. + * IO in progress? Leave it at the back of the list. */ if (unlikely(PageWriteback(page))) { - if (PageLaunder(page) && (gfp_mask & __GFP_FS)) { + if (gfp_mask & __GFP_FS) { page_cache_get(page); spin_unlock(&pagemap_lru_lock); wait_on_page_writeback(page); @@ -458,35 +457,20 @@ static int shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, * pinned it and after the I/O to the page is finished, * so the direct writes to the page cannot get lost. */ - struct address_space_operations *a_ops; int (*writeback)(struct page *, int *); - int (*writepage)(struct page *); + const int nr_pages = SWAP_CLUSTER_MAX; + int nr_to_write = nr_pages; - /* - * There's no guarantee that writeback() will actually - * start I/O against *this* page. Which is broken if we're - * trying to free memory in a particular zone. FIXME. - */ - a_ops = mapping->a_ops; - writeback = a_ops->vm_writeback; - writepage = a_ops->writepage; - if (writeback || writepage) { - SetPageLaunder(page); - page_cache_get(page); - spin_unlock(&pagemap_lru_lock); - ClearPageDirty(page); - - if (writeback) { - int nr_to_write = WRITEOUT_PAGES; - writeback(page, &nr_to_write); - } else { - writepage(page); - } - page_cache_release(page); - - spin_lock(&pagemap_lru_lock); - continue; - } + writeback = mapping->a_ops->vm_writeback; + if (writeback == NULL) + writeback = generic_vm_writeback; + page_cache_get(page); + spin_unlock(&pagemap_lru_lock); + (*writeback)(page, &nr_to_write); + max_scan -= (nr_pages - nr_to_write); + page_cache_release(page); + spin_lock(&pagemap_lru_lock); + continue; } /* @@ -648,6 +632,8 @@ static int shrink_caches(zone_t * classzone, int priority, unsigned int gfp_mask if (nr_pages <= 0) return 0; + wakeup_bdflush(); + shrink_dcache_memory(priority, gfp_mask); /* After shrinking the dcache, get rid of unused inodes too .. */ diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 50873f1890e7..f04a3c5a9aa8 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -203,6 +203,7 @@ find_appropriate_src(const struct ip_conntrack_tuple *tuple, return NULL; } +#ifdef CONFIG_IP_NF_NAT_LOCAL /* If it's really a local destination manip, it may need to do a source manip too. */ static int @@ -221,6 +222,7 @@ do_extra_mangle(u_int32_t var_ip, u_int32_t *other_ipp) ip_rt_put(rt); return 1; } +#endif /* Simple way to iterate through all. */ static inline int fake_cmp(const struct ip_nat_hash *i, diff --git a/net/khttpd/datasending.c b/net/khttpd/datasending.c index d1663cc21ef0..c78ef11b39b3 100644 --- a/net/khttpd/datasending.c +++ b/net/khttpd/datasending.c @@ -36,7 +36,6 @@ Return value: #include <linux/config.h> #include <linux/kernel.h> -#include <linux/locks.h> #include <linux/skbuff.h> #include <net/tcp.h> diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h index 4c4afc19eb9a..05f9ecb2f3ae 100644 --- a/scripts/mkcompile_h +++ b/scripts/mkcompile_h @@ -5,11 +5,11 @@ CC=$4 # Generate a temporary compile.h -( echo /\* This file is auto generated, version `cat .version` \*/ +( echo /\* This file is auto generated, version `cat ../.version` \*/ echo \#define UTS_MACHINE \"$ARCH\" - echo -n \#define UTS_VERSION \"\#`cat .version` + echo -n \#define UTS_VERSION \"\#`cat ../.version` if [ -n "$SMP" ] ; then echo -n " SMP"; fi echo ' '`date`'"' diff --git a/sound/core/Config.in b/sound/core/Config.in index 83d122cd42b7..77e2e8dfcac8 100644 --- a/sound/core/Config.in +++ b/sound/core/Config.in @@ -13,7 +13,7 @@ dep_tristate ' Sequencer support' CONFIG_SND_SEQUENCER $CONFIG_SND if [ "$CONFIG_SND_SEQUENCER" != "n" ]; then dep_tristate ' Sequencer dummy client' CONFIG_SND_SEQ_DUMMY $CONFIG_SND_SEQUENCER fi -dep_bool ' OSS API emulation' CONFIG_SND_OSSEMUL $CONFIG_SND +dep_mbool ' OSS API emulation' CONFIG_SND_OSSEMUL $CONFIG_SND if [ "$CONFIG_SND_OSSEMUL" = "y" ]; then dep_tristate ' OSS Mixer API' CONFIG_SND_MIXER_OSS $CONFIG_SND dep_tristate ' OSS PCM API' CONFIG_SND_PCM_OSS $CONFIG_SND diff --git a/sound/core/memory.c b/sound/core/memory.c index 6cc7093972e8..168dc13fd418 100644 --- a/sound/core/memory.c +++ b/sound/core/memory.c @@ -470,18 +470,16 @@ char *snd_kmalloc_strdup(const char *string, int flags) int copy_to_user_fromio(void *dst, unsigned long src, size_t count) { #if defined(__i386_) || defined(CONFIG_SPARC32) - return copy_to_user(dst, (const void*) src, count); + return copy_to_user(dst, (const void*)src, count) ? -EFAULT : 0; #else char buf[1024]; while (count) { size_t c = count; - int err; if (c > sizeof(buf)) c = sizeof(buf); memcpy_fromio(buf, src, c); - err = copy_to_user(dst, buf, c); - if (err) - return err; + if (copy_to_user(dst, buf, c)) + return -EFAULT; count -= c; dst += c; src += c; @@ -493,17 +491,15 @@ int copy_to_user_fromio(void *dst, unsigned long src, size_t count) int copy_from_user_toio(unsigned long dst, const void *src, size_t count) { #if defined(__i386_) || defined(CONFIG_SPARC32) - return copy_from_user((void*)dst, src, count); + return copy_from_user((void*)dst, src, count) ? -EFAULT : 0; #else char buf[1024]; while (count) { size_t c = count; - int err; if (c > sizeof(buf)) c = sizeof(buf); - err = copy_from_user(buf, src, c); - if (err) - return err; + if (copy_from_user(buf, src, c)) + return -EFAULT; memcpy_toio(dst, buf, c); count -= c; dst += c; diff --git a/sound/pci/cmipci.c b/sound/pci/cmipci.c index ac0e14d61409..75737a145d3c 100644 --- a/sound/pci/cmipci.c +++ b/sound/pci/cmipci.c @@ -846,9 +846,13 @@ static int snd_cmipci_ac3_copy(snd_pcm_substream_t *subs, int channel, snd_pcm_uframes_t offset; snd_pcm_runtime_t *runtime = subs->runtime; - if (! cm->channel[CM_CH_PLAY].ac3_shift) - return copy_from_user(runtime->dma_area + frames_to_bytes(runtime, pos), - src, frames_to_bytes(runtime, count)); + if (!cm->channel[CM_CH_PLAY].ac3_shift) { + if (copy_from_user(runtime->dma_area + + frames_to_bytes(runtime, pos), src, + frames_to_bytes(runtime, count))) + return -EFAULT; + return 0; + } if (! access_ok(VERIFY_READ, src, count)) return -EFAULT; diff --git a/sound/pci/korg1212/korg1212.c b/sound/pci/korg1212/korg1212.c index 44c31f555eee..c2a0e2722185 100644 --- a/sound/pci/korg1212/korg1212.c +++ b/sound/pci/korg1212/korg1212.c @@ -1455,9 +1455,7 @@ static int snd_korg1212_playback_copy(snd_pcm_substream_t *substream, snd_assert(pos + count <= K1212_MAX_SAMPLES, return -EINVAL); - copy_from_user(dst, src, count * K1212_FRAME_SIZE); - - return 0; + return copy_from_user(dst, src, count * K1212_FRAME_SIZE) : -EFAULT : 0; } static int snd_korg1212_capture_copy(snd_pcm_substream_t *substream, @@ -1475,9 +1473,7 @@ static int snd_korg1212_capture_copy(snd_pcm_substream_t *substream, snd_assert(pos + count <= K1212_MAX_SAMPLES, return -EINVAL); - copy_to_user(dst, src, count * K1212_FRAME_SIZE); - - return 0; + return copy_to_user(dst, src, count * K1212_FRAME_SIZE) ? -EFAULT : 0; } static int snd_korg1212_playback_silence(snd_pcm_substream_t *substream, diff --git a/sound/pci/rme9652/rme9652.c b/sound/pci/rme9652/rme9652.c index 1a54d01056aa..2248d86bce3b 100644 --- a/sound/pci/rme9652/rme9652.c +++ b/sound/pci/rme9652/rme9652.c @@ -2015,7 +2015,8 @@ static int snd_rme9652_playback_copy(snd_pcm_substream_t *substream, int channel substream->pstr->stream, channel); snd_assert(channel_buf != NULL, return -EIO); - copy_from_user(channel_buf + pos * 4, src, count * 4); + if (copy_from_user(channel_buf + pos * 4, src, count * 4)) + return -EFAULT; return count; } @@ -2031,7 +2032,8 @@ static int snd_rme9652_capture_copy(snd_pcm_substream_t *substream, int channel, substream->pstr->stream, channel); snd_assert(channel_buf != NULL, return -EIO); - copy_to_user(dst, channel_buf + pos * 4, count * 4); + if (copy_to_user(dst, channel_buf + pos * 4, count * 4)) + return -EFAULT; return count; } |
