diff options
| author | Oleg Drokin <green@angband.namesys.com> | 2002-06-07 17:41:48 +0400 |
|---|---|---|
| committer | Oleg Drokin <green@angband.namesys.com> | 2002-06-07 17:41:48 +0400 |
| commit | eed2a0261b039da58d35e15582358cb8cfb2db05 (patch) | |
| tree | edd99164d7a9f8baef544ac6b10c1a73c3ad4632 | |
| parent | 556a2071e454ca342f4caeb9964f7bb28552be21 (diff) | |
| parent | 3da9cf2895a490d8c7a1a8d78130f66a88e93efa (diff) | |
Merge angband.namesys.com:/home/green/bk/linux-2.5
into angband.namesys.com:/home/green/bk_work/reiser3-linux-2.5-test
100 files changed, 6897 insertions, 5851 deletions
@@ -2779,6 +2779,10 @@ N: Christopher Smith E: x@xman.org D: Tulip net driver hacker +N: Mark Smith +E: mark.smith@comdev.cc +D: Multicast support in bonding driver + N: Miquel van Smoorenburg E: miquels@cistron.nl D: Kernel and net hacker. Sysvinit, minicom. doing Debian stuff. diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index 442924cf6d22..0502eeee672e 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -161,18 +161,20 @@ LOG := $(patsubst %.sgml, %.log, $(BOOKS)) OUT := $(patsubst %.sgml, %.out, $(BOOKS)) clean: - rm -f core *~ - rm -f $(BOOKS) - rm -f $(DVI) $(AUX) $(TEX) $(LOG) $(OUT) - rm -f $(PNG-parportbook) $(EPS-parportbook) - rm -f $(C-procfs-example) - -mrproper: clean - rm -f $(PS) $(PDF) - rm -f -r $(HTML) - rm -f .depend - rm -f $(TOPDIR)/scripts/mkdep-docbook - rm -rf DBTOHTML_OUTPUT* + @echo 'Cleaning up (DocBook)' + @rm -f core *~ + @rm -f $(BOOKS) + @rm -f $(DVI) $(AUX) $(TEX) $(LOG) $(OUT) + @rm -f $(PNG-parportbook) $(EPS-parportbook) + @rm -f $(C-procfs-example) + +mrproper: + @echo 'Making mrproper (DocBook)' + @rm -f $(PS) $(PDF) + @rm -f -r $(HTML) + @rm -f .depend + @rm -f $(TOPDIR)/scripts/mkdep-docbook + @rm -rf DBTOHTML_OUTPUT* %.ps : %.sgml @(which db2ps > /dev/null 2>&1) || \ diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt index 5b1a63680d44..ce51c07246b5 100644 --- a/Documentation/filesystems/ntfs.txt +++ b/Documentation/filesystems/ntfs.txt @@ -73,52 +73,27 @@ sloppy=<BOOL> If sloppy is specified, ignore unknown mount options. Otherwise the default behaviour is to abort mount if any unknown options are found. -posix=<bool> Deprecated option. Still supported but please use - show_inodes=posix in the future. See description for - show_inodes=opt. - -show_sys_files=<bool> Deprecated option. Still supported but please use - show_inodes=system in the future. See description for - show_inodes=opt. - -show_inodes=opt Allows choice of which types of inode names readdir() - returns, i.e. this affects what "ls" shows. Following - values can be used for "opt": - system: show system files - win32: long file names (includes POSIX) [DEFAULT] - long: same as win32 - dos: short file names only (excludes POSIX) - short: same as dos - posix: same as both win32 and dos - all: all file names - Note that the options are additive, i.e. specifying: - show_inodes=system,show_inodes=win32,show_inodes=dos - is the same as specifying: - show_inodes=all - Note that the "posix" and "all" options will show all - directory names, BUT the link count on each directory - inode entry is set to 1, due to Linux not supporting - directory hard links. This may well confuse some - user space applications, since the directory names will - have the same inode numbers. Thus it is NOT advisable - to use the "posix" and "all" options. We provide them - only for completeness sake. - Further, note that the "system" option will not show - "$MFT" due to bugs/mis-features in glibc. Even though - it does not show, you can specifically "ls" it: - ls -l \$MFT - And of course you can stat it, too. - Further, note that irrespective of what show_inodes - option(s) you use, all files are accessible when you - specify the correct name, even though they may not be - shown in a normal "ls", i.e. you can always access the - system files and both the short and long file names of - files and directories. - Finally, note that win32 and dos file names are not - case sensitive and can be accessed using any - combination of lower and upper case, while POSIX file - names are case sensitive and they can only be accessed - given the correct case. +show_sys_files=<BOOL> If show_sys_files is specified, show the system files + in directory listings. Otherwise the default behaviour + is to hide the system files. + Note that even when show_sys_files is specified, "$MFT" + will not be visible due to bugs/mis-features in glibc. + Further, note that irrespective of show_sys_files, all + files are accessible by name, i.e. you can always do + "ls -l \$UpCase" for example to specifically show the + system file containing the Unicode upcase table. + +case_sensitive=<BOOL> If case_sensitive is specified, treat all file names as + case sensitive and create file names in the POSIX + namespace. Otherwise the default behaviour is to treat + file names as case insensitive and to create file names + in the WIN32/LONG name space. Note, the Linux NTFS + driver will never create short file names and will + remove them on rename/delete of the corresponding long + file name. + Note that files remain accessible via their short file + name, if it exists. If case_sensitive, you will need to + provide the correct case of the short file name. errors=opt What to do when critical file system errors are found. Following values can be used for "opt": @@ -174,12 +149,22 @@ Features (from 9:43 minutes on average down to 7:53). The time spent in user space was unchanged but the time spent in the kernel was decreased by a factor of 2.5 (from 85 CPU seconds down to 33). +- The driver does not support short file names in general. For backwards + compatibility, we implement access to files using their short file names if + they exist. The driver will not create short file names however, and a rename + will discard any existing short file name. Known bugs and (mis-)features ============================= -- None +- The link count on each directory inode entry is set to 1, due to Linux not + supporting directory hard links. This may well confuse some user space + applications, since the directory names will have the same inode numbers. + This also speeds up ntfs_read_inode() immensely. And we haven't found any + problems with this approach so far. If you find a problem with this, please + let us know. + Please send bug reports/comments/feedback/abuse to the Linux-NTFS development list at sourceforge: linux-ntfs-dev@lists.sourceforge.net @@ -260,8 +245,20 @@ number of sectors BEFORE attempting to use it. You have been warned! ChangeLog ========= -Note that a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog. - +Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog. + +2.0.8: + - Remove now obsolete show_inodes and posix mount option(s). + - Restore show_sys_files mount option. + - Add new mount option case_sensitive, to determine if the driver + treats file names as case sensitive or not. + - Mostly drop support for short file names (for backwards compatibility + we only support accessing files via their short file name if one + exists). + - Fix dcache aliasing issues wrt short/long file names. + - Cleanups and minor fixes. +2.0.7: + - Just cleanups. 2.0.6: - Major bugfix to make compatible with other kernel changes. This fixes the hangs/oopses on umount. @@ -37,6 +37,56 @@ HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer CROSS_COMPILE = +# That's our default target when none is given on the command line + +all: vmlinux + +# Print entire command lines instead of short version +# For now, leave the default + +ifndef KBUILD_VERBOSE + KBUILD_VERBOSE = 1 +endif + +# Decide whether to build built-in, modular, or both + +KBUILD_MODULES := 1 +KBUILD_BUILTIN := 1 + +export KBUILD_MODULES KBUILD_BUILTIN + +# Beautify output +# --------------------------------------------------------------------------- +# +# Normally, we echo the whole command before executing it. By making +# that echo $($(quiet)$(cmd)), we now have the possibility to set +# $(quiet) to choose other forms of output instead, e.g. +# +# quiet_cmd_cc_o_c = Compiling $(RELDIR)/$@ +# cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< +# +# If $(quiet) is empty, the whole command will be printed. +# If it is set to "quiet_", only the short version will be printed. +# If it is set to "silent_", nothing wil be printed at all, since +# the variable $(silent_cmd_cc_o_c) doesn't exist. + +# If the user wants quiet mode, echo short versions of the commands +# only and suppress the 'Entering/Leaving directory' messages + +ifneq ($(KBUILD_VERBOSE),1) + quiet=quiet_ + MAKEFLAGS += --no-print-directory +endif + +# If the user is running make -s (silent mode), suppress echoing of +# commands + +ifneq ($(findstring s,$(MAKEFLAGS)),) + quiet=silent_ +endif + +export quiet + # # Include the make variables (CC, etc...) # @@ -69,20 +119,31 @@ export CPPFLAGS EXPORT_FLAGS export CFLAGS CFLAGS_KERNEL CFLAGS_MODULE export AFLAGS AFLAGS_KERNEL AFLAGS_MODULE -all: do-it-all +noconfig_targets := oldconfig xconfig menuconfig config clean mrproper \ + distclean +ifeq ($(filter $(noconfig_targets),$(MAKECMDGOALS)),) + +# Here goes the main Makefile +# =========================================================================== # -# Make "config" the default target if there is no configuration file or -# "depend" the target if there is no top-level dependency information. -# +# If the user gave a *config target, it'll be handled in another +# section below, since in this case we cannot include .config +# Same goes for other targets like clean/mrproper etc, which +# don't need .config, either -ifeq (.config,$(wildcard .config)) -include .config -do-it-all: vmlinux -else -CONFIGURATION = config -do-it-all: config -endif +# In this section, we need .config + +-include .config + +# If .config doesn't exist - tough luck + +.config: + @echo '***' + @echo '*** You have not yet configured your kernel!' + @echo '*** Please run "make xconfig/menuconfig/config/oldconfig"' + @echo '***' + @exit 1 # # INSTALL_PATH specifies where to place the updated kernel and system map @@ -149,6 +210,7 @@ boot: vmlinux vmlinux-objs := $(HEAD) $(INIT) $(CORE_FILES) $(LIBS) $(DRIVERS) $(NETWORKS) +quiet_cmd_link_vmlinux = LD $@ cmd_link_vmlinux = $(LD) $(LINKFLAGS) $(HEAD) $(INIT) \ --start-group \ $(CORE_FILES) \ @@ -166,13 +228,13 @@ define rule_link_vmlinux . scripts/mkversion > .tmpversion mv -f .tmpversion .version $(MAKE) -C init - echo $(cmd_link_vmlinux) + $(call cmd,cmd_link_vmlinux) $(cmd_link_vmlinux) echo 'cmd_$@ := $(cmd_link_vmlinux)' > $(@D)/.$(@F).cmd $(NM) vmlinux | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > System.map endef -vmlinux: $(CONFIGURATION) $(vmlinux-objs) FORCE +vmlinux: $(vmlinux-objs) FORCE $(call if_changed_rule,link_vmlinux) # The actual objects are generated when descending, @@ -183,15 +245,14 @@ $(sort $(vmlinux-objs)): $(SUBDIRS) ; # Handle descending into subdirectories listed in $(SUBDIRS) .PHONY: $(SUBDIRS) -$(SUBDIRS): prepare +$(SUBDIRS): .hdepend prepare include/config/MARKER @$(MAKE) -C $@ -# Things we need done before we even start the actual build. -# The dependency on .hdepend will in turn take care of -# include/asm, include/linux/version etc. +# Things we need done before we descend to build or make +# module versions are listed in "prepare" .PHONY: prepare -prepare: .hdepend include/config/MARKER +prepare: include/linux/version.h include/asm # Single targets # --------------------------------------------------------------------------- @@ -202,6 +263,8 @@ prepare: .hdepend include/config/MARKER @$(MAKE) -C $(@D) $(@F) %.o: %.c FORCE @$(MAKE) -C $(@D) $(@F) +%.lst: %.c FORCE + @$(MAKE) -C $(@D) $(@F) %.s: %.S FORCE @$(MAKE) -C $(@D) $(@F) %.o: %.S FORCE @@ -214,8 +277,6 @@ prepare: .hdepend include/config/MARKER include/asm: @echo 'Making asm->asm-$(ARCH) symlink' @ln -s asm-$(ARCH) $@ - @echo 'Making directory include/linux/modules' - @mkdir include/linux/modules # Split autoconf.h into include/linux/config/* @@ -223,6 +284,16 @@ include/config/MARKER: scripts/split-include include/linux/autoconf.h scripts/split-include include/linux/autoconf.h include/config @ touch include/config/MARKER +# if .config is newer than include/linux/autoconf.h, someone tinkered +# with it and forgot to run make oldconfig + +include/linux/autoconf.h: .config + @echo '***' + @echo '*** You changed .config w/o running make *config?' + @echo '*** Please run "make oldconfig"' + @echo '***' + @exit 1 + # Generate some files # --------------------------------------------------------------------------- @@ -230,56 +301,53 @@ include/config/MARKER: scripts/split-include include/linux/autoconf.h # this Makefile include/linux/version.h: Makefile - @echo Generating $@ - @. scripts/mkversion_h $@ $(KERNELRELEASE) $(VERSION) $(PATCHLEVEL) $(SUBLEVEL) + @scripts/mkversion_h $@ $(KERNELRELEASE) $(VERSION) $(PATCHLEVEL) $(SUBLEVEL) # Helpers built in scripts/ # --------------------------------------------------------------------------- -scripts/mkdep scripts/split-include : FORCE +scripts/fixdep scripts/split-include : scripts ; + +.PHONY: scripts +scripts: @$(MAKE) -C scripts -# Generate dependencies +# Generate module versions # --------------------------------------------------------------------------- -# In the same pass, generate module versions, that's why it's -# all mixed up here. +# The targets are still named depend / dep for traditional +# reasons, but the only thing we do here is generating +# the module version checksums. +# FIXME: For now, we are also calling "archdep" from here, +# which should be replaced by a more sensible solution. .PHONY: depend dep $(patsubst %,_sfdep_%,$(SUBDIRS)) depend dep: .hdepend -# .hdepend is missing prerequisites - in fact dependencies need -# to be redone basically each time anything changed - since -# that's too expensive, we traditionally rely on the user to -# run "make dep" manually whenever necessary. In this case, -# we make "FORCE" a prequisite, to force redoing the -# dependencies. Yeah, that's ugly, and it'll go away soon. +# .hdepend is our (misnomed) marker for whether we've run +# generated module versions and made archdep -.hdepend: scripts/mkdep include/linux/version.h include/asm \ - $(if $(filter dep depend,$(MAKECMDGOALS)),FORCE) - scripts/mkdep -- `find $(FINDHPATH) -name SCCS -prune -o -follow -name \*.h ! -name modversions.h -print` > $@ - @$(MAKE) $(patsubst %,_sfdep_%,$(SUBDIRS)) -ifdef CONFIG_MODVERSIONS - @$(MAKE) include/linux/modversions.h -endif - @$(MAKE) archdep +.hdepend: $(if $(filter dep depend,$(MAKECMDGOALS)),FORCE) + @$(MAKE) archdep include/linux/modversions.h + @touch $@ -$(patsubst %,_sfdep_%,$(SUBDIRS)): FORCE - @$(MAKE) -C $(patsubst _sfdep_%, %, $@) fastdep +ifdef CONFIG_MODVERSIONS # Update modversions.h, but only if it would change. -include/linux/modversions.h: FORCE - @(echo "#ifndef _LINUX_MODVERSIONS_H";\ - echo "#define _LINUX_MODVERSIONS_H"; \ - echo "#include <linux/modsetver.h>"; \ - cd $(TOPDIR)/include/linux/modules; \ - for f in *.ver; do \ - if [ -f $$f ]; then echo "#include <linux/modules/$${f}>"; fi; \ - done; \ - echo "#endif"; \ +include/linux/modversions.h: scripts/fixdep prepare FORCE + @rm -rf .tmp_export-objs + @$(MAKE) $(patsubst %,_sfdep_%,$(SUBDIRS)) + @( echo "#ifndef _LINUX_MODVERSIONS_H";\ + echo "#define _LINUX_MODVERSIONS_H"; \ + echo "#include <linux/modsetver.h>"; \ + for f in `cd .tmp_export-objs; find modules -name \*.ver -print`; do \ + echo "#include <linux/$${f}>"; \ + done; \ + echo "#endif"; \ ) > $@.tmp + @rm -rf .tmp_export-objs @if [ -r $@ ] && cmp -s $@ $@.tmp; then \ echo $@ was not updated; \ rm -f $@.tmp; \ @@ -288,6 +356,17 @@ include/linux/modversions.h: FORCE mv -f $@.tmp $@; \ fi +$(patsubst %,_sfdep_%,$(SUBDIRS)): FORCE + @$(MAKE) -C $(patsubst _sfdep_%, %, $@) fastdep + +else # !CONFIG_MODVERSIONS + +.PHONY: include/linux/modversions.h + +include/linux/modversions.h: + +endif # CONFIG_MODVERSIONS + # --------------------------------------------------------------------------- # Modules @@ -300,11 +379,8 @@ MODFLAGS += -include $(HPATH)/linux/modversions.h endif .PHONY: modules -modules: $(patsubst %, _mod_%, $(SUBDIRS)) - -.PHONY: $(patsubst %, _mod_%, $(SUBDIRS)) -$(patsubst %, _mod_%, $(SUBDIRS)) : include/linux/version.h include/config/MARKER - @$(MAKE) -C $(patsubst _mod_%, %, $@) modules +modules: + @$(MAKE) KBUILD_BUILTIN= $(SUBDIRS) # Install modules @@ -412,13 +488,36 @@ rpm: clean spec rpm -ta $(TOPDIR)/../$(KERNELPATH).tar.gz ; \ rm $(TOPDIR)/../$(KERNELPATH).tar.gz +else # ifeq ($(filter $(noconfig_targets),$(MAKECMDGOALS)),) + # Targets which don't need .config # =========================================================================== +# +# These targets basically have their own Makefile - not quite, but at +# least its own exclusive section in the same Makefile. The reason for +# this is the following: +# To know the configuration, the main Makefile has to include +# .config. That's a obviously a problem when .config doesn't exist +# yet, but that could be kludged around with only including it if it +# exists. +# However, the larger problem is: If you run make *config, make will +# include the old .config, then execute your *config. It will then +# notice that a piece it included (.config) did change and restart from +# scratch. Which will cause execution of *config again. You get the +# picture. +# If we don't explicitly let the Makefile know that .config is changed +# by *config (the old way), it won't reread .config after *config, +# thus working with possibly stale values - we don't that either. +# +# So we divide things: This part here is for making *config targets, +# and other targets which should work when no .config exists yet. +# The main part above takes care of the rest after a .config exists. # Kernel configuration # --------------------------------------------------------------------------- -.PHONY: oldconfig xconfig menuconfig config +.PHONY: oldconfig xconfig menuconfig config \ + make_with_config oldconfig: $(CONFIG_SHELL) scripts/Configure -d arch/$(ARCH)/config.in @@ -434,6 +533,22 @@ menuconfig: config: $(CONFIG_SHELL) scripts/Configure arch/$(ARCH)/config.in +# How we generate .config depends on which *config the +# user chose when calling make + +.config: $(filter oldconfig xconfig menuconfig config,$(MAKECMDGOALS)) ; + +# If the user gave commands from both the need / need not +# .config sections, we need to call make again after +# .config is generated, now to take care of the remaining +# targets we know nothing about in this section + +remaining_targets := $(filter-out $(noconfig_targets),$(MAKECMDGOALS)) + +$(remaining_targets) : make_with_config + +make_with_config: .config + @$(MAKE) $(remaining_targets) # Cleaning up # --------------------------------------------------------------------------- @@ -483,34 +598,44 @@ MRPROPER_FILES += \ scripts/lxdialog/*.o scripts/lxdialog/lxdialog \ .menuconfig.log \ include/asm \ - .hdepend scripts/mkdep scripts/split-include scripts/docproc \ - $(TOPDIR)/include/linux/modversions.h \ - kernel.spec + .hdepend scripts/split-include scripts/docproc \ + scripts/fixdep $(TOPDIR)/include/linux/modversions.h \ + tags TAGS kernel.spec \ # directories removed with 'make mrproper' MRPROPER_DIRS += \ include/config \ $(TOPDIR)/include/linux/modules +# That's our way to know about arch specific cleanup. + +include arch/$(ARCH)/Makefile clean: archclean - find . \( -name '*.[oas]' -o -name core -o -name '.*.cmd' \) -type f -print \ + @echo 'Cleaning up' + @find . \( -name \*.[oas] -o -name core -o -name .\*.cmd -o \ + -name .\*.tmp -o -name .\*.d \) -type f -print \ | grep -v lxdialog/ | xargs rm -f - rm -f $(CLEAN_FILES) - rm -rf $(CLEAN_DIRS) + @rm -f $(CLEAN_FILES) + @rm -rf $(CLEAN_DIRS) @$(MAKE) -C Documentation/DocBook clean mrproper: clean archmrproper - find . \( -size 0 -o -name .depend \) -type f -print | xargs rm -f - rm -f $(MRPROPER_FILES) - rm -rf $(MRPROPER_DIRS) + @echo 'Making mrproper' + @find . \( -size 0 -o -name .depend \) -type f -print | xargs rm -f + @rm -f $(MRPROPER_FILES) + @rm -rf $(MRPROPER_DIRS) @$(MAKE) -C Documentation/DocBook mrproper distclean: mrproper - rm -f core `find . \( -not -type d \) -and \ - \( -name '*.orig' -o -name '*.rej' -o -name '*~' \ + @echo 'Making distclean' + @find . \( -not -type d \) -and \ + \( -name '*.orig' -o -name '*.rej' -o -name '*~' \ -o -name '*.bak' -o -name '#*#' -o -name '.*.orig' \ - -o -name '.*.rej' -o -name '.SUMS' -o -size 0 \) -type f -print` TAGS tags + -o -name '.*.rej' -o -name '.SUMS' -o -size 0 \) -type f \ + -print | xargs rm -f + +endif # ifeq ($(filter $(noconfig_targets),$(MAKECMDGOALS)),) # FIXME Should go into a make.lib or something # =========================================================================== @@ -531,4 +656,9 @@ if_changed_rule = $(if $(strip $? \ $(filter-out $(cmd_$(@F)),$(cmd_$(1)))),\ @$(rule_$(1))) +# If quiet is set, only print short version of rule + +cmd = @$(if $($(quiet)$(1)),echo ' $($(quiet)$(1))' &&) $($(1)) + + FORCE: diff --git a/Rules.make b/Rules.make index 03b28bb1ddff..82b549e87880 100644 --- a/Rules.make +++ b/Rules.make @@ -52,9 +52,8 @@ obj-m := $(filter-out %/, $(obj-m)) # add it to $(subdir-m) both-m := $(filter $(mod-subdirs), $(subdir-y)) -SUB_DIRS := $(subdir-y) $(if $(BUILD_MODULES),$(subdir-m)) -MOD_SUB_DIRS := $(sort $(subdir-m) $(both-m)) -ALL_SUB_DIRS := $(sort $(subdir-y) $(subdir-m) $(subdir-n) $(subdir-)) +subdir-ym := $(sort $(subdir-y) $(subdir-m)) +subdir-ymn := $(sort $(subdir-ym) $(subdir-n) $(subdir-)) # export.o is never a composite object, since $(export-objs) has a # fixed meaning (== objects which EXPORT_SYMBOL()) @@ -86,51 +85,67 @@ subdir-obj-y := $(foreach o,$(obj-y),$(if $(filter-out $(o),$(notdir $(o))),$(o) real-objs-y := $(foreach m, $(filter-out $(subdir-obj-y), $(obj-y)), $(if $($(m:.o=-objs)),$($(m:.o=-objs)),$(m))) $(EXTRA_TARGETS) real-objs-m := $(foreach m, $(obj-m), $(if $($(m:.o=-objs)),$($(m:.o=-objs)),$(m))) -# ========================================================================== -# # Get things started. -# -first_rule: vmlinux $(if $(BUILD_MODULES),$(obj-m)) +# ========================================================================== -# -# Common rules -# +ifndef O_TARGET +ifndef L_TARGET +O_TARGET := built-in.o +endif +endif + +# The echo suppresses the "Nothing to be done for first_rule" +first_rule: $(if $(KBUILD_BUILTIN),$(O_TARGET) $(L_TARGET) $(EXTRA_TARGETS)) \ + $(if $(KBUILD_MODULES),$(obj-m)) \ + sub_dirs + @echo -n # Compile C sources (.c) # --------------------------------------------------------------------------- -# FIXME: if we don't know if built-in or modular, assume built-in. +# If we don't know if built-in or modular, assume built-in. # Only happens in Makefiles which override the default first_rule: modkern_cflags := $(CFLAGS_KERNEL) -$(real-objs-y) : modkern_cflags := $(CFLAGS_KERNEL) -$(real-objs-y:.o=.i): modkern_cflags := $(CFLAGS_KERNEL) -$(real-objs-y:.o=.s): modkern_cflags := $(CFLAGS_KERNEL) +$(real-objs-y) : modkern_cflags := $(CFLAGS_KERNEL) +$(real-objs-y:.o=.i) : modkern_cflags := $(CFLAGS_KERNEL) +$(real-objs-y:.o=.s) : modkern_cflags := $(CFLAGS_KERNEL) +$(real-objs-y:.o=.lst): modkern_cflags := $(CFLAGS_KERNEL) -$(real-objs-m) : modkern_cflags := $(CFLAGS_MODULE) -$(real-objs-m:.o=.i): modkern_cflags := $(CFLAGS_MODULE) -$(real-objs-m:.o=.s): modkern_cflags := $(CFLAGS_MODULE) +$(real-objs-m) : modkern_cflags := $(CFLAGS_MODULE) +$(real-objs-m:.o=.i) : modkern_cflags := $(CFLAGS_MODULE) +$(real-objs-m:.o=.lst): modkern_cflags := $(CFLAGS_MODULE) -$(export-objs) : export_flags := $(EXPORT_FLAGS) -$(export-objs:.o=.i): export_flags := $(EXPORT_FLAGS) -$(export-objs:.o=.s): export_flags := $(EXPORT_FLAGS) +$(export-objs) : export_flags := $(EXPORT_FLAGS) +$(export-objs:.o=.i) : export_flags := $(EXPORT_FLAGS) +$(export-objs:.o=.s) : export_flags := $(EXPORT_FLAGS) +$(export-objs:.o=.lst): export_flags := $(EXPORT_FLAGS) c_flags = $(CFLAGS) $(modkern_cflags) $(EXTRA_CFLAGS) $(CFLAGS_$(*F).o) -DKBUILD_BASENAME=$(subst $(comma),_,$(subst -,_,$(*F))) $(export_flags) -cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $< +quiet_cmd_cc_s_c = CC $(RELDIR)/$@ +cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $< %.s: %.c FORCE - $(call if_changed,cmd_cc_s_c) + $(call cmd,cmd_cc_s_c) -cmd_cc_i_c = $(CPP) $(c_flags) -o $@ $< +quiet_cmd_cc_i_c = CPP $(RELDIR)/$@ +cmd_cc_i_c = $(CPP) $(c_flags) -o $@ $< %.i: %.c FORCE - $(call if_changed,cmd_cc_i_c) + $(call cmd,cmd_cc_i_c) -cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< +quiet_cmd_cc_o_c = CC $(RELDIR)/$@ +cmd_cc_o_c = $(CC) -Wp,-MD,.$(subst /,_,$@).d $(c_flags) -c -o $@ $< %.o: %.c FORCE - $(call if_changed,cmd_cc_o_c) + $(call if_changed_dep,cc_o_c) + +quiet_cmd_cc_lst_c = Generating $(RELDIR)/$@ +cmd_cc_lst_c = $(CC) $(c_flags) -g -c -o $*.o $< && $(TOPDIR)/scripts/makelst $*.o $(TOPDIR)/System.map $(OBJDUMP) > $@ + +%.lst: %.c FORCE + $(call cmd,cmd_cc_lst_c) # Compile assembler sources (.S) # --------------------------------------------------------------------------- @@ -146,37 +161,24 @@ $(real-objs-m:.o=.s): modkern_aflags := $(AFLAGS_MODULE) a_flags = $(AFLAGS) $(modkern_aflags) $(EXTRA_AFLAGS) $(AFLAGS_$(*F).o) -cmd_as_s_S = $(CPP) $(a_flags) -o $@ $< +quiet_cmd_as_s_S = CPP $(RELDIR)/$@ +cmd_as_s_S = $(CPP) $(a_flags) -o $@ $< %.s: %.S FORCE - $(call if_changed,cmd_as_s_S) + $(call cmd,cmd_as_s_S) -cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $< +quiet_cmd_as_o_S = AS $(RELDIR)/$@ +cmd_as_o_S = $(CC) -Wp,-MD,.$(subst /,_,$@).d $(a_flags) -c -o $@ $< %.o: %.S FORCE - $(call if_changed,cmd_as_o_S) - -# FIXME - -%.lst: %.c - $(CC) $(c_flags) -g -c -o $*.o $< - $(TOPDIR)/scripts/makelst $* $(TOPDIR) $(OBJDUMP) - + $(call if_changed_dep,as_o_S) # If a Makefile does define neither O_TARGET nor L_TARGET, # use a standard O_TARGET named "built-in.o" -ifndef O_TARGET -ifndef L_TARGET -O_TARGET := built-in.o -endif -endif - # Build the compiled-in targets # --------------------------------------------------------------------------- -vmlinux: $(O_TARGET) $(L_TARGET) $(EXTRA_TARGETS) sub_dirs - # To build objects in subdirs, we need to descend into the directories $(sort $(subdir-obj-y)): sub_dirs ; @@ -184,6 +186,7 @@ $(sort $(subdir-obj-y)): sub_dirs ; # Rule to compile a set of .o files into one .o file # ifdef O_TARGET +quiet_cmd_link_o_target = LD $(RELDIR)/$@ # If the list of objects to link is empty, just create an empty O_TARGET cmd_link_o_target = $(if $(strip $(obj-y)),\ $(LD) $(EXTRA_LDFLAGS) -r -o $@ $(filter $(obj-y), $^),\ @@ -197,6 +200,7 @@ endif # O_TARGET # Rule to compile a set of .o files into one .a file # ifdef L_TARGET +quiet_cmd_link_l_target = AR $(RELDIR)/$@ cmd_link_l_target = rm -f $@; $(AR) $(EXTRA_ARFLAGS) rcs $@ $(obj-y) $(L_TARGET): $(obj-y) FORCE @@ -207,7 +211,7 @@ endif # Rule to link composite objects # - +quiet_cmd_link_multi = LD $(RELDIR)/$@ cmd_link_multi = $(LD) $(EXTRA_LDFLAGS) -r -o $@ $(filter $($(basename $@)-objs),$^) # We would rather have a list of rules like @@ -220,62 +224,54 @@ $(multi-used-y) : %.o: $(multi-objs-y) FORCE $(multi-used-m) : %.o: $(multi-objs-m) FORCE $(call if_changed,cmd_link_multi) -# -# This make dependencies quickly -# -fastdep: FORCE - $(TOPDIR)/scripts/mkdep $(CFLAGS) $(EXTRA_CFLAGS) -- $(wildcard *.[chS]) > .depend -ifdef ALL_SUB_DIRS - $(MAKE) $(patsubst %,_sfdep_%,$(ALL_SUB_DIRS)) _FASTDEP_ALL_SUB_DIRS="$(ALL_SUB_DIRS)" -endif +# Descending when making module versions +# --------------------------------------------------------------------------- -ifdef _FASTDEP_ALL_SUB_DIRS -$(patsubst %,_sfdep_%,$(_FASTDEP_ALL_SUB_DIRS)): +fastdep-list := $(addprefix _sfdep_,$(subdir-ymn)) + +.PHONY: fastdep $(fastdep-list) + +fastdep: $(fastdep-list) + +$(fastdep-list): @$(MAKE) -C $(patsubst _sfdep_%,%,$@) fastdep -endif +# Descending when building +# --------------------------------------------------------------------------- -# -# A rule to make subdirectories -# -subdir-list = $(sort $(patsubst %,_subdir_%,$(SUB_DIRS))) -sub_dirs: FORCE $(subdir-list) +subdir-list := $(addprefix _subdir_,$(subdir-ym)) + +.PHONY: sub_dirs $(subdir-list) -ifdef SUB_DIRS -$(subdir-list) : FORCE +sub_dirs: $(subdir-list) + +$(subdir-list): @$(MAKE) -C $(patsubst _subdir_%,%,$@) -endif -# -# A rule to make modules -# -ifneq "$(strip $(MOD_SUB_DIRS))" "" -.PHONY: $(patsubst %,_modsubdir_%,$(MOD_SUB_DIRS)) -$(patsubst %,_modsubdir_%,$(MOD_SUB_DIRS)) : FORCE - @$(MAKE) -C $(patsubst _modsubdir_%,%,$@) modules +# Descending and installing modules +# --------------------------------------------------------------------------- -.PHONY: $(patsubst %,_modinst_%,$(MOD_SUB_DIRS)) -$(patsubst %,_modinst_%,$(MOD_SUB_DIRS)) : FORCE - @$(MAKE) -C $(patsubst _modinst_%,%,$@) modules_install -endif +modinst-list := $(addprefix _modinst_,$(subdir-ym)) -.PHONY: modules -modules: $(obj-m) FORCE $(patsubst %,_modsubdir_%,$(MOD_SUB_DIRS)) +.PHONY: modules_install _modinst_ $(modinst-list) -.PHONY: _modinst__ -_modinst__: FORCE -ifneq "$(strip $(obj-m))" "" - mkdir -p $(MODLIB)/kernel/$(RELDIR) - cp $(obj-m) $(MODLIB)/kernel/$(RELDIR) +modules_install: $(modinst-list) +ifneq ($(obj-m),) + @echo Installing modules in $(MODLIB)/kernel/$(RELDIR) + @mkdir -p $(MODLIB)/kernel/$(RELDIR) + @cp $(obj-m) $(MODLIB)/kernel/$(RELDIR) +else + @echo -n endif -.PHONY: modules_install -modules_install: _modinst__ $(patsubst %,_modinst_%,$(MOD_SUB_DIRS)) - +$(modinst-list): + @$(MAKE) -C $(patsubst _modinst_%,%,$@) modules_install # Add FORCE to the prequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- + .PHONY: FORCE + FORCE: # @@ -289,13 +285,11 @@ script: # Separate the object into "normal" objects and "exporting" objects # Exporting objects are: all objects that define symbol tables # -ifdef CONFIG_MODULES ifdef CONFIG_MODVERSIONS ifneq "$(strip $(export-objs))" "" -MODINCL := $(TOPDIR)/include/linux/modules -MODPREFIX := $(subst /,-,$(RELDIR))__ +MODVERDIR := $(TOPDIR)/include/linux/modules/$(RELDIR) # # Added the SMP separator to stop module accidents between uniprocessor @@ -311,47 +305,32 @@ endif # We don't track dependencies for .ver files, so we FORCE to check # them always (i.e. always at "make dep" time). +quiet_cmd_create_ver = Creating include/linux/modules/$(RELDIR)/$*.ver cmd_create_ver = $(CC) $(CFLAGS) $(EXTRA_CFLAGS) -E -D__GENKSYMS__ $< | \ $(GENKSYMS) $(genksyms_smp_prefix) -k $(VERSION).$(PATCHLEVEL).$(SUBLEVEL) > $@.tmp -$(MODINCL)/$(MODPREFIX)%.ver: %.c FORCE - @echo $(cmd_create_ver) - @$(cmd_create_ver) +$(MODVERDIR)/%.ver: %.c FORCE + @mkdir -p $(dir $@) + @$(call cmd,cmd_create_ver) @if [ -r $@ ] && cmp -s $@ $@.tmp; then \ - echo $@ is unchanged; rm -f $@.tmp; \ + rm -f $@.tmp; \ else \ - echo mv $@.tmp $@; mv -f $@.tmp $@; \ + touch $(TOPDIR)/include/linux/modversions.h; \ + mv -f $@.tmp $@; \ fi # updates .ver files but not modversions.h -fastdep: $(addprefix $(MODINCL)/$(MODPREFIX),$(export-objs:.o=.ver)) - -endif # export-objs +fastdep: $(addprefix $(MODVERDIR)/,$(export-objs:.o=.ver)) +ifneq ($(export-objs),) + @mkdir -p $(TOPDIR)/.tmp_export-objs/modules/$(RELDIR) + @touch $(addprefix $(TOPDIR)/.tmp_export-objs/modules/$(RELDIR)/,$(export-objs:.o=.ver)) +endif -# make dep cannot correctly figure out the dependency on the generated -# modversions.h, so we list them here: -# o files which export symbols and are compiled into the kernel include -# it (to generate a correct symbol table) -# o all modules get compiled with -include modversions.h -$(filter $(export-objs),$(real-objs-y)): $(TOPDIR)/include/linux/modversions.h -$(real-objs-m): $(TOPDIR)/include/linux/modversions.h +endif # export-objs endif # CONFIG_MODVERSIONS -endif # CONFIG_MODULES - -# -# include dependency files if they exist -# -ifneq ($(wildcard .depend),) -include .depend -endif - -ifneq ($(wildcard $(TOPDIR)/.hdepend),) -include $(TOPDIR)/.hdepend -endif - # --------------------------------------------------------------------------- # Check if command line has changed @@ -383,7 +362,7 @@ endif # which is saved in .<target>.o, to the current command line using # the two filter-out commands) -# read all saved command lines +# read all saved command lines and dependencies cmd_files := $(wildcard .*.cmd) ifneq ($(cmd_files),) @@ -395,4 +374,22 @@ endif if_changed = $(if $(strip $? \ $(filter-out $($(1)),$(cmd_$(@F)))\ $(filter-out $(cmd_$(@F)),$($(1)))),\ - @echo '$($(1))' && $($(1)) && echo 'cmd_$@ := $($(1))' > $(@D)/.$(@F).cmd) + @$(if $($(quiet)$(1)),echo ' $($(quiet)$(1))' &&) $($(1)) && echo 'cmd_$@ := $($(1))' > $(@D)/.$(@F).cmd) + + +# execute the command and also postprocess generated .d dependencies +# file + +if_changed_dep = $(if $(strip $? \ + $(filter-out $(cmd_$(1)),$(cmd_$@))\ + $(filter-out $(cmd_$@),$(cmd_$(1)))),\ + @set -e; \ + $(if $($(quiet)cmd_$(1)),echo ' $($(quiet)cmd_$(1))';) \ + $(cmd_$(1)); \ + $(TOPDIR)/scripts/fixdep $(subst /,_,$@) $(TOPDIR) '$(cmd_$(1))' > .$(subst /,_,$@).tmp; \ + rm -f .$(subst /,_,$@).d; \ + mv -f .$(subst /,_,$@).tmp .$(subst /,_,$@).cmd ) + +# If quiet is set, only print short version of command + +cmd = @$(if $($(quiet)$(1)),echo ' $($(quiet)$(1))' &&) $($(1)) diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile index ac37855167fe..f4fbe33b6875 100644 --- a/arch/arm/boot/Makefile +++ b/arch/arm/boot/Makefile @@ -114,12 +114,12 @@ endif export SYSTEM ZTEXTADDR ZBSSADDR ZRELADDR INITRD_PHYS PARAMS_PHYS -Image: $(CONFIGURE) $(SYSTEM) +Image: $(SYSTEM) $(OBJCOPY) -O binary -R .note -R .comment -S $(SYSTEM) $@ bzImage: zImage -zImage: $(CONFIGURE) compressed/vmlinux +zImage: compressed/vmlinux $(OBJCOPY) -O binary -R .note -R .comment -S compressed/vmlinux $@ bootpImage: bootp/bootp @@ -135,10 +135,10 @@ initrd: @test "$(INITRD_PHYS)" != "" || (echo This architecture does not support INITRD; exit -1) @test "$(INITRD)" != "" || (echo You must specify INITRD; exit -1) -install: $(CONFIGURE) Image +install: Image sh ./install.sh $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) Image $(TOPDIR)/System.map "$(INSTALL_PATH)" -zinstall: $(CONFIGURE) zImage +zinstall: zImage sh ./install.sh $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) zImage $(TOPDIR)/System.map "$(INSTALL_PATH)" clean: diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 5b0f06adf4e7..c307d9355da2 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -133,6 +133,7 @@ bzdisk: vmlinux @$(MAKEBOOT) BOOTIMAGE=bzImage zdisk install: vmlinux + @echo 'Cleaning up (arch)' @$(MAKEBOOT) BOOTIMAGE=bzImage install archclean: diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile index d2f534386731..d7bec772a897 100644 --- a/arch/i386/boot/Makefile +++ b/arch/i386/boot/Makefile @@ -31,11 +31,11 @@ BOOT_INCL = $(TOPDIR)/include/linux/config.h \ $(TOPDIR)/include/linux/autoconf.h \ $(TOPDIR)/include/asm/boot.h -zImage: $(CONFIGURE) bootsect setup compressed/vmlinux tools/build +zImage: bootsect setup compressed/vmlinux tools/build $(OBJCOPY) compressed/vmlinux compressed/vmlinux.out tools/build bootsect setup compressed/vmlinux.out $(ROOT_DEV) > zImage -bzImage: $(CONFIGURE) bbootsect bsetup compressed/bvmlinux tools/build +bzImage: bbootsect bsetup compressed/bvmlinux tools/build $(OBJCOPY) compressed/bvmlinux compressed/bvmlinux.out tools/build -b bbootsect bsetup compressed/bvmlinux.out $(ROOT_DEV) > bzImage @@ -48,14 +48,14 @@ compressed/bvmlinux: $(TOPDIR)/vmlinux zdisk: $(BOOTIMAGE) dd bs=8192 if=$(BOOTIMAGE) of=/dev/fd0 -zlilo: $(CONFIGURE) $(BOOTIMAGE) +zlilo: $(BOOTIMAGE) if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi cat $(BOOTIMAGE) > $(INSTALL_PATH)/vmlinuz cp $(TOPDIR)/System.map $(INSTALL_PATH)/ if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi -install: $(CONFIGURE) $(BOOTIMAGE) +install: $(BOOTIMAGE) sh -x ./install.sh $(KERNELRELEASE) $(BOOTIMAGE) $(TOPDIR)/System.map "$(INSTALL_PATH)" tools/build: tools/build.c @@ -100,7 +100,8 @@ bsetup.s: setup.S video.S Makefile $(BOOT_INCL) $(TOPDIR)/include/linux/version. dep: clean: - rm -f tools/build - rm -f setup bootsect zImage compressed/vmlinux.out - rm -f bsetup bbootsect bzImage compressed/bvmlinux.out + @echo 'Cleaning up (boot)' + @rm -f tools/build + @rm -f setup bootsect zImage compressed/vmlinux.out + @rm -f bsetup bbootsect bzImage compressed/bvmlinux.out @$(MAKE) -C compressed clean diff --git a/arch/i386/boot/compressed/Makefile b/arch/i386/boot/compressed/Makefile index e323873c4b26..70446cc3e5c7 100644 --- a/arch/i386/boot/compressed/Makefile +++ b/arch/i386/boot/compressed/Makefile @@ -47,4 +47,4 @@ piggy.o: $(SYSTEM) rm -f $$tmppiggy $$tmppiggy.gz $$tmppiggy.lnk clean: - rm -f vmlinux bvmlinux _tmp_* + @rm -f vmlinux bvmlinux _tmp_* diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 8ca2b738265c..b6623556c819 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux kernel. # -EXTRA_TARGETS := head.o init_task.o +EXTRA_TARGETS := kernel.o head.o init_task.o O_TARGET := kernel.o @@ -13,7 +13,9 @@ obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ pci-dma.o i386_ksyms.o i387.o bluesmoke.o dmi_scan.o \ bootflag.o +obj-y += cpu/ obj-$(CONFIG_MCA) += mca.o +obj-$(CONFIG_EISA) += eisa.o obj-$(CONFIG_MTRR) += mtrr.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o @@ -31,6 +33,6 @@ obj-y += setup-visws.o obj-$(CONFIG_X86_VISWS_APIC) += visws_apic.o endif -EXTRA_AFLAGS := -traditional +EXTRA_AFLAGS := -traditional include $(TOPDIR)/Rules.make diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile new file mode 100644 index 000000000000..18b9d13323d4 --- /dev/null +++ b/arch/i386/kernel/cpu/Makefile @@ -0,0 +1,16 @@ +# +# Makefile for x86-compatible CPU details and quirks +# + +obj-y := common.o proc.o + +obj-y += amd.o +obj-y += cyrix.o +obj-y += centaur.o +obj-y += transmeta.o +obj-y += intel.o +obj-y += rise.o +obj-y += nexgen.o +obj-y += umc.o + +include $(TOPDIR)/Rules.make diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c new file mode 100644 index 000000000000..a7d5a572c050 --- /dev/null +++ b/arch/i386/kernel/cpu/amd.c @@ -0,0 +1,216 @@ +#include <linux/init.h> +#include <linux/bitops.h> +#include <asm/io.h> +#include <asm/processor.h> + +#include "cpu.h" + +/* + * B step AMD K6 before B 9730xxxx have hardware bugs that can cause + * misexecution of code under Linux. Owners of such processors should + * contact AMD for precise details and a CPU swap. + * + * See http://www.multimania.com/poulot/k6bug.html + * http://www.amd.com/K6/k6docs/revgd.html + * + * The following test is erm.. interesting. AMD neglected to up + * the chip setting when fixing the bug but they also tweaked some + * performance at the same time.. + */ + +extern void vide(void); +__asm__(".align 4\nvide: ret"); + +static void __init init_amd(struct cpuinfo_x86 *c) +{ + u32 l, h; + int mbytes = max_mapnr >> (20-PAGE_SHIFT); + int r; + + /* + * FIXME: We should handle the K5 here. Set up the write + * range and also turn on MSR 83 bits 4 and 31 (write alloc, + * no bus pipeline) + */ + + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_bit(0*32+31, c->x86_capability); + + r = get_model_name(c); + + switch(c->x86) + { + case 5: + if( c->x86_model < 6 ) + { + /* Based on AMD doc 20734R - June 2000 */ + if ( c->x86_model == 0 ) { + clear_bit(X86_FEATURE_APIC, c->x86_capability); + set_bit(X86_FEATURE_PGE, c->x86_capability); + } + break; + } + + if ( c->x86_model == 6 && c->x86_mask == 1 ) { + const int K6_BUG_LOOP = 1000000; + int n; + void (*f_vide)(void); + unsigned long d, d2; + + printk(KERN_INFO "AMD K6 stepping B detected - "); + + /* + * It looks like AMD fixed the 2.6.2 bug and improved indirect + * calls at the same time. + */ + + n = K6_BUG_LOOP; + f_vide = vide; + rdtscl(d); + while (n--) + f_vide(); + rdtscl(d2); + d = d2-d; + + /* Knock these two lines out if it debugs out ok */ + printk(KERN_INFO "K6 BUG %ld %d (Report these if test report is incorrect)\n", d, 20*K6_BUG_LOOP); + printk(KERN_INFO "AMD K6 stepping B detected - "); + /* -- cut here -- */ + if (d > 20*K6_BUG_LOOP) + printk("system stability may be impaired when more than 32 MB are used.\n"); + else + printk("probably OK (after B9730xxxx).\n"); + printk(KERN_INFO "Please see http://www.mygale.com/~poulot/k6bug.html\n"); + } + + /* K6 with old style WHCR */ + if (c->x86_model < 8 || + (c->x86_model== 8 && c->x86_mask < 8)) { + /* We can only write allocate on the low 508Mb */ + if(mbytes>508) + mbytes=508; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0x0000FFFF)==0) { + unsigned long flags; + l=(1<<0)|((mbytes/4)<<1); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", + mbytes); + } + break; + } + + if ((c->x86_model == 8 && c->x86_mask >7) || + c->x86_model == 9 || c->x86_model == 13) { + /* The more serious chips .. */ + + if(mbytes>4092) + mbytes=4092; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0xFFFF0000)==0) { + unsigned long flags; + l=((mbytes>>2)<<22)|(1<<16); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", + mbytes); + } + + /* Set MTRR capability flag if appropriate */ + if (c->x86_model == 13 || c->x86_model == 9 || + (c->x86_model == 8 && c->x86_mask >= 8)) + set_bit(X86_FEATURE_K6_MTRR, c->x86_capability); + break; + } + break; + + case 6: /* An Athlon/Duron */ + + /* Bit 15 of Athlon specific MSR 15, needs to be 0 + * to enable SSE on Palomino/Morgan CPU's. + * If the BIOS didn't enable it already, enable it + * here. + */ + if (c->x86_model == 6 || c->x86_model == 7) { + if (!test_bit(X86_FEATURE_XMM, c->x86_capability)) { + printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); + rdmsr(MSR_K7_HWCR, l, h); + l &= ~0x00008000; + wrmsr(MSR_K7_HWCR, l, h); + set_bit(X86_FEATURE_XMM, c->x86_capability); + } + } + break; + + } + + display_cacheinfo(c); +// return r; +} + +static void amd_identify(struct cpuinfo_x86 * c) +{ + u32 xlvl; + + if (have_cpuid_p()) { + generic_identify(c); + + /* AMD-defined flags: level 0x80000001 */ + xlvl = cpuid_eax(0x80000000); + if ( (xlvl & 0xffff0000) == 0x80000000 ) { + if ( xlvl >= 0x80000001 ) + c->x86_capability[1] = cpuid_edx(0x80000001); + if ( xlvl >= 0x80000004 ) + get_model_name(c); /* Default name */ + } + } +} + +static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) +{ + /* AMD errata T13 (order #21922) */ + if ((c->x86 == 6)) { + if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ + size = 64; + if (c->x86_model == 4 && + (c->x86_mask==0 || c->x86_mask==1)) /* Tbird rev A1/A2 */ + size = 256; + } + return size; +} + +static struct cpu_dev amd_cpu_dev __initdata = { + c_vendor: "AMD", + c_ident: { "AuthenticAMD" }, + c_models: { + { X86_VENDOR_AMD, 4, + { + [3] "486 DX/2", + [7] "486 DX/2-WB", + [8] "486 DX/4", + [9] "486 DX/4-WB", + [14] "Am5x86-WT", + [15] "Am5x86-WB" + } + }, + }, + c_init: init_amd, + c_identify: amd_identify, + c_size_cache: amd_size_cache, +}; + +int __init amd_init_cpu(void) +{ + cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev; + return 0; +} + +//early_arch_initcall(amd_init_cpu); diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c new file mode 100644 index 000000000000..e5bd1bebc4b0 --- /dev/null +++ b/arch/i386/kernel/cpu/centaur.c @@ -0,0 +1,426 @@ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/bitops.h> +#include <asm/processor.h> +#include <asm/msr.h> +#include "cpu.h" + +#ifdef CONFIG_X86_OOSTORE + +static u32 __init power2(u32 x) +{ + u32 s=1; + while(s<=x) + s<<=1; + return s>>=1; +} + + +/* + * Set up an actual MCR + */ + +static void __init centaur_mcr_insert(int reg, u32 base, u32 size, int key) +{ + u32 lo, hi; + + hi = base & ~0xFFF; + lo = ~(size-1); /* Size is a power of 2 so this makes a mask */ + lo &= ~0xFFF; /* Remove the ctrl value bits */ + lo |= key; /* Attribute we wish to set */ + wrmsr(reg+MSR_IDT_MCR0, lo, hi); + mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */ +} + +/* + * Figure what we can cover with MCR's + * + * Shortcut: We know you can't put 4Gig of RAM on a winchip + */ + +static u32 __init ramtop(void) /* 16388 */ +{ + int i; + u32 top = 0; + u32 clip = 0xFFFFFFFFUL; + + for (i = 0; i < e820.nr_map; i++) { + unsigned long start, end; + + if (e820.map[i].addr > 0xFFFFFFFFUL) + continue; + /* + * Don't MCR over reserved space. Ignore the ISA hole + * we frob around that catastrophy already + */ + + if (e820.map[i].type == E820_RESERVED) + { + if(e820.map[i].addr >= 0x100000UL && e820.map[i].addr < clip) + clip = e820.map[i].addr; + continue; + } + start = e820.map[i].addr; + end = e820.map[i].addr + e820.map[i].size; + if (start >= end) + continue; + if (end > top) + top = end; + } + /* Everything below 'top' should be RAM except for the ISA hole. + Because of the limited MCR's we want to map NV/ACPI into our + MCR range for gunk in RAM + + Clip might cause us to MCR insufficient RAM but that is an + acceptable failure mode and should only bite obscure boxes with + a VESA hole at 15Mb + + The second case Clip sometimes kicks in is when the EBDA is marked + as reserved. Again we fail safe with reasonable results + */ + + if(top>clip) + top=clip; + + return top; +} + +/* + * Compute a set of MCR's to give maximum coverage + */ + +static int __init centaur_mcr_compute(int nr, int key) +{ + u32 mem = ramtop(); + u32 root = power2(mem); + u32 base = root; + u32 top = root; + u32 floor = 0; + int ct = 0; + + while(ct<nr) + { + u32 fspace = 0; + + /* + * Find the largest block we will fill going upwards + */ + + u32 high = power2(mem-top); + + /* + * Find the largest block we will fill going downwards + */ + + u32 low = base/2; + + /* + * Don't fill below 1Mb going downwards as there + * is an ISA hole in the way. + */ + + if(base <= 1024*1024) + low = 0; + + /* + * See how much space we could cover by filling below + * the ISA hole + */ + + if(floor == 0) + fspace = 512*1024; + else if(floor ==512*1024) + fspace = 128*1024; + + /* And forget ROM space */ + + /* + * Now install the largest coverage we get + */ + + if(fspace > high && fspace > low) + { + centaur_mcr_insert(ct, floor, fspace, key); + floor += fspace; + } + else if(high > low) + { + centaur_mcr_insert(ct, top, high, key); + top += high; + } + else if(low > 0) + { + base -= low; + centaur_mcr_insert(ct, base, low, key); + } + else break; + ct++; + } + /* + * We loaded ct values. We now need to set the mask. The caller + * must do this bit. + */ + + return ct; +} + +static void __init centaur_create_optimal_mcr(void) +{ + int i; + /* + * Allocate up to 6 mcrs to mark as much of ram as possible + * as write combining and weak write ordered. + * + * To experiment with: Linux never uses stack operations for + * mmio spaces so we could globally enable stack operation wc + * + * Load the registers with type 31 - full write combining, all + * writes weakly ordered. + */ + int used = centaur_mcr_compute(6, 31); + + /* + * Wipe unused MCRs + */ + + for(i=used;i<8;i++) + wrmsr(MSR_IDT_MCR0+i, 0, 0); +} + +static void __init winchip2_create_optimal_mcr(void) +{ + u32 lo, hi; + int i; + + /* + * Allocate up to 6 mcrs to mark as much of ram as possible + * as write combining, weak store ordered. + * + * Load the registers with type 25 + * 8 - weak write ordering + * 16 - weak read ordering + * 1 - write combining + */ + + int used = centaur_mcr_compute(6, 25); + + /* + * Mark the registers we are using. + */ + + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + for(i=0;i<used;i++) + lo|=1<<(9+i); + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); + + /* + * Wipe unused MCRs + */ + + for(i=used;i<8;i++) + wrmsr(MSR_IDT_MCR0+i, 0, 0); +} + +/* + * Handle the MCR key on the Winchip 2. + */ + +static void __init winchip2_unprotect_mcr(void) +{ + u32 lo, hi; + u32 key; + + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + lo&=~0x1C0; /* blank bits 8-6 */ + key = (lo>>17) & 7; + lo |= key<<6; /* replace with unlock key */ + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); +} + +static void __init winchip2_protect_mcr(void) +{ + u32 lo, hi; + + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + lo&=~0x1C0; /* blank bits 8-6 */ + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); +} +#endif + +static void __init init_centaur(struct cpuinfo_x86 *c) +{ + enum { + ECX8=1<<1, + EIERRINT=1<<2, + DPM=1<<3, + DMCE=1<<4, + DSTPCLK=1<<5, + ELINEAR=1<<6, + DSMC=1<<7, + DTLOCK=1<<8, + EDCTLB=1<<8, + EMMX=1<<9, + DPDC=1<<11, + EBRPRED=1<<12, + DIC=1<<13, + DDC=1<<14, + DNA=1<<15, + ERETSTK=1<<16, + E2MMX=1<<19, + EAMD3D=1<<20, + }; + + char *name; + u32 fcr_set=0; + u32 fcr_clr=0; + u32 lo,hi,newlo; + u32 aa,bb,cc,dd; + + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_bit(0*32+31, c->x86_capability); + + switch (c->x86) { + + case 5: + switch(c->x86_model) { + case 4: + name="C6"; + fcr_set=ECX8|DSMC|EDCTLB|EMMX|ERETSTK; + fcr_clr=DPDC; + printk(KERN_NOTICE "Disabling bugged TSC.\n"); + clear_bit(X86_FEATURE_TSC, c->x86_capability); +#ifdef CONFIG_X86_OOSTORE + centaur_create_optimal_mcr(); + /* Enable + write combining on non-stack, non-string + write combining on string, all types + weak write ordering + + The C6 original lacks weak read order + + Note 0x120 is write only on Winchip 1 */ + + wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); +#endif + break; + case 8: + switch(c->x86_mask) { + default: + name="2"; + break; + case 7 ... 9: + name="2A"; + break; + case 10 ... 15: + name="2B"; + break; + } + fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; + fcr_clr=DPDC; +#ifdef CONFIG_X86_OOSTORE + winchip2_unprotect_mcr(); + winchip2_create_optimal_mcr(); + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + /* Enable + write combining on non-stack, non-string + write combining on string, all types + weak write ordering + */ + lo|=31; + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); + winchip2_protect_mcr(); +#endif + break; + case 9: + name="3"; + fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; + fcr_clr=DPDC; +#ifdef CONFIG_X86_OOSTORE + winchip2_unprotect_mcr(); + winchip2_create_optimal_mcr(); + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + /* Enable + write combining on non-stack, non-string + write combining on string, all types + weak write ordering + */ + lo|=31; + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); + winchip2_protect_mcr(); +#endif + break; + case 10: + name="4"; + /* no info on the WC4 yet */ + break; + default: + name="??"; + } + + rdmsr(MSR_IDT_FCR1, lo, hi); + newlo=(lo|fcr_set) & (~fcr_clr); + + if (newlo!=lo) { + printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", lo, newlo ); + wrmsr(MSR_IDT_FCR1, newlo, hi ); + } else { + printk(KERN_INFO "Centaur FCR is 0x%X\n",lo); + } + /* Emulate MTRRs using Centaur's MCR. */ + set_bit(X86_FEATURE_CENTAUR_MCR, c->x86_capability); + /* Report CX8 */ + set_bit(X86_FEATURE_CX8, c->x86_capability); + /* Set 3DNow! on Winchip 2 and above. */ + if (c->x86_model >=8) + set_bit(X86_FEATURE_3DNOW, c->x86_capability); + /* See if we can find out some more. */ + if ( cpuid_eax(0x80000000) >= 0x80000005 ) { + /* Yes, we can. */ + cpuid(0x80000005,&aa,&bb,&cc,&dd); + /* Add L1 data and code cache sizes. */ + c->x86_cache_size = (cc>>24)+(dd>>24); + } + sprintf( c->x86_model_id, "WinChip %s", name ); + break; + + case 6: + switch (c->x86_model) { + case 6 ... 8: /* Cyrix III family */ + rdmsr (MSR_VIA_FCR, lo, hi); + lo |= (1<<1 | 1<<7); /* Report CX8 & enable PGE */ + wrmsr (MSR_VIA_FCR, lo, hi); + + set_bit(X86_FEATURE_CX8, c->x86_capability); + set_bit(X86_FEATURE_3DNOW, c->x86_capability); + + get_model_name(c); + display_cacheinfo(c); + break; + } + break; + } +} + +static unsigned int centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size) +{ + /* VIA C3 CPUs (670-68F) need further shifting. */ + if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) + size >>= 8; + return size; +} + +static struct cpu_dev centaur_cpu_dev __initdata = { + c_vendor: "Centaur", + c_ident: { "CentaurHauls" }, + c_init: init_centaur, + c_size_cache: centaur_size_cache, +}; + +int __init centaur_init_cpu(void) +{ + cpu_devs[X86_VENDOR_CENTAUR] = ¢aur_cpu_dev; + return 0; +} + +//early_arch_initcall(centaur_init_cpu); diff --git a/arch/i386/kernel/cpu/changelog b/arch/i386/kernel/cpu/changelog new file mode 100644 index 000000000000..cef76b80a710 --- /dev/null +++ b/arch/i386/kernel/cpu/changelog @@ -0,0 +1,63 @@ +/* + * Enhanced CPU type detection by Mike Jagdis, Patrick St. Jean + * and Martin Mares, November 1997. + * + * Force Cyrix 6x86(MX) and M II processors to report MTRR capability + * and Cyrix "coma bug" recognition by + * Zoltán Böszörményi <zboszor@mail.externet.hu> February 1999. + * + * Force Centaur C6 processors to report MTRR capability. + * Bart Hartgers <bart@etpmod.phys.tue.nl>, May 1999. + * + * Intel Mobile Pentium II detection fix. Sean Gilley, June 1999. + * + * IDT Winchip tweaks, misc clean ups. + * Dave Jones <davej@suse.de>, August 1999 + * + * Better detection of Centaur/IDT WinChip models. + * Bart Hartgers <bart@etpmod.phys.tue.nl>, August 1999. + * + * Cleaned up cache-detection code + * Dave Jones <davej@suse.de>, October 1999 + * + * Added proper L2 cache detection for Coppermine + * Dragan Stancevic <visitor@valinux.com>, October 1999 + * + * Added the original array for capability flags but forgot to credit + * myself :) (~1998) Fixed/cleaned up some cpu_model_info and other stuff + * Jauder Ho <jauderho@carumba.com>, January 2000 + * + * Detection for Celeron coppermine, identify_cpu() overhauled, + * and a few other clean ups. + * Dave Jones <davej@suse.de>, April 2000 + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes <gareth@valinux.com>, May 2000 + * + * Added proper Cascades CPU and L2 cache detection for Cascades + * and 8-way type cache happy bunch from Intel:^) + * Dragan Stancevic <visitor@valinux.com>, May 2000 + * + * Forward port AMD Duron errata T13 from 2.2.17pre + * Dave Jones <davej@suse.de>, August 2000 + * + * Forward port lots of fixes/improvements from 2.2.18pre + * Cyrix III, Pentium IV support. + * Dave Jones <davej@suse.de>, October 2000 + * + * Massive cleanup of CPU detection and bug handling; + * Transmeta CPU detection, + * H. Peter Anvin <hpa@zytor.com>, November 2000 + * + * VIA C3 Support. + * Dave Jones <davej@suse.de>, March 2001 + * + * AMD Athlon/Duron/Thunderbird bluesmoke support. + * Dave Jones <davej@suse.de>, April 2001. + * + * CacheSize bug workaround updates for AMD, Intel & VIA Cyrix. + * Dave Jones <davej@suse.de>, September, October 2001. + * + */ + diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c new file mode 100644 index 000000000000..95530e701b4d --- /dev/null +++ b/arch/i386/kernel/cpu/common.c @@ -0,0 +1,486 @@ +#include <linux/init.h> +#include <linux/string.h> +#include <linux/delay.h> +#include <linux/smp.h> +#include <asm/semaphore.h> +#include <asm/processor.h> +#include <asm/msr.h> +#include <asm/io.h> +#include <asm/mmu_context.h> + +#include "cpu.h" + +static int cachesize_override __initdata = -1; +static int disable_x86_fxsr __initdata = 0; + +struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; + +static struct cpu_dev default_cpu; +static struct cpu_dev * this_cpu = &default_cpu; + +extern void mcheck_init(struct cpuinfo_x86 *c); + +static void default_init(struct cpuinfo_x86 * c) +{ + /* Not much we can do here... */ + /* Check if at least it has cpuid */ + if (c->cpuid_level == -1) { + /* No cpuid. It must be an ancient CPU */ + if (c->x86 == 4) + strcpy(c->x86_model_id, "486"); + else if (c->x86 == 3) + strcpy(c->x86_model_id, "386"); + } +} + +static struct cpu_dev default_cpu = { + c_init: default_init, +}; + +static int __init cachesize_setup(char *str) +{ + get_option (&str, &cachesize_override); + return 1; +} +__setup("cachesize=", cachesize_setup); + +#ifndef CONFIG_X86_TSC +static int tsc_disable __initdata = 0; + +static int __init tsc_setup(char *str) +{ + tsc_disable = 1; + return 1; +} + +__setup("notsc", tsc_setup); +#endif + +int __init get_model_name(struct cpuinfo_x86 *c) +{ + unsigned int *v; + char *p, *q; + + if (cpuid_eax(0x80000000) < 0x80000004) + return 0; + + v = (unsigned int *) c->x86_model_id; + cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); + cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); + cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); + c->x86_model_id[48] = 0; + + /* Intel chips right-justify this string for some dumb reason; + undo that brain damage */ + p = q = &c->x86_model_id[0]; + while ( *p == ' ' ) + p++; + if ( p != q ) { + while ( *p ) + *q++ = *p++; + while ( q <= &c->x86_model_id[48] ) + *q++ = '\0'; /* Zero-pad the rest */ + } + + return 1; +} + + +void __init display_cacheinfo(struct cpuinfo_x86 *c) +{ + unsigned int n, dummy, ecx, edx, l2size; + + n = cpuid_eax(0x80000000); + + if (n >= 0x80000005) { + cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); + printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); + c->x86_cache_size=(ecx>>24)+(edx>>24); + } + + if (n < 0x80000006) /* Some chips just has a large L1. */ + return; + + ecx = cpuid_ecx(0x80000006); + l2size = ecx >> 16; + + /* do processor-specific cache resizing */ + if (this_cpu->c_size_cache) + l2size = this_cpu->c_size_cache(c,l2size); + + /* Allow user to override all this if necessary. */ + if (cachesize_override != -1) + l2size = cachesize_override; + + if ( l2size == 0 ) + return; /* Again, no L2 cache is possible */ + + c->x86_cache_size = l2size; + + printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", + l2size, ecx & 0xFF); +} + +/* Naming convention should be: <Name> [(<Codename>)] */ +/* This table only is used unless init_<vendor>() below doesn't set it; */ +/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ + +/* Look up CPU names by table lookup. */ +static char __init *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; + + if ( c->x86_model >= 16 ) + return NULL; /* Range check */ + + if (!this_cpu) + return NULL; + + info = this_cpu->c_models; + + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; + } + return NULL; /* Not found */ +} + + + +void __init get_cpu_vendor(struct cpuinfo_x86 *c) +{ + char *v = c->x86_vendor_id; + int i; + + for (i = 0; i < X86_VENDOR_NUM; i++) { + if (cpu_devs[i]) { + if (!strcmp(v,cpu_devs[i]->c_ident[0]) || + (cpu_devs[i]->c_ident[1] && + !strcmp(v,cpu_devs[i]->c_ident[1]))) { + c->x86_vendor = i; + this_cpu = cpu_devs[i]; + break; + } + } + } +} + + +static int __init x86_fxsr_setup(char * s) +{ + disable_x86_fxsr = 1; + return 1; +} +__setup("nofxsr", x86_fxsr_setup); + + +/* Standard macro to see if a specific flag is changeable */ +static inline int flag_is_changeable_p(u32 flag) +{ + u32 f1, f2; + + asm("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl %2,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl\n\t" + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); + + return ((f1^f2) & flag) != 0; +} + + +/* Probe for the CPUID instruction */ +int __init have_cpuid_p(void) +{ + return flag_is_changeable_p(X86_EFLAGS_ID); +} + +void __init generic_identify(struct cpuinfo_x86 * c) +{ + u32 tfms; + int junk; + + if (have_cpuid_p()) { + /* Get vendor name */ + cpuid(0x00000000, &c->cpuid_level, + (int *)&c->x86_vendor_id[0], + (int *)&c->x86_vendor_id[8], + (int *)&c->x86_vendor_id[4]); + + get_cpu_vendor(c); + /* Initialize the standard set of capabilities */ + /* Note that the vendor-specific code below might override */ + + /* Intel-defined flags: level 0x00000001 */ + if ( c->cpuid_level >= 0x00000001 ) { + u32 capability; + cpuid(0x00000001, &tfms, &junk, &junk, &capability); + c->x86_capability[0] = capability; + c->x86 = (tfms >> 8) & 15; + c->x86_model = (tfms >> 4) & 15; + c->x86_mask = tfms & 15; + } else { + /* Have CPUID level 0 only - unheard of */ + c->x86 = 4; + } + } +} + +/* + * This does the hard work of actually picking apart the CPU stuff... + */ +void __init identify_cpu(struct cpuinfo_x86 *c) +{ + int i; + + c->loops_per_jiffy = loops_per_jiffy; + c->x86_cache_size = -1; + c->x86_vendor = X86_VENDOR_UNKNOWN; + c->cpuid_level = -1; /* CPUID not detected */ + c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ + c->x86_model_id[0] = '\0'; /* Unset */ + memset(&c->x86_capability, 0, sizeof c->x86_capability); + + if (!have_cpuid_p()) { + /* First of all, decide if this is a 486 or higher */ + /* It's a 486 if we can modify the AC flag */ + if ( flag_is_changeable_p(X86_EFLAGS_AC) ) + c->x86 = 4; + else + c->x86 = 3; + } + + if (this_cpu->c_identify) + this_cpu->c_identify(c); + else + generic_identify(c); + + printk(KERN_DEBUG "CPU: Before vendor init, caps: %08lx %08lx %08lx, vendor = %d\n", + c->x86_capability[0], + c->x86_capability[1], + c->x86_capability[2], + c->x86_vendor); + + /* + * Vendor-specific initialization. In this section we + * canonicalize the feature flags, meaning if there are + * features a certain CPU supports which CPUID doesn't + * tell us, CPUID claiming incorrect flags, or other bugs, + * we handle them here. + * + * At the end of this section, c->x86_capability better + * indicate the features this CPU genuinely supports! + */ + if (this_cpu->c_init) + this_cpu->c_init(c); + + printk(KERN_DEBUG "CPU: After vendor init, caps: %08lx %08lx %08lx %08lx\n", + c->x86_capability[0], + c->x86_capability[1], + c->x86_capability[2], + c->x86_capability[3]); + + /* + * The vendor-specific functions might have changed features. Now + * we do "generic changes." + */ + + /* TSC disabled? */ +#ifndef CONFIG_X86_TSC + if ( tsc_disable ) + clear_bit(X86_FEATURE_TSC, c->x86_capability); +#endif + + /* FXSR disabled? */ + if (disable_x86_fxsr) { + clear_bit(X86_FEATURE_FXSR, c->x86_capability); + clear_bit(X86_FEATURE_XMM, c->x86_capability); + } + + /* Init Machine Check Exception if available. */ + mcheck_init(c); + + /* If the model name is still unset, do table lookup. */ + if ( !c->x86_model_id[0] ) { + char *p; + p = table_lookup_model(c); + if ( p ) + strcpy(c->x86_model_id, p); + else + /* Last resort... */ + sprintf(c->x86_model_id, "%02x/%02x", + c->x86_vendor, c->x86_model); + } + + /* Now the feature flags better reflect actual CPU features! */ + + printk(KERN_DEBUG "CPU: After generic, caps: %08lx %08lx %08lx %08lx\n", + c->x86_capability[0], + c->x86_capability[1], + c->x86_capability[2], + c->x86_capability[3]); + + /* + * On SMP, boot_cpu_data holds the common feature set between + * all CPUs; so make sure that we indicate which features are + * common between the CPUs. The first time this routine gets + * executed, c == &boot_cpu_data. + */ + if ( c != &boot_cpu_data ) { + /* AND the already accumulated flags with these */ + for ( i = 0 ; i < NCAPINTS ; i++ ) + boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; + } + + printk(KERN_DEBUG "CPU: Common caps: %08lx %08lx %08lx %08lx\n", + boot_cpu_data.x86_capability[0], + boot_cpu_data.x86_capability[1], + boot_cpu_data.x86_capability[2], + boot_cpu_data.x86_capability[3]); +} +/* + * Perform early boot up checks for a valid TSC. See arch/i386/kernel/time.c + */ + +void __init dodgy_tsc(void) +{ + get_cpu_vendor(&boot_cpu_data); + if (( boot_cpu_data.x86_vendor == X86_VENDOR_CYRIX ) || + ( boot_cpu_data.x86_vendor == X86_VENDOR_NSC )) + cpu_devs[X86_VENDOR_CYRIX]->c_init(&boot_cpu_data); +} + +void __init print_cpu_info(struct cpuinfo_x86 *c) +{ + char *vendor = NULL; + + if (c->x86_vendor < X86_VENDOR_NUM) + vendor = this_cpu->c_vendor; + else if (c->cpuid_level >= 0) + vendor = c->x86_vendor_id; + + if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) + printk("%s ", vendor); + + if (!c->x86_model_id[0]) + printk("%d86", c->x86); + else + printk("%s", c->x86_model_id); + + if (c->x86_mask || c->cpuid_level >= 0) + printk(" stepping %02x\n", c->x86_mask); + else + printk("\n"); +} + +unsigned long cpu_initialized __initdata = 0; + +/* This is hacky. :) + * We're emulating future behavior. + * In the future, the cpu-specific init functions will be called implicitly + * via the magic of initcalls. + * They will insert themselves into the cpu_devs structure. + * Then, when cpu_init() is called, we can just iterate over that array. + */ + +extern int intel_cpu_init(void); +extern int cyrix_init_cpu(void); +extern int nsc_init_cpu(void); +extern int amd_init_cpu(void); +extern int centaur_init_cpu(void); +extern int transmeta_init_cpu(void); +extern int rise_init_cpu(void); +extern int nexgen_init_cpu(void); +extern int umc_init_cpu(void); + +void __init early_cpu_init(void) +{ + intel_cpu_init(); + cyrix_init_cpu(); + nsc_init_cpu(); + amd_init_cpu(); + centaur_init_cpu(); + transmeta_init_cpu(); + rise_init_cpu(); + nexgen_init_cpu(); + umc_init_cpu(); +} +/* + * cpu_init() initializes state that is per-CPU. Some data is already + * initialized (naturally) in the bootstrap process, such as the GDT + * and IDT. We reload them nevertheless, this function acts as a + * 'CPU state barrier', nothing should get across. + */ +void __init cpu_init (void) +{ + int nr = smp_processor_id(); + struct tss_struct * t = &init_tss[nr]; + + if (test_and_set_bit(nr, &cpu_initialized)) { + printk(KERN_WARNING "CPU#%d already initialized!\n", nr); + for (;;) __sti(); + } + printk(KERN_INFO "Initializing CPU#%d\n", nr); + + if (cpu_has_vme || cpu_has_tsc || cpu_has_de) + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); +#ifndef CONFIG_X86_TSC + if (tsc_disable && cpu_has_tsc) { + printk(KERN_NOTICE "Disabling TSC...\n"); + /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ + clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); + set_in_cr4(X86_CR4_TSD); + } +#endif + + __asm__ __volatile__("lgdt %0": "=m" (gdt_descr)); + __asm__ __volatile__("lidt %0": "=m" (idt_descr)); + + /* + * Delete NT + */ + __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl"); + + /* + * set up and load the per-CPU TSS and LDT + */ + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + if(current->mm) + BUG(); + enter_lazy_tlb(&init_mm, current, nr); + + t->esp0 = current->thread.esp0; + set_tss_desc(nr,t); + gdt_table[__TSS(nr)].b &= 0xfffffdff; + load_TR(nr); + load_LDT(&init_mm.context); + + /* Clear %fs and %gs. */ + asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); + + /* Clear all 6 debug registers: */ + +#define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) ); + + CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); + +#undef CD + + /* + * Force FPU initialization: + */ + clear_thread_flag(TIF_USEDFPU); + current->used_math = 0; + stts(); +} diff --git a/arch/i386/kernel/cpu/cpu.h b/arch/i386/kernel/cpu/cpu.h new file mode 100644 index 000000000000..ad3a3371efcd --- /dev/null +++ b/arch/i386/kernel/cpu/cpu.h @@ -0,0 +1,28 @@ + +struct cpu_model_info { + int vendor; + int family; + char *model_names[16]; +}; + +/* attempt to consolidate cpu attributes */ +struct cpu_dev { + char * c_vendor; + + /* some have two possibilities for cpuid string */ + char * c_ident[2]; + + struct cpu_model_info c_models[4]; + + void (*c_init)(struct cpuinfo_x86 * c); + void (*c_identify)(struct cpuinfo_x86 * c); + unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); +}; + +extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; + +extern int get_model_name(struct cpuinfo_x86 *c); +extern void display_cacheinfo(struct cpuinfo_x86 *c); + +extern void generic_identify(struct cpuinfo_x86 * c); +extern int have_cpuid_p(void); diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c new file mode 100644 index 000000000000..d9d3771b428d --- /dev/null +++ b/arch/i386/kernel/cpu/cyrix.c @@ -0,0 +1,352 @@ +#include <linux/init.h> +#include <linux/bitops.h> +#include <linux/delay.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> + +#include <asm/dma.h> +#include <asm/io.h> +#include <asm/processor.h> + +#include "cpu.h" + +/* + * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU + */ +void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) +{ + unsigned char ccr2, ccr3; + unsigned long flags; + + /* we test for DEVID by checking whether CCR3 is writable */ + local_irq_save(flags); + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, ccr3 ^ 0x80); + getCx86(0xc0); /* dummy to change bus */ + + if (getCx86(CX86_CCR3) == ccr3) { /* no DEVID regs. */ + ccr2 = getCx86(CX86_CCR2); + setCx86(CX86_CCR2, ccr2 ^ 0x04); + getCx86(0xc0); /* dummy */ + + if (getCx86(CX86_CCR2) == ccr2) /* old Cx486SLC/DLC */ + *dir0 = 0xfd; + else { /* Cx486S A step */ + setCx86(CX86_CCR2, ccr2); + *dir0 = 0xfe; + } + } + else { + setCx86(CX86_CCR3, ccr3); /* restore CCR3 */ + + /* read DIR0 and DIR1 CPU registers */ + *dir0 = getCx86(CX86_DIR0); + *dir1 = getCx86(CX86_DIR1); + } + local_irq_restore(flags); +} + +/* + * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in + * order to identify the Cyrix CPU model after we're out of setup.c + * + * Actually since bugs.h doesnt even reference this perhaps someone should + * fix the documentation ??? + */ +static unsigned char Cx86_dir0_msb __initdata = 0; + +static char Cx86_model[][9] __initdata = { + "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ", + "M II ", "Unknown" +}; +static char Cx486_name[][5] __initdata = { + "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx", + "SRx2", "DRx2" +}; +static char Cx486S_name[][4] __initdata = { + "S", "S2", "Se", "S2e" +}; +static char Cx486D_name[][4] __initdata = { + "DX", "DX2", "?", "?", "?", "DX4" +}; +static char Cx86_cb[] __initdata = "?.5x Core/Bus Clock"; +static char cyrix_model_mult1[] __initdata = "12??43"; +static char cyrix_model_mult2[] __initdata = "12233445"; + +/* + * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old + * BIOSes for compatability with DOS games. This makes the udelay loop + * work correctly, and improves performance. + * + * FIXME: our newer udelay uses the tsc. We dont need to frob with SLOP + */ + +extern void calibrate_delay(void) __init; + +static void __init check_cx686_slop(struct cpuinfo_x86 *c) +{ + unsigned long flags; + + if (Cx86_dir0_msb == 3) { + unsigned char ccr3, ccr5; + + local_irq_save(flags); + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + ccr5 = getCx86(CX86_CCR5); + if (ccr5 & 2) + setCx86(CX86_CCR5, ccr5 & 0xfd); /* reset SLOP */ + setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ + local_irq_restore(flags); + + if (ccr5 & 2) { /* possible wrong calibration done */ + printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n"); + calibrate_delay(); + c->loops_per_jiffy = loops_per_jiffy; + } + } +} + +static void __init init_cyrix(struct cpuinfo_x86 *c) +{ + unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; + char *buf = c->x86_model_id; + const char *p = NULL; + + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_bit(0*32+31, c->x86_capability); + + /* Cyrix used bit 24 in extended (AMD) CPUID for Cyrix MMX extensions */ + if ( test_bit(1*32+24, c->x86_capability) ) { + clear_bit(1*32+24, c->x86_capability); + set_bit(X86_FEATURE_CXMMX, c->x86_capability); + } + + do_cyrix_devid(&dir0, &dir1); + + check_cx686_slop(c); + + Cx86_dir0_msb = dir0_msn = dir0 >> 4; /* identifies CPU "family" */ + dir0_lsn = dir0 & 0xf; /* model or clock multiplier */ + + /* common case step number/rev -- exceptions handled below */ + c->x86_model = (dir1 >> 4) + 1; + c->x86_mask = dir1 & 0xf; + + /* Now cook; the original recipe is by Channing Corn, from Cyrix. + * We do the same thing for each generation: we work out + * the model, multiplier and stepping. Black magic included, + * to make the silicon step/rev numbers match the printed ones. + */ + + switch (dir0_msn) { + unsigned char tmp; + + case 0: /* Cx486SLC/DLC/SRx/DRx */ + p = Cx486_name[dir0_lsn & 7]; + break; + + case 1: /* Cx486S/DX/DX2/DX4 */ + p = (dir0_lsn & 8) ? Cx486D_name[dir0_lsn & 5] + : Cx486S_name[dir0_lsn & 3]; + break; + + case 2: /* 5x86 */ + Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5]; + p = Cx86_cb+2; + break; + + case 3: /* 6x86/6x86L */ + Cx86_cb[1] = ' '; + Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5]; + if (dir1 > 0x21) { /* 686L */ + Cx86_cb[0] = 'L'; + p = Cx86_cb; + (c->x86_model)++; + } else /* 686 */ + p = Cx86_cb+1; + /* Emulate MTRRs using Cyrix's ARRs. */ + set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability); + /* 6x86's contain this bug */ + c->coma_bug = 1; + break; + + case 4: /* MediaGX/GXm */ +#ifdef CONFIG_PCI + /* It isn't really a PCI quirk directly, but the cure is the + same. The MediaGX has deep magic SMM stuff that handles the + SB emulation. It thows away the fifo on disable_dma() which + is wrong and ruins the audio. + + Bug2: VSA1 has a wrap bug so that using maximum sized DMA + causes bad things. According to NatSemi VSA2 has another + bug to do with 'hlt'. I've not seen any boards using VSA2 + and X doesn't seem to support it either so who cares 8). + VSA1 we work around however. + */ + + printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); + isa_dma_bridge_buggy = 2; +#endif + c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ + + /* GXm supports extended cpuid levels 'ala' AMD */ + if (c->cpuid_level == 2) { + /* Enable Natsemi MMX extensions */ + setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); + + get_model_name(c); /* get CPU marketing name */ + /* + * The 5510/5520 companion chips have a funky PIT + * that breaks the TSC synchronizing, so turn it off + */ + if (pci_find_device(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, NULL) || + pci_find_device(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, NULL)) + clear_bit(X86_FEATURE_TSC, c->x86_capability); + return; + } + else { /* MediaGX */ + Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; + p = Cx86_cb+2; + c->x86_model = (dir1 & 0x20) ? 1 : 2; +#ifndef CONFIG_CS5520 + clear_bit(X86_FEATURE_TSC, c->x86_capability); +#endif + } + break; + + case 5: /* 6x86MX/M II */ + if (dir1 > 7) + { + dir0_msn++; /* M II */ + /* Enable MMX extensions (App note 108) */ + setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); + } + else + { + c->coma_bug = 1; /* 6x86MX, it has the bug. */ + } + tmp = (!(dir0_lsn & 7) || dir0_lsn & 1) ? 2 : 0; + Cx86_cb[tmp] = cyrix_model_mult2[dir0_lsn & 7]; + p = Cx86_cb+tmp; + if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20)) + (c->x86_model)++; + /* Emulate MTRRs using Cyrix's ARRs. */ + set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability); + break; + + case 0xf: /* Cyrix 486 without DEVID registers */ + switch (dir0_lsn) { + case 0xd: /* either a 486SLC or DLC w/o DEVID */ + dir0_msn = 0; + p = Cx486_name[(c->hard_math) ? 1 : 0]; + break; + + case 0xe: /* a 486S A step */ + dir0_msn = 0; + p = Cx486S_name[0]; + break; + } + break; + + default: /* unknown (shouldn't happen, we know everyone ;-) */ + dir0_msn = 7; + break; + } + strcpy(buf, Cx86_model[dir0_msn & 7]); + if (p) strcat(buf, p); + return; +} + +/* + * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected + * by the fact that they preserve the flags across the division of 5/2. + * PII and PPro exhibit this behavior too, but they have cpuid available. + */ + +/* + * Perform the Cyrix 5/2 test. A Cyrix won't change + * the flags, while other 486 chips will. + */ +static inline int test_cyrix_52div(void) +{ + unsigned int test; + + __asm__ __volatile__( + "sahf\n\t" /* clear flags (%eax = 0x0005) */ + "div %b2\n\t" /* divide 5 by 2 */ + "lahf" /* store flags into %ah */ + : "=a" (test) + : "0" (5), "q" (2) + : "cc"); + + /* AH is 0x02 on Cyrix after the divide.. */ + return (unsigned char) (test >> 8) == 0x02; +} + +static void cyrix_identify(struct cpuinfo_x86 * c) +{ + /* Detect Cyrix with disabled CPUID */ + if ( c->x86 == 4 && test_cyrix_52div() ) { + unsigned char dir0, dir1; + + strcpy(c->x86_vendor_id, "CyrixInstead"); + c->x86_vendor = X86_VENDOR_CYRIX; + + /* Actually enable cpuid on the older cyrix */ + + /* Retrieve CPU revisions */ + + do_cyrix_devid(&dir0, &dir1); + + dir0>>=4; + + /* Check it is an affected model */ + + if (dir0 == 5 || dir0 == 3) + { + unsigned char ccr3, ccr4; + unsigned long flags; + printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); + local_irq_save(flags); + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + ccr4 = getCx86(CX86_CCR4); + setCx86(CX86_CCR4, ccr4 | 0x80); /* enable cpuid */ + setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ + local_irq_restore(flags); + } + } + generic_identify(c); +} + +static struct cpu_dev cyrix_cpu_dev __initdata = { + c_vendor: "Cyrix", + c_ident: { "CyrixInstead" }, + c_init: init_cyrix, + c_identify: cyrix_identify, +}; + +int __init cyrix_init_cpu(void) +{ + cpu_devs[X86_VENDOR_CYRIX] = &cyrix_cpu_dev; + return 0; +} + +//early_arch_initcall(cyrix_init_cpu); + +static struct cpu_dev nsc_cpu_dev __initdata = { + c_vendor: "NSC", + c_ident: { "Geode by NSC" }, + c_init: init_cyrix, + c_identify: generic_identify, +}; + +int __init nsc_init_cpu(void) +{ + cpu_devs[X86_VENDOR_NSC] = &nsc_cpu_dev; + return 0; +} + +//early_arch_initcall(nsc_init_cpu); diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c new file mode 100644 index 000000000000..2af26956a150 --- /dev/null +++ b/arch/i386/kernel/cpu/intel.c @@ -0,0 +1,378 @@ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/bitops.h> +#include <linux/smp.h> +#include <asm/processor.h> +#include <asm/thread_info.h> +#include <asm/msr.h> + +#include "cpu.h" + +static int disable_x86_serial_nr __initdata = 1; +static int disable_P4_HT __initdata = 0; +extern int trap_init_f00f_bug(void); + +/* + * Early probe support logic for ppro memory erratum #50 + * + * This is called before we do cpu ident work + */ + +int __init ppro_with_ram_bug(void) +{ + char vendor_id[16]; + int ident; + + /* Must have CPUID */ + if(!have_cpuid_p()) + return 0; + if(cpuid_eax(0)<1) + return 0; + + /* Must be Intel */ + cpuid(0, &ident, + (int *)&vendor_id[0], + (int *)&vendor_id[8], + (int *)&vendor_id[4]); + + if(memcmp(vendor_id, "IntelInside", 12)) + return 0; + + ident = cpuid_eax(1); + + /* Model 6 */ + + if(((ident>>8)&15)!=6) + return 0; + + /* Pentium Pro */ + + if(((ident>>4)&15)!=1) + return 0; + + if((ident&15) < 8) + { + printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); + return 1; + } + printk(KERN_INFO "Your Pentium Pro seems ok.\n"); + return 0; +} + +static void __init squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ + if( test_bit(X86_FEATURE_PN, c->x86_capability) && + disable_x86_serial_nr ) { + /* Disable processor serial number */ + unsigned long lo,hi; + rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi); + lo |= 0x200000; + wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi); + printk(KERN_NOTICE "CPU serial number disabled.\n"); + clear_bit(X86_FEATURE_PN, c->x86_capability); + + /* Disabling the serial number may affect the cpuid level */ + c->cpuid_level = cpuid_eax(0); + } +} + +static int __init x86_serial_nr_setup(char *s) +{ + disable_x86_serial_nr = 0; + return 1; +} +__setup("serialnumber", x86_serial_nr_setup); + +static int __init P4_disable_ht(char *s) +{ + disable_P4_HT = 1; + return 1; +} +__setup("noht", P4_disable_ht); + + +static void __init init_intel(struct cpuinfo_x86 *c) +{ + char *p = NULL; + unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ + +#ifdef CONFIG_X86_F00F_BUG + /* + * All current models of Pentium and Pentium with MMX technology CPUs + * have the F0 0F bug, which lets nonpriviledged users lock up the system. + * Note that the workaround only should be initialized once... + */ + c->f00f_bug = 0; + if ( c->x86 == 5 ) { + static int f00f_workaround_enabled = 0; + + c->f00f_bug = 1; + if ( !f00f_workaround_enabled ) { + trap_init_f00f_bug(); + printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); + f00f_workaround_enabled = 1; + } + } +#endif + + + if (c->cpuid_level > 1) { + /* supports eax=2 call */ + int i, j, n; + int regs[4]; + unsigned char *dp = (unsigned char *)regs; + + /* Number of times to iterate */ + n = cpuid_eax(2) & 0xFF; + + for ( i = 0 ; i < n ; i++ ) { + cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); + + /* If bit 31 is set, this is an unknown format */ + for ( j = 0 ; j < 3 ; j++ ) { + if ( regs[j] < 0 ) regs[j] = 0; + } + + /* Byte 0 is level count, not a descriptor */ + for ( j = 1 ; j < 16 ; j++ ) { + unsigned char des = dp[j]; + unsigned char dl, dh; + unsigned int cs; + + dh = des >> 4; + dl = des & 0x0F; + + /* Black magic... */ + + switch ( dh ) + { + case 0: + switch ( dl ) { + case 6: + /* L1 I cache */ + l1i += 8; + break; + case 8: + /* L1 I cache */ + l1i += 16; + break; + case 10: + /* L1 D cache */ + l1d += 8; + break; + case 12: + /* L1 D cache */ + l1d += 16; + break; + default:; + /* TLB, or unknown */ + } + break; + case 2: + if ( dl ) { + /* L3 cache */ + cs = (dl-1) << 9; + l3 += cs; + } + break; + case 4: + if ( c->x86 > 6 && dl ) { + /* P4 family */ + /* L3 cache */ + cs = 128 << (dl-1); + l3 += cs; + break; + } + /* else same as 8 - fall through */ + case 8: + if ( dl ) { + /* L2 cache */ + cs = 128 << (dl-1); + l2 += cs; + } + break; + case 6: + if (dl > 5) { + /* L1 D cache */ + cs = 8<<(dl-6); + l1d += cs; + } + break; + case 7: + if ( dl >= 8 ) + { + /* L2 cache */ + cs = 64<<(dl-8); + l2 += cs; + } else { + /* L0 I cache, count as L1 */ + cs = dl ? (16 << (dl-1)) : 12; + l1i += cs; + } + break; + default: + /* TLB, or something else we don't know about */ + break; + } + } + } + if ( l1i || l1d ) + printk(KERN_INFO "CPU: L1 I cache: %dK, L1 D cache: %dK\n", + l1i, l1d); + if ( l2 ) + printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); + if ( l3 ) + printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); + + /* + * This assumes the L3 cache is shared; it typically lives in + * the northbridge. The L1 caches are included by the L2 + * cache, and so should not be included for the purpose of + * SMP switching weights. + */ + c->x86_cache_size = l2 ? l2 : (l1i+l1d); + } + + /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */ + if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 ) + clear_bit(X86_FEATURE_SEP, c->x86_capability); + + /* Names for the Pentium II/Celeron processors + detectable only by also checking the cache size. + Dixon is NOT a Celeron. */ + if (c->x86 == 6) { + switch (c->x86_model) { + case 5: + if (l2 == 0) + p = "Celeron (Covington)"; + if (l2 == 256) + p = "Mobile Pentium II (Dixon)"; + break; + + case 6: + if (l2 == 128) + p = "Celeron (Mendocino)"; + break; + + case 8: + if (l2 == 128) + p = "Celeron (Coppermine)"; + break; + } + } + + if ( p ) + strcpy(c->x86_model_id, p); + +#ifdef CONFIG_SMP + if (test_bit(X86_FEATURE_HT, c->x86_capability) && !disable_P4_HT) { + extern int phys_proc_id[NR_CPUS]; + + u32 eax, ebx, ecx, edx; + int index_lsb, index_msb, tmp; + int initial_apic_id; + int cpu = smp_processor_id(); + + cpuid(1, &eax, &ebx, &ecx, &edx); + smp_num_siblings = (ebx & 0xff0000) >> 16; + + if (smp_num_siblings == 1) { + printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); + } else if (smp_num_siblings > 1 ) { + index_lsb = 0; + index_msb = 31; + /* + * At this point we only support two siblings per + * processor package. + */ +#define NR_SIBLINGS 2 + if (smp_num_siblings != NR_SIBLINGS) { + printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); + smp_num_siblings = 1; + goto too_many_siblings; + } + tmp = smp_num_siblings; + while ((tmp & 1) == 0) { + tmp >>=1 ; + index_lsb++; + } + tmp = smp_num_siblings; + while ((tmp & 0x80000000 ) == 0) { + tmp <<=1 ; + index_msb--; + } + if (index_lsb != index_msb ) + index_msb++; + initial_apic_id = ebx >> 24 & 0xff; + phys_proc_id[cpu] = initial_apic_id >> index_msb; + + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + phys_proc_id[cpu]); + } + + } +too_many_siblings: + + if (disable_P4_HT) + clear_bit(X86_FEATURE_HT, c->x86_capability); +#endif + + /* Disable the PN if appropriate */ + squash_the_stupid_serial_number(c); +} + +static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) +{ + /* Intel PIII Tualatin. This comes in two flavours. + * One has 256kb of cache, the other 512. We have no way + * to determine which, so we use a boottime override + * for the 512kb model, and assume 256 otherwise. + */ + if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) + size = 256; + return size; +} + +static struct cpu_dev intel_cpu_dev __initdata = { + c_vendor: "Intel", + c_ident: { "GenuineIntel" }, + c_models: { + { X86_VENDOR_INTEL, 4, + { + [0] "486 DX-25/33", + [1] "486 DX-50", + [2] "486 SX", + [3] "486 DX/2", + [4] "486 SL", + [5] "486 SX/2", + [7] "486 DX/2-WB", + [8] "486 DX/4", + [9] "486 DX/4-WB" + } + }, + { X86_VENDOR_INTEL, 5, + { + [0] "Pentium 60/66 A-step", + [1] "Pentium 60/66", + [2] "Pentium 75 - 200", + [3] "OverDrive PODP5V83", + [4] "Pentium MMX", + [7] "Mobile Pentium 75 - 200", + [8] "Mobile Pentium MMX" + } + }, + }, + c_init: init_intel, + c_identify: generic_identify, + c_size_cache: intel_size_cache, +}; + +__init int intel_cpu_init(void) +{ + cpu_devs[X86_VENDOR_INTEL] = &intel_cpu_dev; + return 0; +} + +// arch_initcall(intel_cpu_init); + diff --git a/arch/i386/kernel/cpu/nexgen.c b/arch/i386/kernel/cpu/nexgen.c new file mode 100644 index 000000000000..487e9e795d79 --- /dev/null +++ b/arch/i386/kernel/cpu/nexgen.c @@ -0,0 +1,60 @@ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/string.h> +#include <asm/processor.h> + +#include "cpu.h" + +/* + * Detect a NexGen CPU running without BIOS hypercode new enough + * to have CPUID. (Thanks to Herbert Oppmann) + */ + +static int __init deep_magic_nexgen_probe(void) +{ + int ret; + + __asm__ __volatile__ ( + " movw $0x5555, %%ax\n" + " xorw %%dx,%%dx\n" + " movw $2, %%cx\n" + " divw %%cx\n" + " movl $0, %%eax\n" + " jnz 1f\n" + " movl $1, %%eax\n" + "1:\n" + : "=a" (ret) : : "cx", "dx" ); + return ret; +} + +static void __init init_nexgen(struct cpuinfo_x86 * c) +{ + c->x86_cache_size = 256; /* A few had 1 MB... */ +} + +static void nexgen_identify(struct cpuinfo_x86 * c) +{ + /* Detect NexGen with old hypercode */ + if ( deep_magic_nexgen_probe() ) { + strcpy(c->x86_vendor_id, "NexGenDriven"); + } + generic_identify(c); +} + +static struct cpu_dev nexgen_cpu_dev __initdata = { + c_vendor: "Nexgen", + c_ident: { "NexGenDriven" }, + c_models: { + { X86_VENDOR_NEXGEN,5, { [1] "Nx586" } }, + }, + c_init: init_nexgen, + c_identify: nexgen_identify, +}; + +int __init nexgen_init_cpu(void) +{ + cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev; + return 0; +} + +//early_arch_initcall(nexgen_init_cpu); diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c new file mode 100644 index 000000000000..570affdb1f0c --- /dev/null +++ b/arch/i386/kernel/cpu/proc.c @@ -0,0 +1,126 @@ +#include <linux/smp.h> +#include <linux/timex.h> +#include <linux/string.h> +#include <asm/semaphore.h> +#include <linux/seq_file.h> + +/* + * Get CPU information for use by the procfs. + */ +static int show_cpuinfo(struct seq_file *m, void *v) +{ + /* + * These flag bits must match the definitions in <asm/cpufeature.h>. + * NULL means this bit is undefined or reserved; either way it doesn't + * have meaning as far as Linux is concerned. Note that it's important + * to realize there is a difference between this table and CPUID -- if + * applications want to get the raw CPUID data, they should access + * /dev/cpu/<cpu_nr>/cpuid instead. + */ + static char *x86_cap_flags[] = { + /* Intel-defined */ + "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", + "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", + "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", + "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", NULL, + + /* AMD-defined */ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "mp", NULL, NULL, "mmxext", NULL, + NULL, NULL, NULL, NULL, NULL, "lm", "3dnowext", "3dnow", + + /* Transmeta-defined */ + "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Other (Linux-defined) */ + "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + }; + struct cpuinfo_x86 *c = v; + int i, n = c - cpu_data; + int fpu_exception; + +#ifdef CONFIG_SMP + if (!(cpu_online_map & (1<<n))) + return 0; +#endif + seq_printf(m, "processor\t: %d\n" + "vendor_id\t: %s\n" + "cpu family\t: %d\n" + "model\t\t: %d\n" + "model name\t: %s\n", + n, + c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", + c->x86, + c->x86_model, + c->x86_model_id[0] ? c->x86_model_id : "unknown"); + + if (c->x86_mask || c->cpuid_level >= 0) + seq_printf(m, "stepping\t: %d\n", c->x86_mask); + else + seq_printf(m, "stepping\t: unknown\n"); + + if ( test_bit(X86_FEATURE_TSC, c->x86_capability) ) { + seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n", + cpu_khz / 1000, (cpu_khz % 1000)); + } + + /* Cache size */ + if (c->x86_cache_size >= 0) + seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); + + /* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */ + fpu_exception = c->hard_math && (ignore_irq13 || cpu_has_fpu); + seq_printf(m, "fdiv_bug\t: %s\n" + "hlt_bug\t\t: %s\n" + "f00f_bug\t: %s\n" + "coma_bug\t: %s\n" + "fpu\t\t: %s\n" + "fpu_exception\t: %s\n" + "cpuid level\t: %d\n" + "wp\t\t: %s\n" + "flags\t\t:", + c->fdiv_bug ? "yes" : "no", + c->hlt_works_ok ? "no" : "yes", + c->f00f_bug ? "yes" : "no", + c->coma_bug ? "yes" : "no", + c->hard_math ? "yes" : "no", + fpu_exception ? "yes" : "no", + c->cpuid_level, + c->wp_works_ok ? "yes" : "no"); + + for ( i = 0 ; i < 32*NCAPINTS ; i++ ) + if ( test_bit(i, c->x86_capability) && + x86_cap_flags[i] != NULL ) + seq_printf(m, " %s", x86_cap_flags[i]); + + seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n", + c->loops_per_jiffy/(500000/HZ), + (c->loops_per_jiffy/(5000/HZ)) % 100); + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < NR_CPUS ? cpu_data + *pos : NULL; +} +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} +static void c_stop(struct seq_file *m, void *v) +{ +} +struct seq_operations cpuinfo_op = { + start: c_start, + next: c_next, + stop: c_stop, + show: show_cpuinfo, +}; diff --git a/arch/i386/kernel/cpu/rise.c b/arch/i386/kernel/cpu/rise.c new file mode 100644 index 000000000000..e56935319bba --- /dev/null +++ b/arch/i386/kernel/cpu/rise.c @@ -0,0 +1,53 @@ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/bitops.h> +#include <asm/processor.h> + +#include "cpu.h" + +static void __init init_rise(struct cpuinfo_x86 *c) +{ + printk("CPU: Rise iDragon"); + if (c->x86_model > 2) + printk(" II"); + printk("\n"); + + /* Unhide possibly hidden capability flags + The mp6 iDragon family don't have MSRs. + We switch on extra features with this cpuid weirdness: */ + __asm__ ( + "movl $0x6363452a, %%eax\n\t" + "movl $0x3231206c, %%ecx\n\t" + "movl $0x2a32313a, %%edx\n\t" + "cpuid\n\t" + "movl $0x63634523, %%eax\n\t" + "movl $0x32315f6c, %%ecx\n\t" + "movl $0x2333313a, %%edx\n\t" + "cpuid\n\t" : : : "eax", "ebx", "ecx", "edx" + ); + set_bit(X86_FEATURE_CX8, c->x86_capability); +} + +static struct cpu_dev rise_cpu_dev __initdata = { + c_vendor: "Rise", + c_ident: { "RiseRiseRise" }, + c_models: { + { X86_VENDOR_RISE, 5, + { + [0] "iDragon", + [2] "iDragon", + [8] "iDragon II", + [9] "iDragon II" + } + }, + }, + c_init: init_rise, +}; + +int __init rise_init_cpu(void) +{ + cpu_devs[X86_VENDOR_RISE] = &rise_cpu_dev; + return 0; +} + +//early_arch_initcall(rise_init_cpu); diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c new file mode 100644 index 000000000000..3fe5124e438f --- /dev/null +++ b/arch/i386/kernel/cpu/transmeta.c @@ -0,0 +1,95 @@ +#include <linux/kernel.h> +#include <linux/init.h> +#include <asm/processor.h> +#include <asm/msr.h> +#include "cpu.h" + +static void __init init_transmeta(struct cpuinfo_x86 *c) +{ + unsigned int cap_mask, uk, max, dummy; + unsigned int cms_rev1, cms_rev2; + unsigned int cpu_rev, cpu_freq, cpu_flags; + char cpu_info[65]; + + get_model_name(c); /* Same as AMD/Cyrix */ + display_cacheinfo(c); + + /* Print CMS and CPU revision */ + max = cpuid_eax(0x80860000); + if ( max >= 0x80860001 ) { + cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); + printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n", + (cpu_rev >> 24) & 0xff, + (cpu_rev >> 16) & 0xff, + (cpu_rev >> 8) & 0xff, + cpu_rev & 0xff, + cpu_freq); + } + if ( max >= 0x80860002 ) { + cpuid(0x80860002, &dummy, &cms_rev1, &cms_rev2, &dummy); + printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n", + (cms_rev1 >> 24) & 0xff, + (cms_rev1 >> 16) & 0xff, + (cms_rev1 >> 8) & 0xff, + cms_rev1 & 0xff, + cms_rev2); + } + if ( max >= 0x80860006 ) { + cpuid(0x80860003, + (void *)&cpu_info[0], + (void *)&cpu_info[4], + (void *)&cpu_info[8], + (void *)&cpu_info[12]); + cpuid(0x80860004, + (void *)&cpu_info[16], + (void *)&cpu_info[20], + (void *)&cpu_info[24], + (void *)&cpu_info[28]); + cpuid(0x80860005, + (void *)&cpu_info[32], + (void *)&cpu_info[36], + (void *)&cpu_info[40], + (void *)&cpu_info[44]); + cpuid(0x80860006, + (void *)&cpu_info[48], + (void *)&cpu_info[52], + (void *)&cpu_info[56], + (void *)&cpu_info[60]); + cpu_info[64] = '\0'; + printk(KERN_INFO "CPU: %s\n", cpu_info); + } + + /* Unhide possibly hidden capability flags */ + rdmsr(0x80860004, cap_mask, uk); + wrmsr(0x80860004, ~0, uk); + c->x86_capability[0] = cpuid_edx(0x00000001); + wrmsr(0x80860004, cap_mask, uk); +} + +static void transmeta_identify(struct cpuinfo_x86 * c) +{ + u32 xlvl; + generic_identify(c); + + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ( (xlvl & 0xffff0000) == 0x80860000 ) { + if ( xlvl >= 0x80860001 ) + c->x86_capability[2] = cpuid_edx(0x80860001); + } +} + +static struct cpu_dev transmeta_cpu_dev __initdata = { + c_vendor: "Transmeta", + c_ident: { "GenuineTMx86", "TransmetaCPU" }, + c_init: init_transmeta, + c_identify: transmeta_identify, +}; + +int __init transmeta_init_cpu(void) +{ + cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev; + return 0; +} + +//early_arch_initcall(transmeta_init_cpu); diff --git a/arch/i386/kernel/cpu/umc.c b/arch/i386/kernel/cpu/umc.c new file mode 100644 index 000000000000..a99545811457 --- /dev/null +++ b/arch/i386/kernel/cpu/umc.c @@ -0,0 +1,33 @@ +#include <linux/kernel.h> +#include <linux/init.h> +#include <asm/processor.h> +#include "cpu.h" + +/* UMC chips appear to be only either 386 or 486, so no special init takes place. + */ +static void __init init_umc(struct cpuinfo_x86 * c) +{ + +} + +static struct cpu_dev umc_cpu_dev __initdata = { + c_vendor: "UMC", + c_ident: { "UMC UMC UMC" }, + c_models: { + { X86_VENDOR_UMC, 4, + { + [1] "U5D", + [2] "U5S", + } + }, + }, + c_init: init_umc, +}; + +int __init umc_init_cpu(void) +{ + cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev; + return 0; +} + +//early_arch_initcall(umc_init_cpu); diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 3f9883be4436..ba4a8d9f3b64 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -3,73 +3,16 @@ * * Copyright (C) 1995 Linus Torvalds * - * Enhanced CPU type detection by Mike Jagdis, Patrick St. Jean - * and Martin Mares, November 1997. - * - * Force Cyrix 6x86(MX) and M II processors to report MTRR capability - * and Cyrix "coma bug" recognition by - * Zoltán Böszörményi <zboszor@mail.externet.hu> February 1999. - * - * Force Centaur C6 processors to report MTRR capability. - * Bart Hartgers <bart@etpmod.phys.tue.nl>, May 1999. - * - * Intel Mobile Pentium II detection fix. Sean Gilley, June 1999. - * - * IDT Winchip tweaks, misc clean ups. - * Dave Jones <davej@suse.de>, August 1999 - * * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 * - * Better detection of Centaur/IDT WinChip models. - * Bart Hartgers <bart@etpmod.phys.tue.nl>, August 1999. - * * Memory region support * David Parsons <orc@pell.chi.il.us>, July-August 1999 * - * Cleaned up cache-detection code - * Dave Jones <davej@suse.de>, October 1999 - * - * Added proper L2 cache detection for Coppermine - * Dragan Stancevic <visitor@valinux.com>, October 1999 - * - * Added the original array for capability flags but forgot to credit - * myself :) (~1998) Fixed/cleaned up some cpu_model_info and other stuff - * Jauder Ho <jauderho@carumba.com>, January 2000 - * - * Detection for Celeron coppermine, identify_cpu() overhauled, - * and a few other clean ups. - * Dave Jones <davej@suse.de>, April 2000 - * - * Pentium III FXSR, SSE support - * General FPU state handling cleanups - * Gareth Hughes <gareth@valinux.com>, May 2000 - * - * Added proper Cascades CPU and L2 cache detection for Cascades - * and 8-way type cache happy bunch from Intel:^) - * Dragan Stancevic <visitor@valinux.com>, May 2000 - * - * Forward port AMD Duron errata T13 from 2.2.17pre - * Dave Jones <davej@suse.de>, August 2000 - * - * Forward port lots of fixes/improvements from 2.2.18pre - * Cyrix III, Pentium IV support. - * Dave Jones <davej@suse.de>, October 2000 - * - * Massive cleanup of CPU detection and bug handling; - * Transmeta CPU detection, - * H. Peter Anvin <hpa@zytor.com>, November 2000 - * * Added E820 sanitization routine (removes overlapping memory regions); * Brian Moyle <bmoyle@mvista.com>, February 2001 * - * VIA C3 Support. - * Dave Jones <davej@suse.de>, March 2001 - * - * AMD Athlon/Duron/Thunderbird bluesmoke support. - * Dave Jones <davej@suse.de>, April 2001. - * - * CacheSize bug workaround updates for AMD, Intel & VIA Cyrix. - * Dave Jones <davej@suse.de>, September, October 2001. + * Moved CPU detection code to cpu/${cpu}.c + * Patrick Mochel <mochel@osdl.org>, March 2002 * */ @@ -77,45 +20,21 @@ * This file handles the architecture-dependent parts of initialization */ -#include <linux/errno.h> #include <linux/sched.h> -#include <linux/kernel.h> #include <linux/mm.h> -#include <linux/stddef.h> -#include <linux/unistd.h> -#include <linux/ptrace.h> -#include <linux/slab.h> -#include <linux/user.h> -#include <linux/a.out.h> #include <linux/tty.h> #include <linux/ioport.h> -#include <linux/delay.h> -#include <linux/config.h> -#include <linux/init.h> #include <linux/acpi.h> #include <linux/apm_bios.h> #ifdef CONFIG_BLK_DEV_RAM #include <linux/blk.h> #endif -#include <linux/highmem.h> #include <linux/bootmem.h> -#include <linux/pci.h> -#include <linux/pci_ids.h> #include <linux/seq_file.h> #include <linux/console.h> -#include <asm/processor.h> -#include <asm/mtrr.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/io.h> -#include <asm/smp.h> -#include <asm/cobalt.h> -#include <asm/msr.h> -#include <asm/desc.h> +#include <linux/highmem.h> #include <asm/e820.h> -#include <asm/dma.h> #include <asm/mpspec.h> -#include <asm/mmu_context.h> /* * Machine setup.. @@ -126,11 +45,7 @@ struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; unsigned long mmu_cr4_features; -/* - * Bus types .. - */ int MCA_bus; - /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; unsigned int machine_submodel_id; @@ -158,20 +73,15 @@ struct e820map e820; unsigned char aux_device_present; -extern void mcheck_init(struct cpuinfo_x86 *c); +extern void early_cpu_init(void); extern void dmi_scan_machine(void); extern int root_mountflags; extern char _text, _etext, _edata, _end; extern int blk_nohighio; void __init visws_get_board_type_and_rev(void); -static int disable_x86_serial_nr __initdata = 1; -static int disable_x86_fxsr __initdata = 0; - unsigned long saved_videomode; -extern unsigned long saved_videomode; - /* * This is set up by the setup-routine at boot-time */ @@ -297,6 +207,22 @@ static void __init probe_roms(void) } } +static void __init limit_regions (unsigned long long size) +{ + int i; + unsigned long long current_size = 0; + + for (i = 0; i < e820.nr_map; i++) { + if (e820.map[i].type == E820_RAM) { + current_size += e820.map[i].size; + if (current_size >= size) { + e820.map[i].size -= current_size-size; + e820.nr_map = i + 1; + return; + } + } + } +} static void __init add_memory_region(unsigned long long start, unsigned long long size, int type) { @@ -598,7 +524,7 @@ static void __init parse_mem_cmdline (char ** cmdline_p) { char c = ' ', *to = command_line, *from = COMMAND_LINE; int len = 0; - int usermem = 0; + int userdef = 0; /* Save unparsed command line copy for /proc/cmdline */ memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE); @@ -621,32 +547,24 @@ static void __init parse_mem_cmdline (char ** cmdline_p) } else if (!memcmp(from+4, "exactmap", 8)) { from += 8+4; e820.nr_map = 0; - usermem = 1; + userdef = 1; } else { /* If the user specifies memory size, we - * blow away any automatically generated - * size + * limit the BIOS-provided memory map to + * that size. exactmap can be used to specify + * the exact map. mem=number can be used to + * trim the existing memory map. */ unsigned long long start_at, mem_size; - if (usermem == 0) { - /* first time in: zap the whitelist - * and reinitialize it with the - * standard low-memory region. - */ - e820.nr_map = 0; - usermem = 1; - add_memory_region(0, LOWMEMSIZE(), E820_RAM); - } mem_size = memparse(from+4, &from); - if (*from == '@') + if (*from == '@') { start_at = memparse(from+1, &from); - else { - start_at = HIGH_MEMORY; - mem_size -= HIGH_MEMORY; - usermem=0; + add_memory_region(start_at, mem_size, E820_RAM); + } else { + limit_regions(mem_size); + userdef=1; } - add_memory_region(start_at, mem_size, E820_RAM); } } /* @@ -666,7 +584,7 @@ static void __init parse_mem_cmdline (char ** cmdline_p) } *to = '\0'; *cmdline_p = command_line; - if (usermem) { + if (userdef) { printk(KERN_INFO "user-defined physical RAM map:\n"); print_memory_map("user"); } @@ -678,6 +596,8 @@ void __init setup_arch(char **cmdline_p) unsigned long start_pfn, max_low_pfn; int i; + early_cpu_init(); + #ifdef CONFIG_VISWS visws_get_board_type_and_rev(); #endif @@ -977,27 +897,6 @@ void __init setup_arch(char **cmdline_p) dmi_scan_machine(); } -static int cachesize_override __initdata = -1; -static int __init cachesize_setup(char *str) -{ - get_option (&str, &cachesize_override); - return 1; -} -__setup("cachesize=", cachesize_setup); - - -#ifndef CONFIG_X86_TSC -static int tsc_disable __initdata = 0; - -static int __init tsc_setup(char *str) -{ - tsc_disable = 1; - return 1; -} - -__setup("notsc", tsc_setup); -#endif - static int __init highio_setup(char *str) { printk("i386: disabling HIGHMEM block I/O\n"); @@ -1006,1938 +905,6 @@ static int __init highio_setup(char *str) } __setup("nohighio", highio_setup); -static int __init get_model_name(struct cpuinfo_x86 *c) -{ - unsigned int *v; - char *p, *q; - - if (cpuid_eax(0x80000000) < 0x80000004) - return 0; - - v = (unsigned int *) c->x86_model_id; - cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); - cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); - cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); - c->x86_model_id[48] = 0; - - /* Intel chips right-justify this string for some dumb reason; - undo that brain damage */ - p = q = &c->x86_model_id[0]; - while ( *p == ' ' ) - p++; - if ( p != q ) { - while ( *p ) - *q++ = *p++; - while ( q <= &c->x86_model_id[48] ) - *q++ = '\0'; /* Zero-pad the rest */ - } - - return 1; -} - - -static void __init display_cacheinfo(struct cpuinfo_x86 *c) -{ - unsigned int n, dummy, ecx, edx, l2size; - - n = cpuid_eax(0x80000000); - - if (n >= 0x80000005) { - cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); - c->x86_cache_size=(ecx>>24)+(edx>>24); - } - - if (n < 0x80000006) /* Some chips just has a large L1. */ - return; - - ecx = cpuid_ecx(0x80000006); - l2size = ecx >> 16; - - /* AMD errata T13 (order #21922) */ - if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { - if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ - l2size = 64; - if (c->x86_model == 4 && - (c->x86_mask==0 || c->x86_mask==1)) /* Tbird rev A1/A2 */ - l2size = 256; - } - - /* Intel PIII Tualatin. This comes in two flavours. - * One has 256kb of cache, the other 512. We have no way - * to determine which, so we use a boottime override - * for the 512kb model, and assume 256 otherwise. - */ - if ((c->x86_vendor == X86_VENDOR_INTEL) && (c->x86 == 6) && - (c->x86_model == 11) && (l2size == 0)) - l2size = 256; - - /* VIA C3 CPUs (670-68F) need further shifting. */ - if (c->x86_vendor == X86_VENDOR_CENTAUR && (c->x86 == 6) && - ((c->x86_model == 7) || (c->x86_model == 8))) { - l2size = l2size >> 8; - } - - /* Allow user to override all this if necessary. */ - if (cachesize_override != -1) - l2size = cachesize_override; - - if ( l2size == 0 ) - return; /* Again, no L2 cache is possible */ - - c->x86_cache_size = l2size; - - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); -} - -/* - * B step AMD K6 before B 9730xxxx have hardware bugs that can cause - * misexecution of code under Linux. Owners of such processors should - * contact AMD for precise details and a CPU swap. - * - * See http://www.multimania.com/poulot/k6bug.html - * http://www.amd.com/K6/k6docs/revgd.html - * - * The following test is erm.. interesting. AMD neglected to up - * the chip setting when fixing the bug but they also tweaked some - * performance at the same time.. - */ - -extern void vide(void); -__asm__(".align 4\nvide: ret"); - -static int __init init_amd(struct cpuinfo_x86 *c) -{ - u32 l, h; - int mbytes = max_mapnr >> (20-PAGE_SHIFT); - int r; - - /* - * FIXME: We should handle the K5 here. Set up the write - * range and also turn on MSR 83 bits 4 and 31 (write alloc, - * no bus pipeline) - */ - - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_bit(0*32+31, c->x86_capability); - - r = get_model_name(c); - - switch(c->x86) - { - case 5: - if( c->x86_model < 6 ) - { - /* Based on AMD doc 20734R - June 2000 */ - if ( c->x86_model == 0 ) { - clear_bit(X86_FEATURE_APIC, c->x86_capability); - set_bit(X86_FEATURE_PGE, c->x86_capability); - } - break; - } - - if ( c->x86_model == 6 && c->x86_mask == 1 ) { - const int K6_BUG_LOOP = 1000000; - int n; - void (*f_vide)(void); - unsigned long d, d2; - - printk(KERN_INFO "AMD K6 stepping B detected - "); - - /* - * It looks like AMD fixed the 2.6.2 bug and improved indirect - * calls at the same time. - */ - - n = K6_BUG_LOOP; - f_vide = vide; - rdtscl(d); - while (n--) - f_vide(); - rdtscl(d2); - d = d2-d; - - /* Knock these two lines out if it debugs out ok */ - printk(KERN_INFO "K6 BUG %ld %d (Report these if test report is incorrect)\n", d, 20*K6_BUG_LOOP); - printk(KERN_INFO "AMD K6 stepping B detected - "); - /* -- cut here -- */ - if (d > 20*K6_BUG_LOOP) - printk("system stability may be impaired when more than 32 MB are used.\n"); - else - printk("probably OK (after B9730xxxx).\n"); - printk(KERN_INFO "Please see http://www.mygale.com/~poulot/k6bug.html\n"); - } - - /* K6 with old style WHCR */ - if (c->x86_model < 8 || - (c->x86_model== 8 && c->x86_mask < 8)) { - /* We can only write allocate on the low 508Mb */ - if(mbytes>508) - mbytes=508; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0x0000FFFF)==0) { - unsigned long flags; - l=(1<<0)|((mbytes/4)<<1); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", - mbytes); - } - break; - } - - if ((c->x86_model == 8 && c->x86_mask >7) || - c->x86_model == 9 || c->x86_model == 13) { - /* The more serious chips .. */ - - if(mbytes>4092) - mbytes=4092; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0xFFFF0000)==0) { - unsigned long flags; - l=((mbytes>>2)<<22)|(1<<16); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", - mbytes); - } - - /* Set MTRR capability flag if appropriate */ - if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) - set_bit(X86_FEATURE_K6_MTRR, c->x86_capability); - break; - } - break; - - case 6: /* An Athlon/Duron */ - - /* Bit 15 of Athlon specific MSR 15, needs to be 0 - * to enable SSE on Palomino/Morgan CPU's. - * If the BIOS didn't enable it already, enable it - * here. - */ - if (c->x86_model == 6 || c->x86_model == 7) { - if (!cpu_has(c, X86_FEATURE_XMM)) { - printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); - rdmsr(MSR_K7_HWCR, l, h); - l &= ~0x00008000; - wrmsr(MSR_K7_HWCR, l, h); - set_bit(X86_FEATURE_XMM, c->x86_capability); - } - } - break; - - } - - display_cacheinfo(c); - return r; -} - -/* - * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU - */ -static void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) -{ - unsigned char ccr2, ccr3; - unsigned long flags; - - /* we test for DEVID by checking whether CCR3 is writable */ - local_irq_save(flags); - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, ccr3 ^ 0x80); - getCx86(0xc0); /* dummy to change bus */ - - if (getCx86(CX86_CCR3) == ccr3) { /* no DEVID regs. */ - ccr2 = getCx86(CX86_CCR2); - setCx86(CX86_CCR2, ccr2 ^ 0x04); - getCx86(0xc0); /* dummy */ - - if (getCx86(CX86_CCR2) == ccr2) /* old Cx486SLC/DLC */ - *dir0 = 0xfd; - else { /* Cx486S A step */ - setCx86(CX86_CCR2, ccr2); - *dir0 = 0xfe; - } - } - else { - setCx86(CX86_CCR3, ccr3); /* restore CCR3 */ - - /* read DIR0 and DIR1 CPU registers */ - *dir0 = getCx86(CX86_DIR0); - *dir1 = getCx86(CX86_DIR1); - } - local_irq_restore(flags); -} - -/* - * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in - * order to identify the Cyrix CPU model after we're out of setup.c - * - * Actually since bugs.h doesnt even reference this perhaps someone should - * fix the documentation ??? - */ -static unsigned char Cx86_dir0_msb __initdata = 0; - -static char Cx86_model[][9] __initdata = { - "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ", - "M II ", "Unknown" -}; -static char Cx486_name[][5] __initdata = { - "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx", - "SRx2", "DRx2" -}; -static char Cx486S_name[][4] __initdata = { - "S", "S2", "Se", "S2e" -}; -static char Cx486D_name[][4] __initdata = { - "DX", "DX2", "?", "?", "?", "DX4" -}; -static char Cx86_cb[] __initdata = "?.5x Core/Bus Clock"; -static char cyrix_model_mult1[] __initdata = "12??43"; -static char cyrix_model_mult2[] __initdata = "12233445"; - -/* - * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old - * BIOSes for compatability with DOS games. This makes the udelay loop - * work correctly, and improves performance. - * - * FIXME: our newer udelay uses the tsc. We dont need to frob with SLOP - */ - -extern void calibrate_delay(void) __init; - -static void __init check_cx686_slop(struct cpuinfo_x86 *c) -{ - unsigned long flags; - - if (Cx86_dir0_msb == 3) { - unsigned char ccr3, ccr5; - - local_irq_save(flags); - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - ccr5 = getCx86(CX86_CCR5); - if (ccr5 & 2) - setCx86(CX86_CCR5, ccr5 & 0xfd); /* reset SLOP */ - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ - local_irq_restore(flags); - - if (ccr5 & 2) { /* possible wrong calibration done */ - printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n"); - calibrate_delay(); - c->loops_per_jiffy = loops_per_jiffy; - } - } -} - - -static void __init init_cyrix(struct cpuinfo_x86 *c) -{ - unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; - char *buf = c->x86_model_id; - const char *p = NULL; - - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_bit(0*32+31, c->x86_capability); - - /* Cyrix used bit 24 in extended (AMD) CPUID for Cyrix MMX extensions */ - if ( test_bit(1*32+24, c->x86_capability) ) { - clear_bit(1*32+24, c->x86_capability); - set_bit(X86_FEATURE_CXMMX, c->x86_capability); - } - - do_cyrix_devid(&dir0, &dir1); - - check_cx686_slop(c); - - Cx86_dir0_msb = dir0_msn = dir0 >> 4; /* identifies CPU "family" */ - dir0_lsn = dir0 & 0xf; /* model or clock multiplier */ - - /* common case step number/rev -- exceptions handled below */ - c->x86_model = (dir1 >> 4) + 1; - c->x86_mask = dir1 & 0xf; - - /* Now cook; the original recipe is by Channing Corn, from Cyrix. - * We do the same thing for each generation: we work out - * the model, multiplier and stepping. Black magic included, - * to make the silicon step/rev numbers match the printed ones. - */ - - switch (dir0_msn) { - unsigned char tmp; - - case 0: /* Cx486SLC/DLC/SRx/DRx */ - p = Cx486_name[dir0_lsn & 7]; - break; - - case 1: /* Cx486S/DX/DX2/DX4 */ - p = (dir0_lsn & 8) ? Cx486D_name[dir0_lsn & 5] - : Cx486S_name[dir0_lsn & 3]; - break; - - case 2: /* 5x86 */ - Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5]; - p = Cx86_cb+2; - break; - - case 3: /* 6x86/6x86L */ - Cx86_cb[1] = ' '; - Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5]; - if (dir1 > 0x21) { /* 686L */ - Cx86_cb[0] = 'L'; - p = Cx86_cb; - (c->x86_model)++; - } else /* 686 */ - p = Cx86_cb+1; - /* Emulate MTRRs using Cyrix's ARRs. */ - set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability); - /* 6x86's contain this bug */ - c->coma_bug = 1; - break; - - case 4: /* MediaGX/GXm */ -#ifdef CONFIG_PCI - /* It isn't really a PCI quirk directly, but the cure is the - same. The MediaGX has deep magic SMM stuff that handles the - SB emulation. It thows away the fifo on disable_dma() which - is wrong and ruins the audio. - - Bug2: VSA1 has a wrap bug so that using maximum sized DMA - causes bad things. According to NatSemi VSA2 has another - bug to do with 'hlt'. I've not seen any boards using VSA2 - and X doesn't seem to support it either so who cares 8). - VSA1 we work around however. - */ - - printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); - isa_dma_bridge_buggy = 2; -#endif - c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ - - /* GXm supports extended cpuid levels 'ala' AMD */ - if (c->cpuid_level == 2) { - /* Enable Natsemi MMX extensions */ - setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); - - get_model_name(c); /* get CPU marketing name */ - /* - * The 5510/5520 companion chips have a funky PIT - * that breaks the TSC synchronizing, so turn it off - */ - if(pci_find_device(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, NULL) || - pci_find_device(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, NULL)) - clear_bit(X86_FEATURE_TSC, c->x86_capability); - return; - } - else { /* MediaGX */ - Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; - p = Cx86_cb+2; - c->x86_model = (dir1 & 0x20) ? 1 : 2; -#ifndef CONFIG_CS5520 - clear_bit(X86_FEATURE_TSC, c->x86_capability); -#endif - } - break; - - case 5: /* 6x86MX/M II */ - if (dir1 > 7) - { - dir0_msn++; /* M II */ - /* Enable MMX extensions (App note 108) */ - setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); - } - else - { - c->coma_bug = 1; /* 6x86MX, it has the bug. */ - } - tmp = (!(dir0_lsn & 7) || dir0_lsn & 1) ? 2 : 0; - Cx86_cb[tmp] = cyrix_model_mult2[dir0_lsn & 7]; - p = Cx86_cb+tmp; - if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20)) - (c->x86_model)++; - /* Emulate MTRRs using Cyrix's ARRs. */ - set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability); - break; - - case 0xf: /* Cyrix 486 without DEVID registers */ - switch (dir0_lsn) { - case 0xd: /* either a 486SLC or DLC w/o DEVID */ - dir0_msn = 0; - p = Cx486_name[(c->hard_math) ? 1 : 0]; - break; - - case 0xe: /* a 486S A step */ - dir0_msn = 0; - p = Cx486S_name[0]; - break; - } - break; - - default: /* unknown (shouldn't happen, we know everyone ;-) */ - dir0_msn = 7; - break; - } - strcpy(buf, Cx86_model[dir0_msn & 7]); - if (p) strcat(buf, p); - return; -} - -#ifdef CONFIG_X86_OOSTORE - -static u32 __init power2(u32 x) -{ - u32 s=1; - while(s<=x) - s<<=1; - return s>>=1; -} - -/* - * Set up an actual MCR - */ - -static void __init winchip_mcr_insert(int reg, u32 base, u32 size, int key) -{ - u32 lo, hi; - - hi = base & ~0xFFF; - lo = ~(size-1); /* Size is a power of 2 so this makes a mask */ - lo &= ~0xFFF; /* Remove the ctrl value bits */ - lo |= key; /* Attribute we wish to set */ - wrmsr(reg+MSR_IDT_MCR0, lo, hi); - mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */ -} - -/* - * Figure what we can cover with MCR's - * - * Shortcut: We know you can't put 4Gig of RAM on a winchip - */ - -static u32 __init ramtop(void) /* 16388 */ -{ - int i; - u32 top = 0; - u32 clip = 0xFFFFFFFFUL; - - for (i = 0; i < e820.nr_map; i++) { - unsigned long start, end; - - if (e820.map[i].addr > 0xFFFFFFFFUL) - continue; - /* - * Don't MCR over reserved space. Ignore the ISA hole - * we frob around that catastrophy already - */ - - if (e820.map[i].type == E820_RESERVED) - { - if(e820.map[i].addr >= 0x100000UL && e820.map[i].addr < clip) - clip = e820.map[i].addr; - continue; - } - start = e820.map[i].addr; - end = e820.map[i].addr + e820.map[i].size; - if (start >= end) - continue; - if (end > top) - top = end; - } - /* Everything below 'top' should be RAM except for the ISA hole. - Because of the limited MCR's we want to map NV/ACPI into our - MCR range for gunk in RAM - - Clip might cause us to MCR insufficient RAM but that is an - acceptable failure mode and should only bite obscure boxes with - a VESA hole at 15Mb - - The second case Clip sometimes kicks in is when the EBDA is marked - as reserved. Again we fail safe with reasonable results - */ - - if(top>clip) - top=clip; - - return top; -} - -/* - * Compute a set of MCR's to give maximum coverage - */ - -static int __init winchip_mcr_compute(int nr, int key) -{ - u32 mem = ramtop(); - u32 root = power2(mem); - u32 base = root; - u32 top = root; - u32 floor = 0; - int ct = 0; - - while(ct<nr) - { - u32 fspace = 0; - - /* - * Find the largest block we will fill going upwards - */ - - u32 high = power2(mem-top); - - /* - * Find the largest block we will fill going downwards - */ - - u32 low = base/2; - - /* - * Don't fill below 1Mb going downwards as there - * is an ISA hole in the way. - */ - - if(base <= 1024*1024) - low = 0; - - /* - * See how much space we could cover by filling below - * the ISA hole - */ - - if(floor == 0) - fspace = 512*1024; - else if(floor ==512*1024) - fspace = 128*1024; - - /* And forget ROM space */ - - /* - * Now install the largest coverage we get - */ - - if(fspace > high && fspace > low) - { - winchip_mcr_insert(ct, floor, fspace, key); - floor += fspace; - } - else if(high > low) - { - winchip_mcr_insert(ct, top, high, key); - top += high; - } - else if(low > 0) - { - base -= low; - winchip_mcr_insert(ct, base, low, key); - } - else break; - ct++; - } - /* - * We loaded ct values. We now need to set the mask. The caller - * must do this bit. - */ - - return ct; -} - -static void __init winchip_create_optimal_mcr(void) -{ - int i; - /* - * Allocate up to 6 mcrs to mark as much of ram as possible - * as write combining and weak write ordered. - * - * To experiment with: Linux never uses stack operations for - * mmio spaces so we could globally enable stack operation wc - * - * Load the registers with type 31 - full write combining, all - * writes weakly ordered. - */ - int used = winchip_mcr_compute(6, 31); - - /* - * Wipe unused MCRs - */ - - for(i=used;i<8;i++) - wrmsr(MSR_IDT_MCR0+i, 0, 0); -} - -static void __init winchip2_create_optimal_mcr(void) -{ - u32 lo, hi; - int i; - - /* - * Allocate up to 6 mcrs to mark as much of ram as possible - * as write combining, weak store ordered. - * - * Load the registers with type 25 - * 8 - weak write ordering - * 16 - weak read ordering - * 1 - write combining - */ - - int used = winchip_mcr_compute(6, 25); - - /* - * Mark the registers we are using. - */ - - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - for(i=0;i<used;i++) - lo|=1<<(9+i); - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); - - /* - * Wipe unused MCRs - */ - - for(i=used;i<8;i++) - wrmsr(MSR_IDT_MCR0+i, 0, 0); -} - -/* - * Handle the MCR key on the Winchip 2. - */ - -static void __init winchip2_unprotect_mcr(void) -{ - u32 lo, hi; - u32 key; - - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - lo&=~0x1C0; /* blank bits 8-6 */ - key = (lo>>17) & 7; - lo |= key<<6; /* replace with unlock key */ - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); -} - -static void __init winchip2_protect_mcr(void) -{ - u32 lo, hi; - - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - lo&=~0x1C0; /* blank bits 8-6 */ - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); -} - -#endif - -static void __init init_centaur(struct cpuinfo_x86 *c) -{ - enum { - ECX8=1<<1, - EIERRINT=1<<2, - DPM=1<<3, - DMCE=1<<4, - DSTPCLK=1<<5, - ELINEAR=1<<6, - DSMC=1<<7, - DTLOCK=1<<8, - EDCTLB=1<<8, - EMMX=1<<9, - DPDC=1<<11, - EBRPRED=1<<12, - DIC=1<<13, - DDC=1<<14, - DNA=1<<15, - ERETSTK=1<<16, - E2MMX=1<<19, - EAMD3D=1<<20, - }; - - char *name; - u32 fcr_set=0; - u32 fcr_clr=0; - u32 lo,hi,newlo; - u32 aa,bb,cc,dd; - - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_bit(0*32+31, c->x86_capability); - - switch (c->x86) { - - case 5: - switch(c->x86_model) { - case 4: - name="C6"; - fcr_set=ECX8|DSMC|EDCTLB|EMMX|ERETSTK; - fcr_clr=DPDC; - printk(KERN_NOTICE "Disabling bugged TSC.\n"); - clear_bit(X86_FEATURE_TSC, c->x86_capability); -#ifdef CONFIG_X86_OOSTORE - winchip_create_optimal_mcr(); - /* Enable - write combining on non-stack, non-string - write combining on string, all types - weak write ordering - - The C6 original lacks weak read order - - Note 0x120 is write only on Winchip 1 */ - - wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); -#endif - break; - case 8: - switch(c->x86_mask) { - default: - name="2"; - break; - case 7 ... 9: - name="2A"; - break; - case 10 ... 15: - name="2B"; - break; - } - fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; - fcr_clr=DPDC; -#ifdef CONFIG_X86_OOSTORE - winchip2_unprotect_mcr(); - winchip2_create_optimal_mcr(); - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - /* Enable - write combining on non-stack, non-string - write combining on string, all types - weak write ordering - */ - lo|=31; - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); - winchip2_protect_mcr(); -#endif - break; - case 9: - name="3"; - fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; - fcr_clr=DPDC; -#ifdef CONFIG_X86_OOSTORE - winchip2_unprotect_mcr(); - winchip2_create_optimal_mcr(); - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - /* Enable - write combining on non-stack, non-string - write combining on string, all types - weak write ordering - */ - lo|=31; - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); - winchip2_protect_mcr(); -#endif - break; - case 10: - name="4"; - /* no info on the WC4 yet */ - break; - default: - name="??"; - } - - rdmsr(MSR_IDT_FCR1, lo, hi); - newlo=(lo|fcr_set) & (~fcr_clr); - - if (newlo!=lo) { - printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", lo, newlo ); - wrmsr(MSR_IDT_FCR1, newlo, hi ); - } else { - printk(KERN_INFO "Centaur FCR is 0x%X\n",lo); - } - /* Emulate MTRRs using Centaur's MCR. */ - set_bit(X86_FEATURE_CENTAUR_MCR, c->x86_capability); - /* Report CX8 */ - set_bit(X86_FEATURE_CX8, c->x86_capability); - /* Set 3DNow! on Winchip 2 and above. */ - if (c->x86_model >=8) - set_bit(X86_FEATURE_3DNOW, c->x86_capability); - /* See if we can find out some more. */ - if ( cpuid_eax(0x80000000) >= 0x80000005 ) { - /* Yes, we can. */ - cpuid(0x80000005,&aa,&bb,&cc,&dd); - /* Add L1 data and code cache sizes. */ - c->x86_cache_size = (cc>>24)+(dd>>24); - } - sprintf( c->x86_model_id, "WinChip %s", name ); - break; - - case 6: - switch (c->x86_model) { - case 6 ... 8: /* Cyrix III family */ - rdmsr (MSR_VIA_FCR, lo, hi); - lo |= (1<<1 | 1<<7); /* Report CX8 & enable PGE */ - wrmsr (MSR_VIA_FCR, lo, hi); - - set_bit(X86_FEATURE_CX8, c->x86_capability); - set_bit(X86_FEATURE_3DNOW, c->x86_capability); - - get_model_name(c); - display_cacheinfo(c); - break; - } - break; - } -} - - -static void __init init_transmeta(struct cpuinfo_x86 *c) -{ - unsigned int cap_mask, uk, max, dummy; - unsigned int cms_rev1, cms_rev2; - unsigned int cpu_rev, cpu_freq, cpu_flags; - char cpu_info[65]; - - get_model_name(c); /* Same as AMD/Cyrix */ - display_cacheinfo(c); - - /* Print CMS and CPU revision */ - max = cpuid_eax(0x80860000); - if ( max >= 0x80860001 ) { - cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); - printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n", - (cpu_rev >> 24) & 0xff, - (cpu_rev >> 16) & 0xff, - (cpu_rev >> 8) & 0xff, - cpu_rev & 0xff, - cpu_freq); - } - if ( max >= 0x80860002 ) { - cpuid(0x80860002, &dummy, &cms_rev1, &cms_rev2, &dummy); - printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n", - (cms_rev1 >> 24) & 0xff, - (cms_rev1 >> 16) & 0xff, - (cms_rev1 >> 8) & 0xff, - cms_rev1 & 0xff, - cms_rev2); - } - if ( max >= 0x80860006 ) { - cpuid(0x80860003, - (void *)&cpu_info[0], - (void *)&cpu_info[4], - (void *)&cpu_info[8], - (void *)&cpu_info[12]); - cpuid(0x80860004, - (void *)&cpu_info[16], - (void *)&cpu_info[20], - (void *)&cpu_info[24], - (void *)&cpu_info[28]); - cpuid(0x80860005, - (void *)&cpu_info[32], - (void *)&cpu_info[36], - (void *)&cpu_info[40], - (void *)&cpu_info[44]); - cpuid(0x80860006, - (void *)&cpu_info[48], - (void *)&cpu_info[52], - (void *)&cpu_info[56], - (void *)&cpu_info[60]); - cpu_info[64] = '\0'; - printk(KERN_INFO "CPU: %s\n", cpu_info); - } - - /* Unhide possibly hidden capability flags */ - rdmsr(0x80860004, cap_mask, uk); - wrmsr(0x80860004, ~0, uk); - c->x86_capability[0] = cpuid_edx(0x00000001); - wrmsr(0x80860004, cap_mask, uk); -} - - -static void __init init_rise(struct cpuinfo_x86 *c) -{ - printk("CPU: Rise iDragon"); - if (c->x86_model > 2) - printk(" II"); - printk("\n"); - - /* Unhide possibly hidden capability flags - The mp6 iDragon family don't have MSRs. - We switch on extra features with this cpuid weirdness: */ - __asm__ ( - "movl $0x6363452a, %%eax\n\t" - "movl $0x3231206c, %%ecx\n\t" - "movl $0x2a32313a, %%edx\n\t" - "cpuid\n\t" - "movl $0x63634523, %%eax\n\t" - "movl $0x32315f6c, %%ecx\n\t" - "movl $0x2333313a, %%edx\n\t" - "cpuid\n\t" : : : "eax", "ebx", "ecx", "edx" - ); - set_bit(X86_FEATURE_CX8, c->x86_capability); -} - - -extern void trap_init_f00f_bug(void); - -static void __init init_intel(struct cpuinfo_x86 *c) -{ - char *p = NULL; - unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ - -#ifdef CONFIG_X86_F00F_BUG - /* - * All current models of Pentium and Pentium with MMX technology CPUs - * have the F0 0F bug, which lets nonpriviledged users lock up the system. - * Note that the workaround only should be initialized once... - */ - c->f00f_bug = 0; - if ( c->x86 == 5 ) { - static int f00f_workaround_enabled = 0; - - c->f00f_bug = 1; - if ( !f00f_workaround_enabled ) { - trap_init_f00f_bug(); - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); - f00f_workaround_enabled = 1; - } - } -#endif - - - if (c->cpuid_level > 1) { - /* supports eax=2 call */ - int i, j, n; - int regs[4]; - unsigned char *dp = (unsigned char *)regs; - - /* Number of times to iterate */ - n = cpuid_eax(2) & 0xFF; - - for ( i = 0 ; i < n ; i++ ) { - cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); - - /* If bit 31 is set, this is an unknown format */ - for ( j = 0 ; j < 3 ; j++ ) { - if ( regs[j] < 0 ) regs[j] = 0; - } - - /* Byte 0 is level count, not a descriptor */ - for ( j = 1 ; j < 16 ; j++ ) { - unsigned char des = dp[j]; - unsigned char dl, dh; - unsigned int cs; - - dh = des >> 4; - dl = des & 0x0F; - - /* Black magic... */ - - switch ( dh ) - { - case 0: - switch ( dl ) { - case 6: - /* L1 I cache */ - l1i += 8; - break; - case 8: - /* L1 I cache */ - l1i += 16; - break; - case 10: - /* L1 D cache */ - l1d += 8; - break; - case 12: - /* L1 D cache */ - l1d += 16; - break; - default:; - /* TLB, or unknown */ - } - break; - case 2: - if ( dl ) { - /* L3 cache */ - cs = (dl-1) << 9; - l3 += cs; - } - break; - case 4: - if ( c->x86 > 6 && dl ) { - /* P4 family */ - /* L3 cache */ - cs = 128 << (dl-1); - l3 += cs; - break; - } - /* else same as 8 - fall through */ - case 8: - if ( dl ) { - /* L2 cache */ - cs = 128 << (dl-1); - l2 += cs; - } - break; - case 6: - if (dl > 5) { - /* L1 D cache */ - cs = 8<<(dl-6); - l1d += cs; - } - break; - case 7: - if ( dl >= 8 ) - { - /* L2 cache */ - cs = 64<<(dl-8); - l2 += cs; - } else { - /* L0 I cache, count as L1 */ - cs = dl ? (16 << (dl-1)) : 12; - l1i += cs; - } - break; - default: - /* TLB, or something else we don't know about */ - break; - } - } - } - if ( l1i || l1d ) - printk(KERN_INFO "CPU: L1 I cache: %dK, L1 D cache: %dK\n", - l1i, l1d); - if ( l2 ) - printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); - if ( l3 ) - printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); - - /* - * This assumes the L3 cache is shared; it typically lives in - * the northbridge. The L1 caches are included by the L2 - * cache, and so should not be included for the purpose of - * SMP switching weights. - */ - c->x86_cache_size = l2 ? l2 : (l1i+l1d); - } - - /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */ - if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 ) - clear_bit(X86_FEATURE_SEP, c->x86_capability); - - /* Names for the Pentium II/Celeron processors - detectable only by also checking the cache size. - Dixon is NOT a Celeron. */ - if (c->x86 == 6) { - switch (c->x86_model) { - case 5: - if (l2 == 0) - p = "Celeron (Covington)"; - if (l2 == 256) - p = "Mobile Pentium II (Dixon)"; - break; - - case 6: - if (l2 == 128) - p = "Celeron (Mendocino)"; - break; - - case 8: - if (l2 == 128) - p = "Celeron (Coppermine)"; - break; - } - } - - if ( p ) - strcpy(c->x86_model_id, p); - -#ifdef CONFIG_SMP - if (cpu_has(c, X86_FEATURE_HT)) { - extern int phys_proc_id[NR_CPUS]; - - u32 eax, ebx, ecx, edx; - int index_lsb, index_msb, tmp; - int initial_apic_id; - int cpu = smp_processor_id(); - - cpuid(1, &eax, &ebx, &ecx, &edx); - smp_num_siblings = (ebx & 0xff0000) >> 16; - - if (smp_num_siblings == 1) { - printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1 ) { - index_lsb = 0; - index_msb = 31; - /* - * At this point we only support two siblings per - * processor package. - */ -#define NR_SIBLINGS 2 - if (smp_num_siblings != NR_SIBLINGS) { - printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); - smp_num_siblings = 1; - goto too_many_siblings; - } - tmp = smp_num_siblings; - while ((tmp & 1) == 0) { - tmp >>=1 ; - index_lsb++; - } - tmp = smp_num_siblings; - while ((tmp & 0x80000000 ) == 0) { - tmp <<=1 ; - index_msb--; - } - if (index_lsb != index_msb ) - index_msb++; - initial_apic_id = ebx >> 24 & 0xff; - phys_proc_id[cpu] = initial_apic_id >> index_msb; - - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - phys_proc_id[cpu]); - } - - } -too_many_siblings: -#endif -} - -void __init get_cpu_vendor(struct cpuinfo_x86 *c) -{ - char *v = c->x86_vendor_id; - - if (!strcmp(v, "GenuineIntel")) - c->x86_vendor = X86_VENDOR_INTEL; - else if (!strcmp(v, "AuthenticAMD")) - c->x86_vendor = X86_VENDOR_AMD; - else if (!strcmp(v, "CyrixInstead")) - c->x86_vendor = X86_VENDOR_CYRIX; - else if (!strcmp(v, "Geode by NSC")) - c->x86_vendor = X86_VENDOR_NSC; - else if (!strcmp(v, "UMC UMC UMC ")) - c->x86_vendor = X86_VENDOR_UMC; - else if (!strcmp(v, "CentaurHauls")) - c->x86_vendor = X86_VENDOR_CENTAUR; - else if (!strcmp(v, "NexGenDriven")) - c->x86_vendor = X86_VENDOR_NEXGEN; - else if (!strcmp(v, "RiseRiseRise")) - c->x86_vendor = X86_VENDOR_RISE; - else if (!strcmp(v, "GenuineTMx86") || - !strcmp(v, "TransmetaCPU")) - c->x86_vendor = X86_VENDOR_TRANSMETA; - else - c->x86_vendor = X86_VENDOR_UNKNOWN; -} - -struct cpu_model_info { - int vendor; - int family; - char *model_names[16]; -}; - -/* Naming convention should be: <Name> [(<Codename>)] */ -/* This table only is used unless init_<vendor>() below doesn't set it; */ -/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ -static struct cpu_model_info cpu_models[] __initdata = { - { X86_VENDOR_INTEL, 4, - { "486 DX-25/33", "486 DX-50", "486 SX", "486 DX/2", "486 SL", - "486 SX/2", NULL, "486 DX/2-WB", "486 DX/4", "486 DX/4-WB", NULL, - NULL, NULL, NULL, NULL, NULL }}, - { X86_VENDOR_INTEL, 5, - { "Pentium 60/66 A-step", "Pentium 60/66", "Pentium 75 - 200", - "OverDrive PODP5V83", "Pentium MMX", NULL, NULL, - "Mobile Pentium 75 - 200", "Mobile Pentium MMX", NULL, NULL, NULL, - NULL, NULL, NULL, NULL }}, - { X86_VENDOR_INTEL, 6, - { "Pentium Pro A-step", "Pentium Pro", NULL, "Pentium II (Klamath)", - NULL, "Pentium II (Deschutes)", "Mobile Pentium II", - "Pentium III (Katmai)", "Pentium III (Coppermine)", NULL, - "Pentium III (Cascades)", NULL, NULL, NULL, NULL }}, - { X86_VENDOR_AMD, 4, - { NULL, NULL, NULL, "486 DX/2", NULL, NULL, NULL, "486 DX/2-WB", - "486 DX/4", "486 DX/4-WB", NULL, NULL, NULL, NULL, "Am5x86-WT", - "Am5x86-WB" }}, - { X86_VENDOR_AMD, 5, /* Is this this really necessary?? */ - { "K5/SSA5", "K5", - "K5", "K5", NULL, NULL, - "K6", "K6", "K6-2", - "K6-3", NULL, NULL, NULL, NULL, NULL, NULL }}, - { X86_VENDOR_AMD, 6, /* Is this this really necessary?? */ - { "Athlon", "Athlon", - "Athlon", NULL, "Athlon", NULL, - NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL }}, - { X86_VENDOR_UMC, 4, - { NULL, "U5D", "U5S", NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL }}, - { X86_VENDOR_NEXGEN, 5, - { "Nx586", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL }}, - { X86_VENDOR_RISE, 5, - { "iDragon", NULL, "iDragon", NULL, NULL, NULL, NULL, - NULL, "iDragon II", "iDragon II", NULL, NULL, NULL, NULL, NULL, NULL }}, -}; - -/* Look up CPU names by table lookup. */ -static char __init *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info = cpu_models; - int i; - - if ( c->x86_model >= 16 ) - return NULL; /* Range check */ - - for ( i = 0 ; i < sizeof(cpu_models)/sizeof(struct cpu_model_info) ; i++ ) { - if ( info->vendor == c->x86_vendor && - info->family == c->x86 ) { - return info->model_names[c->x86_model]; - } - info++; - } - return NULL; /* Not found */ -} - -/* - * Detect a NexGen CPU running without BIOS hypercode new enough - * to have CPUID. (Thanks to Herbert Oppmann) - */ - -static int __init deep_magic_nexgen_probe(void) -{ - int ret; - - __asm__ __volatile__ ( - " movw $0x5555, %%ax\n" - " xorw %%dx,%%dx\n" - " movw $2, %%cx\n" - " divw %%cx\n" - " movl $0, %%eax\n" - " jnz 1f\n" - " movl $1, %%eax\n" - "1:\n" - : "=a" (ret) : : "cx", "dx" ); - return ret; -} - -static void __init squash_the_stupid_serial_number(struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { - /* Disable processor serial number */ - unsigned long lo,hi; - rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi); - lo |= 0x200000; - wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi); - printk(KERN_NOTICE "CPU serial number disabled.\n"); - clear_bit(X86_FEATURE_PN, c->x86_capability); - - /* Disabling the serial number may affect the cpuid level */ - c->cpuid_level = cpuid_eax(0); - } -} - - -static int __init x86_serial_nr_setup(char *s) -{ - disable_x86_serial_nr = 0; - return 1; -} -__setup("serialnumber", x86_serial_nr_setup); - -static int __init x86_fxsr_setup(char * s) -{ - disable_x86_fxsr = 1; - return 1; -} -__setup("nofxsr", x86_fxsr_setup); - - -/* Standard macro to see if a specific flag is changeable */ -static inline int flag_is_changeable_p(u32 flag) -{ - u32 f1, f2; - - asm("pushfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "movl %0,%1\n\t" - "xorl %2,%0\n\t" - "pushl %0\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "popfl\n\t" - : "=&r" (f1), "=&r" (f2) - : "ir" (flag)); - - return ((f1^f2) & flag) != 0; -} - - -/* Probe for the CPUID instruction */ -static int __init have_cpuid_p(void) -{ - return flag_is_changeable_p(X86_EFLAGS_ID); -} - -/* - * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected - * by the fact that they preserve the flags across the division of 5/2. - * PII and PPro exhibit this behavior too, but they have cpuid available. - */ - -/* - * Perform the Cyrix 5/2 test. A Cyrix won't change - * the flags, while other 486 chips will. - */ -static inline int test_cyrix_52div(void) -{ - unsigned int test; - - __asm__ __volatile__( - "sahf\n\t" /* clear flags (%eax = 0x0005) */ - "div %b2\n\t" /* divide 5 by 2 */ - "lahf" /* store flags into %ah */ - : "=a" (test) - : "0" (5), "q" (2) - : "cc"); - - /* AH is 0x02 on Cyrix after the divide.. */ - return (unsigned char) (test >> 8) == 0x02; -} - -/* Try to detect a CPU with disabled CPUID, and if so, enable. This routine - may also be used to detect non-CPUID processors and fill in some of - the information manually. */ -static int __init id_and_try_enable_cpuid(struct cpuinfo_x86 *c) -{ - /* First of all, decide if this is a 486 or higher */ - /* It's a 486 if we can modify the AC flag */ - if ( flag_is_changeable_p(X86_EFLAGS_AC) ) - c->x86 = 4; - else - c->x86 = 3; - - /* Detect Cyrix with disabled CPUID */ - if ( c->x86 == 4 && test_cyrix_52div() ) { - unsigned char dir0, dir1; - - strcpy(c->x86_vendor_id, "CyrixInstead"); - c->x86_vendor = X86_VENDOR_CYRIX; - - /* Actually enable cpuid on the older cyrix */ - - /* Retrieve CPU revisions */ - - do_cyrix_devid(&dir0, &dir1); - - dir0>>=4; - - /* Check it is an affected model */ - - if (dir0 == 5 || dir0 == 3) - { - unsigned char ccr3, ccr4; - unsigned long flags; - printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); - local_irq_save(flags); - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - ccr4 = getCx86(CX86_CCR4); - setCx86(CX86_CCR4, ccr4 | 0x80); /* enable cpuid */ - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ - local_irq_restore(flags); - } - } else - - /* Detect NexGen with old hypercode */ - if ( deep_magic_nexgen_probe() ) { - strcpy(c->x86_vendor_id, "NexGenDriven"); - } - - return have_cpuid_p(); /* Check to see if CPUID now enabled? */ -} - -/* - * This does the hard work of actually picking apart the CPU stuff... - */ -void __init identify_cpu(struct cpuinfo_x86 *c) -{ - int junk, i; - u32 xlvl, tfms; - - c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; - c->x86_vendor = X86_VENDOR_UNKNOWN; - c->cpuid_level = -1; /* CPUID not detected */ - c->x86_model = c->x86_mask = 0; /* So far unknown... */ - c->x86_vendor_id[0] = '\0'; /* Unset */ - c->x86_model_id[0] = '\0'; /* Unset */ - memset(&c->x86_capability, 0, sizeof c->x86_capability); - - if ( !have_cpuid_p() && !id_and_try_enable_cpuid(c) ) { - /* CPU doesn't have CPUID */ - - /* If there are any capabilities, they're vendor-specific */ - /* enable_cpuid() would have set c->x86 for us. */ - } else { - /* CPU does have CPUID */ - - /* Get vendor name */ - cpuid(0x00000000, &c->cpuid_level, - (int *)&c->x86_vendor_id[0], - (int *)&c->x86_vendor_id[8], - (int *)&c->x86_vendor_id[4]); - - get_cpu_vendor(c); - /* Initialize the standard set of capabilities */ - /* Note that the vendor-specific code below might override */ - - /* Intel-defined flags: level 0x00000001 */ - if ( c->cpuid_level >= 0x00000001 ) { - u32 capability; - cpuid(0x00000001, &tfms, &junk, &junk, &capability); - c->x86_capability[0] = capability; - c->x86 = (tfms >> 8) & 15; - c->x86_model = (tfms >> 4) & 15; - c->x86_mask = tfms & 15; - } else { - /* Have CPUID level 0 only - unheard of */ - c->x86 = 4; - } - - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - if ( (xlvl & 0xffff0000) == 0x80000000 ) { - if ( xlvl >= 0x80000001 ) - c->x86_capability[1] = cpuid_edx(0x80000001); - if ( xlvl >= 0x80000004 ) - get_model_name(c); /* Default name */ - } - - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ( (xlvl & 0xffff0000) == 0x80860000 ) { - if ( xlvl >= 0x80860001 ) - c->x86_capability[2] = cpuid_edx(0x80860001); - } - } - - printk(KERN_DEBUG "CPU: Before vendor init, caps: %08lx %08lx %08lx, vendor = %d\n", - c->x86_capability[0], - c->x86_capability[1], - c->x86_capability[2], - c->x86_vendor); - - /* - * Vendor-specific initialization. In this section we - * canonicalize the feature flags, meaning if there are - * features a certain CPU supports which CPUID doesn't - * tell us, CPUID claiming incorrect flags, or other bugs, - * we handle them here. - * - * At the end of this section, c->x86_capability better - * indicate the features this CPU genuinely supports! - */ - switch ( c->x86_vendor ) { - case X86_VENDOR_AMD: - init_amd(c); - break; - - case X86_VENDOR_CENTAUR: - init_centaur(c); - break; - - case X86_VENDOR_CYRIX: - init_cyrix(c); - break; - - case X86_VENDOR_INTEL: - init_intel(c); - break; - - case X86_VENDOR_NEXGEN: - c->x86_cache_size = 256; /* A few had 1 MB... */ - break; - - case X86_VENDOR_NSC: - init_cyrix(c); - break; - - case X86_VENDOR_RISE: - init_rise(c); - break; - - case X86_VENDOR_TRANSMETA: - init_transmeta(c); - break; - - case X86_VENDOR_UNKNOWN: - default: - /* Not much we can do here... */ - /* Check if at least it has cpuid */ - if (c->cpuid_level == -1) - { - /* No cpuid. It must be an ancient CPU */ - if (c->x86 == 4) - strcpy(c->x86_model_id, "486"); - else if (c->x86 == 3) - strcpy(c->x86_model_id, "386"); - } - break; - - } - - printk(KERN_DEBUG "CPU: After vendor init, caps: %08lx %08lx %08lx %08lx\n", - c->x86_capability[0], - c->x86_capability[1], - c->x86_capability[2], - c->x86_capability[3]); - - /* - * The vendor-specific functions might have changed features. Now - * we do "generic changes." - */ - - /* TSC disabled? */ -#ifndef CONFIG_X86_TSC - if ( tsc_disable ) - clear_bit(X86_FEATURE_TSC, c->x86_capability); -#endif - - /* FXSR disabled? */ - if (disable_x86_fxsr) { - clear_bit(X86_FEATURE_FXSR, c->x86_capability); - clear_bit(X86_FEATURE_XMM, c->x86_capability); - } - - /* Disable the PN if appropriate */ - squash_the_stupid_serial_number(c); - - /* Init Machine Check Exception if available. */ - mcheck_init(c); - - /* If the model name is still unset, do table lookup. */ - if ( !c->x86_model_id[0] ) { - char *p; - p = table_lookup_model(c); - if ( p ) - strcpy(c->x86_model_id, p); - else - /* Last resort... */ - sprintf(c->x86_model_id, "%02x/%02x", - c->x86_vendor, c->x86_model); - } - - /* Now the feature flags better reflect actual CPU features! */ - - printk(KERN_DEBUG "CPU: After generic, caps: %08lx %08lx %08lx %08lx\n", - c->x86_capability[0], - c->x86_capability[1], - c->x86_capability[2], - c->x86_capability[3]); - - /* - * On SMP, boot_cpu_data holds the common feature set between - * all CPUs; so make sure that we indicate which features are - * common between the CPUs. The first time this routine gets - * executed, c == &boot_cpu_data. - */ - if ( c != &boot_cpu_data ) { - /* AND the already accumulated flags with these */ - for ( i = 0 ; i < NCAPINTS ; i++ ) - boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; - } - - printk(KERN_DEBUG "CPU: Common caps: %08lx %08lx %08lx %08lx\n", - boot_cpu_data.x86_capability[0], - boot_cpu_data.x86_capability[1], - boot_cpu_data.x86_capability[2], - boot_cpu_data.x86_capability[3]); -} -/* - * Perform early boot up checks for a valid TSC. See arch/i386/kernel/time.c - */ - -void __init dodgy_tsc(void) -{ - get_cpu_vendor(&boot_cpu_data); - - if (( boot_cpu_data.x86_vendor == X86_VENDOR_CYRIX ) || - ( boot_cpu_data.x86_vendor == X86_VENDOR_NSC )) - init_cyrix(&boot_cpu_data); -} - - -/* These need to match <asm/processor.h> */ -static char *cpu_vendor_names[] __initdata = { - "Intel", "Cyrix", "AMD", "UMC", "NexGen", "Centaur", "Rise", "Transmeta", "NSC" }; - - -void __init print_cpu_info(struct cpuinfo_x86 *c) -{ - char *vendor = NULL; - - if (c->x86_vendor < sizeof(cpu_vendor_names)/sizeof(char *)) - vendor = cpu_vendor_names[c->x86_vendor]; - else if (c->cpuid_level >= 0) - vendor = c->x86_vendor_id; - - if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) - printk("%s ", vendor); - - if (!c->x86_model_id[0]) - printk("%d86", c->x86); - else - printk("%s", c->x86_model_id); - - if (c->x86_mask || c->cpuid_level >= 0) - printk(" stepping %02x\n", c->x86_mask); - else - printk("\n"); -} - -/* - * Get CPU information for use by the procfs. - */ -static int show_cpuinfo(struct seq_file *m, void *v) -{ - /* - * These flag bits must match the definitions in <asm/cpufeature.h>. - * NULL means this bit is undefined or reserved; either way it doesn't - * have meaning as far as Linux is concerned. Note that it's important - * to realize there is a difference between this table and CPUID -- if - * applications want to get the raw CPUID data, they should access - * /dev/cpu/<cpu_nr>/cpuid instead. - */ - static char *x86_cap_flags[] = { - /* Intel-defined */ - "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", - "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", - "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", - "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", NULL, - - /* AMD-defined */ - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "mp", NULL, NULL, "mmxext", NULL, - NULL, NULL, NULL, NULL, NULL, "lm", "3dnowext", "3dnow", - - /* Transmeta-defined */ - "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Other (Linux-defined) */ - "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - }; - struct cpuinfo_x86 *c = v; - int i, n = c - cpu_data; - int fpu_exception; - -#ifdef CONFIG_SMP - if (!(cpu_online_map & (1<<n))) - return 0; -#endif - seq_printf(m, "processor\t: %d\n" - "vendor_id\t: %s\n" - "cpu family\t: %d\n" - "model\t\t: %d\n" - "model name\t: %s\n", - n, - c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", - c->x86, - c->x86_model, - c->x86_model_id[0] ? c->x86_model_id : "unknown"); - - if (c->x86_mask || c->cpuid_level >= 0) - seq_printf(m, "stepping\t: %d\n", c->x86_mask); - else - seq_printf(m, "stepping\t: unknown\n"); - - if ( cpu_has(c, X86_FEATURE_TSC) ) { - seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n", - cpu_khz / 1000, (cpu_khz % 1000)); - } - - /* Cache size */ - if (c->x86_cache_size >= 0) - seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); - - /* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */ - fpu_exception = c->hard_math && (ignore_irq13 || cpu_has_fpu); - seq_printf(m, "fdiv_bug\t: %s\n" - "hlt_bug\t\t: %s\n" - "f00f_bug\t: %s\n" - "coma_bug\t: %s\n" - "fpu\t\t: %s\n" - "fpu_exception\t: %s\n" - "cpuid level\t: %d\n" - "wp\t\t: %s\n" - "flags\t\t:", - c->fdiv_bug ? "yes" : "no", - c->hlt_works_ok ? "no" : "yes", - c->f00f_bug ? "yes" : "no", - c->coma_bug ? "yes" : "no", - c->hard_math ? "yes" : "no", - fpu_exception ? "yes" : "no", - c->cpuid_level, - c->wp_works_ok ? "yes" : "no"); - - for ( i = 0 ; i < 32*NCAPINTS ; i++ ) - if ( test_bit(i, c->x86_capability) && - x86_cap_flags[i] != NULL ) - seq_printf(m, " %s", x86_cap_flags[i]); - - seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n", - c->loops_per_jiffy/(500000/HZ), - (c->loops_per_jiffy/(5000/HZ)) % 100); - return 0; -} - -static void *c_start(struct seq_file *m, loff_t *pos) -{ - return *pos < NR_CPUS ? cpu_data + *pos : NULL; -} -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return c_start(m, pos); -} -static void c_stop(struct seq_file *m, void *v) -{ -} -struct seq_operations cpuinfo_op = { - start: c_start, - next: c_next, - stop: c_stop, - show: show_cpuinfo, -}; - -unsigned long cpu_initialized __initdata = 0; - -/* - * cpu_init() initializes state that is per-CPU. Some data is already - * initialized (naturally) in the bootstrap process, such as the GDT - * and IDT. We reload them nevertheless, this function acts as a - * 'CPU state barrier', nothing should get across. - */ -void __init cpu_init (void) -{ - int nr = smp_processor_id(); - struct tss_struct * t = &init_tss[nr]; - - if (test_and_set_bit(nr, &cpu_initialized)) { - printk(KERN_WARNING "CPU#%d already initialized!\n", nr); - for (;;) __sti(); - } - printk(KERN_INFO "Initializing CPU#%d\n", nr); - - if (cpu_has_vme || cpu_has_tsc || cpu_has_de) - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); -#ifndef CONFIG_X86_TSC - if (tsc_disable && cpu_has_tsc) { - printk(KERN_NOTICE "Disabling TSC...\n"); - /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ - clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); - set_in_cr4(X86_CR4_TSD); - } -#endif - - __asm__ __volatile__("lgdt %0": "=m" (gdt_descr)); - __asm__ __volatile__("lidt %0": "=m" (idt_descr)); - - /* - * Delete NT - */ - __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl"); - - /* - * set up and load the per-CPU TSS and LDT - */ - atomic_inc(&init_mm.mm_count); - current->active_mm = &init_mm; - if(current->mm) - BUG(); - enter_lazy_tlb(&init_mm, current, nr); - - t->esp0 = current->thread.esp0; - set_tss_desc(nr,t); - gdt_table[__TSS(nr)].b &= 0xfffffdff; - load_TR(nr); - load_LDT(&init_mm.context); - - /* Clear %fs and %gs. */ - asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); - - /* Clear all 6 debug registers: */ - -#define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) ); - - CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); - -#undef CD - - /* - * Force FPU initialization: - */ - clear_thread_flag(TIF_USEDFPU); - current->used_math = 0; - stts(); -} - -/* - * Early probe support logic for ppro memory erratum #50 - * - * This is called before we do cpu ident work - */ - -int __init ppro_with_ram_bug(void) -{ - char vendor_id[16]; - int ident; - - /* Must have CPUID */ - if(!have_cpuid_p()) - return 0; - if(cpuid_eax(0)<1) - return 0; - - /* Must be Intel */ - cpuid(0, &ident, - (int *)&vendor_id[0], - (int *)&vendor_id[8], - (int *)&vendor_id[4]); - - if(memcmp(vendor_id, "IntelInside", 12)) - return 0; - - ident = cpuid_eax(1); - - /* Model 6 */ - - if(((ident>>8)&15)!=6) - return 0; - - /* Pentium Pro */ - - if(((ident>>4)&15)!=1) - return 0; - - if((ident&15) < 8) - { - printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); - return 1; - } - printk(KERN_INFO "Your Pentium Pro seems ok.\n"); - return 0; -} - /* * Local Variables: * mode:c diff --git a/arch/i386/kernel/suspend.c b/arch/i386/kernel/suspend.c index 07f966251209..be913bbd16f6 100644 --- a/arch/i386/kernel/suspend.c +++ b/arch/i386/kernel/suspend.c @@ -24,8 +24,156 @@ #include <linux/suspend.h> #include <asm/uaccess.h> #include <asm/acpi.h> +#include <asm/tlbflush.h> +static struct saved_context saved_context; + +/* + * save_processor_context + * + * Save the state of the processor before we go to sleep. + * + * return_stack is the value of the stack pointer (%esp) as the caller sees it. + * A good way could not be found to obtain it from here (don't want to make _too_ + * many assumptions about the layout of the stack this far down.) Also, the + * handy little __builtin_frame_pointer(level) where level > 0, is blatantly + * buggy - it returns the value of the stack at the proper location, not the + * location, like it should (as of gcc 2.91.66) + * + * Note that the context and timing of this function is pretty critical. + * With a minimal amount of things going on in the caller and in here, gcc + * does a good job of being just a dumb compiler. Watch the assembly output + * if anything changes, though, and make sure everything is going in the right + * place. + */ +static inline void save_processor_context (void) +{ + kernel_fpu_begin(); + + /* + * descriptor tables + */ + asm volatile ("sgdt (%0)" : "=m" (saved_context.gdt_limit)); + asm volatile ("sidt (%0)" : "=m" (saved_context.idt_limit)); + asm volatile ("sldt (%0)" : "=m" (saved_context.ldt)); + asm volatile ("str (%0)" : "=m" (saved_context.tr)); + + /* + * save the general registers. + * note that gcc has constructs to specify output of certain registers, + * but they're not used here, because it assumes that you want to modify + * those registers, so it tries to be smart and save them beforehand. + * It's really not necessary, and kinda fishy (check the assembly output), + * so it's avoided. + */ + asm volatile ("movl %%esp, (%0)" : "=m" (saved_context.esp)); + asm volatile ("movl %%eax, (%0)" : "=m" (saved_context.eax)); + asm volatile ("movl %%ebx, (%0)" : "=m" (saved_context.ebx)); + asm volatile ("movl %%ecx, (%0)" : "=m" (saved_context.ecx)); + asm volatile ("movl %%edx, (%0)" : "=m" (saved_context.edx)); + asm volatile ("movl %%ebp, (%0)" : "=m" (saved_context.ebp)); + asm volatile ("movl %%esi, (%0)" : "=m" (saved_context.esi)); + asm volatile ("movl %%edi, (%0)" : "=m" (saved_context.edi)); + + /* + * segment registers + */ + asm volatile ("movw %%es, %0" : "=r" (saved_context.es)); + asm volatile ("movw %%fs, %0" : "=r" (saved_context.fs)); + asm volatile ("movw %%gs, %0" : "=r" (saved_context.gs)); + asm volatile ("movw %%ss, %0" : "=r" (saved_context.ss)); + + /* + * control registers + */ + asm volatile ("movl %%cr0, %0" : "=r" (saved_context.cr0)); + asm volatile ("movl %%cr2, %0" : "=r" (saved_context.cr2)); + asm volatile ("movl %%cr3, %0" : "=r" (saved_context.cr3)); + asm volatile ("movl %%cr4, %0" : "=r" (saved_context.cr4)); + + /* + * eflags + */ + asm volatile ("pushfl ; popl (%0)" : "=m" (saved_context.eflags)); +} + +/* + * restore_processor_context + * + * Restore the processor context as it was before we went to sleep + * - descriptor tables + * - control registers + * - segment registers + * - flags + * + * Note that it is critical that this function is declared inline. + * It was separated out from restore_state to make that function + * a little clearer, but it needs to be inlined because we won't have a + * stack when we get here (so we can't push a return address). + */ +static inline void restore_processor_context (void) +{ + /* + * first restore %ds, so we can access our data properly + */ + asm volatile (".align 4"); + asm volatile ("movw %0, %%ds" :: "r" ((u16)__KERNEL_DS)); + + + /* + * control registers + */ + asm volatile ("movl %0, %%cr4" :: "r" (saved_context.cr4)); + asm volatile ("movl %0, %%cr3" :: "r" (saved_context.cr3)); + asm volatile ("movl %0, %%cr2" :: "r" (saved_context.cr2)); + asm volatile ("movl %0, %%cr0" :: "r" (saved_context.cr0)); + + /* + * segment registers + */ + asm volatile ("movw %0, %%es" :: "r" (saved_context.es)); + asm volatile ("movw %0, %%fs" :: "r" (saved_context.fs)); + asm volatile ("movw %0, %%gs" :: "r" (saved_context.gs)); + asm volatile ("movw %0, %%ss" :: "r" (saved_context.ss)); + + /* + * the other general registers + * + * note that even though gcc has constructs to specify memory + * input into certain registers, it will try to be too smart + * and save them at the beginning of the function. This is esp. + * bad since we don't have a stack set up when we enter, and we + * want to preserve the values on exit. So, we set them manually. + */ + asm volatile ("movl %0, %%esp" :: "m" (saved_context.esp)); + asm volatile ("movl %0, %%ebp" :: "m" (saved_context.ebp)); + asm volatile ("movl %0, %%eax" :: "m" (saved_context.eax)); + asm volatile ("movl %0, %%ebx" :: "m" (saved_context.ebx)); + asm volatile ("movl %0, %%ecx" :: "m" (saved_context.ecx)); + asm volatile ("movl %0, %%edx" :: "m" (saved_context.edx)); + asm volatile ("movl %0, %%esi" :: "m" (saved_context.esi)); + asm volatile ("movl %0, %%edi" :: "m" (saved_context.edi)); + + /* + * now restore the descriptor tables to their proper values + * ltr is done i fix_processor_context(). + */ + asm volatile ("lgdt (%0)" :: "m" (saved_context.gdt_limit)); + asm volatile ("lidt (%0)" :: "m" (saved_context.idt_limit)); + asm volatile ("lldt (%0)" :: "m" (saved_context.ldt)); + + fix_processor_context(); + + /* + * the flags + */ + asm volatile ("pushl %0 ; popfl" :: "m" (saved_context.eflags)); + + do_fpu_end(); +} + +#ifdef CONFIG_ACPI_SLEEP void do_suspend_lowlevel(int resume) { /* @@ -44,3 +192,111 @@ acpi_sleep_done: acpi_restore_register_state(); restore_processor_context(); } +#endif + +void fix_processor_context(void) +{ + int nr = smp_processor_id(); + struct tss_struct * t = &init_tss[nr]; + + set_tss_desc(nr,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */ + gdt_table[__TSS(nr)].b &= 0xfffffdff; + + load_TR(nr); /* This does ltr */ + + load_LDT(¤t->mm->context); /* This does lldt */ + + /* + * Now maybe reload the debug registers + */ + if (current->thread.debugreg[7]){ + loaddebug(¤t->thread, 0); + loaddebug(¤t->thread, 1); + loaddebug(¤t->thread, 2); + loaddebug(¤t->thread, 3); + /* no 4 and 5 */ + loaddebug(¤t->thread, 6); + loaddebug(¤t->thread, 7); + } + +} + +static void +do_fpu_end(void) +{ + /* restore FPU regs if necessary */ + /* Do it out of line so that gcc does not move cr0 load to some stupid place */ + kernel_fpu_end(); +} + +/* Local variables for do_magic */ +static int loop __nosavedata = 0; +static int loop2 __nosavedata = 0; + +/* + * (KG): Since we affect stack here, we make this function as flat and easy + * as possible in order to not provoke gcc to use local variables on the stack. + * Note that on resume, all (expect nosave) variables will have the state from + * the time of writing (suspend_save_image) and the registers (including the + * stack pointer, but excluding the instruction pointer) will be loaded with + * the values saved at save_processor_context() time. + */ +void do_magic(int resume) +{ + /* DANGER WILL ROBINSON! + * + * If this function is too difficult for gcc to optimize, it will crash and burn! + * see above. + * + * DO NOT TOUCH. + */ + + if (!resume) { + do_magic_suspend_1(); + save_processor_context(); /* We need to capture registers and memory at "same time" */ + do_magic_suspend_2(); /* If everything goes okay, this function does not return */ + return; + } + + /* We want to run from swapper_pg_dir, since swapper_pg_dir is stored in constant + * place in memory + */ + + __asm__( "movl %%ecx,%%cr3\n" ::"c"(__pa(swapper_pg_dir))); + +/* + * Final function for resuming: after copying the pages to their original + * position, it restores the register state. + * + * What about page tables? Writing data pages may toggle + * accessed/dirty bits in our page tables. That should be no problems + * with 4MB page tables. That's why we require have_pse. + * + * This loops destroys stack from under itself, so it better should + * not use any stack space, itself. When this function is entered at + * resume time, we move stack to _old_ place. This is means that this + * function must use no stack and no local variables in registers, + * until calling restore_processor_context(); + * + * Critical section here: noone should touch saved memory after + * do_magic_resume_1; copying works, because nr_copy_pages, + * pagedir_nosave, loop and loop2 are nosavedata. + */ + do_magic_resume_1(); + + for (loop=0; loop < nr_copy_pages; loop++) { + /* You may not call something (like copy_page) here: see above */ + for (loop2=0; loop2 < PAGE_SIZE; loop2++) { + *(((char *)((pagedir_nosave+loop)->orig_address))+loop2) = + *(((char *)((pagedir_nosave+loop)->address))+loop2); + __flush_tlb(); + } + } + + restore_processor_context(); + +/* Ahah, we now run with our old stack, and with registers copied from + suspend time */ + + do_magic_resume_2(); +} diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile index ed5600563491..0376f044afd9 100644 --- a/arch/mips/boot/Makefile +++ b/arch/mips/boot/Makefile @@ -26,7 +26,7 @@ strip-flags = $(addprefix --remove-section=,$(drop-sections)) all: vmlinux.ecoff addinitrd -vmlinux.ecoff: $(CONFIGURE) elf2ecoff $(TOPDIR)/vmlinux +vmlinux.ecoff: elf2ecoff $(TOPDIR)/vmlinux ./elf2ecoff $(TOPDIR)/vmlinux vmlinux.ecoff $(E2EFLAGS) elf2ecoff: elf2ecoff.c diff --git a/arch/mips64/boot/Makefile b/arch/mips64/boot/Makefile index 57d4964eac05..e894ee7d7bfa 100644 --- a/arch/mips64/boot/Makefile +++ b/arch/mips64/boot/Makefile @@ -17,7 +17,7 @@ endif all: vmlinux.ecoff addinitrd -vmlinux.ecoff: $(CONFIGURE) elf2ecoff $(TOPDIR)/vmlinux +vmlinux.ecoff: elf2ecoff $(TOPDIR)/vmlinux ./elf2ecoff $(TOPDIR)/vmlinux vmlinux.ecoff $(E2EFLAGS) elf2ecoff: elf2ecoff.c diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 4fd3b1d044b7..4dec864fed7d 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -1,11 +1,6 @@ # # arch/s390/boot/Makefile # -# Note! Dependencies are done automagically by 'make dep', which also -# removes any old dependencies. DON'T put your own dependencies here -# unless it's something special (ie not a .c file). -# -# Note 2! The CFLAGS definition is now in the main makefile... OBJCOPY = $(CROSS_COMPILE)objcopy @@ -21,7 +16,7 @@ include $(TOPDIR)/Rules.make %.boot: %.lnk $(OBJCOPY) -O binary $< $@ -image: $(CONFIGURE) $(TOPDIR)/vmlinux \ +image: $(TOPDIR)/vmlinux \ iplfba.boot ipleckd.boot ipldump.boot $(OBJCOPY) -O binary $(TOPDIR)/vmlinux image $(NM) $(TOPDIR)/vmlinux | grep -v '\(compiled\)\|\( [aUw] \)\|\(\.\)\|\(LASH[RL]DI\)' | sort > $(TOPDIR)/System.map diff --git a/arch/s390x/boot/Makefile b/arch/s390x/boot/Makefile index 8020a8d4891c..d698b210f73c 100644 --- a/arch/s390x/boot/Makefile +++ b/arch/s390x/boot/Makefile @@ -1,11 +1,6 @@ # # Makefile for the linux s390-specific parts of the memory manager. # -# Note! Dependencies are done automagically by 'make dep', which also -# removes any old dependencies. DON'T put your own dependencies here -# unless it's something special (ie not a .c file). -# -# Note 2! The CFLAGS definition is now in the main makefile... OBJCOPY = $(CROSS_COMPILE)objcopy @@ -21,7 +16,7 @@ EXTRA_AFLAGS := -traditional %.boot: %.lnk $(OBJCOPY) -O binary $< $@ -image: $(CONFIGURE) $(TOPDIR)/vmlinux \ +image: $(TOPDIR)/vmlinux \ iplfba.boot ipleckd.boot ipldump.boot $(OBJCOPY) -O binary $(TOPDIR)/vmlinux image $(NM) $(TOPDIR)/vmlinux | grep -v '\(compiled\)\|\( [aUw] \)\|\(\.\)\|\(LASH[RL]DI\)' | sort > $(TOPDIR)/System.map diff --git a/arch/sh/boot/Makefile b/arch/sh/boot/Makefile index aff166e2802f..cae1f2831dd8 100644 --- a/arch/sh/boot/Makefile +++ b/arch/sh/boot/Makefile @@ -10,19 +10,19 @@ SYSTEM =$(TOPDIR)/vmlinux -Image: $(CONFIGURE) $(SYSTEM) +Image: $(SYSTEM) $(OBJCOPY) $(SYSTEM) Image -zImage: $(CONFIGURE) compressed/vmlinux +zImage: compressed/vmlinux $(OBJCOPY) compressed/vmlinux zImage compressed/vmlinux: $(TOPDIR)/vmlinux $(MAKE) -C compressed vmlinux -install: $(CONFIGURE) Image +install: Image sh -x ./install.sh $(KERNELRELEASE) Image $(TOPDIR)/System.map "$(INSTALL_PATH)" -zinstall: $(CONFIGURE) zImage +zinstall: zImage sh -x ./install.sh $(KERNELRELEASE) zImage $(TOPDIR)/System.map "$(INSTALL_PATH)" dep: diff --git a/arch/sparc/kernel/devices.c b/arch/sparc/kernel/devices.c index ecd4e6d150e7..a48cb980c329 100644 --- a/arch/sparc/kernel/devices.c +++ b/arch/sparc/kernel/devices.c @@ -22,8 +22,8 @@ extern void cpu_probe(void); extern void clock_stop_probe(void); /* tadpole.c */ extern void sun4c_probe_memerr_reg(void); -unsigned long __init -device_scan(unsigned long mem_start) +void __init +device_scan(void) { char node_str[128]; int thismid; @@ -37,46 +37,45 @@ device_scan(unsigned long mem_start) int scan; scan = prom_getchild(prom_root_node); /* One can look it up in PROM instead */ - /* prom_printf("root child is %08lx\n", (unsigned long) scan); */ - while((scan = prom_getsibling(scan)) != 0) { - prom_getstring(scan, "device_type", node_str, sizeof(node_str)); - if(strcmp(node_str, "cpu") == 0) { + while ((scan = prom_getsibling(scan)) != 0) { + prom_getstring(scan, "device_type", + node_str, sizeof(node_str)); + if (strcmp(node_str, "cpu") == 0) { linux_cpus[linux_num_cpus].prom_node = scan; - prom_getproperty(scan, "mid", (char *) &thismid, sizeof(thismid)); + prom_getproperty(scan, "mid", + (char *) &thismid, sizeof(thismid)); linux_cpus[linux_num_cpus].mid = thismid; - /* prom_printf("Found CPU %d <node=%08lx,mid=%d>\n", linux_num_cpus, (unsigned long) scan, thismid); */ - printk("Found CPU %d <node=%08lx,mid=%d>\n", linux_num_cpus, (unsigned long) scan, thismid); + printk("Found CPU %d <node=%08lx,mid=%d>\n", + linux_num_cpus, (unsigned long) scan, thismid); linux_num_cpus++; } } - if(linux_num_cpus == 0) { - if (sparc_cpu_model == sun4d) { - scan = prom_getchild(prom_root_node); - for (scan = prom_searchsiblings(scan, "cpu-unit"); scan; - scan = prom_searchsiblings(prom_getsibling(scan), "cpu-unit")) { - int node = prom_getchild(scan); + if (linux_num_cpus == 0 && sparc_cpu_model == sun4d) { + scan = prom_getchild(prom_root_node); + for (scan = prom_searchsiblings(scan, "cpu-unit"); scan; + scan = prom_searchsiblings(prom_getsibling(scan), "cpu-unit")) { + int node = prom_getchild(scan); - prom_getstring(node, "device_type", node_str, sizeof(node_str)); - if (strcmp(node_str, "cpu") == 0) { - prom_getproperty(node, "cpu-id", (char *) &thismid, sizeof(thismid)); - linux_cpus[linux_num_cpus].prom_node = node; - linux_cpus[linux_num_cpus].mid = thismid; - /* prom_printf("Found CPU %d <node=%08lx,mid=%d>\n", - linux_num_cpus, (unsigned long) node, thismid); */ - printk("Found CPU %d <node=%08lx,mid=%d>\n", - linux_num_cpus, (unsigned long) node, thismid); - linux_num_cpus++; - } + prom_getstring(node, "device_type", + node_str, sizeof(node_str)); + if (strcmp(node_str, "cpu") == 0) { + prom_getproperty(node, "cpu-id", + (char *) &thismid, sizeof(thismid)); + linux_cpus[linux_num_cpus].prom_node = node; + linux_cpus[linux_num_cpus].mid = thismid; + printk("Found CPU %d <node=%08lx,mid=%d>\n", + linux_num_cpus, (unsigned long) node, thismid); + linux_num_cpus++; } } } - if(linux_num_cpus == 0) { + if (linux_num_cpus == 0) { printk("No CPU nodes found, cannot continue.\n"); /* Probably a sun4e, Sun is trying to trick us ;-) */ halt(); } printk("Found %d CPU prom device tree node(s).\n", linux_num_cpus); - }; + } cpu_probe(); #ifdef CONFIG_SUN_AUXIO @@ -92,5 +91,5 @@ device_scan(unsigned long mem_start) if (ARCH_SUN4C_SUN4) sun4c_probe_memerr_reg(); - return mem_start; + return; } diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c index 26b4afb785d4..02fd8d66fd04 100644 --- a/arch/sparc/mm/sun4c.c +++ b/arch/sparc/mm/sun4c.c @@ -35,6 +35,7 @@ #include <asm/mmu_context.h> #include <asm/sun4paddr.h> #include <asm/highmem.h> +#include <asm/btfixup.h> /* Because of our dynamic kernel TLB miss strategy, and how * our DVMA mapping allocation works, you _MUST_: @@ -63,25 +64,17 @@ extern unsigned long page_kernel; #define SUN4C_KERNEL_BUCKETS 32 -#ifndef MAX -#define MAX(a,b) ((a)<(b)?(b):(a)) -#endif -#ifndef MIN -#define MIN(a,b) ((a)<(b)?(a):(b)) -#endif - /* Flushing the cache. */ struct sun4c_vac_props sun4c_vacinfo; unsigned long sun4c_kernel_faults; /* Invalidate every sun4c cache line tag. */ -void sun4c_flush_all(void) +static void __init sun4c_flush_all(void) { unsigned long begin, end; if (sun4c_vacinfo.on) - panic("SUN4C: AIEEE, trying to invalidate vac while" - " it is on."); + panic("SUN4C: AIEEE, trying to invalidate vac while it is on."); /* Clear 'valid' bit in all cache line tags */ begin = AC_CACHETAGS; @@ -93,7 +86,7 @@ void sun4c_flush_all(void) } } -static __inline__ void sun4c_flush_context_hw(void) +static void sun4c_flush_context_hw(void) { unsigned long end = SUN4C_VAC_SIZE; @@ -122,8 +115,17 @@ static void sun4c_flush_segment_hw(unsigned long addr) } } +/* File local boot time fixups. */ +BTFIXUPDEF_CALL(void, sun4c_flush_page, unsigned long) +BTFIXUPDEF_CALL(void, sun4c_flush_segment, unsigned long) +BTFIXUPDEF_CALL(void, sun4c_flush_context, void) + +#define sun4c_flush_page(addr) BTFIXUP_CALL(sun4c_flush_page)(addr) +#define sun4c_flush_segment(addr) BTFIXUP_CALL(sun4c_flush_segment)(addr) +#define sun4c_flush_context() BTFIXUP_CALL(sun4c_flush_context)() + /* Must be called minimally with interrupts disabled. */ -static __inline__ void sun4c_flush_page_hw(unsigned long addr) +static void sun4c_flush_page_hw(unsigned long addr) { addr &= PAGE_MASK; if ((int)sun4c_get_pte(addr) < 0) @@ -195,48 +197,6 @@ static void sun4c_flush_segment_sw(unsigned long addr) } } -/* Bolix one page from the virtual cache. */ -static void sun4c_flush_page(unsigned long addr) -{ - addr &= PAGE_MASK; - - if ((sun4c_get_pte(addr) & (_SUN4C_PAGE_NOCACHE | _SUN4C_PAGE_VALID)) != - _SUN4C_PAGE_VALID) - return; - - if (sun4c_vacinfo.do_hwflushes) { - __asm__ __volatile__("sta %%g0, [%0] %1;nop;nop;nop;\n\t" : : - "r" (addr), "i" (ASI_HWFLUSHPAGE)); - } else { - unsigned long left = PAGE_SIZE; - unsigned long lsize = sun4c_vacinfo.linesize; - - __asm__ __volatile__("add %2, %2, %%g1\n\t" - "add %2, %%g1, %%g2\n\t" - "add %2, %%g2, %%g3\n\t" - "add %2, %%g3, %%g4\n\t" - "add %2, %%g4, %%g5\n\t" - "add %2, %%g5, %%o4\n\t" - "add %2, %%o4, %%o5\n" - "1:\n\t" - "subcc %1, %%o5, %1\n\t" - "sta %%g0, [%0] %6\n\t" - "sta %%g0, [%0 + %2] %6\n\t" - "sta %%g0, [%0 + %%g1] %6\n\t" - "sta %%g0, [%0 + %%g2] %6\n\t" - "sta %%g0, [%0 + %%g3] %6\n\t" - "sta %%g0, [%0 + %%g4] %6\n\t" - "sta %%g0, [%0 + %%g5] %6\n\t" - "sta %%g0, [%0 + %%o4] %6\n\t" - "bg 1b\n\t" - " add %0, %%o5, %0\n\t" - : "=&r" (addr), "=&r" (left), "=&r" (lsize) - : "0" (addr), "1" (left), "2" (lsize), - "i" (ASI_FLUSHPG) - : "g1", "g2", "g3", "g4", "g5", "o4", "o5", "cc"); - } -} - /* Don't inline the software version as it eats too many cache lines if expanded. */ static void sun4c_flush_page_sw(unsigned long addr) { @@ -387,7 +347,8 @@ void __init sun4c_probe_vac(void) prom_getintdefault(prom_root_node, "vac_hwflush", 0); if (sun4c_vacinfo.num_bytes != 65536) { - prom_printf("WEIRD Sun4C VAC cache size, tell davem"); + prom_printf("WEIRD Sun4C VAC cache size, " + "tell sparclinux@vger.kernel.org"); prom_halt(); } } @@ -427,7 +388,7 @@ extern unsigned long vac_hwflush_patch2, vac_hwflush_patch2_on; *daddr = *iaddr; \ } while (0) -static void patch_kernel_fault_handler(void) +static void __init patch_kernel_fault_handler(void) { unsigned long *iaddr, *daddr; @@ -459,10 +420,6 @@ static void patch_kernel_fault_handler(void) case 16: PATCH_INSN(num_context_patch1_16, num_context_patch1); -#if 0 - PATCH_INSN(num_context_patch2_16, - num_context_patch2); -#endif break; default: prom_printf("Unhandled number of contexts: %d\n", @@ -867,7 +824,7 @@ static void sun4c_kernel_map(struct sun4c_mmu_entry *kentry) #define sun4c_user_unmap(__entry) \ sun4c_put_segmap((__entry)->vaddr, invalid_segment) -static void sun4c_demap_context_hw(struct sun4c_mmu_ring *crp, unsigned char ctx) +static void sun4c_demap_context(struct sun4c_mmu_ring *crp, unsigned char ctx) { struct sun4c_mmu_entry *head = &crp->ringhd; unsigned long flags; @@ -879,7 +836,7 @@ static void sun4c_demap_context_hw(struct sun4c_mmu_ring *crp, unsigned char ctx flush_user_windows(); sun4c_set_context(ctx); - sun4c_flush_context_hw(); + sun4c_flush_context(); do { struct sun4c_mmu_entry *next = entry->next; @@ -893,34 +850,8 @@ static void sun4c_demap_context_hw(struct sun4c_mmu_ring *crp, unsigned char ctx restore_flags(flags); } -static void sun4c_demap_context_sw(struct sun4c_mmu_ring *crp, unsigned char ctx) -{ - struct sun4c_mmu_entry *head = &crp->ringhd; - unsigned long flags; - - save_and_cli(flags); - if (head->next != head) { - struct sun4c_mmu_entry *entry = head->next; - int savectx = sun4c_get_context(); - - flush_user_windows(); - sun4c_set_context(ctx); - sun4c_flush_context_sw(); - do { - struct sun4c_mmu_entry *next = entry->next; - - sun4c_user_unmap(entry); - free_user_entry(ctx, entry); - - entry = next; - } while (entry != head); - sun4c_set_context(savectx); - } - restore_flags(flags); -} - -static int sun4c_user_taken_entries = 0; /* This is how much we have. */ -static int max_user_taken_entries = 0; /* This limits us and prevents deadlock. */ +static int sun4c_user_taken_entries; /* This is how much we have. */ +static int max_user_taken_entries; /* This limits us and prevents deadlock. */ static struct sun4c_mmu_entry *sun4c_kernel_strategy(void) { @@ -934,10 +865,7 @@ static struct sun4c_mmu_entry *sun4c_kernel_strategy(void) /* Else free one up. */ this_entry = sun4c_kernel_ring.ringhd.prev; - if (sun4c_vacinfo.do_hwflushes) - sun4c_flush_segment_hw(this_entry->vaddr); - else - sun4c_flush_segment_sw(this_entry->vaddr); + sun4c_flush_segment(this_entry->vaddr); sun4c_kernel_unmap(this_entry); free_kernel_entry(this_entry, &sun4c_kernel_ring); this_entry = sun4c_kfree_ring.ringhd.next; @@ -976,10 +904,7 @@ static struct sun4c_mmu_entry *sun4c_user_strategy(void) savectx = sun4c_get_context(); flush_user_windows(); sun4c_set_context(ctx); - if (sun4c_vacinfo.do_hwflushes) - sun4c_flush_segment_hw(entry->vaddr); - else - sun4c_flush_segment_sw(entry->vaddr); + sun4c_flush_segment(entry->vaddr); sun4c_user_unmap(entry); remove_ring(sun4c_context_ring + ctx, entry); remove_lru(entry); @@ -1068,10 +993,7 @@ static void free_locked_segment(unsigned long addr) entry = &mmu_entry_pool[pseg]; flush_user_windows(); - if (sun4c_vacinfo.do_hwflushes) - sun4c_flush_segment_hw(addr); - else - sun4c_flush_segment_sw(addr); + sun4c_flush_segment(addr); sun4c_kernel_unmap(entry); add_ring(&sun4c_ufree_ring, entry); max_user_taken_entries++; @@ -1126,17 +1048,10 @@ static struct task_struct *sun4c_alloc_task_struct(void) /* We are changing the virtual color of the page(s) * so we must flush the cache to guarentee consistancy. */ - if (sun4c_vacinfo.do_hwflushes) { - sun4c_flush_page_hw(pages); -#ifndef CONFIG_SUN4 - sun4c_flush_page_hw(pages + PAGE_SIZE); -#endif - } else { - sun4c_flush_page_sw(pages); + sun4c_flush_page(pages); #ifndef CONFIG_SUN4 - sun4c_flush_page_sw(pages + PAGE_SIZE); + sun4c_flush_page(pages + PAGE_SIZE); #endif - } sun4c_put_pte(addr, BUCKET_PTE(pages)); #ifndef CONFIG_SUN4 @@ -1145,32 +1060,7 @@ static struct task_struct *sun4c_alloc_task_struct(void) return (struct task_struct *) addr; } -static void sun4c_free_task_struct_hw(struct task_struct *tsk) -{ - unsigned long tsaddr = (unsigned long) tsk; - unsigned long pages = BUCKET_PTE_PAGE(sun4c_get_pte(tsaddr)); - int entry = BUCKET_NUM(tsaddr); - - if (atomic_dec_and_test(&(tsk)->thread.refcount)) { - /* We are deleting a mapping, so the flush here is mandatory. */ - sun4c_flush_page_hw(tsaddr); -#ifndef CONFIG_SUN4 - sun4c_flush_page_hw(tsaddr + PAGE_SIZE); -#endif - sun4c_put_pte(tsaddr, 0); -#ifndef CONFIG_SUN4 - sun4c_put_pte(tsaddr + PAGE_SIZE, 0); -#endif - sun4c_bucket[entry] = BUCKET_EMPTY; - if (entry < sun4c_lowbucket_avail) - sun4c_lowbucket_avail = entry; - - free_pages(pages, TASK_STRUCT_ORDER); - garbage_collect(entry); - } -} - -static void sun4c_free_task_struct_sw(struct task_struct *tsk) +static void sun4c_free_task_struct(struct task_struct *tsk) { unsigned long tsaddr = (unsigned long) tsk; unsigned long pages = BUCKET_PTE_PAGE(sun4c_get_pte(tsaddr)); @@ -1178,9 +1068,9 @@ static void sun4c_free_task_struct_sw(struct task_struct *tsk) if (atomic_dec_and_test(&(tsk)->thread.refcount)) { /* We are deleting a mapping, so the flush here is mandatory. */ - sun4c_flush_page_sw(tsaddr); + sun4c_flush_page(tsaddr); #ifndef CONFIG_SUN4 - sun4c_flush_page_sw(tsaddr + PAGE_SIZE); + sun4c_flush_page(tsaddr + PAGE_SIZE); #endif sun4c_put_pte(tsaddr, 0); #ifndef CONFIG_SUN4 @@ -1452,131 +1342,7 @@ static void sun4c_flush_cache_all(void) } } -static void sun4c_flush_cache_mm_hw(struct mm_struct *mm) -{ - int new_ctx = mm->context; - - if (new_ctx != NO_CONTEXT) { - flush_user_windows(); - if (sun4c_context_ring[new_ctx].num_entries) { - struct sun4c_mmu_entry *head = &sun4c_context_ring[new_ctx].ringhd; - unsigned long flags; - - save_and_cli(flags); - if (head->next != head) { - struct sun4c_mmu_entry *entry = head->next; - int savectx = sun4c_get_context(); - - sun4c_set_context(new_ctx); - sun4c_flush_context_hw(); - do { - struct sun4c_mmu_entry *next = entry->next; - - sun4c_user_unmap(entry); - free_user_entry(new_ctx, entry); - - entry = next; - } while (entry != head); - sun4c_set_context(savectx); - } - restore_flags(flags); - } - } -} - -static void sun4c_flush_cache_range_hw(struct vm_area_struct *vma, unsigned long start, unsigned long end) -{ - struct mm_struct *mm = vma->vm_mm; - int new_ctx = mm->context; - - if (new_ctx != NO_CONTEXT) { - struct sun4c_mmu_entry *head = &sun4c_context_ring[new_ctx].ringhd; - struct sun4c_mmu_entry *entry; - unsigned long flags; - - flush_user_windows(); - - save_and_cli(flags); - - /* All user segmap chains are ordered on entry->vaddr. */ - for (entry = head->next; - (entry != head) && ((entry->vaddr+SUN4C_REAL_PGDIR_SIZE) < start); - entry = entry->next) - ; - - /* Tracing various job mixtures showed that this conditional - * only passes ~35% of the time for most worse case situations, - * therefore we avoid all of this gross overhead ~65% of the time. - */ - if ((entry != head) && (entry->vaddr < end)) { - int octx = sun4c_get_context(); - - sun4c_set_context(new_ctx); - - /* At this point, always, (start >= entry->vaddr) and - * (entry->vaddr < end), once the latter condition - * ceases to hold, or we hit the end of the list, we - * exit the loop. The ordering of all user allocated - * segmaps makes this all work out so beautifully. - */ - do { - struct sun4c_mmu_entry *next = entry->next; - unsigned long realend; - - /* "realstart" is always >= entry->vaddr */ - realend = entry->vaddr + SUN4C_REAL_PGDIR_SIZE; - if (end < realend) - realend = end; - if ((realend - entry->vaddr) <= (PAGE_SIZE << 3)) { - unsigned long page = entry->vaddr; - while (page < realend) { - sun4c_flush_page_hw(page); - page += PAGE_SIZE; - } - } else { - sun4c_flush_segment_hw(entry->vaddr); - sun4c_user_unmap(entry); - free_user_entry(new_ctx, entry); - } - entry = next; - } while ((entry != head) && (entry->vaddr < end)); - sun4c_set_context(octx); - } - restore_flags(flags); - } -} - -static void sun4c_flush_cache_page_hw(struct vm_area_struct *vma, unsigned long page) -{ - struct mm_struct *mm = vma->vm_mm; - int new_ctx = mm->context; - - /* Sun4c has no separate I/D caches so cannot optimize for non - * text page flushes. - */ - if (new_ctx != NO_CONTEXT) { - int octx = sun4c_get_context(); - unsigned long flags; - - flush_user_windows(); - save_and_cli(flags); - sun4c_set_context(new_ctx); - sun4c_flush_page_hw(page); - sun4c_set_context(octx); - restore_flags(flags); - } -} - -static void sun4c_flush_page_to_ram_hw(unsigned long page) -{ - unsigned long flags; - - save_and_cli(flags); - sun4c_flush_page_hw(page); - restore_flags(flags); -} - -static void sun4c_flush_cache_mm_sw(struct mm_struct *mm) +static void sun4c_flush_cache_mm(struct mm_struct *mm) { int new_ctx = mm->context; @@ -1593,7 +1359,7 @@ static void sun4c_flush_cache_mm_sw(struct mm_struct *mm) int savectx = sun4c_get_context(); sun4c_set_context(new_ctx); - sun4c_flush_context_sw(); + sun4c_flush_context(); do { struct sun4c_mmu_entry *next = entry->next; @@ -1653,11 +1419,11 @@ static void sun4c_flush_cache_range_sw(struct vm_area_struct *vma, unsigned long if ((realend - entry->vaddr) <= (PAGE_SIZE << 3)) { unsigned long page = entry->vaddr; while (page < realend) { - sun4c_flush_page_sw(page); + sun4c_flush_page(page); page += PAGE_SIZE; } } else { - sun4c_flush_segment_sw(entry->vaddr); + sun4c_flush_segment(entry->vaddr); sun4c_user_unmap(entry); free_user_entry(new_ctx, entry); } @@ -1669,7 +1435,7 @@ static void sun4c_flush_cache_range_sw(struct vm_area_struct *vma, unsigned long } } -static void sun4c_flush_cache_page_sw(struct vm_area_struct *vma, unsigned long page) +static void sun4c_flush_cache_page(struct vm_area_struct *vma, unsigned long page) { struct mm_struct *mm = vma->vm_mm; int new_ctx = mm->context; @@ -1684,18 +1450,18 @@ static void sun4c_flush_cache_page_sw(struct vm_area_struct *vma, unsigned long flush_user_windows(); save_and_cli(flags); sun4c_set_context(new_ctx); - sun4c_flush_page_sw(page); + sun4c_flush_page(page); sun4c_set_context(octx); restore_flags(flags); } } -static void sun4c_flush_page_to_ram_sw(unsigned long page) +static void sun4c_flush_page_to_ram(unsigned long page) { unsigned long flags; save_and_cli(flags); - sun4c_flush_page_sw(page); + sun4c_flush_page(page); restore_flags(flags); } @@ -1723,10 +1489,7 @@ static void sun4c_flush_tlb_all(void) flush_user_windows(); while (sun4c_kernel_ring.num_entries) { next_entry = this_entry->next; - if (sun4c_vacinfo.do_hwflushes) - sun4c_flush_segment_hw(this_entry->vaddr); - else - sun4c_flush_segment_sw(this_entry->vaddr); + sun4c_flush_segment(this_entry->vaddr); for (ctx = 0; ctx < num_contexts; ctx++) { sun4c_set_context(ctx); sun4c_put_segmap(this_entry->vaddr, invalid_segment); @@ -1738,7 +1501,7 @@ static void sun4c_flush_tlb_all(void) restore_flags(flags); } -static void sun4c_flush_tlb_mm_hw(struct mm_struct *mm) +static void sun4c_flush_tlb_mm(struct mm_struct *mm) { int new_ctx = mm->context; @@ -1752,91 +1515,7 @@ static void sun4c_flush_tlb_mm_hw(struct mm_struct *mm) int savectx = sun4c_get_context(); sun4c_set_context(new_ctx); - sun4c_flush_context_hw(); - do { - struct sun4c_mmu_entry *next = entry->next; - - sun4c_user_unmap(entry); - free_user_entry(new_ctx, entry); - - entry = next; - } while (entry != head); - sun4c_set_context(savectx); - } - restore_flags(flags); - } -} - -static void sun4c_flush_tlb_range_hw(struct vm_area_struct *vma, unsigned long start, unsigned long end) -{ - struct mm_struct *mm = vma->vm_mm; - int new_ctx = mm->context; - - if (new_ctx != NO_CONTEXT) { - struct sun4c_mmu_entry *head = &sun4c_context_ring[new_ctx].ringhd; - struct sun4c_mmu_entry *entry; - unsigned long flags; - - save_and_cli(flags); - /* See commentary in sun4c_flush_cache_range_*(). */ - for (entry = head->next; - (entry != head) && ((entry->vaddr+SUN4C_REAL_PGDIR_SIZE) < start); - entry = entry->next) - ; - - if ((entry != head) && (entry->vaddr < end)) { - int octx = sun4c_get_context(); - - sun4c_set_context(new_ctx); - do { - struct sun4c_mmu_entry *next = entry->next; - - sun4c_flush_segment_hw(entry->vaddr); - sun4c_user_unmap(entry); - free_user_entry(new_ctx, entry); - - entry = next; - } while ((entry != head) && (entry->vaddr < end)); - sun4c_set_context(octx); - } - restore_flags(flags); - } -} - -static void sun4c_flush_tlb_page_hw(struct vm_area_struct *vma, unsigned long page) -{ - struct mm_struct *mm = vma->vm_mm; - int new_ctx = mm->context; - - if (new_ctx != NO_CONTEXT) { - int savectx = sun4c_get_context(); - unsigned long flags; - - save_and_cli(flags); - sun4c_set_context(new_ctx); - page &= PAGE_MASK; - sun4c_flush_page_hw(page); - sun4c_put_pte(page, 0); - sun4c_set_context(savectx); - restore_flags(flags); - } -} - -static void sun4c_flush_tlb_mm_sw(struct mm_struct *mm) -{ - int new_ctx = mm->context; - - if (new_ctx != NO_CONTEXT) { - struct sun4c_mmu_entry *head = &sun4c_context_ring[new_ctx].ringhd; - unsigned long flags; - - save_and_cli(flags); - if (head->next != head) { - struct sun4c_mmu_entry *entry = head->next; - int savectx = sun4c_get_context(); - - sun4c_set_context(new_ctx); - sun4c_flush_context_sw(); + sun4c_flush_context(); do { struct sun4c_mmu_entry *next = entry->next; @@ -1862,7 +1541,7 @@ static void sun4c_flush_tlb_range_sw(struct vm_area_struct *vma, unsigned long s unsigned long flags; save_and_cli(flags); - /* See commentary in sun4c_flush_cache_range_*(). */ + /* See commentary in sun4c_flush_cache_range(). */ for (entry = head->next; (entry != head) && ((entry->vaddr+SUN4C_REAL_PGDIR_SIZE) < start); entry = entry->next) @@ -1875,7 +1554,7 @@ static void sun4c_flush_tlb_range_sw(struct vm_area_struct *vma, unsigned long s do { struct sun4c_mmu_entry *next = entry->next; - sun4c_flush_segment_sw(entry->vaddr); + sun4c_flush_segment(entry->vaddr); sun4c_user_unmap(entry); free_user_entry(new_ctx, entry); @@ -1887,7 +1566,7 @@ static void sun4c_flush_tlb_range_sw(struct vm_area_struct *vma, unsigned long s } } -static void sun4c_flush_tlb_page_sw(struct vm_area_struct *vma, unsigned long page) +static void sun4c_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) { struct mm_struct *mm = vma->vm_mm; int new_ctx = mm->context; @@ -1899,7 +1578,7 @@ static void sun4c_flush_tlb_page_sw(struct vm_area_struct *vma, unsigned long pa save_and_cli(flags); sun4c_set_context(new_ctx); page &= PAGE_MASK; - sun4c_flush_page_sw(page); + sun4c_flush_page(page); sun4c_put_pte(page, 0); sun4c_set_context(savectx); restore_flags(flags); @@ -1923,7 +1602,7 @@ void sun4c_unmapioaddr(unsigned long virt_addr) sun4c_put_pte(virt_addr, 0); } -static void sun4c_alloc_context_hw(struct mm_struct *old_mm, struct mm_struct *mm) +static void sun4c_alloc_context(struct mm_struct *old_mm, struct mm_struct *mm) { struct ctx_list *ctxp; @@ -1943,92 +1622,35 @@ static void sun4c_alloc_context_hw(struct mm_struct *old_mm, struct mm_struct *m ctxp->ctx_mm->context = NO_CONTEXT; ctxp->ctx_mm = mm; mm->context = ctxp->ctx_number; - sun4c_demap_context_hw(&sun4c_context_ring[ctxp->ctx_number], - ctxp->ctx_number); -} - -/* Switch the current MM context. */ -static void sun4c_switch_mm_hw(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk, int cpu) -{ - struct ctx_list *ctx; - int dirty = 0; - - if (mm->context == NO_CONTEXT) { - dirty = 1; - sun4c_alloc_context_hw(old_mm, mm); - } else { - /* Update the LRU ring of contexts. */ - ctx = ctx_list_pool + mm->context; - remove_from_ctx_list(ctx); - add_to_used_ctxlist(ctx); - } - if (dirty || old_mm != mm) - sun4c_set_context(mm->context); -} - -static void sun4c_destroy_context_hw(struct mm_struct *mm) -{ - struct ctx_list *ctx_old; - - if (mm->context != NO_CONTEXT) { - sun4c_demap_context_hw(&sun4c_context_ring[mm->context], mm->context); - ctx_old = ctx_list_pool + mm->context; - remove_from_ctx_list(ctx_old); - add_to_free_ctxlist(ctx_old); - mm->context = NO_CONTEXT; - } -} - -static void sun4c_alloc_context_sw(struct mm_struct *old_mm, struct mm_struct *mm) -{ - struct ctx_list *ctxp; - - ctxp = ctx_free.next; - if (ctxp != &ctx_free) { - remove_from_ctx_list(ctxp); - add_to_used_ctxlist(ctxp); - mm->context = ctxp->ctx_number; - ctxp->ctx_mm = mm; - return; - } - ctxp = ctx_used.next; - if(ctxp->ctx_mm == old_mm) - ctxp = ctxp->next; - remove_from_ctx_list(ctxp); - add_to_used_ctxlist(ctxp); - ctxp->ctx_mm->context = NO_CONTEXT; - ctxp->ctx_mm = mm; - mm->context = ctxp->ctx_number; - sun4c_demap_context_sw(&sun4c_context_ring[ctxp->ctx_number], + sun4c_demap_context(&sun4c_context_ring[ctxp->ctx_number], ctxp->ctx_number); } /* Switch the current MM context. */ -static void sun4c_switch_mm_sw(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk, int cpu) +static void sun4c_switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk, int cpu) { struct ctx_list *ctx; int dirty = 0; if (mm->context == NO_CONTEXT) { dirty = 1; - sun4c_alloc_context_sw(old_mm, mm); + sun4c_alloc_context(old_mm, mm); } else { /* Update the LRU ring of contexts. */ ctx = ctx_list_pool + mm->context; remove_from_ctx_list(ctx); add_to_used_ctxlist(ctx); } - if (dirty || old_mm != mm) sun4c_set_context(mm->context); } -static void sun4c_destroy_context_sw(struct mm_struct *mm) +static void sun4c_destroy_context(struct mm_struct *mm) { struct ctx_list *ctx_old; if (mm->context != NO_CONTEXT) { - sun4c_demap_context_sw(&sun4c_context_ring[mm->context], mm->context); + sun4c_demap_context(&sun4c_context_ring[mm->context], mm->context); ctx_old = ctx_list_pool + mm->context; remove_from_ctx_list(ctx_old); add_to_free_ctxlist(ctx_old); @@ -2095,7 +1717,7 @@ static void sun4c_pgd_set(pgd_t * pgdp, pmd_t * pmdp) static void sun4c_pmd_set(pmd_t * pmdp, pte_t * ptep) { - *pmdp = (PGD_TABLE | (unsigned long) ptep); + *pmdp = __pmd(PGD_TABLE | (unsigned long) ptep); } static int sun4c_pte_present(pte_t pte) @@ -2178,10 +1800,7 @@ static inline unsigned long sun4c_pmd_page(pmd_t pmd) return (pmd_val(pmd) & PAGE_MASK); } -static unsigned long sun4c_pgd_page(pgd_t pgd) -{ - return 0; -} +static unsigned long sun4c_pgd_page(pgd_t pgd) { return 0; } /* to find an entry in a page-table-directory */ static inline pgd_t *sun4c_pgd_offset(struct mm_struct * mm, unsigned long address) @@ -2275,9 +1894,7 @@ static pmd_t *sun4c_pmd_alloc_one_fast(struct mm_struct *mm, unsigned long addre return NULL; } -static void sun4c_free_pmd_fast(pmd_t * pmd) -{ -} +static void sun4c_free_pmd_fast(pmd_t * pmd) { } static int sun4c_check_pgt_cache(int low, int high) { @@ -2470,37 +2087,31 @@ void __init ld_mmu_sun4c(void) _SUN4C_PAGE_IO | _SUN4C_PAGE_NOCACHE; /* Functions */ -#ifndef CONFIG_SMP BTFIXUPSET_CALL(___xchg32, ___xchg32_sun4c, BTFIXUPCALL_NORM); -#endif BTFIXUPSET_CALL(do_check_pgt_cache, sun4c_check_pgt_cache, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(flush_cache_all, sun4c_flush_cache_all, BTFIXUPCALL_NORM); if (sun4c_vacinfo.do_hwflushes) { - BTFIXUPSET_CALL(flush_cache_mm, sun4c_flush_cache_mm_hw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_range, sun4c_flush_cache_range_hw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_page, sun4c_flush_cache_page_hw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(__flush_page_to_ram, sun4c_flush_page_to_ram_hw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_mm, sun4c_flush_tlb_mm_hw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_range, sun4c_flush_tlb_range_hw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_page, sun4c_flush_tlb_page_hw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(free_task_struct, sun4c_free_task_struct_hw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(switch_mm, sun4c_switch_mm_hw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(destroy_context, sun4c_destroy_context_hw, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(sun4c_flush_page, sun4c_flush_page_hw, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(sun4c_flush_segment, sun4c_flush_segment_hw, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(sun4c_flush_context, sun4c_flush_context_hw, BTFIXUPCALL_NORM); } else { - BTFIXUPSET_CALL(flush_cache_mm, sun4c_flush_cache_mm_sw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_range, sun4c_flush_cache_range_sw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_page, sun4c_flush_cache_page_sw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(__flush_page_to_ram, sun4c_flush_page_to_ram_sw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_mm, sun4c_flush_tlb_mm_sw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_range, sun4c_flush_tlb_range_sw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_page, sun4c_flush_tlb_page_sw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(free_task_struct, sun4c_free_task_struct_sw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(switch_mm, sun4c_switch_mm_sw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(destroy_context, sun4c_destroy_context_sw, BTFIXUPCALL_NORM); - } - + BTFIXUPSET_CALL(sun4c_flush_page, sun4c_flush_page_sw, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(sun4c_flush_segment, sun4c_flush_segment_sw, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(sun4c_flush_context, sun4c_flush_context_sw, BTFIXUPCALL_NORM); + } + + BTFIXUPSET_CALL(flush_tlb_mm, sun4c_flush_tlb_mm, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(flush_cache_mm, sun4c_flush_cache_mm, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(destroy_context, sun4c_destroy_context, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(switch_mm, sun4c_switch_mm, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(flush_cache_page, sun4c_flush_cache_page, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(flush_tlb_page, sun4c_flush_tlb_page, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(flush_tlb_range, sun4c_flush_tlb_range, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(flush_cache_range, sun4c_flush_cache_range, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(free_task_struct, sun4c_free_task_struct, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(__flush_page_to_ram, sun4c_flush_page_to_ram, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(flush_tlb_all, sun4c_flush_tlb_all, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(flush_sig_insns, sun4c_flush_sig_insns, BTFIXUPCALL_NOP); diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile index b34077ca5ffe..99121ee372ae 100644 --- a/arch/sparc64/kernel/Makefile +++ b/arch/sparc64/kernel/Makefile @@ -31,11 +31,6 @@ else endif endif -# -# This is just to get the dependencies... -# -binfmt_elf32.o: $(TOPDIR)/fs/binfmt_elf.c - ifneq ($(NEW_GCC),y) CMODEL_CFLAG := -mmedlow else diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index 224387833d54..b12beee463ba 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -2879,16 +2879,19 @@ do_execve32(char * filename, u32 * argv, u32 * envp, struct pt_regs * regs) bprm.sh_bang = 0; bprm.loader = 0; bprm.exec = 0; - if ((bprm.argc = count32(argv, bprm.p / sizeof(u32))) < 0) { - allow_write_access(file); - fput(file); - return bprm.argc; - } - if ((bprm.envc = count32(envp, bprm.p / sizeof(u32))) < 0) { - allow_write_access(file); - fput(file); - return bprm.envc; - } + + bprm.mm = mm_alloc(); + retval = -ENOMEM; + if (!bprm.mm) + goto out_file; + + bprm.argc = count32(argv, bprm.p / sizeof(u32)); + if ((retval = bprm.argc) < 0) + goto out_mm; + + bprm.envc = count32(envp, bprm.p / sizeof(u32)); + if ((retval = bprm.envc) < 0) + goto out_mm; retval = prepare_binprm(&bprm); if (retval < 0) @@ -2914,14 +2917,20 @@ do_execve32(char * filename, u32 * argv, u32 * envp, struct pt_regs * regs) out: /* Something went wrong, return the inode and free the argument pages*/ - allow_write_access(bprm.file); - if (bprm.file) - fput(bprm.file); + for (i = 0 ; i < MAX_ARG_PAGES ; i++) { + struct page * page = bprm.page[i]; + if (page) + __free_page(page); + } - for (i=0 ; i<MAX_ARG_PAGES ; i++) - if (bprm.page[i]) - __free_page(bprm.page[i]); +out_mm: + mmdrop(bprm.mm); +out_file: + if (bprm.file) { + allow_write_access(bprm.file); + fput(bprm.file); + } return retval; } diff --git a/arch/sparc64/mm/generic.c b/arch/sparc64/mm/generic.c index 266c51b450da..149dc1b4c484 100644 --- a/arch/sparc64/mm/generic.c +++ b/arch/sparc64/mm/generic.c @@ -17,20 +17,10 @@ static inline void forget_pte(pte_t page) { - if (pte_none(page)) - return; - if (pte_present(page)) { - unsigned long pfn = pte_pfn(page); - struct page *ptpage; - if (!pfn_valid(pfn)) - return; - ptpage = pfn_to_page(page); - if (PageReserved(ptpage)) - return; - page_cache_release(ptpage); - return; + if (!pte_none(page)) { + printk("forget_pte: old mapping existed!\n"); + BUG(); } - swap_free(pte_to_swp_entry(page)); } /* Remap IO memory, the same way as remap_page_range(), but use diff --git a/arch/x86_64/boot/Makefile b/arch/x86_64/boot/Makefile index 724b3a7bdb15..a82cabc11223 100644 --- a/arch/x86_64/boot/Makefile +++ b/arch/x86_64/boot/Makefile @@ -31,11 +31,11 @@ BOOT_INCL = $(TOPDIR)/include/linux/config.h \ $(TOPDIR)/include/linux/autoconf.h \ $(TOPDIR)/include/asm/boot.h -zImage: $(CONFIGURE) bootsect setup compressed/vmlinux tools/build +zImage: bootsect setup compressed/vmlinux tools/build $(OBJCOPY) compressed/vmlinux compressed/vmlinux.out tools/build bootsect setup compressed/vmlinux.out $(ROOT_DEV) > zImage -bzImage: $(CONFIGURE) bbootsect bsetup compressed/bvmlinux tools/build +bzImage: bbootsect bsetup compressed/bvmlinux tools/build $(OBJCOPY) compressed/bvmlinux compressed/bvmlinux.out tools/build -b bbootsect bsetup compressed/bvmlinux.out $(ROOT_DEV) > bzImage @@ -51,14 +51,14 @@ compressed/bvmlinux: $(TOPDIR)/vmlinux zdisk: $(BOOTIMAGE) dd bs=8192 if=$(BOOTIMAGE) of=/dev/fd0 -zlilo: $(CONFIGURE) $(BOOTIMAGE) +zlilo: $(BOOTIMAGE) if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi cat $(BOOTIMAGE) > $(INSTALL_PATH)/vmlinuz cp $(TOPDIR)/System.map $(INSTALL_PATH)/ if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi -install: $(CONFIGURE) $(BOOTIMAGE) +install: $(BOOTIMAGE) sh -x ./install.sh $(KERNELRELEASE) $(BOOTIMAGE) $(TOPDIR)/System.map "$(INSTALL_PATH)" tools/build: tools/build.c diff --git a/drivers/base/sys.c b/drivers/base/sys.c index 32c30d77ca27..9098ef7e906b 100644 --- a/drivers/base/sys.c +++ b/drivers/base/sys.c @@ -44,6 +44,6 @@ static int sys_bus_init(void) return device_register(&system_bus); } -subsys_initcall(sys_bus_init); +postcore_initcall(sys_bus_init); EXPORT_SYMBOL(register_sys_device); EXPORT_SYMBOL(unregister_sys_device); diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index b24ef3acabd2..2b0624126f0c 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -1069,7 +1069,9 @@ static void idedisk_setup(struct ata_device *drive) } #else printk("%s: setmax_ext LBA %llu, native %llu\n", - drive->name, set_max_ext, capacity_2); + drive->name, + (long long) set_max_ext, + (long long) capacity_2); #endif } drive->bios_cyl = drive->cyl; diff --git a/drivers/ide/ide-pci.c b/drivers/ide/ide-pci.c index 9ae3d818abd0..c609aa7e7b78 100644 --- a/drivers/ide/ide-pci.c +++ b/drivers/ide/ide-pci.c @@ -451,7 +451,7 @@ check_if_enabled: if (d->init_chipset) d->init_chipset(dev); #ifdef __sparc__ - printk(KERN_INFO "ATA: 100%% native mode on irq\n", __irq_itoa(pciirq)); + printk(KERN_INFO "ATA: 100%% native mode on irq %s\n", __irq_itoa(pciirq)); #else printk(KERN_INFO "ATA: 100%% native mode on irq %d\n", pciirq); #endif diff --git a/drivers/ide/probe.c b/drivers/ide/probe.c index 88cdab4215ca..bb144dfbbb47 100644 --- a/drivers/ide/probe.c +++ b/drivers/ide/probe.c @@ -875,7 +875,8 @@ static void channel_probe(struct ata_channel *ch) /* Register this hardware interface within the global device tree. */ - sprintf(ch->dev.bus_id, "%04x", ch->io_ports[IDE_DATA_OFFSET]); + sprintf(ch->dev.bus_id, "%04lx", + (unsigned long) ch->io_ports[IDE_DATA_OFFSET]); sprintf(ch->dev.name, "ide"); ch->dev.driver_data = ch; #ifdef CONFIG_PCI diff --git a/drivers/net/bonding.c b/drivers/net/bonding.c index e9736dc3ce89..a437e3fe6174 100644 --- a/drivers/net/bonding.c +++ b/drivers/net/bonding.c @@ -161,6 +161,21 @@ * - Remove possibility of calling bond_sethwaddr with NULL slave_dev ptr * - Handle hot swap ethernet interface deregistration events to remove * kernel oops following hot swap of enslaved interface + * + * 2002/1/2 - Chad N. Tindel <ctindel at ieee dot org> + * - Restore original slave flags at release time. + * + * 2002/02/18 - Erik Habbinga <erik_habbinga at hp dot com> + * - bond_release(): calling kfree on our_slave after call to + * bond_restore_slave_flags, not before + * - bond_enslave(): saving slave flags into original_flags before + * call to netdev_set_master, so the IFF_SLAVE flag doesn't end + * up in original_flags + * + * 2002/04/05 - Mark Smith <mark.smith at comdev dot cc> and + * Steve Mead <steve.mead at comdev dot cc> + * - Port Gleb Natapov's multicast support patchs from 2.4.12 + * to 2.4.18 adding support for multicast. */ #include <linux/config.h> @@ -208,11 +223,8 @@ #define MII_ENDOF_NWAY 0x20 #undef MII_LINK_READY -/*#define MII_LINK_READY (MII_LINK_UP | MII_ENDOF_NWAY)*/ #define MII_LINK_READY (MII_LINK_UP) -#define MAX_BOND_ADDR 256 - #ifndef BOND_LINK_ARP_INTERV #define BOND_LINK_ARP_INTERV 0 #endif @@ -223,7 +235,7 @@ static unsigned long arp_target = 0; static u32 my_ip = 0; char *arp_target_hw_addr = NULL; -static int max_bonds = MAX_BONDS; +static int max_bonds = BOND_DEFAULT_MAX_BONDS; static int miimon = BOND_LINK_MON_INTERV; static int mode = BOND_MODE_ROUNDROBIN; static int updelay = 0; @@ -234,7 +246,7 @@ int bond_cnt; static struct bonding *these_bonds = NULL; static struct net_device *dev_bonds = NULL; -MODULE_PARM(max_bonds, "1-" __MODULE_STRING(INT_MAX) "i"); +MODULE_PARM(max_bonds, "i"); MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); MODULE_PARM(miimon, "i"); MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); @@ -260,6 +272,15 @@ static struct net_device_stats *bond_get_stats(struct net_device *dev); static void bond_mii_monitor(struct net_device *dev); static void bond_arp_monitor(struct net_device *dev); static int bond_event(struct notifier_block *this, unsigned long event, void *ptr); +static void bond_restore_slave_flags(slave_t *slave); +static void bond_mc_list_destroy(struct bonding *bond); +static void bond_mc_add(bonding_t *bond, void *addr, int alen); +static void bond_mc_delete(bonding_t *bond, void *addr, int alen); +static int bond_mc_list_copy (struct dev_mc_list *src, struct bonding *dst, int gpf_flag); +static inline int dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2); +static void bond_set_promiscuity(bonding_t *bond, int inc); +static void bond_set_allmulti(bonding_t *bond, int inc); +static struct dev_mc_list* bond_mc_list_find_dmi(struct dev_mc_list *dmi, struct dev_mc_list *mc_list); static void bond_set_slave_inactive_flags(slave_t *slave); static void bond_set_slave_active_flags(slave_t *slave); static int bond_enslave(struct net_device *master, struct net_device *slave); @@ -282,6 +303,11 @@ static int bond_get_info(char *buf, char **start, off_t offset, int length); #define IS_UP(dev) ((((dev)->flags & (IFF_UP)) == (IFF_UP)) && \ (netif_running(dev) && netif_carrier_ok(dev))) +static void bond_restore_slave_flags(slave_t *slave) +{ + slave->dev->flags = slave->original_flags; +} + static void bond_set_slave_inactive_flags(slave_t *slave) { slave->state = BOND_STATE_BACKUP; @@ -431,6 +457,7 @@ static int bond_close(struct net_device *master) /* Release the bonded slaves */ bond_release_all(master); + bond_mc_list_destroy (bond); write_unlock_irqrestore(&bond->lock, flags); @@ -438,19 +465,180 @@ static int bond_close(struct net_device *master) return 0; } -static void set_multicast_list(struct net_device *master) +/* + * flush all members of flush->mc_list from device dev->mc_list + */ +static void bond_mc_list_flush(struct net_device *dev, struct net_device *flush) +{ + struct dev_mc_list *dmi; + + for (dmi = flush->mc_list; dmi != NULL; dmi = dmi->next) + dev_mc_delete(dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); +} + +/* + * Totally destroys the mc_list in bond + */ +static void bond_mc_list_destroy(struct bonding *bond) { + struct dev_mc_list *dmi; + + dmi = bond->mc_list; + while (dmi) { + bond->mc_list = dmi->next; + kfree(dmi); + dmi = bond->mc_list; + } +} + /* - bonding_t *bond = master->priv; + * Add a Multicast address to every slave in the bonding group + */ +static void bond_mc_add(bonding_t *bond, void *addr, int alen) +{ slave_t *slave; - for (slave = bond->next; slave != (slave_t*)bond; slave = slave->next) { - slave->dev->mc_list = master->mc_list; - slave->dev->mc_count = master->mc_count; - slave->dev->flags = master->flags; - slave->dev->set_multicast_list(slave->dev); + for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev) { + dev_mc_add(slave->dev, addr, alen, 0); } +} + +/* + * Remove a multicast address from every slave in the bonding group + */ +static void bond_mc_delete(bonding_t *bond, void *addr, int alen) +{ + slave_t *slave; + + for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev) + dev_mc_delete(slave->dev, addr, alen, 0); +} + +/* + * Copy all the Multicast addresses from src to the bonding device dst + */ +static int bond_mc_list_copy (struct dev_mc_list *src, struct bonding *dst, + int gpf_flag) +{ + struct dev_mc_list *dmi, *new_dmi; + + for (dmi = src; dmi != NULL; dmi = dmi->next) { + new_dmi = kmalloc(sizeof(struct dev_mc_list), gpf_flag); + + if (new_dmi == NULL) { + return -ENOMEM; + } + + new_dmi->next = dst->mc_list; + dst->mc_list = new_dmi; + + new_dmi->dmi_addrlen = dmi->dmi_addrlen; + memcpy(new_dmi->dmi_addr, dmi->dmi_addr, dmi->dmi_addrlen); + new_dmi->dmi_users = dmi->dmi_users; + new_dmi->dmi_gusers = dmi->dmi_gusers; + } + return 0; +} + +/* + * Returns 0 if dmi1 and dmi2 are the same, non-0 otherwise + */ +static inline int dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2) +{ + return memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0 && + dmi1->dmi_addrlen == dmi2->dmi_addrlen; +} + +/* + * Push the promiscuity flag down to all slaves + */ +static void bond_set_promiscuity(bonding_t *bond, int inc) +{ + slave_t *slave; + + for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev) + dev_set_promiscuity(slave->dev, inc); +} + +/* + * Push the allmulti flag down to all slaves */ +static void bond_set_allmulti(bonding_t *bond, int inc) +{ + slave_t *slave; + + for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev) + dev_set_allmulti(slave->dev, inc); +} + +/* + * returns dmi entry if found, NULL otherwise + */ +static struct dev_mc_list* bond_mc_list_find_dmi(struct dev_mc_list *dmi, + struct dev_mc_list *mc_list) +{ + struct dev_mc_list *idmi; + + for (idmi = mc_list; idmi != NULL; idmi = idmi->next) { + if (dmi_same(dmi, idmi)) { + return idmi; + } + } + return NULL; +} + +static void set_multicast_list(struct net_device *master) +{ + bonding_t *bond = master->priv; + struct dev_mc_list *dmi; + unsigned long flags = 0; + + /* + * Lock the private data for the master + */ + write_lock_irqsave(&bond->lock, flags); + + /* + * Lock the master device so that noone trys to transmit + * while we're changing things + */ + spin_lock_bh(&master->xmit_lock); + + /* set promiscuity flag to slaves */ + if ( (master->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC) ) + bond_set_promiscuity(bond, 1); + + if ( !(master->flags & IFF_PROMISC) && (bond->flags & IFF_PROMISC) ) + bond_set_promiscuity(bond, -1); + + /* set allmulti flag to slaves */ + if ( (master->flags & IFF_ALLMULTI) && !(bond->flags & IFF_ALLMULTI) ) + bond_set_allmulti(bond, 1); + + if ( !(master->flags & IFF_ALLMULTI) && (bond->flags & IFF_ALLMULTI) ) + bond_set_allmulti(bond, -1); + + bond->flags = master->flags; + + /* looking for addresses to add to slaves' mc list */ + for (dmi = master->mc_list; dmi != NULL; dmi = dmi->next) { + if (bond_mc_list_find_dmi(dmi, bond->mc_list) == NULL) + bond_mc_add(bond, dmi->dmi_addr, dmi->dmi_addrlen); + } + + /* looking for addresses to delete from slaves' list */ + for (dmi = bond->mc_list; dmi != NULL; dmi = dmi->next) { + if (bond_mc_list_find_dmi(dmi, master->mc_list) == NULL) + bond_mc_delete(bond, dmi->dmi_addr, dmi->dmi_addrlen); + } + + + /* save master's multicast list */ + bond_mc_list_destroy (bond); + bond_mc_list_copy (master->mc_list, bond, GFP_KERNEL); + + spin_unlock_bh(&master->xmit_lock); + write_unlock_irqrestore(&bond->lock, flags); } /* @@ -476,6 +664,7 @@ static int bond_enslave(struct net_device *master_dev, unsigned long flags = 0; int ndx = 0; int err = 0; + struct dev_mc_list *dmi; if (master_dev == NULL || slave_dev == NULL) { return -ENODEV; @@ -513,6 +702,8 @@ static int bond_enslave(struct net_device *master_dev, } memset(new_slave, 0, sizeof(slave_t)); + /* save flags before call to netdev_set_master */ + new_slave->original_flags = slave_dev->flags; err = netdev_set_master(slave_dev, master_dev); if (err) { @@ -526,10 +717,38 @@ static int bond_enslave(struct net_device *master_dev, new_slave->dev = slave_dev; + /* set promiscuity level to new slave */ + if (master_dev->flags & IFF_PROMISC) + dev_set_promiscuity(slave_dev, 1); + + /* set allmulti level to new slave */ + if (master_dev->flags & IFF_ALLMULTI) + dev_set_allmulti(slave_dev, 1); + + /* upload master's mc_list to new slave */ + for (dmi = master_dev->mc_list; dmi != NULL; dmi = dmi->next) + dev_mc_add (slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); + /* * queue to the end of the slaves list, make the first element its * successor, the last one its predecessor, and make it the bond's * predecessor. + * + * Just to clarify, so future bonding driver hackers don't go through + * the same confusion stage I did trying to figure this out, the + * slaves are stored in a double linked circular list, sortof. + * In the ->next direction, the last slave points to the first slave, + * bypassing bond; only the slaves are in the ->next direction. + * In the ->prev direction, however, the first slave points to bond + * and bond points to the last slave. + * + * It looks like a circle with a little bubble hanging off one side + * in the ->prev direction only. + * + * When going through the list once, its best to start at bond->prev + * and go in the ->prev direction, testing for bond. Doing this + * in the ->next direction doesn't work. Trust me, I know this now. + * :) -mts 2002.03.14 */ new_slave->prev = bond->prev; new_slave->prev->next = new_slave; @@ -838,10 +1057,20 @@ static int bond_release(struct net_device *master, struct net_device *slave) } else { printk(".\n"); } - kfree(our_slave); /* release the slave from its bond */ - + + /* flush master's mc_list from slave */ + bond_mc_list_flush (slave, master); + + /* unset promiscuity level from slave */ + if (master->flags & IFF_PROMISC) + dev_set_promiscuity(slave, -1); + + /* unset allmulti level from slave */ + if (master->flags & IFF_ALLMULTI) + dev_set_allmulti(slave, -1); + netdev_set_master(slave, NULL); /* only restore its RUNNING flag if monitoring set it down */ @@ -854,6 +1083,9 @@ static int bond_release(struct net_device *master, struct net_device *slave) dev_close(slave); } + bond_restore_slave_flags(our_slave); + kfree(our_slave); + if (bond->current_slave == NULL) { printk(KERN_INFO "%s: now running without any active interface !\n", @@ -1121,8 +1353,8 @@ static void bond_mii_monitor(struct net_device *master) master->name, bestslave->dev->name, (updelay - bestslave->delay) * miimon); - bestslave->delay= 0; - bestslave->link = BOND_LINK_UP; + bestslave->delay = 0; + bestslave->link = BOND_LINK_UP; } if (mode == BOND_MODE_ACTIVEBACKUP) { @@ -1192,7 +1424,7 @@ static void bond_arp_monitor(struct net_device *master) read_lock(&bond->ptrlock); if ( (!(slave->link == BOND_LINK_UP)) - && (slave!= bond->current_slave) ) { + && (slave != bond->current_slave) ) { read_unlock(&bond->ptrlock); @@ -1207,7 +1439,7 @@ static void bond_arp_monitor(struct net_device *master) slave->state = BOND_STATE_ACTIVE; bond->current_slave = slave; } - if (slave!=bond->current_slave) { + if (slave != bond->current_slave) { slave->dev->flags |= IFF_NOARP; } write_unlock(&bond->ptrlock); @@ -1311,7 +1543,7 @@ arp_monitor_out: #define isdigit(c) (c >= '0' && c <= '9') __inline static int atoi( char **s) { -int i=0; +int i = 0; while (isdigit(**s)) i = i*20 + *((*s)++) - '0'; return i; @@ -1388,7 +1620,7 @@ my_inet_aton(char *cp, unsigned long *the_addr) { goto ret_0; } - if (the_addr!= NULL) { + if (the_addr != NULL) { *the_addr = res.word | htonl (val); } @@ -1420,7 +1652,7 @@ static int bond_info_query(struct net_device *master, struct ifbond *info) info->miimon = miimon; read_lock_irqsave(&bond->lock, flags); - for (slave = bond->prev; slave!=(slave_t *)bond; slave = slave->prev) { + for (slave = bond->prev; slave != (slave_t *)bond; slave = slave->prev) { info->num_slaves++; } read_unlock_irqrestore(&bond->lock, flags); @@ -1696,7 +1928,7 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev) /* if we are sending arp packets, try to at least identify our own ip address */ - if ( (arp_interval > 0) && (my_ip==0) && + if ( (arp_interval > 0) && (my_ip == 0) && (skb->protocol == __constant_htons(ETH_P_ARP) ) ) { char *the_ip = (((char *)skb->data)) + sizeof(struct ethhdr) @@ -1708,7 +1940,7 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev) /* if we are sending arp packets and don't know the target hw address, save it so we don't need to use a broadcast address */ - if ( (arp_interval > 0) && (arp_target_hw_addr==NULL) && + if ( (arp_interval > 0) && (arp_target_hw_addr == NULL) && (skb->protocol == __constant_htons(ETH_P_IP) ) ) { struct ethhdr *eth_hdr = (struct ethhdr *) (((char *)skb->data)); @@ -1751,7 +1983,7 @@ static struct net_device_stats *bond_get_stats(struct net_device *dev) read_lock_irqsave(&bond->lock, flags); - for (slave = bond->prev; slave!=(slave_t *)bond; slave = slave->prev) { + for (slave = bond->prev; slave != (slave_t *)bond; slave = slave->prev) { sstats = slave->dev->get_stats(slave->dev); stats->rx_packets += sstats->rx_packets; @@ -1861,7 +2093,7 @@ static int bond_get_info(char *buf, char **start, off_t offset, int length) static int bond_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct bonding *this_bond=(struct bonding *)these_bonds; + struct bonding *this_bond = (struct bonding *)these_bonds; struct bonding *last_bond; struct net_device *event_dev = (struct net_device *)ptr; @@ -1905,10 +2137,8 @@ static int bond_event(struct notifier_block *this, unsigned long event, return NOTIFY_DONE; } -static struct notifier_block bond_netdev_notifier={ - bond_event, - NULL, - 0 +static struct notifier_block bond_netdev_notifier = { + notifier_call: bond_event, }; static int __init bond_init(struct net_device *dev) @@ -2038,6 +2268,13 @@ static int __init bonding_init(void) /* Find a name for this unit */ static struct net_device *dev_bond = NULL; + if (max_bonds < 1 || max_bonds > INT_MAX) { + printk(KERN_WARNING + "bonding_init(): max_bonds (%d) not in range %d-%d, " + "so it was reset to BOND_DEFAULT_MAX_BONDS (%d)", + max_bonds, 1, INT_MAX, BOND_DEFAULT_MAX_BONDS); + max_bonds = BOND_DEFAULT_MAX_BONDS; + } dev_bond = dev_bonds = kmalloc(max_bonds*sizeof(struct net_device), GFP_KERNEL); if (dev_bond == NULL) { diff --git a/drivers/net/irda/irda-usb.c b/drivers/net/irda/irda-usb.c index 8f18ca3389c0..b3a91975bdb4 100644 --- a/drivers/net/irda/irda-usb.c +++ b/drivers/net/irda/irda-usb.c @@ -857,7 +857,7 @@ static void irda_usb_receive(struct urb *urb) new->mac.raw = new->data; new->protocol = htons(ETH_P_IRDA); netif_rx(new); - self->netdev->last_rx = jiffies; + self->netdev->last_rx = jiffies; done: /* Note : at this point, the URB we've just received (urb) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index e5bad37436f9..7bfbbab8087f 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -204,7 +204,7 @@ static int __init pci_driver_init(void) return bus_register(&pci_bus_type); } -subsys_initcall(pci_driver_init); +postcore_initcall(pci_driver_init); EXPORT_SYMBOL(pci_match_device); EXPORT_SYMBOL(pci_register_driver); diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 33f9ed3ca50f..b4ada910a348 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -36,7 +36,7 @@ static int usb_start_wait_urb(struct urb *urb, int timeout, int* actual_length) add_wait_queue(&awd.wqh, &wait); urb->context = &awd; - status = usb_submit_urb(urb, GFP_KERNEL); + status = usb_submit_urb(urb, GFP_ATOMIC); if (status) { // something went wrong usb_free_urb(urb); diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 9b3d5e204db7..b9bc5f3e952e 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -12,6 +12,9 @@ * * History: * + * 2002/06/01 remember frame when HC won't see EDs any more; use that info + * to fix urb unlink races caused by interrupt latency assumptions; + * minor ED field and function naming updates * 2002/01/18 package as a patch for 2.5.3; this should match the * 2.4.17 kernel modulo some bugs being fixed. * @@ -106,7 +109,7 @@ * - lots more testing!! */ -#define DRIVER_VERSION "$Revision: 1.9 $" +#define DRIVER_VERSION "2002-Jun-01" #define DRIVER_AUTHOR "Roman Weissgaerber <weissg@vienna.at>, David Brownell" #define DRIVER_DESC "USB 1.1 'Open' Host Controller (OHCI) Driver" @@ -287,7 +290,7 @@ static int ohci_urb_dequeue (struct usb_hcd *hcd, struct urb *urb) } urb_priv->state = URB_DEL; - ed_unlink (urb->dev, urb_priv->ed); + start_urb_unlink (ohci, urb_priv->ed); spin_unlock_irqrestore (&ohci->lock, flags); } else { /* @@ -508,16 +511,15 @@ static void ohci_irq (struct usb_hcd *hcd) /* could track INTR_SO to reduce available PCI/... bandwidth */ - // FIXME: this assumes SOF (1/ms) interrupts don't get lost... - if (ints & OHCI_INTR_SF) { - unsigned int frame = le16_to_cpu (ohci->hcca->frame_no) & 1; + /* handle any pending URB/ED unlinks, leaving INTR_SF enabled + * when there's still unlinking to be done (next frame). + */ + spin_lock (&ohci->lock); + if (ohci->ed_rm_list) + finish_unlinks (ohci, le16_to_cpu (ohci->hcca->frame_no)); + if ((ints & OHCI_INTR_SF) != 0 && !ohci->ed_rm_list) writel (OHCI_INTR_SF, ®s->intrdisable); - if (ohci->ed_rm_list [!frame] != NULL) { - dl_del_list (ohci, !frame); - } - if (ohci->ed_rm_list [frame] != NULL) - writel (OHCI_INTR_SF, ®s->intrenable); - } + spin_unlock (&ohci->lock); writel (ints, ®s->intrstatus); writel (OHCI_INTR_MIE, ®s->intrenable); @@ -719,8 +721,7 @@ static int hc_restart (struct ohci_hcd *ohci) for (i = 0; i < NUM_INTS; i++) ohci->hcca->int_table [i] = 0; /* no EDs to remove */ - ohci->ed_rm_list [0] = NULL; - ohci->ed_rm_list [1] = NULL; + ohci->ed_rm_list = NULL; /* empty control and bulk lists */ ohci->ed_isotail = NULL; @@ -802,7 +803,7 @@ static int ohci_resume (struct usb_hcd *hcd) ohci->disabled = 0; ohci->sleeping = 0; ohci->hc_control = OHCI_CONTROL_INIT | OHCI_USB_OPER; - if (!ohci->ed_rm_list [0] && !ohci->ed_rm_list [1]) { + if (!ohci->ed_rm_list) { if (ohci->ed_controltail) ohci->hc_control |= OHCI_CTRL_CLE; if (ohci->ed_bulktail) diff --git a/drivers/usb/host/ohci-q.c b/drivers/usb/host/ohci-q.c index e1c92c8b49ae..7ab1f2203653 100644 --- a/drivers/usb/host/ohci-q.c +++ b/drivers/usb/host/ohci-q.c @@ -208,8 +208,7 @@ static int ep_link (struct ohci_hcd *ohci, struct ed *edi) } ed->ed_prev = ohci->ed_controltail; if (!ohci->ed_controltail - && !ohci->ed_rm_list [0] - && !ohci->ed_rm_list [1] + && !ohci->ed_rm_list && !ohci->sleeping ) { ohci->hc_control |= OHCI_CTRL_CLE; @@ -227,8 +226,7 @@ static int ep_link (struct ohci_hcd *ohci, struct ed *edi) } ed->ed_prev = ohci->ed_bulktail; if (!ohci->ed_bulktail - && !ohci->ed_rm_list [0] - && !ohci->ed_rm_list [1] + && !ohci->ed_rm_list && !ohci->sleeping ) { ohci->hc_control |= OHCI_CTRL_BLE; @@ -240,16 +238,16 @@ static int ep_link (struct ohci_hcd *ohci, struct ed *edi) case PIPE_INTERRUPT: load = ed->int_load; interval = ep_2_n_interval (ed->int_period); - ed->int_interval = interval; + ed->interval = interval; int_branch = ep_int_balance (ohci, interval, load); ed->int_branch = int_branch; for (i = 0; i < ep_rev (6, interval); i += inter) { inter = 1; for (ed_p = & (ohci->hcca->int_table [ep_rev (5, i) + int_branch]); - (*ed_p != 0) && ((dma_to_ed (ohci, le32_to_cpup (ed_p)))->int_interval >= interval); + (*ed_p != 0) && ((dma_to_ed (ohci, le32_to_cpup (ed_p)))->interval >= interval); ed_p = & ((dma_to_ed (ohci, le32_to_cpup (ed_p)))->hwNextED)) - inter = ep_rev (6, (dma_to_ed (ohci, le32_to_cpup (ed_p)))->int_interval); + inter = ep_rev (6, (dma_to_ed (ohci, le32_to_cpup (ed_p)))->interval); ed->hwNextED = *ed_p; *ed_p = cpu_to_le32 (ed->dma); } @@ -260,7 +258,7 @@ static int ep_link (struct ohci_hcd *ohci, struct ed *edi) case PIPE_ISOCHRONOUS: ed->hwNextED = 0; - ed->int_interval = 1; + ed->interval = 1; if (ohci->ed_isotail != NULL) { ohci->ed_isotail->hwNextED = cpu_to_le32 (ed->dma); ed->ed_prev = ohci->ed_isotail; @@ -270,7 +268,7 @@ static int ep_link (struct ohci_hcd *ohci, struct ed *edi) for (ed_p = & (ohci->hcca->int_table [ep_rev (5, i)]); *ed_p != 0; ed_p = & ((dma_to_ed (ohci, le32_to_cpup (ed_p)))->hwNextED)) - inter = ep_rev (6, (dma_to_ed (ohci, le32_to_cpup (ed_p)))->int_interval); + inter = ep_rev (6, (dma_to_ed (ohci, le32_to_cpup (ed_p)))->interval); *ed_p = cpu_to_le32 (ed->dma); } ed->ed_prev = NULL; @@ -311,7 +309,7 @@ static void periodic_unlink ( * the link from the ed still points to another operational ed or 0 * so the HC can eventually finish the processing of the unlinked ed */ -static int ep_unlink (struct ohci_hcd *ohci, struct ed *ed) +static int start_ed_unlink (struct ohci_hcd *ohci, struct ed *ed) { int i; @@ -357,8 +355,8 @@ static int ep_unlink (struct ohci_hcd *ohci, struct ed *ed) break; case PIPE_INTERRUPT: - periodic_unlink (ohci, ed, ed->int_branch, ed->int_interval); - for (i = ed->int_branch; i < NUM_INTS; i += ed->int_interval) + periodic_unlink (ohci, ed, ed->int_branch, ed->interval); + for (i = ed->int_branch; i < NUM_INTS; i += ed->interval) ohci->ohci_int_load [i] -= ed->int_load; #ifdef OHCI_VERBOSE_DEBUG ohci_dump_periodic (ohci, "UNLINK_INT"); @@ -384,6 +382,10 @@ static int ep_unlink (struct ohci_hcd *ohci, struct ed *ed) /* FIXME ED's "unlink" state is indeterminate; * the HC might still be caching it (till SOF). + * - use ed_rm_list and finish_unlinks(), adding some state that + * prevents clobbering hw linkage before the appropriate SOF + * - a speedup: when only one urb is queued on the ed, save 1msec + * by making start_urb_unlink() use this routine to deschedule. */ ed->state = ED_UNLINK; return 0; @@ -478,11 +480,8 @@ static struct ed *ep_add_ed ( * put the ep on the rm_list and stop the bulk or ctrl list * real work is done at the next start frame (SF) hardware interrupt */ -static void ed_unlink (struct usb_device *usb_dev, struct ed *ed) +static void start_urb_unlink (struct ohci_hcd *ohci, struct ed *ed) { - unsigned int frame; - struct ohci_hcd *ohci = hcd_to_ohci (usb_dev->bus->hcpriv); - /* already pending? */ if (ed->state & ED_URB_DEL) return; @@ -503,9 +502,15 @@ static void ed_unlink (struct usb_device *usb_dev, struct ed *ed) break; } - frame = le16_to_cpu (ohci->hcca->frame_no) & 0x1; - ed->ed_rm_list = ohci->ed_rm_list [frame]; - ohci->ed_rm_list [frame] = ed; + /* SF interrupt might get delayed; record the frame counter value that + * indicates when the HC isn't looking at it, so concurrent unlinks + * behave. frame_no wraps every 2^16 msec, and changes right before + * SF is triggered. + */ + ed->tick = le16_to_cpu (ohci->hcca->frame_no) + 1; + + ed->ed_rm_list = ohci->ed_rm_list; + ohci->ed_rm_list = ed; /* enable SOF interrupt */ if (!ohci->sleeping) { @@ -816,10 +821,12 @@ static struct td *dl_reverse_done_list (struct ohci_hcd *ohci) if (td_list->ed->hwHeadP & ED_H) { if (urb_priv && ((td_list->index + 1) < urb_priv->length)) { +#ifdef OHCI_VERBOSE_DEBUG dbg ("urb %p TD %d of %d, patch ED", td_list->urb, 1 + td_list->index, urb_priv->length); +#endif td_list->ed->hwHeadP = (urb_priv->td [urb_priv->length - 1]->hwNextTD & __constant_cpu_to_le32 (TD_MASK)) @@ -841,27 +848,37 @@ static struct td *dl_reverse_done_list (struct ohci_hcd *ohci) /*-------------------------------------------------------------------------*/ -/* there are some pending requests to unlink - * - some URBs/TDs if urb_priv->state == URB_DEL - */ -static void dl_del_list (struct ohci_hcd *ohci, unsigned int frame) +/* wrap-aware logic stolen from <linux/jiffies.h> */ +#define tick_before(t1,t2) ((((s16)(t1))-((s16)(t2))) < 0) + +/* there are some urbs/eds to unlink; called in_irq(), with HCD locked */ +static void finish_unlinks (struct ohci_hcd *ohci, u16 tick) { - unsigned long flags; - struct ed *ed; - __u32 edINFO; - __u32 tdINFO; - struct td *td = NULL, *td_next = NULL, - *tdHeadP = NULL, *tdTailP; - __u32 *td_p; + struct ed *ed, **last; int ctrl = 0, bulk = 0; - spin_lock_irqsave (&ohci->lock, flags); + for (last = &ohci->ed_rm_list, ed = *last; ed != NULL; ed = *last) { + struct td *td, *td_next, *tdHeadP, *tdTailP; + u32 *td_p; + int unlinked; - for (ed = ohci->ed_rm_list [frame]; ed != NULL; ed = ed->ed_rm_list) { + /* only take off EDs that the HC isn't using, accounting for + * frame counter wraps. completion callbacks might prepend + * EDs to the list, they'll be checked next irq. + */ + if (tick_before (tick, ed->tick)) { + last = &ed->ed_rm_list; + continue; + } + *last = ed->ed_rm_list; + ed->ed_rm_list = 0; + unlinked = 0; + /* unlink urbs from first one requested to queue end; + * leave earlier urbs alone + */ tdTailP = dma_to_td (ohci, le32_to_cpup (&ed->hwTailP)); tdHeadP = dma_to_td (ohci, le32_to_cpup (&ed->hwHeadP)); - edINFO = le32_to_cpup (&ed->hwINFO); td_p = &ed->hwHeadP; for (td = tdHeadP; td != tdTailP; td = td_next) { @@ -870,8 +887,11 @@ static void dl_del_list (struct ohci_hcd *ohci, unsigned int frame) td_next = dma_to_td (ohci, le32_to_cpup (&td->hwNextTD)); - if ((urb_priv->state == URB_DEL)) { - tdINFO = le32_to_cpup (&td->hwINFO); + if (unlinked || (urb_priv->state == URB_DEL)) { + u32 tdINFO = le32_to_cpup (&td->hwINFO); + + unlinked = 1; + /* HC may have partly processed this TD */ if (TD_CC_GET (tdINFO) < 0xE) td_done (urb, td); @@ -880,22 +900,32 @@ static void dl_del_list (struct ohci_hcd *ohci, unsigned int frame) /* URB is done; clean up */ if (++ (urb_priv->td_cnt) == urb_priv->length) { - spin_unlock_irqrestore (&ohci->lock, - flags); + if (urb->status == -EINPROGRESS) + urb->status = -ECONNRESET; + spin_unlock (&ohci->lock); finish_urb (ohci, urb); - spin_lock_irqsave (&ohci->lock, flags); + spin_lock (&ohci->lock); } } else { td_p = &td->hwNextTD; } } + /* FIXME actually want four cases here: + * (a) finishing URB unlink + * [a1] no URBs queued, so start ED unlink + * [a2] some (earlier) URBs still linked, re-enable + * (b) finishing ED unlink + * [b1] no URBs queued, ED is truly idle now + * [b2] URBs now queued, link ED back into schedule + * right now we only have (a) + */ ed->state &= ~ED_URB_DEL; tdHeadP = dma_to_td (ohci, le32_to_cpup (&ed->hwHeadP)); if (tdHeadP == tdTailP) { if (ed->state == ED_OPER) - ep_unlink (ohci, ed); + start_ed_unlink (ohci, ed); td_free (ohci, tdTailP); ed->hwINFO = ED_SKIP; ed->state = ED_NEW; @@ -918,7 +948,7 @@ static void dl_del_list (struct ohci_hcd *ohci, unsigned int frame) writel (0, &ohci->regs->ed_controlcurrent); if (bulk) /* reset bulk list */ writel (0, &ohci->regs->ed_bulkcurrent); - if (!ohci->ed_rm_list [!frame]) { + if (!ohci->ed_rm_list) { if (ohci->ed_controltail) ohci->hc_control |= OHCI_CTRL_CLE; if (ohci->ed_bulktail) @@ -926,9 +956,6 @@ static void dl_del_list (struct ohci_hcd *ohci, unsigned int frame) writel (ohci->hc_control, &ohci->regs->control); } } - - ohci->ed_rm_list [frame] = NULL; - spin_unlock_irqrestore (&ohci->lock, flags); } @@ -939,7 +966,7 @@ static void dl_del_list (struct ohci_hcd *ohci, unsigned int frame) * Process normal completions (error or success) and clean the schedules. * * This is the main path for handing urbs back to drivers. The only other - * path is dl_del_list(), which unlinks URBs by scanning EDs, instead of + * path is finish_unlinks(), which unlinks URBs using ed_rm_list, instead of * scanning the (re-reversed) donelist as this does. */ static void dl_done_list (struct ohci_hcd *ohci, struct td *td) @@ -960,7 +987,7 @@ static void dl_done_list (struct ohci_hcd *ohci, struct td *td) /* If all this urb's TDs are done, call complete(). * Interrupt transfers are the only special case: * they're reissued, until "deleted" by usb_unlink_urb - * (real work done in a SOF intr, by dl_del_list). + * (real work done in a SOF intr, by finish_unlinks). */ if (urb_priv->td_cnt == urb_priv->length) { int resubmit; @@ -980,7 +1007,7 @@ static void dl_done_list (struct ohci_hcd *ohci, struct td *td) if ((ed->hwHeadP & __constant_cpu_to_le32 (TD_MASK)) == ed->hwTailP && (ed->state == ED_OPER)) - ep_unlink (ohci, ed); + start_ed_unlink (ohci, ed); td = td_next; } spin_unlock_irqrestore (&ohci->lock, flags); diff --git a/drivers/usb/host/ohci.h b/drivers/usb/host/ohci.h index b191552a7a23..e455dcc8c279 100644 --- a/drivers/usb/host/ohci.h +++ b/drivers/usb/host/ohci.h @@ -27,22 +27,29 @@ struct ed { __u32 hwNextED; /* next ED in list */ /* rest are purely for the driver's use */ - struct ed *ed_prev; - __u8 int_period; - __u8 int_branch; - __u8 int_load; - __u8 int_interval; - __u8 state; // ED_{NEW,UNLINK,OPER} + dma_addr_t dma; /* addr of ED */ + struct ed *ed_prev; /* for non-interrupt EDs */ + + u8 type; /* PIPE_{BULK,...} */ + u8 interval; /* interrupt, isochronous */ + union { + struct intr_info { /* interrupt */ + u8 int_period; + u8 int_branch; + u8 int_load; + }; + u16 last_iso; /* isochronous */ + }; + + u8 state; /* ED_{NEW,UNLINK,OPER} */ #define ED_NEW 0x00 /* unused, no dummy td */ #define ED_UNLINK 0x01 /* dummy td, maybe linked to hc */ #define ED_OPER 0x02 /* dummy td, _is_ linked to hc */ #define ED_URB_DEL 0x08 /* for unlinking; masked in */ - __u8 type; - __u16 last_iso; + /* HC may see EDs on rm_list until next frame (frame_no == tick) */ + u16 tick; struct ed *ed_rm_list; - - dma_addr_t dma; /* addr of ED */ } __attribute__ ((aligned(16))); #define ED_MASK ((u32)~0x0f) /* strip hw status in low addr bits */ @@ -335,7 +342,7 @@ struct ohci_hcd { struct ohci_hcca *hcca; dma_addr_t hcca_dma; - struct ed *ed_rm_list [2]; /* to be removed */ + struct ed *ed_rm_list; /* to be removed */ struct ed *ed_bulktail; /* last in bulk list */ struct ed *ed_controltail; /* last in ctrl list */ diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c index 0872da688d27..127e93068f93 100644 --- a/drivers/usb/host/uhci-hcd.c +++ b/drivers/usb/host/uhci-hcd.c @@ -2515,7 +2515,7 @@ static const struct hc_driver uhci_driver = { suspend: uhci_suspend, resume: uhci_resume, #endif - stop: uhci_stop, + stop: __devexit_p(uhci_stop), hcd_alloc: uhci_hcd_alloc, hcd_free: uhci_hcd_free, diff --git a/drivers/usb/misc/emi26.c b/drivers/usb/misc/emi26.c index 6208a896d92f..8803dd60a493 100644 --- a/drivers/usb/misc/emi26.c +++ b/drivers/usb/misc/emi26.c @@ -13,6 +13,7 @@ #include <linux/errno.h> #include <linux/slab.h> #include <linux/module.h> +#include <linux/init.h> #include <linux/usb.h> #define MAX_INTEL_HEX_RECORD_LENGTH 16 diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index ca8ca92552f3..ff139712322b 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -106,9 +106,9 @@ UNUSUAL_DEV( 0x04ce, 0x0002, 0x0074, 0x0074, * This entry is needed because the device reports Sub=ff */ UNUSUAL_DEV( 0x04da, 0x0901, 0x0100, 0x0200, - "Panasonic", - "LS-120 Camera", - US_SC_UFI, US_PR_CBI, NULL, 0), + "Panasonic", + "LS-120 Camera", + US_SC_UFI, US_PR_CBI, NULL, 0), /* Reported by Peter Wächtler <pwaechtler@loewe-komp.de> */ UNUSUAL_DEV( 0x04ce, 0x0002, 0x0074, 0x0074, @@ -230,7 +230,7 @@ UNUSUAL_DEV( 0x054c, 0x002e, 0x0106, 0x0310, US_FL_SINGLE_LUN | US_FL_START_STOP | US_FL_MODE_XLATE), UNUSUAL_DEV( 0x054c, 0x0032, 0x0000, 0x9999, - "Sony", + "Sony", "Memorystick MSC-U01N", US_SC_UFI, US_PR_CB, NULL, US_FL_SINGLE_LUN | US_FL_START_STOP ), @@ -261,34 +261,34 @@ UNUSUAL_DEV( 0x059f, 0xa601, 0x0200, 0x0200, #ifdef CONFIG_USB_STORAGE_ISD200 UNUSUAL_DEV( 0x05ab, 0x0031, 0x0100, 0x0110, - "In-System", - "USB/IDE Bridge (ATA/ATAPI)", - US_SC_ISD200, US_PR_BULK, isd200_Initialization, - 0 ), + "In-System", + "USB/IDE Bridge (ATA/ATAPI)", + US_SC_ISD200, US_PR_BULK, isd200_Initialization, + 0 ), UNUSUAL_DEV( 0x05ab, 0x0301, 0x0100, 0x0110, - "In-System", - "Portable USB Harddrive V2", - US_SC_ISD200, US_PR_BULK, isd200_Initialization, - 0 ), + "In-System", + "Portable USB Harddrive V2", + US_SC_ISD200, US_PR_BULK, isd200_Initialization, + 0 ), UNUSUAL_DEV( 0x05ab, 0x0351, 0x0100, 0x0110, - "In-System", - "Portable USB Harddrive V2", - US_SC_ISD200, US_PR_BULK, isd200_Initialization, - 0 ), + "In-System", + "Portable USB Harddrive V2", + US_SC_ISD200, US_PR_BULK, isd200_Initialization, + 0 ), UNUSUAL_DEV( 0x05ab, 0x5701, 0x0100, 0x0110, - "In-System", - "USB Storage Adapter V2", - US_SC_ISD200, US_PR_BULK, isd200_Initialization, - 0 ), + "In-System", + "USB Storage Adapter V2", + US_SC_ISD200, US_PR_BULK, isd200_Initialization, + 0 ), UNUSUAL_DEV( 0x054c, 0x002b, 0x0100, 0x0110, - "Sony", - "Portable USB Harddrive V2", - US_SC_ISD200, US_PR_BULK, isd200_Initialization, - 0 ), + "Sony", + "Portable USB Harddrive V2", + US_SC_ISD200, US_PR_BULK, isd200_Initialization, + 0 ), #endif #ifdef CONFIG_USB_STORAGE_JUMPSHOT @@ -342,18 +342,18 @@ UNUSUAL_DEV( 0x066b, 0x0105, 0x0100, 0x0100, /* Submitted by kedar@centillium * Needed for START_STOP flag, but that is unconfirmed */ UNUSUAL_DEV( 0x0686, 0x4006, 0x0001, 0x0001, - "Minolta", - "Dimage S304", - US_SC_SCSI, US_PR_BULK, NULL, - US_FL_START_STOP ), + "Minolta", + "Dimage S304", + US_SC_SCSI, US_PR_BULK, NULL, + US_FL_START_STOP ), /* Submitted by f.brugmans@hccnet.nl * Needed for START_STOP flag */ UNUSUAL_DEV( 0x0686, 0x4007, 0x0001, 0x0001, - "Minolta", - "Dimage S304", - US_SC_SCSI, US_PR_BULK, NULL, - US_FL_START_STOP ), + "Minolta", + "Dimage S304", + US_SC_SCSI, US_PR_BULK, NULL, + US_FL_START_STOP ), UNUSUAL_DEV( 0x0693, 0x0002, 0x0100, 0x0100, "Hagiwara", @@ -378,10 +378,10 @@ UNUSUAL_DEV( 0x0781, 0x0002, 0x0009, 0x0009, US_FL_IGNORE_SER), UNUSUAL_DEV( 0x0781, 0x0100, 0x0100, 0x0100, - "Sandisk", - "ImageMate SDDR-12", - US_SC_SCSI, US_PR_CB, NULL, - US_FL_SINGLE_LUN ), + "Sandisk", + "ImageMate SDDR-12", + US_SC_SCSI, US_PR_CB, NULL, + US_FL_SINGLE_LUN ), #ifdef CONFIG_USB_STORAGE_SDDR09 UNUSUAL_DEV( 0x0781, 0x0200, 0x0000, 0x9999, @@ -393,9 +393,9 @@ UNUSUAL_DEV( 0x0781, 0x0200, 0x0000, 0x9999, #ifdef CONFIG_USB_STORAGE_FREECOM UNUSUAL_DEV( 0x07ab, 0xfc01, 0x0000, 0x9999, - "Freecom", - "USB-IDE", - US_SC_QIC, US_PR_FREECOM, freecom_init, 0), + "Freecom", + "USB-IDE", + US_SC_QIC, US_PR_FREECOM, freecom_init, 0), #endif UNUSUAL_DEV( 0x07af, 0x0004, 0x0100, 0x0133, @@ -473,7 +473,7 @@ UNUSUAL_DEV( 0x07c4, 0xa109, 0x0000, 0xffff, US_SC_SCSI, US_PR_DATAFAB, NULL, US_FL_MODE_XLATE ), #endif - + #ifdef CONFIG_USB_STORAGE_SDDR55 /* Contributed by Peter Waechtler */ UNUSUAL_DEV( 0x07c4, 0xa103, 0x0000, 0x9999, @@ -491,10 +491,10 @@ UNUSUAL_DEV( 0x07c4, 0xa103, 0x0000, 0x9999, * of the SCSI layer ourselves. */ UNUSUAL_DEV( 0x07cf, 0x1001, 0x1000, 0x9009, - "Casio", - "QV DigitalCamera", - US_SC_8070, US_PR_CB, NULL, - US_FL_FIX_INQUIRY ), + "Casio", + "QV DigitalCamera", + US_SC_8070, US_PR_CB, NULL, + US_FL_FIX_INQUIRY ), UNUSUAL_DEV( 0x097a, 0x0001, 0x0000, 0x0001, "Minds@Work", @@ -510,10 +510,10 @@ UNUSUAL_DEV( 0x0a16, 0x8888, 0x0100, 0x0100, #ifdef CONFIG_USB_STORAGE_ISD200 UNUSUAL_DEV( 0x0bf6, 0xa001, 0x0100, 0x0110, - "ATI", - "USB Cable 205", - US_SC_ISD200, US_PR_BULK, isd200_Initialization, - 0 ), + "ATI", + "USB Cable 205", + US_SC_ISD200, US_PR_BULK, isd200_Initialization, + 0 ), #endif /* EasyDisk support. Submitted by Stanislav Karchebny <berk@madfire.net> */ diff --git a/fs/locks.c b/fs/locks.c index 93776fd4b4ae..bfaf05a2c18a 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -126,6 +126,10 @@ #include <asm/semaphore.h> #include <asm/uaccess.h> +#define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) +#define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) +#define IS_LEASE(fl) (fl->fl_flags & FL_LEASE) + int leases_enable = 1; int lease_break_time = 45; @@ -561,8 +565,7 @@ static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *s /* POSIX locks owned by the same process do not conflict with * each other. */ - if (!(sys_fl->fl_flags & FL_POSIX) || - locks_same_owner(caller_fl, sys_fl)) + if (!IS_POSIX(sys_fl) || locks_same_owner(caller_fl, sys_fl)) return (0); /* Check whether they overlap */ @@ -580,8 +583,7 @@ static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *s /* FLOCK locks referring to the same filp do not conflict with * each other. */ - if (!(sys_fl->fl_flags & FL_FLOCK) || - (caller_fl->fl_file == sys_fl->fl_file)) + if (!IS_FLOCK(sys_fl) || (caller_fl->fl_file == sys_fl->fl_file)) return (0); #ifdef MSNFS if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND)) @@ -634,7 +636,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) lock_kernel(); for (cfl = filp->f_dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) { - if (!(cfl->fl_flags & FL_POSIX)) + if (!IS_POSIX(cfl)) continue; if (posix_locks_conflict(cfl, fl)) break; @@ -696,7 +698,7 @@ int locks_mandatory_locked(struct inode *inode) */ lock_kernel(); for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (!(fl->fl_flags & FL_POSIX)) + if (!IS_POSIX(fl)) continue; if (fl->fl_owner != owner) break; @@ -732,7 +734,7 @@ repeat: * the proposed read/write. */ for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (!(fl->fl_flags & FL_POSIX)) + if (!IS_POSIX(fl)) continue; if (fl->fl_start > new_fl->fl_end) break; @@ -790,7 +792,7 @@ static int flock_lock_file(struct file *filp, unsigned int lock_type, search: change = 0; before = &inode->i_flock; - while (((fl = *before) != NULL) && (fl->fl_flags & FL_FLOCK)) { + while (((fl = *before) != NULL) && IS_FLOCK(fl)) { if (filp == fl->fl_file) { if (lock_type == fl->fl_type) goto out; @@ -815,7 +817,7 @@ search: goto out; repeat: - for (fl = inode->i_flock; (fl != NULL) && (fl->fl_flags & FL_FLOCK); + for (fl = inode->i_flock; (fl != NULL) && IS_FLOCK(fl); fl = fl->fl_next) { if (!flock_locks_conflict(new_fl, fl)) continue; @@ -880,7 +882,7 @@ int posix_lock_file(struct file *filp, struct file_lock *caller, if (caller->fl_type != F_UNLCK) { repeat: for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (!(fl->fl_flags & FL_POSIX)) + if (!IS_POSIX(fl)) continue; if (!posix_locks_conflict(caller, fl)) continue; @@ -909,7 +911,7 @@ int posix_lock_file(struct file *filp, struct file_lock *caller, /* First skip locks owned by other processes. */ - while ((fl = *before) && (!(fl->fl_flags & FL_POSIX) || + while ((fl = *before) && (!IS_POSIX(fl) || !locks_same_owner(caller, fl))) { before = &fl->fl_next; } @@ -1085,7 +1087,7 @@ int __get_lease(struct inode *inode, unsigned int mode) if (error != 0) goto out; flock = inode->i_flock; - if (!(flock && (flock->fl_flags & FL_LEASE))) + if (!(flock && IS_LEASE(flock))) goto out; } while (flock->fl_type & F_INPROGRESS); } @@ -1110,7 +1112,7 @@ int __get_lease(struct inode *inode, unsigned int mode) do { fl->fl_type = future; fl = fl->fl_next; - } while (fl != NULL && (fl->fl_flags & FL_LEASE)); + } while (fl != NULL && IS_LEASE(fl)); kill_fasync(&flock->fl_fasync, SIGIO, POLL_MSG); @@ -1131,7 +1133,7 @@ restart: printk(KERN_WARNING "lease timed out\n"); } else if (error > 0) { flock = inode->i_flock; - if (flock && (flock->fl_flags & FL_LEASE)) + if (flock && IS_LEASE(flock)) goto restart; error = 0; } @@ -1154,7 +1156,7 @@ out: time_t lease_get_mtime(struct inode *inode) { struct file_lock *flock = inode->i_flock; - if (flock && (flock->fl_flags & FL_LEASE) && (flock->fl_type & F_WRLCK)) + if (flock && IS_LEASE(flock) && (flock->fl_type & F_WRLCK)) return CURRENT_TIME; return inode->i_mtime; } @@ -1177,7 +1179,7 @@ int fcntl_getlease(struct file *filp) struct file_lock *fl; fl = filp->f_dentry->d_inode->i_flock; - if ((fl == NULL) || ((fl->fl_flags & FL_LEASE) == 0)) + if ((fl == NULL) || !IS_LEASE(fl)) return F_UNLCK; return fl->fl_type & ~F_INPROGRESS; } @@ -1243,7 +1245,7 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg) lock_kernel(); while ((fl = *before) != NULL) { - if (fl->fl_flags != FL_LEASE) + if (!IS_LEASE(fl)) break; if (fl->fl_file == filp) my_before = before; @@ -1646,7 +1648,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) lock_kernel(); before = &inode->i_flock; while ((fl = *before) != NULL) { - if ((fl->fl_flags & FL_POSIX) && fl->fl_owner == owner) { + if (IS_POSIX(fl) && fl->fl_owner == owner) { locks_unlock_delete(before); before = &inode->i_flock; continue; @@ -1672,8 +1674,7 @@ void locks_remove_flock(struct file *filp) before = &inode->i_flock; while ((fl = *before) != NULL) { - if ((fl->fl_flags & (FL_FLOCK|FL_LEASE)) - && (fl->fl_file == filp)) { + if ((IS_FLOCK(fl) || IS_LEASE(fl)) && (fl->fl_file == filp)) { locks_delete_lock(before, 0); continue; } @@ -1716,21 +1717,21 @@ static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx) inode = fl->fl_file->f_dentry->d_inode; out += sprintf(out, "%d:%s ", id, pfx); - if (fl->fl_flags & FL_POSIX) { + if (IS_POSIX(fl)) { out += sprintf(out, "%6s %s ", (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ", (inode == NULL) ? "*NOINODE*" : (IS_MANDLOCK(inode) && (inode->i_mode & (S_IXGRP | S_ISGID)) == S_ISGID) ? "MANDATORY" : "ADVISORY "); - } else if (fl->fl_flags & FL_FLOCK) { + } else if (IS_FLOCK(fl)) { #ifdef MSNFS if (fl->fl_type & LOCK_MAND) { out += sprintf(out, "FLOCK MSNFS "); } else #endif out += sprintf(out, "FLOCK ADVISORY "); - } else if (fl->fl_flags & FL_LEASE) { + } else if (IS_LEASE(fl)) { out += sprintf(out, "LEASE MANDATORY "); } else { out += sprintf(out, "UNKNOWN UNKNOWN "); @@ -1844,12 +1845,12 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len) int result = 1; lock_kernel(); for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (fl->fl_flags == FL_POSIX) { + if (IS_POSIX(fl)) { if (fl->fl_type == F_RDLCK) continue; if ((fl->fl_end < start) || (fl->fl_start > (start + len))) continue; - } else if (fl->fl_flags == FL_FLOCK) { + } else if (IS_FLOCK(fl)) { if (!(fl->fl_type & LOCK_MAND)) continue; if (fl->fl_type & LOCK_READ) @@ -1882,10 +1883,10 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len) int result = 1; lock_kernel(); for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (fl->fl_flags == FL_POSIX) { + if (IS_POSIX(fl)) { if ((fl->fl_end < start) || (fl->fl_start > (start + len))) continue; - } else if (fl->fl_flags == FL_FLOCK) { + } else if (IS_FLOCK(fl)) { if (!(fl->fl_type & LOCK_MAND)) continue; if (fl->fl_type & LOCK_WRITE) diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog index ba8f1bb1d5e1..48ea2458e9ae 100644 --- a/fs/ntfs/ChangeLog +++ b/fs/ntfs/ChangeLog @@ -21,8 +21,38 @@ ToDo: several copies of almost identicall functions and the functions are quite big. Modularising them a bit, e.g. a-la get_block(), will make them cleaner and make code reuse easier. - - Want to use dummy inodes for address space i/o. We need some VFS - changes first, which are currently under discussion. + - Want to use dummy inodes for address space i/o. + +2.0.8 - Major updates for handling of case sensitivity and dcache aliasing. + + Big thanks go to Al Viro and other inhabitants of #kernel for investing + their time to discuss the case sensitivity and dcache aliasing issues. + + - Remove unused source file fs/ntfs/attraops.c. + - Remove show_inodes mount option(s), thus dropping support for + displaying of short file names. + - Remove deprecated mount option posix. + - Restore show_sys_files mount option. + - Add new mount option case_sensitive, to determine if the driver + treats file names as case sensitive or not. If case sensitive, create + file names in the POSIX namespace. Otherwise create file names in the + LONG/WIN32 namespace. Note, files remain accessible via their short + file name, if it exists. + - Remove really dumb logic bug in boot sector recovery code. + - Fix dcache aliasing issues wrt short/long file names via changes + to fs/ntfs/dir.c::ntfs_lookup_inode_by_name() and + fs/ntfs/namei.c::ntfs_lookup(): + - Add additional argument to ntfs_lookup_inode_by_name() in which we + return information about the matching file name if the case is not + matching or the match is a short file name. See comments above the + function definition for details. + - Change ntfs_lookup() to only create dcache entries for the correctly + cased file name and only for the WIN32 namespace counterpart of DOS + namespace file names. This ensures we have only one dentry per + directory and also removes all dcache aliasing issues between short + and long file names once we add write support. See comments above + function for details. + - Fix potential 1 byte overflow in fs/ntfs/unistr.c::ntfs_ucstonls(). 2.0.7 - Minor cleanups and updates for changes in core kernel code. diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile index 0b5c00293344..d75e846489a7 100644 --- a/fs/ntfs/Makefile +++ b/fs/ntfs/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_NTFS_FS) += ntfs.o ntfs-objs := aops.o attrib.o compress.o debug.o dir.o file.o inode.o mft.o \ mst.o namei.o super.o sysctl.o time.o unistr.o upcase.o -EXTRA_CFLAGS = -DNTFS_VERSION=\"2.0.7\" +EXTRA_CFLAGS = -DNTFS_VERSION=\"2.0.8\" ifeq ($(CONFIG_NTFS_DEBUG),y) EXTRA_CFLAGS += -DDEBUG diff --git a/fs/ntfs/attraops.c b/fs/ntfs/attraops.c deleted file mode 100644 index e75825a99c79..000000000000 --- a/fs/ntfs/attraops.c +++ /dev/null @@ -1,47 +0,0 @@ -#include "ntfs.h" - -/* - * We need to define the attribute object structure. FIXME: Move these to - * ntfs.h. - */ -typedef struct { - ntfs_inode *a_ni; - ntfs_volume *a_vol; - atomic_t a_count; - s64 a_size; - struct rw_semaphore a_sem; - struct address_space a_mapping; - unsigned long a_flags; -} attr_obj; - -/** - * ntfs_attr_readpage - fill a page @page of an attribute object @aobj with data - * @aobj: attribute object to which the page @page belongs - * @page: page cache page to fill with data - * - */ -//static int ntfs_attr_readpage(attr_obj *aobj, struct page *page) -static int ntfs_attr_readpage(struct file *aobj, struct page *page) -{ - return -EOPNOTSUPP; -} - -/* - * Address space operations for accessing attributes. Note that these functions - * do not accept an inode as the first parameter but an attribute object. We - * use this to implement a generic interface that is not bound to inodes in - * order to support multiple named streams per file, multiple bitmaps per file - * and directory, etc. Basically, this gives access to any attribute within an - * mft record. - * - * We make use of a slab cache for attribute object allocations. - */ -struct address_space_operations ntfs_attr_aops = { - writepage: NULL, /* Write dirty page to disk. */ - readpage: ntfs_attr_readpage, /* Fill page with data. */ - sync_page: block_sync_page, /* Currently, just unplugs the - disk request queue. */ - prepare_write: NULL, /* . */ - commit_write: NULL, /* . */ -}; - diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index 0052563c13f0..e67e4654c3a4 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -22,6 +22,7 @@ #include <linux/buffer_head.h> #include "ntfs.h" +#include "dir.h" /* Temporary helper functions -- might become macros */ diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index ac8d04938c79..30112895606c 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -22,6 +22,7 @@ #include <linux/smp_lock.h> #include "ntfs.h" +#include "dir.h" /** * The little endian Unicode string $I30 as a global constant. @@ -35,6 +36,564 @@ const uchar_t I30[5] = { const_cpu_to_le16('$'), const_cpu_to_le16('I'), * @dir_ni: ntfs inode of the directory in which to search for the name * @uname: Unicode name for which to search in the directory * @uname_len: length of the name @uname in Unicode characters + * @res: return the found file name if necessary (see below) + * + * Look for an inode with name @uname in the directory with inode @dir_ni. + * ntfs_lookup_inode_by_name() walks the contents of the directory looking for + * the Unicode name. If the name is found in the directory, the corresponding + * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it + * is a 64-bit number containing the sequence number. + * + * On error, a negative value is returned corresponding to the error code. In + * particular if the inode is not found -ENOENT is returned. Note that you + * can't just check the return value for being negative, you have to check the + * inode number for being negative which you can extract using MREC(return + * value). + * + * Note, @uname_len does not include the (optional) terminating NULL character. + * + * Note, we look for a case sensitive match first but we also look for a case + * insensitive match at the same time. If we find a case insensitive match, we + * save that for the case that we don't find an exact match, where we return + * the case insensitive match and setup @res (which we allocate!) with the mft + * reference, the file name type, length and with a copy of the little endian + * Unicode file name itself. If we match a file name which is in the DOS name + * space, we only return the mft reference and file name type in @res. + * ntfs_lookup() then uses this to find the long file name in the inode itself. + * This is to avoid polluting the dcache with short file names. We want them to + * work but we don't care for how quickly one can access them. This also fixes + * the dcache aliasing issues. + */ +u64 ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const uchar_t *uname, + const int uname_len, ntfs_name **res) +{ + ntfs_volume *vol = dir_ni->vol; + struct super_block *sb = vol->sb; + MFT_RECORD *m; + INDEX_ROOT *ir; + INDEX_ENTRY *ie; + INDEX_ALLOCATION *ia; + u8 *index_end; + u64 mref; + attr_search_context *ctx; + int err = 0, rc; + VCN vcn, old_vcn; + struct address_space *ia_mapping; + struct page *page; + u8 *kaddr; + ntfs_name *name = NULL; + + /* Get hold of the mft record for the directory. */ + m = map_mft_record(READ, dir_ni); + if (IS_ERR(m)) + goto map_err_out; + + ctx = get_attr_search_ctx(dir_ni, m); + if (!ctx) { + err = -ENOMEM; + goto unm_err_out; + } + + /* Find the index root attribute in the mft record. */ + if (!lookup_attr(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, 0, + ctx)) { + ntfs_error(sb, "Index root attribute missing in directory " + "inode 0x%Lx.", + (unsigned long long)dir_ni->mft_no); + err = -EIO; + goto put_unm_err_out; + } + /* Get to the index root value (it's been verified in read_inode). */ + ir = (INDEX_ROOT*)((u8*)ctx->attr + + le16_to_cpu(ctx->attr->_ARA(value_offset))); + index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length); + /* The first index entry. */ + ie = (INDEX_ENTRY*)((u8*)&ir->index + + le32_to_cpu(ir->index.entries_offset)); + /* + * Loop until we exceed valid memory (corruption case) or until we + * reach the last entry. + */ + for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->_IEH(length)))) { + /* Bounds checks. */ + if ((u8*)ie < (u8*)ctx->mrec || (u8*)ie + + sizeof(INDEX_ENTRY_HEADER) > index_end || + (u8*)ie + le16_to_cpu(ie->_IEH(key_length)) > + index_end) + goto dir_err_out; + /* + * The last entry cannot contain a name. It can however contain + * a pointer to a child node in the B+tree so we just break out. + */ + if (ie->_IEH(flags) & INDEX_ENTRY_END) + break; + /* + * We perform a case sensitive comparison and if that matches + * we are done and return the mft reference of the inode (i.e. + * the inode number together with the sequence number for + * consistency checking). We convert it to cpu format before + * returning. + */ + if (ntfs_are_names_equal(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, + CASE_SENSITIVE, vol->upcase, vol->upcase_len)) { +found_it: + /* + * We have a perfect match, so we don't need to care + * about having matched imperfectly before, so we can + * free name and set *res to NULL. + * However, if the perfect match is a short file name, + * we need to signal this through *res, so that + * ntfs_lookup() can fix dcache aliasing issues. + * As an optimization we just reuse an existing + * allocation of *res. + */ + if (ie->key.file_name.file_name_type == FILE_NAME_DOS) { + if (!name) { + name = kmalloc(sizeof(ntfs_name), + GFP_NOFS); + if (!name) { + err = -ENOMEM; + goto put_unm_err_out; + } + } + name->mref = le64_to_cpu( + ie->_IIF(indexed_file)); + name->type = FILE_NAME_DOS; + name->len = 0; + *res = name; + } else { + if (name) + kfree(name); + *res = NULL; + } + mref = le64_to_cpu(ie->_IIF(indexed_file)); + put_attr_search_ctx(ctx); + unmap_mft_record(READ, dir_ni); + return mref; + } + /* + * For a case insensitive mount, we also perform a case + * insensitive comparison (provided the file name is not in the + * POSIX namespace). If the comparison matches, and the name is + * in the WIN32 namespace, we cache the filename in *res so + * that the caller, ntfs_lookup(), can work on it. If the + * comparison matches, and the name is in the DOS namespace, we + * only cache the mft reference and the file name type (we set + * the name length to zero for simplicity). + */ + if (!NVolCaseSensitive(vol) && + ie->key.file_name.file_name_type && + ntfs_are_names_equal(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, + IGNORE_CASE, vol->upcase, vol->upcase_len)) { + int name_size = sizeof(ntfs_name); + u8 type = ie->key.file_name.file_name_type; + u8 len = ie->key.file_name.file_name_length; + + /* Only one case insensitive matching name allowed. */ + if (name) { + ntfs_error(sb, "Found already allocated name " + "in phase 1. Please run chkdsk " + "and if that doesn't find any " + "errors please report you saw " + "this message to " + "linux-ntfs-dev@lists.sf.net."); + goto dir_err_out; + } + + if (type != FILE_NAME_DOS) + name_size += len * sizeof(uchar_t); + name = kmalloc(name_size, GFP_NOFS); + if (!name) { + err = -ENOMEM; + goto put_unm_err_out; + } + name->mref = le64_to_cpu(ie->_IIF(indexed_file)); + name->type = type; + if (type != FILE_NAME_DOS) { + name->len = len; + memcpy(name->name, ie->key.file_name.file_name, + len * sizeof(uchar_t)); + } else + name->len = 0; + *res = name; + } + /* + * Not a perfect match, need to do full blown collation so we + * know which way in the B+tree we have to go. + */ + rc = ntfs_collate_names(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, 1, + IGNORE_CASE, vol->upcase, vol->upcase_len); + /* + * If uname collates before the name of the current entry, there + * is definitely no such name in this index but we might need to + * descend into the B+tree so we just break out of the loop. + */ + if (rc == -1) + break; + /* The names are not equal, continue the search. */ + if (rc) + continue; + /* + * Names match with case insensitive comparison, now try the + * case sensitive comparison, which is required for proper + * collation. + */ + rc = ntfs_collate_names(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, 1, + CASE_SENSITIVE, vol->upcase, vol->upcase_len); + if (rc == -1) + break; + if (rc) + continue; + /* + * Perfect match, this will never happen as the + * ntfs_are_names_equal() call will have gotten a match but we + * still treat it correctly. + */ + goto found_it; + } + /* + * We have finished with this index without success. Check for the + * presence of a child node and if not present return -ENOENT, unless + * we have got a matching name cached in name in which case return the + * mft reference associated with it. + */ + if (!(ie->_IEH(flags) & INDEX_ENTRY_NODE)) { + if (name) { + put_attr_search_ctx(ctx); + unmap_mft_record(READ, dir_ni); + return name->mref; + } + ntfs_debug("Entry not found."); + err = -ENOENT; + goto put_unm_err_out; + } /* Child node present, descend into it. */ + /* Consistency check: Verify that an index allocation exists. */ + if (!NInoIndexAllocPresent(dir_ni)) { + ntfs_error(sb, "No index allocation attribute but index entry " + "requires one. Directory inode 0x%Lx is " + "corrupt or driver bug.", + (unsigned long long)dir_ni->mft_no); + err = -EIO; + goto put_unm_err_out; + } + /* Get the starting vcn of the index_block holding the child node. */ + vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->_IEH(length)) - 8); + ia_mapping = VFS_I(dir_ni)->i_mapping; +descend_into_child_node: + /* + * Convert vcn to index into the index allocation attribute in units + * of PAGE_CACHE_SIZE and map the page cache page, reading it from + * disk if necessary. + */ + page = ntfs_map_page(ia_mapping, vcn << + dir_ni->_IDM(index_vcn_size_bits) >> PAGE_CACHE_SHIFT); + if (IS_ERR(page)) { + ntfs_error(sb, "Failed to map directory index page, error %ld.", + -PTR_ERR(page)); + goto put_unm_err_out; + } + kaddr = (u8*)page_address(page); +fast_descend_into_child_node: + /* Get to the index allocation block. */ + ia = (INDEX_ALLOCATION*)(kaddr + ((vcn << + dir_ni->_IDM(index_vcn_size_bits)) & ~PAGE_CACHE_MASK)); + /* Bounds checks. */ + if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) { + ntfs_error(sb, "Out of bounds check failed. Corrupt directory " + "inode 0x%Lx or driver bug.", + (unsigned long long)dir_ni->mft_no); + err = -EIO; + goto unm_unm_err_out; + } + if (sle64_to_cpu(ia->index_block_vcn) != vcn) { + ntfs_error(sb, "Actual VCN (0x%Lx) of index buffer is " + "different from expected VCN (0x%Lx). " + "Directory inode 0x%Lx is corrupt or driver " + "bug.", + (long long)sle64_to_cpu(ia->index_block_vcn), + (long long)vcn, + (unsigned long long)dir_ni->mft_no); + err = -EIO; + goto unm_unm_err_out; + } + if (le32_to_cpu(ia->index.allocated_size) + 0x18 != + dir_ni->_IDM(index_block_size)) { + ntfs_error(sb, "Index buffer (VCN 0x%Lx) of directory inode " + "0x%Lx has a size (%u) differing from the " + "directory specified size (%u). Directory " + "inode is corrupt or driver bug.", + (long long)vcn, + (unsigned long long)dir_ni->mft_no, + le32_to_cpu(ia->index.allocated_size) + 0x18, + dir_ni->_IDM(index_block_size)); + err = -EIO; + goto unm_unm_err_out; + } + index_end = (u8*)ia + dir_ni->_IDM(index_block_size); + if (index_end > kaddr + PAGE_CACHE_SIZE) { + ntfs_error(sb, "Index buffer (VCN 0x%Lx) of directory inode " + "0x%Lx crosses page boundary. Impossible! " + "Cannot access! This is probably a bug in the " + "driver.", (long long)vcn, + (unsigned long long)dir_ni->mft_no); + err = -EIO; + goto unm_unm_err_out; + } + index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length); + if (index_end > (u8*)ia + dir_ni->_IDM(index_block_size)) { + ntfs_error(sb, "Size of index buffer (VCN 0x%Lx) of directory " + "inode 0x%Lx exceeds maximum size.", + (long long)vcn, + (unsigned long long)dir_ni->mft_no); + err = -EIO; + goto unm_unm_err_out; + } + /* The first index entry. */ + ie = (INDEX_ENTRY*)((u8*)&ia->index + + le32_to_cpu(ia->index.entries_offset)); + /* + * Iterate similar to above big loop but applied to index buffer, thus + * loop until we exceed valid memory (corruption case) or until we + * reach the last entry. + */ + for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->_IEH(length)))) { + /* Bounds check. */ + if ((u8*)ie < (u8*)ia || (u8*)ie + + sizeof(INDEX_ENTRY_HEADER) > index_end || + (u8*)ie + le16_to_cpu(ie->_IEH(key_length)) > + index_end) { + ntfs_error(sb, "Index entry out of bounds in " + "directory inode 0x%Lx.", + (unsigned long long)dir_ni->mft_no); + err = -EIO; + goto unm_unm_err_out; + } + /* + * The last entry cannot contain a name. It can however contain + * a pointer to a child node in the B+tree so we just break out. + */ + if (ie->_IEH(flags) & INDEX_ENTRY_END) + break; + /* + * We perform a case sensitive comparison and if that matches + * we are done and return the mft reference of the inode (i.e. + * the inode number together with the sequence number for + * consistency checking). We convert it to cpu format before + * returning. + */ + if (ntfs_are_names_equal(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, + CASE_SENSITIVE, vol->upcase, vol->upcase_len)) { +found_it2: + /* + * We have a perfect match, so we don't need to care + * about having matched imperfectly before, so we can + * free name and set *res to NULL. + * However, if the perfect match is a short file name, + * we need to signal this through *res, so that + * ntfs_lookup() can fix dcache aliasing issues. + * As an optimization we just reuse an existing + * allocation of *res. + */ + if (ie->key.file_name.file_name_type == FILE_NAME_DOS) { + if (!name) { + name = kmalloc(sizeof(ntfs_name), + GFP_NOFS); + if (!name) { + err = -ENOMEM; + goto unm_unm_err_out; + } + } + name->mref = le64_to_cpu( + ie->_IIF(indexed_file)); + name->type = FILE_NAME_DOS; + name->len = 0; + *res = name; + } else { + if (name) + kfree(name); + *res = NULL; + } + mref = le64_to_cpu(ie->_IIF(indexed_file)); + ntfs_unmap_page(page); + put_attr_search_ctx(ctx); + unmap_mft_record(READ, dir_ni); + return mref; + } + /* + * For a case insensitive mount, we also perform a case + * insensitive comparison (provided the file name is not in the + * POSIX namespace). If the comparison matches, and the name is + * in the WIN32 namespace, we cache the filename in *res so + * that the caller, ntfs_lookup(), can work on it. If the + * comparison matches, and the name is in the DOS namespace, we + * only cache the mft reference and the file name type (we set + * the name length to zero for simplicity). + */ + if (!NVolCaseSensitive(vol) && + ie->key.file_name.file_name_type && + ntfs_are_names_equal(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, + IGNORE_CASE, vol->upcase, vol->upcase_len)) { + int name_size = sizeof(ntfs_name); + u8 type = ie->key.file_name.file_name_type; + u8 len = ie->key.file_name.file_name_length; + + /* Only one case insensitive matching name allowed. */ + if (name) { + ntfs_error(sb, "Found already allocated name " + "in phase 2. Please run chkdsk " + "and if that doesn't find any " + "errors please report you saw " + "this message to " + "linux-ntfs-dev@lists.sf.net."); + ntfs_unmap_page(page); + goto dir_err_out; + } + + if (type != FILE_NAME_DOS) + name_size += len * sizeof(uchar_t); + name = kmalloc(name_size, GFP_NOFS); + if (!name) { + err = -ENOMEM; + goto put_unm_err_out; + } + name->mref = le64_to_cpu(ie->_IIF(indexed_file)); + name->type = type; + if (type != FILE_NAME_DOS) { + name->len = len; + memcpy(name->name, ie->key.file_name.file_name, + len * sizeof(uchar_t)); + } else + name->len = 0; + *res = name; + } + /* + * Not a perfect match, need to do full blown collation so we + * know which way in the B+tree we have to go. + */ + rc = ntfs_collate_names(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, 1, + IGNORE_CASE, vol->upcase, vol->upcase_len); + /* + * If uname collates before the name of the current entry, there + * is definitely no such name in this index but we might need to + * descend into the B+tree so we just break out of the loop. + */ + if (rc == -1) + break; + /* The names are not equal, continue the search. */ + if (rc) + continue; + /* + * Names match with case insensitive comparison, now try the + * case sensitive comparison, which is required for proper + * collation. + */ + rc = ntfs_collate_names(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, 1, + CASE_SENSITIVE, vol->upcase, vol->upcase_len); + if (rc == -1) + break; + if (rc) + continue; + /* + * Perfect match, this will never happen as the + * ntfs_are_names_equal() call will have gotten a match but we + * still treat it correctly. + */ + goto found_it2; + } + /* + * We have finished with this index buffer without success. Check for + * the presence of a child node. + */ + if (ie->_IEH(flags) & INDEX_ENTRY_NODE) { + if ((ia->index.flags & NODE_MASK) == LEAF_NODE) { + ntfs_error(sb, "Index entry with child node found in " + "a leaf node in directory inode 0x%Lx.", + (unsigned long long)dir_ni->mft_no); + err = -EIO; + goto unm_unm_err_out; + } + /* Child node present, descend into it. */ + old_vcn = vcn; + vcn = sle64_to_cpup((u8*)ie + + le16_to_cpu(ie->_IEH(length)) - 8); + if (vcn >= 0) { + /* If vcn is in the same page cache page as old_vcn we + * recycle the mapped page. */ + if (old_vcn << vol->cluster_size_bits >> + PAGE_CACHE_SHIFT == vcn << + vol->cluster_size_bits >> + PAGE_CACHE_SHIFT) + goto fast_descend_into_child_node; + ntfs_unmap_page(page); + goto descend_into_child_node; + } + ntfs_error(sb, "Negative child node vcn in directory inode " + "0x%Lx.", (unsigned long long)dir_ni->mft_no); + err = -EIO; + goto unm_unm_err_out; + } + /* + * No child node present, return -ENOENT, unless we have got a matching + * name cached in name in which case return the mft reference + * associated with it. + */ + if (name) { + ntfs_unmap_page(page); + put_attr_search_ctx(ctx); + unmap_mft_record(READ, dir_ni); + return name->mref; + } + ntfs_debug("Entry not found."); + err = -ENOENT; +unm_unm_err_out: + ntfs_unmap_page(page); +put_unm_err_out: + put_attr_search_ctx(ctx); +unm_err_out: + unmap_mft_record(READ, dir_ni); + if (name) { + kfree(name); + *res = NULL; + } + return ERR_MREF(err); +map_err_out: + ntfs_error(sb, "map_mft_record(READ) failed with error code %ld.", + -PTR_ERR(m)); + return ERR_MREF(PTR_ERR(m)); +dir_err_out: + ntfs_error(sb, "Corrupt directory. Aborting lookup."); + err = -EIO; + goto put_unm_err_out; +} + +#if 0 + +// TODO: (AIA) +// The algorithm embedded in this code will be required for the time when we +// want to support adding of entries to directories, where we require correct +// collation of file names in order not to cause corruption of the file system. + +/** + * ntfs_lookup_inode_by_name - find an inode in a directory given its name + * @dir_ni: ntfs inode of the directory in which to search for the name + * @uname: Unicode name for which to search in the directory + * @uname_len: length of the name @uname in Unicode characters * * Look for an inode with name @uname in the directory with inode @dir_ni. * ntfs_lookup_inode_by_name() walks the contents of the directory looking for @@ -414,6 +973,8 @@ dir_err_out: goto put_unm_err_out; } +#endif + typedef union { INDEX_ROOT *ir; INDEX_ALLOCATION *ia; @@ -447,7 +1008,6 @@ static inline int ntfs_filldir(ntfs_volume *vol, struct file *filp, int name_len; unsigned dt_type; FILE_NAME_TYPE_FLAGS name_type; - READDIR_OPTIONS readdir_opts; /* Advance the position even if going to skip the entry. */ if (index_type == INDEX_TYPE_ALLOCATION) @@ -457,25 +1017,17 @@ static inline int ntfs_filldir(ntfs_volume *vol, struct file *filp, vol->mft_record_size; else /* if (index_type == INDEX_TYPE_ROOT) */ filp->f_pos = (u8*)ie - (u8*)iu.ir; - readdir_opts = vol->readdir_opts; name_type = ie->key.file_name.file_name_type; - if (name_type == FILE_NAME_DOS && RHideDosNames(readdir_opts)) { + if (name_type == FILE_NAME_DOS) { ntfs_debug("Skipping DOS name space entry."); return 0; } - if (RHideLongNames(readdir_opts)) { - if (name_type == FILE_NAME_WIN32 || - name_type == FILE_NAME_POSIX) { - ntfs_debug("Skipping WIN32/POSIX name space entry."); - return 0; - } - } if (MREF_LE(ie->_IIF(indexed_file)) == FILE_root) { ntfs_debug("Skipping root directory self reference entry."); return 0; } if (MREF_LE(ie->_IIF(indexed_file)) < FILE_first_user && - RHideSystemFiles(readdir_opts)) { + !NVolShowSystemFiles(vol)) { ntfs_debug("Skipping system file."); return 0; } @@ -496,7 +1048,8 @@ static inline int ntfs_filldir(ntfs_volume *vol, struct file *filp, (unsigned long long)MREF_LE(ie->_IIF(indexed_file)), dt_type == DT_DIR ? "DIR" : "REG"); return filldir(dirent, name, name_len, filp->f_pos, - (unsigned long)MREF_LE(ie->_IIF(indexed_file)), dt_type); + (unsigned long)MREF_LE(ie->_IIF(indexed_file)), + dt_type); } /* @@ -510,7 +1063,7 @@ static inline int ntfs_filldir(ntfs_volume *vol, struct file *filp, * index root entries and then the index allocation entries that are marked * as in use in the index bitmap. * While this will return the names in random order this doesn't matter for - * readdir but OTOH results in faster readdir. + * readdir but OTOH results in a faster readdir. */ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { diff --git a/fs/ntfs/dir.h b/fs/ntfs/dir.h new file mode 100644 index 000000000000..3e9482a3db11 --- /dev/null +++ b/fs/ntfs/dir.h @@ -0,0 +1,47 @@ +/* + * dir.h - Defines for directory handling in NTFS Linux kernel driver. Part of + * the Linux-NTFS project. + * + * Copyright (c) 2002 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_DIR_H +#define _LINUX_NTFS_DIR_H + +#include "layout.h" + +/* + * ntfs_name is used to return the file name to the caller of + * ntfs_lookup_inode_by_name() in order for the caller (namei.c::ntfs_lookup()) + * to be able to deal with dcache aliasing issues. + */ +typedef struct { + MFT_REF mref; + FILE_NAME_TYPE_FLAGS type; + u8 len; + uchar_t name[0]; +} __attribute__ ((__packed__)) ntfs_name; + +/* The little endian Unicode string $I30 as a global constant. */ +extern const uchar_t I30[5]; + +extern u64 ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const uchar_t *uname, + const int uname_len, ntfs_name **res); + +#endif /* _LINUX_NTFS_FS_DIR_H */ + diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index a9e59a90f310..219e25d794e5 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -23,6 +23,7 @@ #include <linux/buffer_head.h> #include "ntfs.h" +#include "dir.h" struct inode *ntfs_alloc_big_inode(struct super_block *sb) { @@ -1337,13 +1338,6 @@ void ntfs_clear_big_inode(struct inode *vi) return; } -static const option_t si_readdir_opts_arr[] = { - { SHOW_SYSTEM, "system" }, - { SHOW_WIN32, "win32" }, - { SHOW_DOS, "dos" }, - { 0, NULL } -}; - /** * ntfs_show_options - show mount options in /proc/mounts * @sf: seq_file in which to write our mount options @@ -1368,20 +1362,10 @@ int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt) seq_printf(sf, ",dmask=0%o", vol->dmask); } seq_printf(sf, ",nls=%s", vol->nls_map->charset); - switch (vol->readdir_opts) { - case SHOW_ALL: - seq_printf(sf, ",show_inodes=all"); - break; - case SHOW_POSIX: - seq_printf(sf, ",show_inodes=posix"); - break; - default: - for (i = 0; si_readdir_opts_arr[i].val; i++) { - if (si_readdir_opts_arr[i].val & vol->readdir_opts) - seq_printf(sf, ",show_inodes=%s", - si_readdir_opts_arr[i].str); - } - } + if (NVolCaseSensitive(vol)) + seq_printf(sf, ",case_sensitive"); + if (NVolShowSystemFiles(vol)) + seq_printf(sf, ",show_sys_files"); for (i = 0; on_errors_arr[i].val; i++) { if (on_errors_arr[i].val & vol->on_errors) seq_printf(sf, ",errors=%s", on_errors_arr[i].str); diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h index 7f45526a89f9..683aff265021 100644 --- a/fs/ntfs/layout.h +++ b/fs/ntfs/layout.h @@ -29,7 +29,7 @@ #include <linux/list.h> #include <asm/byteorder.h> -#include "volume.h" +#include "types.h" /* * Constant endianness conversion defines. @@ -679,7 +679,7 @@ typedef ATTR_RECORD ATTR_REC; */ typedef enum { /* - * These flags are only presnt in the STANDARD_INFORMATION attribute + * These flags are only present in the STANDARD_INFORMATION attribute * (in the field file_attributes). */ FILE_ATTR_READONLY = const_cpu_to_le32(0x00000001), diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index 9e1ef4f9641a..33b328c578af 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -20,7 +20,10 @@ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/dcache.h> + #include "ntfs.h" +#include "dir.h" /** * ntfs_lookup - find the inode represented by a dentry in a directory inode @@ -43,14 +46,55 @@ * dentry @dent. The dentry is then termed a negative dentry. * * Only if an actual error occurs, do we return an error via ERR_PTR(). + * + * In order to handle the case insensitivity issues of NTFS with regards to the + * dcache and the dcache requiring only one dentry per directory, we deal with + * dentry aliases that only differ in case in ->ntfs_lookup() while maintining + * a case sensitive dcache. This means that we get the full benefit of dcache + * speed when the file/directory is looked up with the same case as returned by + * ->ntfs_readdir() but that a lookup for any other case (or for the short file + * name) will not find anything in dcache and will enter ->ntfs_lookup() + * instead, where we search the directory for a fully matching file name + * (including case) and if that is not found, we search for a file name that + * matches with different case and if that has non-POSIX semantics we return + * that. We actually do only one search (case sensitive) and keep tabs on + * whether we have found a case insensitive match in the process. + * + * To simplify matters for us, we do not treat the short vs long filenames as + * two hard links but instead if the lookup matches a short filename, we + * return the dentry for the corresponding long filename instead. + * + * There are three cases we need to distinguish here: + * + * 1) @dent perfectly matches (i.e. including case) a directory entry with a + * file name in the WIN32 or POSIX namespaces. In this case + * ntfs_lookup_inode_by_name() will return with name set to NULL and we + * just d_add() @dent. + * 2) @dent matches (not including case) a directory entry with a file name in + * the WIN32 namespace. In this case ntfs_lookup_inode_by_name() will return + * with name set to point to a kmalloc()ed ntfs_name structure containing + * the properly cased little endian Unicode name. We convert the name to the + * current NLS code page, search if a dentry with this name already exists + * and if so return that instead of @dent. The VFS will then destroy the old + * @dent and use the one we returned. If a dentry is not found, we allocate + * a new one, d_add() it, and return it as above. + * 3) @dent matches either perfectly or not (i.e. we don't care about case) a + * directory entry with a file name in the DOS namespace. In this case + * ntfs_lookup_inode_by_name() will return with name set to point to a + * kmalloc()ed ntfs_name structure containing the mft reference (cpu endian) + * of the inode. We use the mft reference to read the inode and to find the + * file name in the WIN32 namespace corresponding to the matched short file + * name. We then convert the name to the current NLS code page, and proceed + * searching for a dentry with this name, etc, as in case 2), above. */ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent) { ntfs_volume *vol = NTFS_SB(dir_ino->i_sb); struct inode *dent_inode; + uchar_t *uname; + ntfs_name *name = NULL; u64 mref; unsigned long dent_ino; - uchar_t *uname; int uname_len; ntfs_debug("Looking up %s in directory inode 0x%lx.", @@ -62,7 +106,8 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent) ntfs_error(vol->sb, "Failed to convert name to Unicode."); return ERR_PTR(uname_len); } - mref = ntfs_lookup_inode_by_name(NTFS_I(dir_ino), uname, uname_len); + mref = ntfs_lookup_inode_by_name(NTFS_I(dir_ino), uname, uname_len, + &name); kmem_cache_free(ntfs_name_cache, uname); if (!IS_ERR_MREF(mref)) { dent_ino = (unsigned long)MREF(mref); @@ -72,9 +117,17 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent) /* Consistency check. */ if (MSEQNO(mref) == NTFS_I(dent_inode)->seq_no || dent_ino == FILE_MFT) { - d_add(dent, dent_inode); - ntfs_debug("Done."); - return NULL; + /* Perfect WIN32/POSIX match. -- Case 1. */ + if (!name) { + d_add(dent, dent_inode); + ntfs_debug("Done."); + return NULL; + } + /* + * We are too indented. Handle imperfect + * matches and short file names further below. + */ + goto handle_name; } ntfs_error(vol->sb, "Found stale reference to inode " "0x%Lx (reference sequence number = " @@ -88,8 +141,11 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent) ntfs_error(vol->sb, "iget(0x%Lx) failed, returning " "-EACCES.", (unsigned long long)MREF(mref)); + if (name) + kfree(name); return ERR_PTR(-EACCES); } + /* It is guaranteed that name is no longer allocated at this point. */ if (MREF_ERR(mref) == -ENOENT) { ntfs_debug("Entry was not found, adding negative dentry."); /* The dcache will handle negative entries. */ @@ -100,9 +156,133 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent) ntfs_error(vol->sb, "ntfs_lookup_ino_by_name() failed with error " "code %i.", -MREF_ERR(mref)); return ERR_PTR(MREF_ERR(mref)); + + // TODO: Consider moving this lot to a separate function! (AIA) +handle_name: + { + struct dentry *real_dent; + attr_search_context *ctx; + ntfs_inode *ni = NTFS_I(dent_inode); + int err; + struct qstr nls_name; + + nls_name.name = NULL; + if (name->type != FILE_NAME_DOS) { /* Case 2. */ + nls_name.len = (unsigned)ntfs_ucstonls(vol, + (uchar_t*)&name->name, name->len, + (unsigned char**)&nls_name.name, + name->len * 3 + 1); + kfree(name); + } else /* if (name->type == FILE_NAME_DOS) */ { /* Case 3. */ + MFT_RECORD *m; + FILE_NAME_ATTR *fn; + + kfree(name); + + /* Find the WIN32 name corresponding to the matched DOS name. */ + ni = NTFS_I(dent_inode); + m = map_mft_record(READ, ni); + if (IS_ERR(m)) { + err = PTR_ERR(m); + goto name_err_out; + } + ctx = get_attr_search_ctx(ni, m); + if (!ctx) { + err = -ENOMEM; + goto unm_err_out; + } + do { + ATTR_RECORD *a; + u32 val_len; + + if (!lookup_attr(AT_FILE_NAME, NULL, 0, 0, 0, NULL, 0, + ctx)) { + ntfs_error(vol->sb, "Inode corrupt: No WIN32 " + "namespace counterpart to DOS " + "file name. Run chkdsk."); + err = -EIO; + goto put_unm_err_out; + } + /* Consistency checks. */ + a = ctx->attr; + if (a->non_resident || a->flags) + goto eio_put_unm_err_out; + val_len = le32_to_cpu(a->_ARA(value_length)); + if (le16_to_cpu(a->_ARA(value_offset)) + val_len > + le32_to_cpu(a->length)) + goto eio_put_unm_err_out; + fn = (FILE_NAME_ATTR*)((u8*)ctx->attr + le16_to_cpu( + ctx->attr->_ARA(value_offset))); + if ((u32)(fn->file_name_length * sizeof(uchar_t) + + sizeof(FILE_NAME_ATTR)) > val_len) + goto eio_put_unm_err_out; + } while (fn->file_name_type != FILE_NAME_WIN32); + + /* Convert the found WIN32 name to current NLS code page. */ + nls_name.len = (unsigned)ntfs_ucstonls(vol, + (uchar_t*)&fn->file_name, fn->file_name_length, + (unsigned char**)&nls_name.name, + fn->file_name_length * 3 + 1); + + put_attr_search_ctx(ctx); + unmap_mft_record(READ, ni); + } + + /* Check if a conversion error occured. */ + if ((signed)nls_name.len < 0) { + err = (signed)nls_name.len; + goto name_err_out; + } + nls_name.hash = full_name_hash(nls_name.name, nls_name.len); + + // FIXME: Do we need dcache_lock or dparent_lock here or is the + // fact that i_sem is held on the parent inode sufficient? (AIA) + + /* Does a dentry matching the nls_name exist already? */ + real_dent = d_lookup(dent->d_parent, &nls_name); + /* If not, create it now. */ + if (!real_dent) { + real_dent = d_alloc(dent->d_parent, &nls_name); + kfree(nls_name.name); + if (!real_dent) { + err = -ENOMEM; + goto name_err_out; + } + d_add(real_dent, dent_inode); + return real_dent; + } + kfree(nls_name.name); + /* Matching dentry exists, check if it is negative. */ + if (real_dent->d_inode) { + BUG_ON(real_dent->d_inode != dent_inode); + /* + * Already have the inode and the dentry attached, decrement + * the reference count to balance the iget() we did earlier on. + */ + iput(dent_inode); + return real_dent; + } + /* Negative dentry: instantiate it. */ + d_instantiate(real_dent, dent_inode); + return real_dent; + +eio_put_unm_err_out: + ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk."); + err = -EIO; +put_unm_err_out: + put_attr_search_ctx(ctx); +unm_err_out: + unmap_mft_record(READ, ni); +name_err_out: + iput(dent_inode); + return ERR_PTR(err); + } } +/* + * Inode operations for directories. + */ struct inode_operations ntfs_dir_inode_ops = { - lookup: ntfs_lookup, /* lookup directory. */ + lookup: ntfs_lookup, /* VFS: Lookup directory. */ }; diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h index 793ea711e1f2..e29a1a90a353 100644 --- a/fs/ntfs/ntfs.h +++ b/fs/ntfs/ntfs.h @@ -96,9 +96,6 @@ extern kmem_cache_t *ntfs_inode_cache; extern kmem_cache_t *ntfs_big_inode_cache; extern kmem_cache_t *ntfs_attr_ctx_cache; -/* The little endian Unicode string $I30 as a global constant. */ -extern const uchar_t I30[5]; - /* The various operations structs defined throughout the driver files. */ extern struct super_operations ntfs_mount_sops; extern struct super_operations ntfs_sops; @@ -223,10 +220,6 @@ extern inline s64 utc2ntfs(const time_t time); extern inline s64 get_current_ntfs_time(void); extern inline time_t ntfs2utc(const s64 time); -/* From fs/ntfs/dir.c */ -extern u64 ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const uchar_t *uname, - const int uname_len); - /* From fs/ntfs/unistr.c */ extern BOOL ntfs_are_names_equal(const uchar_t *s1, size_t s1_len, const uchar_t *s2, size_t s2_len, diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 547f727d74cb..6cfdb2c4b04a 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -52,17 +52,6 @@ const option_t on_errors_arr[] = { { 0, NULL } }; -static const option_t readdir_opts_arr[] = { - { SHOW_SYSTEM, "system" }, - { SHOW_WIN32, "win32" }, - { SHOW_WIN32, "long" }, - { SHOW_DOS, "dos" }, - { SHOW_DOS, "short" }, - { SHOW_POSIX, "posix" }, - { SHOW_ALL, "all" }, - { 0, NULL } -}; - /** * simple_getbool - * @@ -98,7 +87,8 @@ static BOOL parse_options(ntfs_volume *vol, char *opt) uid_t uid = (uid_t)-1; gid_t gid = (gid_t)-1; mode_t fmask = (mode_t)-1, dmask = (mode_t)-1; - int mft_zone_multiplier = -1, on_errors = -1, readdir_opts = -1; + int mft_zone_multiplier = -1, on_errors = -1; + int show_sys_files = -1, case_sensitive = -1; struct nls_table *nls_map = NULL, *old_nls; /* I am lazy... (-8 */ @@ -120,6 +110,13 @@ static BOOL parse_options(ntfs_volume *vol, char *opt) if (*v) \ goto needs_val; \ } +#define NTFS_GETOPT_BOOL(option, variable) \ + if (!strcmp(p, option)) { \ + BOOL val; \ + if (!simple_getbool(v, &val)) \ + goto needs_bool; \ + variable = val; \ + } #define NTFS_GETOPT_OPTIONS_ARRAY(option, variable, opt_array) \ if (!strcmp(p, option)) { \ int _i; \ @@ -146,47 +143,16 @@ static BOOL parse_options(ntfs_volume *vol, char *opt) else NTFS_GETOPT("umask", fmask = dmask) else NTFS_GETOPT("fmask", fmask) else NTFS_GETOPT("dmask", dmask) - else NTFS_GETOPT_WITH_DEFAULT("sloppy", sloppy, TRUE) else NTFS_GETOPT("mft_zone_multiplier", mft_zone_multiplier) + else NTFS_GETOPT_WITH_DEFAULT("sloppy", sloppy, TRUE) + else NTFS_GETOPT_BOOL("show_sys_files", show_sys_files) + else NTFS_GETOPT_BOOL("case_sensitive", case_sensitive) else NTFS_GETOPT_OPTIONS_ARRAY("errors", on_errors, on_errors_arr) - else NTFS_GETOPT_OPTIONS_ARRAY("show_inodes", readdir_opts, - readdir_opts_arr) - else if (!strcmp(p, "show_sys_files")) { - BOOL val = FALSE; - ntfs_warning(vol->sb, "Option show_sys_files is " - "deprecated. Please use option " - "show_inodes=system in the future."); - if (!v || !*v) - val = TRUE; - else if (!simple_getbool(v, &val)) - goto needs_bool; - if (val) { - if (readdir_opts == -1) - readdir_opts = 0; - readdir_opts |= SHOW_SYSTEM; - } - } else if (!strcmp(p, "posix")) { - BOOL val = FALSE; - ntfs_warning(vol->sb, "Option posix is deprecated. " - "Please use option show_inodes=posix " - "instead. Be aware that some userspace " - "applications may be confused by this, " - "since the short and long names of " - "directory inodes will have the same inode " - "numbers, yet each will only have a link " - "count of 1 due to Linux not supporting " - "directory hard links."); - if (!v || !*v) - goto needs_arg; - else if (!simple_getbool(v, &val)) - goto needs_bool; - if (val) { - if (readdir_opts == -1) - readdir_opts = 0; - readdir_opts |= SHOW_POSIX; - } - } else if (!strcmp(p, "nls") || !strcmp(p, "iocharset")) { + else if (!strcmp(p, "posix") || !strcmp(p, "show_inodes")) + ntfs_warning(vol->sb, "Ignoring obsolete option %s.", + p); + else if (!strcmp(p, "nls") || !strcmp(p, "iocharset")) { if (!strcmp(p, "iocharset")) ntfs_warning(vol->sb, "Option iocharset is " "deprecated. Please use " @@ -232,6 +198,7 @@ use_utf8: errors++; } #undef NTFS_GETOPT_OPTIONS_ARRAY +#undef NTFS_GETOPT_BOOL #undef NTFS_GETOPT #undef NTFS_GETOPT_WITH_DEFAULT } @@ -297,8 +264,18 @@ no_mount_options: vol->fmask = fmask; if (dmask != (mode_t)-1) vol->dmask = dmask; - if (readdir_opts != -1) - vol->readdir_opts = readdir_opts; + if (show_sys_files != -1) { + if (show_sys_files) + NVolSetShowSystemFiles(vol); + else + NVolClearShowSystemFiles(vol); + } + if (case_sensitive != -1) { + if (case_sensitive) + NVolSetCaseSensitive(vol); + else + NVolClearCaseSensitive(vol); + } return TRUE; needs_arg: ntfs_error(vol->sb, "The %s option requires an argument.", p); @@ -464,7 +441,7 @@ static struct buffer_head *read_ntfs_boot_sector(struct super_block *sb, ntfs_error(sb, "Primary boot sector is invalid."); } else if (!silent) ntfs_error(sb, read_err_str, "primary"); - if (NTFS_SB(sb)->on_errors & ~ON_ERRORS_RECOVER) { + if (!(NTFS_SB(sb)->on_errors & ON_ERRORS_RECOVER)) { if (bh_primary) brelse(bh_primary); if (!silent) @@ -1492,6 +1469,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) vol->root_ino = NULL; vol->secure_ino = NULL; vol->uid = vol->gid = 0; + vol->flags = 0; vol->on_errors = 0; vol->mft_zone_multiplier = 0; vol->nls_map = NULL; @@ -1531,12 +1509,6 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) vol->fmask = 0177; vol->dmask = 0077; - /* - * Default is to show long file names (including POSIX file names), and - * not to show system files and short file names. - */ - vol->readdir_opts = SHOW_WIN32; - /* Important to get the mount options dealt with now. */ if (!parse_options(vol, (char*)opt)) goto err_out_now; diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c index a6439cf9a9f4..f5f441f97988 100644 --- a/fs/ntfs/unistr.c +++ b/fs/ntfs/unistr.c @@ -333,7 +333,7 @@ int ntfs_ucstonls(const ntfs_volume *vol, const uchar_t *ins, } if (!ns) { ns_len = ins_len * NLS_MAX_CHARSET_SIZE; - ns = (unsigned char*)kmalloc(ns_len, GFP_NOFS); + ns = (unsigned char*)kmalloc(ns_len + 1, GFP_NOFS); if (!ns) goto mem_err_out; } @@ -352,7 +352,7 @@ retry: wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o, ~63, GFP_NOFS); if (tc) { memcpy(tc, ns, ns_len); - ns_len = (ns_len + 64) & ~63; + ns_len = ((ns_len + 64) & ~63) - 1; kfree(ns); ns = tc; goto retry; diff --git a/fs/ntfs/volume.h b/fs/ntfs/volume.h index 0d6f923d0800..675095917c99 100644 --- a/fs/ntfs/volume.h +++ b/fs/ntfs/volume.h @@ -26,18 +26,30 @@ #include "types.h" -/* These are used to determine which inode names are returned by readdir(). */ +/* + * Defined bits for the flags field in the ntfs_volume structure. + */ typedef enum { - SHOW_SYSTEM = 1, - SHOW_WIN32 = 2, - SHOW_DOS = 4, - SHOW_POSIX = SHOW_WIN32 | SHOW_DOS, - SHOW_ALL = SHOW_SYSTEM | SHOW_POSIX, -} READDIR_OPTIONS; + NV_ShowSystemFiles, /* 1: Return system files in ntfs_readdir(). */ + NV_CaseSensitive, /* 1: Treat file names as case sensitive and + create filenames in the POSIX namespace. + Otherwise be case insensitive and create + file names in WIN32 namespace. */ +} ntfs_volume_flags; + +#define NVolShowSystemFiles(n_vol) test_bit(NV_ShowSystemFiles, \ + &(n_vol)->flags) +#define NVolSetShowSystemFiles(n_vol) set_bit(NV_ShowSystemFiles, \ + &(n_vol)->flags) +#define NVolClearShowSystemFiles(n_vol) clear_bit(NV_ShowSystemFiles, \ + &(n_vol)->flags) -#define RHideSystemFiles(x) (!((x) & SHOW_SYSTEM)) -#define RHideLongNames(x) (!((x) & SHOW_WIN32)) -#define RHideDosNames(x) (!((x) & SHOW_DOS)) +#define NVolCaseSensitive(n_vol) test_bit(NV_CaseSensitive, \ + &(n_vol)->flags) +#define NVolSetCaseSensitive(n_vol) set_bit(NV_CaseSensitive, \ + &(n_vol)->flags) +#define NVolClearCaseSensitive(n_vol) clear_bit(NV_CaseSensitive, \ + &(n_vol)->flags) /* * The NTFS in memory super block structure. @@ -57,13 +69,13 @@ typedef struct { LCN nr_blocks; /* Number of NTFS_BLOCK_SIZE bytes sized blocks on the device. */ /* Configuration provided by user at mount time. */ + unsigned long flags; /* Miscellaneous flags, see above. */ uid_t uid; /* uid that files will be mounted as. */ gid_t gid; /* gid that files will be mounted as. */ mode_t fmask; /* The mask for file permissions. */ mode_t dmask; /* The mask for directory permissions. */ - READDIR_OPTIONS readdir_opts; /* Namespace of inode names to show. */ - u8 mft_zone_multiplier; /* Initial mft zone multiplier. */ + u8 mft_zone_multiplier; /* Initial mft zone multiplier. */ u8 on_errors; /* What to do on file system errors. */ /* NTFS bootsector provided information. */ u16 sector_size; /* in bytes */ diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h index 005c5316ca5d..dcca9fc715a8 100644 --- a/include/asm-generic/siginfo.h +++ b/include/asm-generic/siginfo.h @@ -232,6 +232,8 @@ typedef struct sigevent { #ifdef __KERNEL__ +struct siginfo; + #ifndef HAVE_ARCH_COPY_SIGINFO #include <linux/string.h> diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 0e72ad90e1e9..37c4d2fc2fc7 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -60,6 +60,7 @@ struct cpuinfo_x86 { #define X86_VENDOR_RISE 6 #define X86_VENDOR_TRANSMETA 7 #define X86_VENDOR_NSC 8 +#define X86_VENDOR_NUM 9 #define X86_VENDOR_UNKNOWN 0xff /* diff --git a/include/asm-i386/suspend.h b/include/asm-i386/suspend.h index b41d88f45928..7d73c89da525 100644 --- a/include/asm-i386/suspend.h +++ b/include/asm-i386/suspend.h @@ -33,266 +33,14 @@ struct saved_context { u32 eflags; } __attribute__((packed)); -static struct saved_context saved_context; - #define loaddebug(thread,register) \ __asm__("movl %0,%%db" #register \ : /* no output */ \ :"r" ((thread)->debugreg[register])) - -/* - * save_processor_context - * - * Save the state of the processor before we go to sleep. - * - * return_stack is the value of the stack pointer (%esp) as the caller sees it. - * A good way could not be found to obtain it from here (don't want to make _too_ - * many assumptions about the layout of the stack this far down.) Also, the - * handy little __builtin_frame_pointer(level) where level > 0, is blatantly - * buggy - it returns the value of the stack at the proper location, not the - * location, like it should (as of gcc 2.91.66) - * - * Note that the context and timing of this function is pretty critical. - * With a minimal amount of things going on in the caller and in here, gcc - * does a good job of being just a dumb compiler. Watch the assembly output - * if anything changes, though, and make sure everything is going in the right - * place. - */ -static inline void save_processor_context (void) -{ - kernel_fpu_begin(); - - /* - * descriptor tables - */ - asm volatile ("sgdt (%0)" : "=m" (saved_context.gdt_limit)); - asm volatile ("sidt (%0)" : "=m" (saved_context.idt_limit)); - asm volatile ("sldt (%0)" : "=m" (saved_context.ldt)); - asm volatile ("str (%0)" : "=m" (saved_context.tr)); - - /* - * save the general registers. - * note that gcc has constructs to specify output of certain registers, - * but they're not used here, because it assumes that you want to modify - * those registers, so it tries to be smart and save them beforehand. - * It's really not necessary, and kinda fishy (check the assembly output), - * so it's avoided. - */ - asm volatile ("movl %%esp, (%0)" : "=m" (saved_context.esp)); - asm volatile ("movl %%eax, (%0)" : "=m" (saved_context.eax)); - asm volatile ("movl %%ebx, (%0)" : "=m" (saved_context.ebx)); - asm volatile ("movl %%ecx, (%0)" : "=m" (saved_context.ecx)); - asm volatile ("movl %%edx, (%0)" : "=m" (saved_context.edx)); - asm volatile ("movl %%ebp, (%0)" : "=m" (saved_context.ebp)); - asm volatile ("movl %%esi, (%0)" : "=m" (saved_context.esi)); - asm volatile ("movl %%edi, (%0)" : "=m" (saved_context.edi)); - - /* - * segment registers - */ - asm volatile ("movw %%es, %0" : "=r" (saved_context.es)); - asm volatile ("movw %%fs, %0" : "=r" (saved_context.fs)); - asm volatile ("movw %%gs, %0" : "=r" (saved_context.gs)); - asm volatile ("movw %%ss, %0" : "=r" (saved_context.ss)); - - /* - * control registers - */ - asm volatile ("movl %%cr0, %0" : "=r" (saved_context.cr0)); - asm volatile ("movl %%cr2, %0" : "=r" (saved_context.cr2)); - asm volatile ("movl %%cr3, %0" : "=r" (saved_context.cr3)); - asm volatile ("movl %%cr4, %0" : "=r" (saved_context.cr4)); - - /* - * eflags - */ - asm volatile ("pushfl ; popl (%0)" : "=m" (saved_context.eflags)); -} - -static void fix_processor_context(void) -{ - int nr = smp_processor_id(); - struct tss_struct * t = &init_tss[nr]; - - set_tss_desc(nr,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */ - gdt_table[__TSS(nr)].b &= 0xfffffdff; - - load_TR(nr); /* This does ltr */ - - load_LDT(¤t->mm->context); /* This does lldt */ - - /* - * Now maybe reload the debug registers - */ - if (current->thread.debugreg[7]){ - loaddebug(¤t->thread, 0); - loaddebug(¤t->thread, 1); - loaddebug(¤t->thread, 2); - loaddebug(¤t->thread, 3); - /* no 4 and 5 */ - loaddebug(¤t->thread, 6); - loaddebug(¤t->thread, 7); - } - -} - -static void -do_fpu_end(void) -{ - /* restore FPU regs if necessary */ - /* Do it out of line so that gcc does not move cr0 load to some stupid place */ - kernel_fpu_end(); -} - -/* - * restore_processor_context - * - * Restore the processor context as it was before we went to sleep - * - descriptor tables - * - control registers - * - segment registers - * - flags - * - * Note that it is critical that this function is declared inline. - * It was separated out from restore_state to make that function - * a little clearer, but it needs to be inlined because we won't have a - * stack when we get here (so we can't push a return address). - */ -static inline void restore_processor_context (void) -{ - /* - * first restore %ds, so we can access our data properly - */ - asm volatile (".align 4"); - asm volatile ("movw %0, %%ds" :: "r" ((u16)__KERNEL_DS)); - - - /* - * control registers - */ - asm volatile ("movl %0, %%cr4" :: "r" (saved_context.cr4)); - asm volatile ("movl %0, %%cr3" :: "r" (saved_context.cr3)); - asm volatile ("movl %0, %%cr2" :: "r" (saved_context.cr2)); - asm volatile ("movl %0, %%cr0" :: "r" (saved_context.cr0)); - - /* - * segment registers - */ - asm volatile ("movw %0, %%es" :: "r" (saved_context.es)); - asm volatile ("movw %0, %%fs" :: "r" (saved_context.fs)); - asm volatile ("movw %0, %%gs" :: "r" (saved_context.gs)); - asm volatile ("movw %0, %%ss" :: "r" (saved_context.ss)); - - /* - * the other general registers - * - * note that even though gcc has constructs to specify memory - * input into certain registers, it will try to be too smart - * and save them at the beginning of the function. This is esp. - * bad since we don't have a stack set up when we enter, and we - * want to preserve the values on exit. So, we set them manually. - */ - asm volatile ("movl %0, %%esp" :: "m" (saved_context.esp)); - asm volatile ("movl %0, %%ebp" :: "m" (saved_context.ebp)); - asm volatile ("movl %0, %%eax" :: "m" (saved_context.eax)); - asm volatile ("movl %0, %%ebx" :: "m" (saved_context.ebx)); - asm volatile ("movl %0, %%ecx" :: "m" (saved_context.ecx)); - asm volatile ("movl %0, %%edx" :: "m" (saved_context.edx)); - asm volatile ("movl %0, %%esi" :: "m" (saved_context.esi)); - asm volatile ("movl %0, %%edi" :: "m" (saved_context.edi)); - - /* - * now restore the descriptor tables to their proper values - * ltr is done i fix_processor_context(). - */ - asm volatile ("lgdt (%0)" :: "m" (saved_context.gdt_limit)); - asm volatile ("lidt (%0)" :: "m" (saved_context.idt_limit)); - asm volatile ("lldt (%0)" :: "m" (saved_context.ldt)); - - fix_processor_context(); - - /* - * the flags - */ - asm volatile ("pushl %0 ; popfl" :: "m" (saved_context.eflags)); - - do_fpu_end(); -} - -#ifdef SUSPEND_C -/* Local variables for do_magic */ -static int loop __nosavedata = 0; -static int loop2 __nosavedata = 0; - -/* - * (KG): Since we affect stack here, we make this function as flat and easy - * as possible in order to not provoke gcc to use local variables on the stack. - * Note that on resume, all (expect nosave) variables will have the state from - * the time of writing (suspend_save_image) and the registers (including the - * stack pointer, but excluding the instruction pointer) will be loaded with - * the values saved at save_processor_context() time. - */ -static void do_magic(int resume) -{ - /* DANGER WILL ROBINSON! - * - * If this function is too difficult for gcc to optimize, it will crash and burn! - * see above. - * - * DO NOT TOUCH. - */ - - if (!resume) { - do_magic_suspend_1(); - save_processor_context(); /* We need to capture registers and memory at "same time" */ - do_magic_suspend_2(); /* If everything goes okay, this function does not return */ - return; - } - - /* We want to run from swapper_pg_dir, since swapper_pg_dir is stored in constant - * place in memory - */ - - __asm__( "movl %%ecx,%%cr3\n" ::"c"(__pa(swapper_pg_dir))); - -/* - * Final function for resuming: after copying the pages to their original - * position, it restores the register state. - * - * What about page tables? Writing data pages may toggle - * accessed/dirty bits in our page tables. That should be no problems - * with 4MB page tables. That's why we require have_pse. - * - * This loops destroys stack from under itself, so it better should - * not use any stack space, itself. When this function is entered at - * resume time, we move stack to _old_ place. This is means that this - * function must use no stack and no local variables in registers, - * until calling restore_processor_context(); - * - * Critical section here: noone should touch saved memory after - * do_magic_resume_1; copying works, because nr_copy_pages, - * pagedir_nosave, loop and loop2 are nosavedata. - */ - do_magic_resume_1(); - - for (loop=0; loop < nr_copy_pages; loop++) { - /* You may not call something (like copy_page) here: see above */ - for (loop2=0; loop2 < PAGE_SIZE; loop2++) { - *(((char *)((pagedir_nosave+loop)->orig_address))+loop2) = - *(((char *)((pagedir_nosave+loop)->address))+loop2); - __flush_tlb(); - } - } - - restore_processor_context(); - -/* Ahah, we now run with our old stack, and with registers copied from - suspend time */ - - do_magic_resume_2(); -} -#endif +extern void do_fpu_end(void); +extern void fix_processor_context(void); +extern void do_magic(int resume); #ifdef CONFIG_ACPI_SLEEP extern unsigned long saved_eip; diff --git a/include/asm-sparc/btfixup.h b/include/asm-sparc/btfixup.h index e2ad32144df2..f0ea6d09d1de 100644 --- a/include/asm-sparc/btfixup.h +++ b/include/asm-sparc/btfixup.h @@ -16,7 +16,22 @@ extern unsigned int ___illegal_use_of_BTFIXUP_SIMM13_in_module(void); extern unsigned int ___illegal_use_of_BTFIXUP_SETHI_in_module(void); extern unsigned int ___illegal_use_of_BTFIXUP_HALF_in_module(void); extern unsigned int ___illegal_use_of_BTFIXUP_INT_in_module(void); -#endif + +#define BTFIXUP_SIMM13(__name) ___illegal_use_of_BTFIXUP_SIMM13_in_module() +#define BTFIXUP_HALF(__name) ___illegal_use_of_BTFIXUP_HALF_in_module() +#define BTFIXUP_SETHI(__name) ___illegal_use_of_BTFIXUP_SETHI_in_module() +#define BTFIXUP_INT(__name) ___illegal_use_of_BTFIXUP_INT_in_module() +#define BTFIXUP_BLACKBOX(__name) ___illegal_use_of_BTFIXUP_BLACKBOX_in_module + +#else + +#define BTFIXUP_SIMM13(__name) ___sf_##__name() +#define BTFIXUP_HALF(__name) ___af_##__name() +#define BTFIXUP_SETHI(__name) ___hf_##__name() +#define BTFIXUP_INT(__name) ((unsigned int)&___i_##__name) +/* This must be written in assembly and present in a sethi */ +#define BTFIXUP_BLACKBOX(__name) ___b_##__name +#endif /* MODULE */ /* Fixup call xx */ @@ -30,12 +45,6 @@ extern unsigned int ___illegal_use_of_BTFIXUP_INT_in_module(void); #define BTFIXUPDEF_BLACKBOX(__name) \ extern unsigned ___bs_##__name[2]; -#ifdef MODULE -#define BTFIXUP_BLACKBOX(__name) ___illegal_use_of_BTFIXUP_BLACKBOX_in_module -#else -/* This must be written in assembly and present in a sethi */ -#define BTFIXUP_BLACKBOX(__name) ___b_##__name -#endif /* Put bottom 13bits into some register variable */ @@ -55,11 +64,6 @@ extern unsigned int ___illegal_use_of_BTFIXUP_INT_in_module(void); __asm__ ("or %%g0, ___s_" #__name "__btset_" #__val ", %0" : "=r"(ret));\ return ret; \ } -#ifdef MODULE -#define BTFIXUP_SIMM13(__name) ___illegal_use_of_BTFIXUP_SIMM13_in_module() -#else -#define BTFIXUP_SIMM13(__name) ___sf_##__name() -#endif /* Put either bottom 13 bits, or upper 22 bits into some register variable * (depending on the value, this will lead into sethi FIX, reg; or @@ -82,11 +86,6 @@ extern unsigned int ___illegal_use_of_BTFIXUP_INT_in_module(void); __asm__ ("or %%g0, ___a_" #__name "__btset_" #__val ", %0" : "=r"(ret));\ return ret; \ } -#ifdef MODULE -#define BTFIXUP_HALF(__name) ___illegal_use_of_BTFIXUP_HALF_in_module() -#else -#define BTFIXUP_HALF(__name) ___af_##__name() -#endif /* Put upper 22 bits into some register variable */ @@ -107,22 +106,12 @@ extern unsigned int ___illegal_use_of_BTFIXUP_INT_in_module(void); "=r"(ret)); \ return ret; \ } -#ifdef MODULE -#define BTFIXUP_SETHI(__name) ___illegal_use_of_BTFIXUP_SETHI_in_module() -#else -#define BTFIXUP_SETHI(__name) ___hf_##__name() -#endif /* Put a full 32bit integer into some register variable */ #define BTFIXUPDEF_INT(__name) \ extern unsigned char ___i_##__name; \ extern unsigned ___is_##__name[2]; -#ifdef MODULE -#define BTFIXUP_INT(__name) ___illegal_use_of_BTFIXUP_INT_in_module() -#else -#define BTFIXUP_INT(__name) ((unsigned int)&___i_##__name) -#endif #define BTFIXUPCALL_NORM 0x00000000 /* Always call */ #define BTFIXUPCALL_NOP 0x01000000 /* Possibly optimize to nop */ diff --git a/include/asm-sparc/page.h b/include/asm-sparc/page.h index 4ce13153c390..0c5b3a066d97 100644 --- a/include/asm-sparc/page.h +++ b/include/asm-sparc/page.h @@ -54,8 +54,8 @@ #define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) #define copy_page(to,from) memcpy((void *)(to), (void *)(from), PAGE_SIZE) -#define clear_user_page(page, vaddr) clear_page(page) -#define copy_user_page(to, from, vaddr) copy_page(to, from) +#define clear_user_page(addr, vaddr, page) clear_page(addr) +#define copy_user_page(to, from, vaddr, page) copy_page(to, from) /* The following structure is used to hold the physical * memory configuration of the machine. This is filled in diff --git a/include/asm-sparc/ultra.h b/include/asm-sparc/ultra.h deleted file mode 100644 index fdc6c7f488f1..000000000000 --- a/include/asm-sparc/ultra.h +++ /dev/null @@ -1,52 +0,0 @@ -/* $Id: ultra.h,v 1.2 1995/11/25 02:33:10 davem Exp $ - * ultra.h: Definitions and defines for the TI V9 UltraSparc. - * - * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) - */ - -#ifndef _SPARC_ULTRA_H -#define _SPARC_ULTRA_H - -/* Spitfire MMU control register: - * - * ---------------------------------------------------------- - * | | IMPL | VERS | | MID | | - * ---------------------------------------------------------- - * 64 31-28 27-24 23-22 21-17 16 0 - * - * IMPL: Implementation of this Spitfire. - * VERS: Version of this Spitfire. - * MID: Module ID of this processor. - */ - -#define SPITFIRE_MIDMASK 0x00000000003e0000 - -/* Spitfire Load Store Unit control register: - * - * --------------------------------------------------------------------- - * | RSV | PWR | PWW | VWR | VWW | RSV | PMASK | DME | IME | DCE | ICE | - * --------------------------------------------------------------------- - * 63-25 24 23 22 21 20 19-4 3 2 1 0 - * - * PWR: Physical Watchpoint Read enable: 0=off 1=on - * PWW: Physical Watchpoint Write enable: 0=off 1=on - * VWR: Virtual Watchpoint Read enable: 0=off 1=on - * VWW: Virtual Watchpoint Write enable: 0=off 1=on - * PMASK: Parity MASK ??? - * DME: Data MMU Enable: 0=off 1=on - * IME: Instruction MMU Enable: 0=off 1=on - * DCE: Data Cache Enable: 0=off 1=on - * ICE: Instruction Cache Enable: 0=off 1=on - */ - -#define SPITFIRE_LSU_PWR 0x01000000 -#define SPITFIRE_LSU_PWW 0x00800000 -#define SPITFIRE_LSU_VWR 0x00400000 -#define SPITFIRE_LSU_VWW 0x00200000 -#define SPITFIRE_LSU_PMASK 0x000ffff0 -#define SPITFIRE_LSU_DME 0x00000008 -#define SPITFIRE_LSU_IME 0x00000004 -#define SPITFIRE_LSU_DCE 0x00000002 -#define SPITFIRE_LSU_ICE 0x00000001 - -#endif /* !(_SPARC_ULTRA_H) */ diff --git a/include/asm-sparc/vac-ops.h b/include/asm-sparc/vac-ops.h index eafe7a45287a..83ceb56d91ab 100644 --- a/include/asm-sparc/vac-ops.h +++ b/include/asm-sparc/vac-ops.h @@ -107,8 +107,6 @@ struct sun4c_vac_props { extern struct sun4c_vac_props sun4c_vacinfo; -extern void sun4c_flush_all(void); - /* sun4c_enable_vac() enables the sun4c virtual address cache. */ extern __inline__ void sun4c_enable_vac(void) { diff --git a/include/asm-sparc64/page.h b/include/asm-sparc64/page.h index a0ffd1ac93ea..c01cafda59ee 100644 --- a/include/asm-sparc64/page.h +++ b/include/asm-sparc64/page.h @@ -35,8 +35,9 @@ extern void do_BUG(const char *file, int line); extern void _clear_page(void *page); #define clear_page(X) _clear_page((void *)(X)) -extern void clear_user_page(void *page, unsigned long vaddr); -extern void copy_user_page(void *to, void *from, unsigned long vaddr); +struct page; +extern void clear_user_page(void *addr, unsigned long vaddr, struct page *page); +extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct page *topage); /* GROSS, defining this makes gcc pass these types as aggregates, * and thus on the stack, turn this crap off... -DaveM diff --git a/include/linux/if_bonding.h b/include/linux/if_bonding.h index 97d490fd8dd8..a263da6bd01c 100644 --- a/include/linux/if_bonding.h +++ b/include/linux/if_bonding.h @@ -51,7 +51,7 @@ #define BOND_STATE_ACTIVE 0 /* link is active */ #define BOND_STATE_BACKUP 1 /* link is backup */ -#define MAX_BONDS 1 /* Maximum number of devices to support */ +#define BOND_DEFAULT_MAX_BONDS 1 /* Default maximum number of devices to support */ typedef struct ifbond { __s32 bond_mode; @@ -76,6 +76,7 @@ typedef struct slave { short delay; char link; /* one of BOND_LINK_XXXX */ char state; /* one of BOND_STATE_XXXX */ + unsigned short original_flags; u32 link_failure_count; } slave_t; @@ -104,6 +105,8 @@ typedef struct bonding { #endif /* CONFIG_PROC_FS */ struct bonding *next_bond; struct net_device *device; + struct dev_mc_list *mc_list; + unsigned short flags; } bonding_t; #endif /* __KERNEL__ */ diff --git a/include/linux/init.h b/include/linux/init.h index ca7e75f37883..b45b95c5e640 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -61,7 +61,7 @@ extern initcall_t __initcall_start, __initcall_end; static initcall_t __initcall_##fn __attribute__ ((unused,__section__ (".initcall" level ".init"))) = fn #define core_initcall(fn) __define_initcall("1",fn) -#define unused_initcall(fn) __define_initcall("2",fn) +#define postcore_initcall(fn) __define_initcall("2",fn) #define arch_initcall(fn) __define_initcall("3",fn) #define subsys_initcall(fn) __define_initcall("4",fn) #define fs_initcall(fn) __define_initcall("5",fn) @@ -160,7 +160,7 @@ typedef void (*__cleanup_module_func_t)(void); #define __setup(str,func) /* nothing */ #define core_initcall(fn) module_init(fn) -#define unused_initcall(fn) module_init(fn) +#define postcore_initcall(fn) module_init(fn) #define arch_initcall(fn) module_init(fn) #define subsys_initcall(fn) module_init(fn) #define fs_initcall(fn) module_init(fn) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 821c2e0704df..3875f87e938b 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -137,10 +137,9 @@ void xdr_zero_iovec(struct iovec *, int, size_t); /* * XDR buffer helper functions */ -extern int xdr_kmap(struct iovec *, struct xdr_buf *, unsigned int); -extern void xdr_kunmap(struct xdr_buf *, unsigned int); -extern void xdr_shift_buf(struct xdr_buf *, unsigned int); -extern void xdr_zero_buf(struct xdr_buf *, unsigned int); +extern int xdr_kmap(struct iovec *, struct xdr_buf *, size_t); +extern void xdr_kunmap(struct xdr_buf *, size_t); +extern void xdr_shift_buf(struct xdr_buf *, size_t); /* * Helper structure for copying from an sk_buff. diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 88abb25a67de..c73dc5655a24 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -55,6 +55,10 @@ extern int register_suspend_notifier(struct notifier_block *); extern int unregister_suspend_notifier(struct notifier_block *); extern void refrigerator(unsigned long); +extern unsigned int nr_copy_pages __nosavedata; +extern suspend_pagedir_t *pagedir_nosave __nosavedata; + + #else #define software_suspend() do { } while(0) #define software_resume() do { } while(0) diff --git a/include/linux/zlib.h b/include/linux/zlib.h index 43b32c613d76..4582d5343c89 100644 --- a/include/linux/zlib.h +++ b/include/linux/zlib.h @@ -31,7 +31,7 @@ #ifndef _ZLIB_H #define _ZLIB_H -#include "zconf.h" +#include <linux/zconf.h> #ifdef __cplusplus extern "C" { diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 84c94256dc65..313bf3b7b212 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -8,6 +8,7 @@ #define PSCHED_CLOCK_SOURCE PSCHED_JIFFIES #include <linux/config.h> +#include <linux/types.h> #include <linux/pkt_sched.h> #include <net/pkt_cls.h> @@ -221,7 +222,7 @@ extern psched_time_t psched_time_base; #define PSCHED_EXPORTLIST_2 -#if ~0UL == 0xFFFFFFFF +#if BITS_PER_LONG <= 32 #define PSCHED_WATCHER unsigned long diff --git a/init/Makefile b/init/Makefile index 5fc90741e578..455416572bac 100644 --- a/init/Makefile +++ b/init/Makefile @@ -22,5 +22,4 @@ $(TOPDIR)/include/linux/compile.h: ../include/linux/compile.h ; # actual file if its content has changed. ../include/linux/compile.h: FORCE - @echo Generating $@ - @. ../scripts/mkcompile_h $@ "$(ARCH)" "$(CONFIG_SMP)" "$(CC) $(CFLAGS)" + @../scripts/mkcompile_h $@ "$(ARCH)" "$(CONFIG_SMP)" "$(CC) $(CFLAGS)" diff --git a/init/main.c b/init/main.c index a316e2145bac..839dbcf364a0 100644 --- a/init/main.c +++ b/init/main.c @@ -51,7 +51,7 @@ * To avoid associated bogus bug reports, we flatly refuse to compile * with a gcc that is known to be too old from the very beginning. */ -#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 95) +#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 91) #error Sorry, your GCC is too old. It builds incorrect kernels. #endif diff --git a/kernel/futex.c b/kernel/futex.c index 4541b2448e49..bdff307969ef 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -33,14 +33,10 @@ #include <linux/futex.h> #include <linux/highmem.h> #include <linux/time.h> +#include <linux/pagemap.h> #include <asm/uaccess.h> -/* These mutexes are a very simple counter: the winner is the one who - decrements from 1 to 0. The counter starts at 1 when the lock is - free. A value other than 0 or 1 means someone may be sleeping. - This is simple enough to work on all architectures, but has the - problem that if we never "up" the semaphore it could eventually - wrap around. */ +/* Simple "sleep if unchanged" interface. */ /* FIXME: This may be way too small. --RR */ #define FUTEX_HASHBITS 6 @@ -49,7 +45,7 @@ the relevent ones (hashed queues may be shared) */ struct futex_q { struct list_head list; - struct task_struct *task; + wait_queue_head_t waiters; /* Page struct and offset within it. */ struct page *page; unsigned int offset; @@ -69,6 +65,11 @@ static inline struct list_head *hash_futex(struct page *page, return &futex_queues[hash_long(h, FUTEX_HASHBITS)]; } +static inline void tell_waiter(struct futex_q *q) +{ + wake_up_all(&q->waiters); +} + static int futex_wake(struct list_head *head, struct page *page, unsigned int offset, @@ -83,7 +84,7 @@ static int futex_wake(struct list_head *head, if (this->page == page && this->offset == offset) { list_del_init(i); - wake_up_process(this->task); + tell_waiter(this); num_woken++; if (num_woken >= num) break; } @@ -94,11 +95,12 @@ static int futex_wake(struct list_head *head, /* Add at end to avoid starvation */ static inline void queue_me(struct list_head *head, + wait_queue_t *wait, struct futex_q *q, struct page *page, unsigned int offset) { - q->task = current; + add_wait_queue(&q->waiters, wait); q->page = page; q->offset = offset; @@ -150,18 +152,20 @@ static int futex_wait(struct list_head *head, { int curval; struct futex_q q; + DECLARE_WAITQUEUE(wait, current); int ret = 0; set_current_state(TASK_INTERRUPTIBLE); - queue_me(head, &q, page, offset); + queue_me(head, &wait, &q, page, offset); - /* Page is pinned, can't fail */ - if (get_user(curval, uaddr) != 0) - BUG(); + /* Page is pinned, but may no longer be in this address space. */ + if (get_user(curval, uaddr) != 0) { + ret = -EFAULT; + goto out; + } if (curval != val) { ret = -EWOULDBLOCK; - set_current_state(TASK_RUNNING); goto out; } time = schedule_timeout(time); @@ -174,6 +178,7 @@ static int futex_wait(struct list_head *head, goto out; } out: + set_current_state(TASK_RUNNING); /* Were we woken up anyway? */ if (!unqueue_me(&q)) return 0; @@ -218,7 +223,7 @@ asmlinkage int sys_futex(void *uaddr, int op, int val, struct timespec *utime) default: ret = -EINVAL; } - put_page(page); + page_cache_release(page); return ret; } diff --git a/kernel/suspend.c b/kernel/suspend.c index 887311b73eed..b791f3ff449e 100644 --- a/kernel/suspend.c +++ b/kernel/suspend.c @@ -106,7 +106,7 @@ static int resume_status = 0; static char resume_file[256] = ""; /* For resume= kernel option */ static kdev_t resume_device; /* Local variables that should not be affected by save */ -static unsigned int nr_copy_pages __nosavedata = 0; +unsigned int nr_copy_pages __nosavedata = 0; static int pm_suspend_state = 0; @@ -119,7 +119,7 @@ static int pm_suspend_state = 0; allocated at time of resume, that travels through memory not to collide with anything. */ -static suspend_pagedir_t *pagedir_nosave __nosavedata = NULL; +suspend_pagedir_t *pagedir_nosave __nosavedata = NULL; static suspend_pagedir_t *pagedir_save; static int pagedir_order __nosavedata = 0; @@ -783,7 +783,7 @@ void suspend_power_down(void) * Magic happens here */ -static void do_magic_resume_1(void) +void do_magic_resume_1(void) { barrier(); mb(); @@ -795,7 +795,7 @@ static void do_magic_resume_1(void) driver scheduled DMA, we have good chance for DMA to finish ;-). */ } -static void do_magic_resume_2(void) +void do_magic_resume_2(void) { if (nr_copy_pages_check != nr_copy_pages) panic("nr_copy_pages changed?!"); @@ -817,14 +817,14 @@ static void do_magic_resume_2(void) #endif } -static void do_magic_suspend_1(void) +void do_magic_suspend_1(void) { mb(); barrier(); spin_lock_irq(&suspend_pagedir_lock); } -static void do_magic_suspend_2(void) +void do_magic_suspend_2(void) { read_swapfiles(); if (!suspend_save_image()) diff --git a/net/core/dev.c b/net/core/dev.c index 53b2c9c4104a..a9a35fb04a11 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -20,17 +20,18 @@ * Pekka Riikonen <priikone@poesidon.pspt.fi> * * Changes: - * D.J. Barrow : Fixed bug where dev->refcnt gets set to 2 - * if register_netdev gets called before - * net_dev_init & also removed a few lines - * of code in the process. + * D.J. Barrow : Fixed bug where dev->refcnt gets set + * to 2 if register_netdev gets called + * before net_dev_init & also removed a + * few lines of code in the process. * Alan Cox : device private ioctl copies fields back. - * Alan Cox : Transmit queue code does relevant stunts to - * keep the queue safe. + * Alan Cox : Transmit queue code does relevant + * stunts to keep the queue safe. * Alan Cox : Fixed double lock. * Alan Cox : Fixed promisc NULL pointer trap * ???????? : Support the full private ioctl range - * Alan Cox : Moved ioctl permission check into drivers + * Alan Cox : Moved ioctl permission check into + * drivers * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI * Alan Cox : 100 backlog just doesn't cut it when * you start doing multicast video 8) @@ -38,16 +39,19 @@ * Alan Cox : Fix ETH_P_ALL echoback lengths. * Alan Cox : Took out transmit every packet pass * Saved a few bytes in the ioctl handler - * Alan Cox : Network driver sets packet type before calling netif_rx. Saves - * a function call a packet. + * Alan Cox : Network driver sets packet type before + * calling netif_rx. Saves a function + * call a packet. * Alan Cox : Hashed net_bh() * Richard Kooijman: Timestamp fixes. * Alan Cox : Wrong field in SIOCGIFDSTADDR * Alan Cox : Device lock protection. - * Alan Cox : Fixed nasty side effect of device close changes. - * Rudi Cilibrasi : Pass the right thing to set_mac_address() - * Dave Miller : 32bit quantity for the device lock to make it work out - * on a Sparc. + * Alan Cox : Fixed nasty side effect of device close + * changes. + * Rudi Cilibrasi : Pass the right thing to + * set_mac_address() + * Dave Miller : 32bit quantity for the device lock to + * make it work out on a Sparc. * Bjorn Ekwall : Added KERNELD hack. * Alan Cox : Cleaned up the backlog initialise. * Craig Metz : SIOCGIFCONF fix if space for under @@ -62,9 +66,10 @@ * the backlog queue. * Paul Rusty Russell : SIOCSIFNAME * Pekka Riikonen : Netdev boot-time settings code - * Andrew Morton : Make unregister_netdevice wait indefinitely on dev->refcnt + * Andrew Morton : Make unregister_netdevice wait + * indefinitely on dev->refcnt * J Hadi Salim : - Backlog queue sampling - * - netif_rx() feedback + * - netif_rx() feedback */ #include <asm/uaccess.h> @@ -162,8 +167,8 @@ const char *if_port_text[] = { * 86DD IPv6 */ -static struct packet_type *ptype_base[16]; /* 16 way hashed list */ -static struct packet_type *ptype_all = NULL; /* Taps */ +static struct packet_type *ptype_base[16]; /* 16 way hashed list */ +static struct packet_type *ptype_all; /* Taps */ #ifdef OFFLINE_SAMPLE static void sample_queue(unsigned long dummy); @@ -179,8 +184,8 @@ static int net_run_sbin_hotplug(struct net_device *dev, char *action); /* * Our notifier list */ - -static struct notifier_block *netdev_chain=NULL; + +static struct notifier_block *netdev_chain; /* * Device drivers call our routines to queue packets here. We empty the @@ -194,17 +199,17 @@ int netdev_fastroute_obstacles; #endif -/****************************************************************************************** +/******************************************************************************* Protocol management and registration routines -*******************************************************************************************/ +*******************************************************************************/ /* * For efficiency */ -int netdev_nit=0; +int netdev_nit; /* * Add a protocol ID to the list. Now that the input handler is @@ -225,12 +230,12 @@ int netdev_nit=0; /** * dev_add_pack - add packet handler * @pt: packet type declaration - * + * * Add a protocol handler to the networking stack. The passed &packet_type * is linked into kernel lists and may not be freed until it has been * removed from the kernel lists. */ - + void dev_add_pack(struct packet_type *pt) { int hash; @@ -239,17 +244,17 @@ void dev_add_pack(struct packet_type *pt) #ifdef CONFIG_NET_FASTROUTE /* Hack to detect packet socket */ - if ((pt->data) && ((int)(pt->data)!=1)) { + if (pt->data && (long)(pt->data) != 1) { netdev_fastroute_obstacles++; dev_clear_fastroute(pt->dev); } #endif if (pt->type == htons(ETH_P_ALL)) { netdev_nit++; - pt->next=ptype_all; - ptype_all=pt; + pt->next = ptype_all; + ptype_all = pt; } else { - hash=ntohs(pt->type)&15; + hash = ntohs(pt->type) & 15; pt->next = ptype_base[hash]; ptype_base[hash] = pt; } @@ -260,13 +265,12 @@ void dev_add_pack(struct packet_type *pt) /** * dev_remove_pack - remove packet handler * @pt: packet type declaration - * + * * Remove a protocol handler that was previously added to the kernel * protocol handlers by dev_add_pack(). The passed &packet_type is removed * from the kernel lists and can be freed or reused once this function * returns. */ - void dev_remove_pack(struct packet_type *pt) { struct packet_type **pt1; @@ -275,24 +279,23 @@ void dev_remove_pack(struct packet_type *pt) if (pt->type == htons(ETH_P_ALL)) { netdev_nit--; - pt1=&ptype_all; - } else { - pt1=&ptype_base[ntohs(pt->type)&15]; - } + pt1 = &ptype_all; + } else + pt1 = &ptype_base[ntohs(pt->type) & 15]; - for (; (*pt1) != NULL; pt1 = &((*pt1)->next)) { - if (pt == (*pt1)) { + for (; *pt1; pt1 = &((*pt1)->next)) { + if (pt == *pt1) { *pt1 = pt->next; #ifdef CONFIG_NET_FASTROUTE if (pt->data) netdev_fastroute_obstacles--; #endif - br_write_unlock_bh(BR_NETPROTO_LOCK); - return; + goto out; } } - br_write_unlock_bh(BR_NETPROTO_LOCK); printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); +out: + br_write_unlock_bh(BR_NETPROTO_LOCK); } /****************************************************************************** @@ -328,10 +331,7 @@ int netdev_boot_setup_add(char *name, struct ifmap *map) } } - if (i >= NETDEV_BOOT_SETUP_MAX) - return 0; - - return 1; + return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; } /** @@ -345,10 +345,9 @@ int netdev_boot_setup_add(char *name, struct ifmap *map) */ int netdev_boot_setup_check(struct net_device *dev) { - struct netdev_boot_setup *s; + struct netdev_boot_setup *s = dev_boot_setup; int i; - s = dev_boot_setup; for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && !strncmp(dev->name, s[i].name, strlen(s[i].name))) { @@ -385,20 +384,20 @@ int __init netdev_boot_setup(char *str) if (ints[0] > 3) map.mem_end = ints[4]; - /* Add new entry to the list */ + /* Add new entry to the list */ return netdev_boot_setup_add(str, &map); } __setup("netdev=", netdev_boot_setup); -/***************************************************************************************** +/******************************************************************************* Device Interface Subroutines -******************************************************************************************/ +*******************************************************************************/ /** - * __dev_get_by_name - find a device by its name + * __dev_get_by_name - find a device by its name * @name: name to find * * Find an interface by name. Must be called under RTNL semaphore @@ -407,24 +406,22 @@ __setup("netdev=", netdev_boot_setup); * reference counters are not incremented so the caller must be * careful with locks. */ - struct net_device *__dev_get_by_name(const char *name) { struct net_device *dev; - for (dev = dev_base; dev != NULL; dev = dev->next) { - if (strncmp(dev->name, name, IFNAMSIZ) == 0) - return dev; - } - return NULL; + for (dev = dev_base; dev; dev = dev->next) + if (!strncmp(dev->name, name, IFNAMSIZ)) + break; + return dev; } /** * dev_get_by_name - find a device by its name * @name: name to find * - * Find an interface by name. This can be called from any + * Find an interface by name. This can be called from any * context and does its own locking. The returned handle has * the usage count incremented and the caller must use dev_put() to * release it when it is no longer needed. %NULL is returned if no @@ -443,7 +440,7 @@ struct net_device *dev_get_by_name(const char *name) return dev; } -/* +/* Return value is changed to int to prevent illegal usage in future. It is still legal to use to check for device existence. @@ -460,9 +457,8 @@ struct net_device *dev_get_by_name(const char *name) * caller must hold the rtnl semaphore. * * This function primarily exists for back compatibility with older - * drivers. + * drivers. */ - int dev_get(const char *name) { struct net_device *dev; @@ -484,15 +480,14 @@ int dev_get(const char *name) * or @dev_base_lock. */ -struct net_device * __dev_get_by_index(int ifindex) +struct net_device *__dev_get_by_index(int ifindex) { struct net_device *dev; - for (dev = dev_base; dev != NULL; dev = dev->next) { + for (dev = dev_base; dev; dev = dev->next) if (dev->ifindex == ifindex) - return dev; - } - return NULL; + break; + return dev; } @@ -501,12 +496,12 @@ struct net_device * __dev_get_by_index(int ifindex) * @ifindex: index of device * * Search for an interface by index. Returns NULL if the device - * is not found or a pointer to the device. The device returned has + * is not found or a pointer to the device. The device returned has * had a reference added and the pointer is safe until the user calls * dev_put to indicate they have finished with it. */ -struct net_device * dev_get_by_index(int ifindex) +struct net_device *dev_get_by_index(int ifindex) { struct net_device *dev; @@ -538,17 +533,16 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) ASSERT_RTNL(); - for (dev = dev_base; dev != NULL; dev = dev->next) { + for (dev = dev_base; dev; dev = dev->next) if (dev->type == type && - memcmp(dev->dev_addr, ha, dev->addr_len) == 0) - return dev; - } - return NULL; + !memcmp(dev->dev_addr, ha, dev->addr_len)) + break; + return dev; } /** * dev_alloc_name - allocate a name for a device - * @dev: device + * @dev: device * @name: name format string * * Passed a format string - eg "lt%d" it will try and find a suitable @@ -570,15 +564,15 @@ int dev_alloc_name(struct net_device *dev, const char *name) * characters, or no "%" characters at all. */ p = strchr(name, '%'); - if (p && (p[1] != 'd' || strchr(p+2, '%'))) + if (p && (p[1] != 'd' || strchr(p + 2, '%'))) return -EINVAL; /* * If you need over 100 please also fix the algorithm... */ for (i = 0; i < 100; i++) { - snprintf(buf,sizeof(buf),name,i); - if (__dev_get_by_name(buf) == NULL) { + snprintf(buf, sizeof(buf), name, i); + if (!__dev_get_by_name(buf)) { strcpy(dev->name, buf); return i; } @@ -593,7 +587,7 @@ int dev_alloc_name(struct net_device *dev, const char *name) * * Passed a format string, eg. "lt%d", it will allocate a network device * and space for the name. %NULL is returned if no memory is available. - * If the allocation succeeds then the name is assigned and the + * If the allocation succeeds then the name is assigned and the * device pointer returned. %NULL is returned if the name allocation * failed. The cause of an error is returned as a negative errno code * in the variable @err points to. @@ -604,16 +598,17 @@ int dev_alloc_name(struct net_device *dev, const char *name) struct net_device *dev_alloc(const char *name, int *err) { - struct net_device *dev=kmalloc(sizeof(struct net_device), GFP_KERNEL); - if (dev == NULL) { + struct net_device *dev = kmalloc(sizeof(*dev), GFP_KERNEL); + + if (!dev) *err = -ENOBUFS; - return NULL; - } - memset(dev, 0, sizeof(struct net_device)); - *err = dev_alloc_name(dev, name); - if (*err < 0) { - kfree(dev); - return NULL; + else { + memset(dev, 0, sizeof(*dev)); + *err = dev_alloc_name(dev, name); + if (*err < 0) { + kfree(dev); + dev = NULL; + } } return dev; } @@ -626,10 +621,9 @@ struct net_device *dev_alloc(const char *name, int *err) * the notifier chains for netdev_chain and sends a NEWLINK message * to the routing socket. */ - void netdev_state_change(struct net_device *dev) { - if (dev->flags&IFF_UP) { + if (dev->flags & IFF_UP) { notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); rtmsg_ifinfo(RTM_NEWLINK, dev, 0); } @@ -661,13 +655,14 @@ extern inline void dev_load(const char *unused){;} static int default_rebuild_header(struct sk_buff *skb) { - printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", skb->dev ? skb->dev->name : "NULL!!!"); + printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", + skb->dev ? skb->dev->name : "NULL!!!"); kfree_skb(skb); return 1; } /** - * dev_open - prepare an interface for use. + * dev_open - prepare an interface for use. * @dev: device to open * * Takes a device from down to up state. The device's private open @@ -678,7 +673,6 @@ static int default_rebuild_header(struct sk_buff *skb) * Calling this function on an active interface is a nop. On a failure * a negative errno code is returned. */ - int dev_open(struct net_device *dev) { int ret = 0; @@ -687,7 +681,7 @@ int dev_open(struct net_device *dev) * Is it already up? */ - if (dev->flags&IFF_UP) + if (dev->flags & IFF_UP) return 0; /* @@ -702,7 +696,7 @@ int dev_open(struct net_device *dev) if (try_inc_mod_count(dev->owner)) { if (dev->open) { ret = dev->open(dev); - if (ret != 0 && dev->owner) + if (ret && dev->owner) __MOD_DEC_USE_COUNT(dev->owner); } } else { @@ -712,9 +706,8 @@ int dev_open(struct net_device *dev) /* * If it went open OK then: */ - - if (ret == 0) - { + + if (!ret) { /* * Set the flags. */ @@ -723,7 +716,7 @@ int dev_open(struct net_device *dev) set_bit(__LINK_STATE_START, &dev->state); /* - * Initialize multicasting status + * Initialize multicasting status */ dev_mc_upload(dev); @@ -737,7 +730,7 @@ int dev_open(struct net_device *dev) */ notifier_call_chain(&netdev_chain, NETDEV_UP, dev); } - return(ret); + return ret; } #ifdef CONFIG_NET_FASTROUTE @@ -747,7 +740,7 @@ static void dev_do_clear_fastroute(struct net_device *dev) if (dev->accept_fastpath) { int i; - for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++) { + for (i = 0; i <= NETDEV_FASTROUTE_HMASK; i++) { struct dst_entry *dst; write_lock_irq(&dev->fastpath_lock); @@ -777,15 +770,14 @@ void dev_clear_fastroute(struct net_device *dev) * dev_close - shutdown an interface. * @dev: device to shutdown * - * This function moves an active device into down state. A + * This function moves an active device into down state. A * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier * chain. */ - int dev_close(struct net_device *dev) { - if (!(dev->flags&IFF_UP)) + if (!(dev->flags & IFF_UP)) return 0; /* @@ -818,7 +810,6 @@ int dev_close(struct net_device *dev) * We allow it to be called even after a DETACH hot-plug * event. */ - if (dev->stop) dev->stop(dev); @@ -842,7 +833,7 @@ int dev_close(struct net_device *dev) if (dev->owner) __MOD_DEC_USE_COUNT(dev->owner); - return(0); + return 0; } @@ -850,7 +841,7 @@ int dev_close(struct net_device *dev) * Device change register/unregister. These are not inline or static * as we export them to the world. */ - + /** * register_netdevice_notifier - register a network notifier block * @nb: notifier @@ -878,7 +869,7 @@ int register_netdevice_notifier(struct notifier_block *nb) int unregister_netdevice_notifier(struct notifier_block *nb) { - return notifier_chain_unregister(&netdev_chain,nb); + return notifier_chain_unregister(&netdev_chain, nb); } /* @@ -892,16 +883,14 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) do_gettimeofday(&skb->stamp); br_read_lock(BR_NETPROTO_LOCK); - for (ptype = ptype_all; ptype!=NULL; ptype = ptype->next) - { + for (ptype = ptype_all; ptype; ptype = ptype->next) { /* Never send packets back to the socket * they originated from - MvS (miquels@drinkel.ow.org) */ if ((ptype->dev == dev || !ptype->dev) && - ((struct sock *)ptype->data != skb->sk)) - { - struct sk_buff *skb2; - if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) + (struct sock *)ptype->data != skb->sk) { + struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC); + if (!skb2) break; /* skb->nh should be correctly @@ -910,9 +899,12 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) */ skb2->mac.raw = skb2->data; - if (skb2->nh.raw < skb2->data || skb2->nh.raw > skb2->tail) { + if (skb2->nh.raw < skb2->data || + skb2->nh.raw > skb2->tail) { if (net_ratelimit()) - printk(KERN_DEBUG "protocol %04x is buggy, dev %s\n", skb2->protocol, dev->name); + printk(KERN_DEBUG "protocol %04x is " + "buggy, dev %s\n", + skb2->protocol, dev->name); skb2->nh.raw = skb2->data; } @@ -928,12 +920,11 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) * If it failed by some reason, ignore and send skb with wrong * checksum. */ -struct sk_buff * skb_checksum_help(struct sk_buff *skb) +struct sk_buff *skb_checksum_help(struct sk_buff *skb) { - int offset; unsigned int csum; + int offset = skb->h.raw - skb->data; - offset = skb->h.raw - skb->data; if (offset > (int)skb->len) BUG(); csum = skb_checksum(skb, offset, skb->len-offset, 0); @@ -941,7 +932,7 @@ struct sk_buff * skb_checksum_help(struct sk_buff *skb) offset = skb->tail - skb->h.raw; if (offset <= 0) BUG(); - if (skb->csum+2 > offset) + if (skb->csum + 2 > offset) BUG(); *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum); @@ -955,15 +946,14 @@ struct sk_buff * skb_checksum_help(struct sk_buff *skb) * 2. No high memory really exists on this machine. */ -static inline int -illegal_highdma(struct net_device *dev, struct sk_buff *skb) +static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) { int i; - if (dev->features&NETIF_F_HIGHDMA) + if (dev->features & NETIF_F_HIGHDMA) return 0; - for (i=0; i<skb_shinfo(skb)->nr_frags; i++) + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) if (skb_shinfo(skb)->frags[i].page >= highmem_start_page) return 1; @@ -976,10 +966,10 @@ illegal_highdma(struct net_device *dev, struct sk_buff *skb) /** * dev_queue_xmit - transmit a buffer * @skb: buffer to transmit - * + * * Queue a buffer for transmission to a network device. The caller must - * have set the device and priority and built the buffer before calling this - * function. The function can be called from an interrupt. + * have set the device and priority and built the buffer before calling + * this function. The function can be called from an interrupt. * * A negative errno code is returned on a failure. A success does not * guarantee the frame will be transmitted as it may be dropped due @@ -989,61 +979,60 @@ illegal_highdma(struct net_device *dev, struct sk_buff *skb) int dev_queue_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; - struct Qdisc *q; + struct Qdisc *q; + int rc = -ENOMEM; if (skb_shinfo(skb)->frag_list && - !(dev->features&NETIF_F_FRAGLIST) && - skb_linearize(skb, GFP_ATOMIC) != 0) { - kfree_skb(skb); - return -ENOMEM; - } + !(dev->features & NETIF_F_FRAGLIST) && + skb_linearize(skb, GFP_ATOMIC)) + goto out_kfree_skb; /* Fragmented skb is linearized if device does not support SG, * or if at least one of fragments is in highmem and device * does not support DMA from it. */ if (skb_shinfo(skb)->nr_frags && - (!(dev->features&NETIF_F_SG) || illegal_highdma(dev, skb)) && - skb_linearize(skb, GFP_ATOMIC) != 0) { - kfree_skb(skb); - return -ENOMEM; - } + (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && + skb_linearize(skb, GFP_ATOMIC)) + goto out_kfree_skb; /* If packet is not checksummed and device does not support * checksumming for this protocol, complete checksumming here. */ if (skb->ip_summed == CHECKSUM_HW && - (!(dev->features&(NETIF_F_HW_CSUM|NETIF_F_NO_CSUM)) && - (!(dev->features&NETIF_F_IP_CSUM) || + (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) && + (!(dev->features & NETIF_F_IP_CSUM) || skb->protocol != htons(ETH_P_IP)))) { if ((skb = skb_checksum_help(skb)) == NULL) - return -ENOMEM; + goto out; } /* Grab device queue */ spin_lock_bh(&dev->queue_lock); q = dev->qdisc; if (q->enqueue) { - int ret = q->enqueue(skb, q); + rc = q->enqueue(skb, q); qdisc_run(dev); spin_unlock_bh(&dev->queue_lock); - return ret == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : ret; + rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; + goto out; } /* The device has no queue. Common case for software devices: loopback, all the sorts of tunnels... Really, it is unlikely that xmit_lock protection is necessary here. - (f.e. loopback and IP tunnels are clean ignoring statistics counters.) + (f.e. loopback and IP tunnels are clean ignoring statistics + counters.) However, it is possible, that they rely on protection made by us here. Check this and shot the lock. It is not prone from deadlocks. Either shot noqueue qdisc, it is even simpler 8) */ - if (dev->flags&IFF_UP) { + if (dev->flags & IFF_UP) { int cpu = smp_processor_id(); if (dev->xmit_lock_owner != cpu) { @@ -1059,30 +1048,36 @@ int dev_queue_xmit(struct sk_buff *skb) if (!netif_queue_stopped(dev)) { if (netdev_nit) - dev_queue_xmit_nit(skb,dev); + dev_queue_xmit_nit(skb, dev); - if (dev->hard_start_xmit(skb, dev) == 0) { + rc = 0; + if (!dev->hard_start_xmit(skb, dev)) { dev->xmit_lock_owner = -1; spin_unlock_bh(&dev->xmit_lock); - return 0; + goto out; } } dev->xmit_lock_owner = -1; spin_unlock_bh(&dev->xmit_lock); if (net_ratelimit()) - printk(KERN_DEBUG "Virtual device %s asks to queue packet!\n", dev->name); - kfree_skb(skb); - return -ENETDOWN; + printk(KERN_DEBUG "Virtual device %s asks to " + "queue packet!\n", dev->name); + goto out_enetdown; } else { - /* Recursion is detected! It is possible, unfortunately */ + /* Recursion is detected! It is possible, + * unfortunately */ if (net_ratelimit()) - printk(KERN_DEBUG "Dead loop on virtual device %s, fix it urgently!\n", dev->name); + printk(KERN_DEBUG "Dead loop on virtual device " + "%s, fix it urgently!\n", dev->name); } } spin_unlock_bh(&dev->queue_lock); - +out_enetdown: + rc = -ENETDOWN; +out_kfree_skb: kfree_skb(skb); - return -ENETDOWN; +out: + return rc; } @@ -1107,7 +1102,7 @@ struct netif_rx_stats netdev_rx_stat[NR_CPUS]; #ifdef CONFIG_NET_HW_FLOWCONTROL atomic_t netdev_dropping = ATOMIC_INIT(0); static unsigned long netdev_fc_mask = 1; -unsigned long netdev_fc_xoff = 0; +unsigned long netdev_fc_xoff; spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED; static struct @@ -1116,7 +1111,8 @@ static struct struct net_device *dev; } netdev_fc_slots[BITS_PER_LONG]; -int netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev)) +int netdev_register_fc(struct net_device *dev, + void (*stimul)(struct net_device *dev)) { int bit = 0; unsigned long flags; @@ -1156,7 +1152,7 @@ static void netdev_wakeup(void) netdev_fc_xoff = 0; while (xoff) { int i = ffz(~xoff); - xoff &= ~(1<<i); + xoff &= ~(1 << i); netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev); } spin_unlock(&netdev_fc_lock); @@ -1172,7 +1168,7 @@ static void get_sample_stats(int cpu) int blog = softnet_data[cpu].input_pkt_queue.qlen; int avg_blog = softnet_data[cpu].avg_blog; - avg_blog = (avg_blog >> 1)+ (blog >> 1); + avg_blog = (avg_blog >> 1) + (blog >> 1); if (avg_blog > mod_cong) { /* Above moderate congestion levels. */ @@ -1191,7 +1187,7 @@ static void get_sample_stats(int cpu) if (rq < avg_blog) /* unlucky bastard */ softnet_data[cpu].cng_level = NET_RX_CN_HIGH; #endif - } else if (avg_blog > no_cong) + } else if (avg_blog > no_cong) softnet_data[cpu].cng_level = NET_RX_CN_LOW; else /* no congestion */ softnet_data[cpu].cng_level = NET_RX_SUCCESS; @@ -1219,17 +1215,16 @@ static void sample_queue(unsigned long dummy) * * This function receives a packet from a device driver and queues it for * the upper (protocol) levels to process. It always succeeds. The buffer - * may be dropped during processing for congestion control or by the + * may be dropped during processing for congestion control or by the * protocol layers. - * + * * return values: - * NET_RX_SUCCESS (no congestion) - * NET_RX_CN_LOW (low congestion) - * NET_RX_CN_MOD (moderate congestion) - * NET_RX_CN_HIGH (high congestion) - * NET_RX_DROP (packet was dropped) - * - * + * NET_RX_SUCCESS (no congestion) + * NET_RX_CN_LOW (low congestion) + * NET_RX_CN_MOD (moderate congestion) + * NET_RX_CN_HIGH (high congestion) + * NET_RX_DROP (packet was dropped) + * */ int netif_rx(struct sk_buff *skb) @@ -1238,7 +1233,7 @@ int netif_rx(struct sk_buff *skb) struct softnet_data *queue; unsigned long flags; - if (skb->stamp.tv_sec == 0) + if (!skb->stamp.tv_sec) do_gettimeofday(&skb->stamp); /* The code is rearranged so that the path is the most @@ -1256,7 +1251,7 @@ int netif_rx(struct sk_buff *skb) enqueue: dev_hold(skb->dev); - __skb_queue_tail(&queue->input_pkt_queue,skb); + __skb_queue_tail(&queue->input_pkt_queue, skb); local_irq_restore(flags); #ifndef OFFLINE_SAMPLE get_sample_stats(this_cpu); @@ -1276,7 +1271,7 @@ enqueue: goto enqueue; } - if (queue->throttle == 0) { + if (!queue->throttle) { queue->throttle = 1; netdev_rx_stat[this_cpu].throttled++; #ifdef CONFIG_NET_HW_FLOWCONTROL @@ -1295,21 +1290,19 @@ drop: /* Deliver skb to an old protocol, which is not threaded well or which do not understand shared skbs. */ -static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last) +static int deliver_to_old_ones(struct packet_type *pt, + struct sk_buff *skb, int last) { static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED; int ret = NET_RX_DROP; - if (!last) { skb = skb_clone(skb, GFP_ATOMIC); - if (skb == NULL) - return ret; - } - if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) { - kfree_skb(skb); - return ret; + if (!skb) + goto out; } + if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC)) + goto out_kfree; /* The assumption (correct one) is that old protocols did not depened on BHs different of NET_BH and TIMER_BH. @@ -1325,7 +1318,11 @@ static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int tasklet_hi_enable(bh_task_vec+TIMER_BH); spin_unlock(&net_bh_lock); +out: return ret; +out_kfree: + kfree_skb(skb); + goto out; } static __inline__ void skb_bond(struct sk_buff *skb) @@ -1348,11 +1345,11 @@ static void net_tx_action(struct softirq_action *h) softnet_data[cpu].completion_queue = NULL; local_irq_enable(); - while (clist != NULL) { + while (clist) { struct sk_buff *skb = clist; clist = clist->next; - BUG_TRAP(atomic_read(&skb->users) == 0); + BUG_TRAP(!atomic_read(&skb->users)); __kfree_skb(skb); } } @@ -1365,7 +1362,7 @@ static void net_tx_action(struct softirq_action *h) softnet_data[cpu].output_queue = NULL; local_irq_enable(); - while (head != NULL) { + while (head) { struct net_device *dev = head; head = head->next_sched; @@ -1389,7 +1386,6 @@ static void net_tx_action(struct softirq_action *h) * Make a function call that is atomic with respect to the protocol * layers. */ - void net_call_rx_atomic(void (*fn)(void)) { br_write_lock_bh(BR_NETPROTO_LOCK); @@ -1421,11 +1417,12 @@ static __inline__ int handle_bridge(struct sk_buff *skb, #ifdef CONFIG_NET_DIVERT -static inline void handle_diverter(struct sk_buff *skb) +static inline int handle_diverter(struct sk_buff *skb) { /* if diversion is supported on device, then divert */ if (skb->dev->divert && skb->dev->divert->divert) divert_frame(skb); + return 0; } #endif /* CONFIG_NET_DIVERT */ @@ -1435,7 +1432,7 @@ int netif_receive_skb(struct sk_buff *skb) int ret = NET_RX_DROP; unsigned short type = skb->protocol; - if (skb->stamp.tv_sec == 0) + if (!skb->stamp.tv_sec) do_gettimeofday(&skb->stamp); skb_bond(skb); @@ -1456,10 +1453,12 @@ int netif_receive_skb(struct sk_buff *skb) if (!ptype->dev || ptype->dev == skb->dev) { if (pt_prev) { if (!pt_prev->data) { - ret = deliver_to_old_ones(pt_prev, skb, 0); + ret = deliver_to_old_ones(pt_prev, + skb, 0); } else { atomic_inc(&skb->users); - ret = pt_prev->func(skb, skb->dev, pt_prev); + ret = pt_prev->func(skb, skb->dev, + pt_prev); } } pt_prev = ptype; @@ -1470,23 +1469,24 @@ int netif_receive_skb(struct sk_buff *skb) if (skb->dev->divert && skb->dev->divert->divert) ret = handle_diverter(skb); #endif /* CONFIG_NET_DIVERT */ - + #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) - if (skb->dev->br_port != NULL && - br_handle_frame_hook != NULL) { + if (skb->dev->br_port && br_handle_frame_hook) { return handle_bridge(skb, pt_prev); } #endif - for (ptype=ptype_base[ntohs(type)&15];ptype;ptype=ptype->next) { + for (ptype = ptype_base[ntohs(type) & 15]; ptype; ptype = ptype->next) { if (ptype->type == type && (!ptype->dev || ptype->dev == skb->dev)) { if (pt_prev) { if (!pt_prev->data) { - ret = deliver_to_old_ones(pt_prev, skb, 0); + ret = deliver_to_old_ones(pt_prev, + skb, 0); } else { atomic_inc(&skb->users); - ret = pt_prev->func(skb, skb->dev, pt_prev); + ret = pt_prev->func(skb, skb->dev, + pt_prev); } } pt_prev = ptype; @@ -1524,7 +1524,7 @@ static int process_backlog(struct net_device *backlog_dev, int *budget) local_irq_disable(); skb = __skb_dequeue(&queue->input_pkt_queue); - if (skb == NULL) + if (!skb) goto job_done; local_irq_enable(); @@ -1540,7 +1540,8 @@ static int process_backlog(struct net_device *backlog_dev, int *budget) break; #ifdef CONFIG_NET_HW_FLOWCONTROL - if (queue->throttle && queue->input_pkt_queue.qlen < no_cong_thresh ) { + if (queue->throttle && + queue->input_pkt_queue.qlen < no_cong_thresh ) { if (atomic_dec_and_test(&netdev_dropping)) { queue->throttle = 0; netdev_wakeup(); @@ -1590,7 +1591,8 @@ static void net_rx_action(struct softirq_action *h) local_irq_enable(); - dev = list_entry(queue->poll_list.next, struct net_device, poll_list); + dev = list_entry(queue->poll_list.next, + struct net_device, poll_list); if (dev->quota <= 0 || dev->poll(dev, &budget)) { local_irq_disable(); @@ -1605,7 +1607,7 @@ static void net_rx_action(struct softirq_action *h) local_irq_disable(); } } - +out: local_irq_enable(); br_read_unlock(BR_NETPROTO_LOCK); return; @@ -1613,9 +1615,7 @@ static void net_rx_action(struct softirq_action *h) softnet_break: netdev_rx_stat[this_cpu].time_squeeze++; __cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ); - - local_irq_enable(); - br_read_unlock(BR_NETPROTO_LOCK); + goto out; } static gifconf_func_t * gifconf_list [NPROTO]; @@ -1629,10 +1629,9 @@ static gifconf_func_t * gifconf_list [NPROTO]; * that is passed must not be freed or reused until it has been replaced * by another handler. */ - int register_gifconf(unsigned int family, gifconf_func_t * gifconf) { - if (family>=NPROTO) + if (family >= NPROTO) return -EINVAL; gifconf_list[family] = gifconf; return 0; @@ -1656,9 +1655,9 @@ static int dev_ifname(struct ifreq *arg) struct ifreq ifr; /* - * Fetch the caller's info block. + * Fetch the caller's info block. */ - + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; @@ -1693,9 +1692,9 @@ static int dev_ifconf(char *arg) int i; /* - * Fetch the caller's info block. + * Fetch the caller's info block. */ - + if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) return -EFAULT; @@ -1703,39 +1702,35 @@ static int dev_ifconf(char *arg) len = ifc.ifc_len; /* - * Loop over the interfaces, and write an info block for each. + * Loop over the interfaces, and write an info block for each. */ total = 0; - for (dev = dev_base; dev != NULL; dev = dev->next) { - for (i=0; i<NPROTO; i++) { + for (dev = dev_base; dev; dev = dev->next) { + for (i = 0; i < NPROTO; i++) { if (gifconf_list[i]) { int done; - if (pos==NULL) { + if (!pos) done = gifconf_list[i](dev, NULL, 0); - } else { - done = gifconf_list[i](dev, pos+total, len-total); - } - if (done<0) { + else + done = gifconf_list[i](dev, pos + total, + len - total); + if (done < 0) return -EFAULT; - } total += done; } } } /* - * All done. Write the updated control block back to the caller. + * All done. Write the updated control block back to the caller. */ ifc.ifc_len = total; - if (copy_to_user(arg, &ifc, sizeof(struct ifconf))) - return -EFAULT; - - /* + /* * Both BSD and Solaris return 0 here, so we do too. */ - return 0; + return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; } /* @@ -1747,36 +1742,39 @@ static int dev_ifconf(char *arg) static int sprintf_stats(char *buffer, struct net_device *dev) { - struct net_device_stats *stats = (dev->get_stats ? dev->get_stats(dev): NULL); + struct net_device_stats *stats = dev->get_stats ? dev->get_stats(dev) : + NULL; int size; - + if (stats) - size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu %8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", + size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu " + "%10lu %9lu %8lu %7lu %4lu %4lu %4lu " + "%5lu %7lu %10lu\n", dev->name, stats->rx_bytes, stats->rx_packets, stats->rx_errors, stats->rx_dropped + stats->rx_missed_errors, stats->rx_fifo_errors, - stats->rx_length_errors + stats->rx_over_errors - + stats->rx_crc_errors + stats->rx_frame_errors, + stats->rx_length_errors + stats->rx_over_errors + + stats->rx_crc_errors + stats->rx_frame_errors, stats->rx_compressed, stats->multicast, stats->tx_bytes, stats->tx_packets, stats->tx_errors, stats->tx_dropped, stats->tx_fifo_errors, stats->collisions, - stats->tx_carrier_errors + stats->tx_aborted_errors - + stats->tx_window_errors + stats->tx_heartbeat_errors, + stats->tx_carrier_errors + stats->tx_aborted_errors + + stats->tx_window_errors + stats->tx_heartbeat_errors, stats->tx_compressed); else - size = sprintf(buffer, "%6s: No statistics available.\n", dev->name); + size = sprintf(buffer, "%6s: No statistics available.\n", + dev->name); return size; } /* - * Called from the PROCfs module. This now uses the new arbitrary sized /proc/net interface - * to create /proc/net/dev + * Called from the PROCfs module. This now uses the new arbitrary sized + * /proc/net interface to create /proc/net/dev */ - static int dev_get_info(char *buffer, char **start, off_t offset, int length) { int len = 0; @@ -1785,21 +1783,19 @@ static int dev_get_info(char *buffer, char **start, off_t offset, int length) int size; struct net_device *dev; - - size = sprintf(buffer, + size = sprintf(buffer, "Inter-| Receive | Transmit\n" " face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n"); - + pos += size; len += size; - read_lock(&dev_base_lock); - for (dev = dev_base; dev != NULL; dev = dev->next) { + for (dev = dev_base; dev; dev = dev->next) { size = sprintf_stats(buffer+len, dev); len += size; pos = begin + len; - + if (pos < offset) { len = 0; begin = pos; @@ -1810,7 +1806,7 @@ static int dev_get_info(char *buffer, char **start, off_t offset, int length) read_unlock(&dev_base_lock); *start = buffer + (offset - begin); /* Start of wanted data */ - len -= (offset - begin); /* Start slop */ + len -= offset - begin; /* Start slop */ if (len > length) len = length; /* Ending slop */ if (len < 0) @@ -1822,11 +1818,12 @@ static int dev_proc_stats(char *buffer, char **start, off_t offset, int length, int *eof, void *data) { int i, lcpu; - int len=0; + int len = 0; - for (lcpu=0; lcpu<smp_num_cpus; lcpu++) { + for (lcpu = 0; lcpu < smp_num_cpus; lcpu++) { i = cpu_logical_map(lcpu); - len += sprintf(buffer+len, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", + len += sprintf(buffer + len, "%08x %08x %08x %08x %08x %08x " + "%08x %08x %08x\n", netdev_rx_stat[i].total, netdev_rx_stat[i].dropped, netdev_rx_stat[i].time_squeeze, @@ -1870,7 +1867,6 @@ static int dev_proc_stats(char *buffer, char **start, off_t offset, * are adjusted, %RTM_NEWLINK is sent to the routing socket and the * function returns zero. */ - int netdev_set_master(struct net_device *slave, struct net_device *master) { struct net_device *old = slave->master; @@ -1909,7 +1905,6 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) * the device reverts back to normal filtering operation. A negative inc * value is used to drop promiscuity on the device. */ - void dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; @@ -1917,9 +1912,9 @@ void dev_set_promiscuity(struct net_device *dev, int inc) dev->flags |= IFF_PROMISC; if ((dev->promiscuity += inc) == 0) dev->flags &= ~IFF_PROMISC; - if (dev->flags^old_flags) { + if (dev->flags ^ old_flags) { #ifdef CONFIG_NET_FASTROUTE - if (dev->flags&IFF_PROMISC) { + if (dev->flags & IFF_PROMISC) { netdev_fastroute_obstacles++; dev_clear_fastroute(dev); } else @@ -1927,7 +1922,8 @@ void dev_set_promiscuity(struct net_device *dev, int inc) #endif dev_mc_upload(dev); printk(KERN_INFO "device %s %s promiscuous mode\n", - dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left"); + dev->name, (dev->flags & IFF_PROMISC) ? "entered" : + "left"); } } @@ -1950,7 +1946,7 @@ void dev_set_allmulti(struct net_device *dev, int inc) dev->flags |= IFF_ALLMULTI; if ((dev->allmulti += inc) == 0) dev->flags &= ~IFF_ALLMULTI; - if (dev->flags^old_flags) + if (dev->flags ^ old_flags) dev_mc_upload(dev); } @@ -1963,13 +1959,15 @@ int dev_change_flags(struct net_device *dev, unsigned flags) * Set the flags on our device. */ - dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|IFF_DYNAMIC| - IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) | - (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI)); + dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP | + IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL | + IFF_AUTOMEDIA)) | + (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC | + IFF_ALLMULTI)); /* * Load in the correct multicast list now the flags have changed. - */ + */ dev_mc_upload(dev); @@ -1980,20 +1978,20 @@ int dev_change_flags(struct net_device *dev, unsigned flags) */ ret = 0; - if ((old_flags^flags)&IFF_UP) /* Bit is different ? */ - { + if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); - if (ret == 0) + if (!ret) dev_mc_upload(dev); } - if (dev->flags&IFF_UP && - ((old_flags^dev->flags)&~(IFF_UP|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE))) + if (dev->flags & IFF_UP && + ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI | + IFF_VOLATILE))) notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); - if ((flags^dev->gflags)&IFF_PROMISC) { - int inc = (flags&IFF_PROMISC) ? +1 : -1; + if ((flags ^ dev->gflags) & IFF_PROMISC) { + int inc = (flags & IFF_PROMISC) ? +1 : -1; dev->gflags ^= IFF_PROMISC; dev_set_promiscuity(dev, inc); } @@ -2002,53 +2000,56 @@ int dev_change_flags(struct net_device *dev, unsigned flags) is important. Some (broken) drivers set IFF_PROMISC, when IFF_ALLMULTI is requested not asking us and not reporting. */ - if ((flags^dev->gflags)&IFF_ALLMULTI) { - int inc = (flags&IFF_ALLMULTI) ? +1 : -1; + if ((flags ^ dev->gflags) & IFF_ALLMULTI) { + int inc = (flags & IFF_ALLMULTI) ? +1 : -1; dev->gflags ^= IFF_ALLMULTI; dev_set_allmulti(dev, inc); } - if (old_flags^dev->flags) - rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags^dev->flags); + if (old_flags ^ dev->flags) + rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags); return ret; } /* - * Perform the SIOCxIFxxx calls. + * Perform the SIOCxIFxxx calls. */ - static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) { - struct net_device *dev; int err; + struct net_device *dev = __dev_get_by_name(ifr->ifr_name); - if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) + if (!dev) return -ENODEV; - switch(cmd) - { + switch (cmd) { case SIOCGIFFLAGS: /* Get interface flags */ - ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI|IFF_RUNNING)) - |(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI)); + ifr->ifr_flags = (dev->flags & ~(IFF_PROMISC | + IFF_ALLMULTI | + IFF_RUNNING)) | + (dev->gflags & (IFF_PROMISC | + IFF_ALLMULTI)); if (netif_running(dev) && netif_carrier_ok(dev)) ifr->ifr_flags |= IFF_RUNNING; return 0; case SIOCSIFFLAGS: /* Set interface flags */ return dev_change_flags(dev, ifr->ifr_flags); - - case SIOCGIFMETRIC: /* Get the metric on the interface (currently unused) */ + + case SIOCGIFMETRIC: /* Get the metric on the interface + (currently unused) */ ifr->ifr_metric = 0; return 0; - - case SIOCSIFMETRIC: /* Set the metric on the interface (currently unused) */ + + case SIOCSIFMETRIC: /* Set the metric on the interface + (currently unused) */ return -EOPNOTSUPP; - + case SIOCGIFMTU: /* Get the MTU of a device */ ifr->ifr_mtu = dev->mtu; return 0; - + case SIOCSIFMTU: /* Set the MTU of a device */ if (ifr->ifr_mtu == dev->mtu) return 0; @@ -2056,80 +2057,85 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) /* * MTU must be positive. */ - - if (ifr->ifr_mtu<0) + if (ifr->ifr_mtu < 0) return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; + err = 0; if (dev->change_mtu) err = dev->change_mtu(dev, ifr->ifr_mtu); - else { + else dev->mtu = ifr->ifr_mtu; - err = 0; - } - if (!err && dev->flags&IFF_UP) - notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev); + if (!err && dev->flags & IFF_UP) + notifier_call_chain(&netdev_chain, + NETDEV_CHANGEMTU, dev); return err; case SIOCGIFHWADDR: - memcpy(ifr->ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN); - ifr->ifr_hwaddr.sa_family=dev->type; + memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, + MAX_ADDR_LEN); + ifr->ifr_hwaddr.sa_family = dev->type; return 0; - + case SIOCSIFHWADDR: - if (dev->set_mac_address == NULL) + if (!dev->set_mac_address) return -EOPNOTSUPP; - if (ifr->ifr_hwaddr.sa_family!=dev->type) + if (ifr->ifr_hwaddr.sa_family != dev->type) return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; err = dev->set_mac_address(dev, &ifr->ifr_hwaddr); if (!err) - notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev); + notifier_call_chain(&netdev_chain, + NETDEV_CHANGEADDR, dev); return err; - + case SIOCSIFHWBROADCAST: - if (ifr->ifr_hwaddr.sa_family!=dev->type) + if (ifr->ifr_hwaddr.sa_family != dev->type) return -EINVAL; - memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, MAX_ADDR_LEN); - notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev); + memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, + MAX_ADDR_LEN); + notifier_call_chain(&netdev_chain, + NETDEV_CHANGEADDR, dev); return 0; case SIOCGIFMAP: - ifr->ifr_map.mem_start=dev->mem_start; - ifr->ifr_map.mem_end=dev->mem_end; - ifr->ifr_map.base_addr=dev->base_addr; - ifr->ifr_map.irq=dev->irq; - ifr->ifr_map.dma=dev->dma; - ifr->ifr_map.port=dev->if_port; + ifr->ifr_map.mem_start = dev->mem_start; + ifr->ifr_map.mem_end = dev->mem_end; + ifr->ifr_map.base_addr = dev->base_addr; + ifr->ifr_map.irq = dev->irq; + ifr->ifr_map.dma = dev->dma; + ifr->ifr_map.port = dev->if_port; return 0; - + case SIOCSIFMAP: if (dev->set_config) { if (!netif_device_present(dev)) return -ENODEV; - return dev->set_config(dev,&ifr->ifr_map); + return dev->set_config(dev, &ifr->ifr_map); } return -EOPNOTSUPP; - + case SIOCADDMULTI: - if (dev->set_multicast_list == NULL || + if (!dev->set_multicast_list || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; - dev_mc_add(dev,ifr->ifr_hwaddr.sa_data, dev->addr_len, 1); + dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, + dev->addr_len, 1); return 0; case SIOCDELMULTI: - if (dev->set_multicast_list == NULL || - ifr->ifr_hwaddr.sa_family!=AF_UNSPEC) + if (!dev->set_multicast_list || + ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; - dev_mc_delete(dev,ifr->ifr_hwaddr.sa_data,dev->addr_len, 1); + dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, + dev->addr_len, 1); return 0; case SIOCGIFINDEX: @@ -2141,19 +2147,20 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) return 0; case SIOCSIFTXQLEN: - if (ifr->ifr_qlen<0) + if (ifr->ifr_qlen < 0) return -EINVAL; dev->tx_queue_len = ifr->ifr_qlen; return 0; case SIOCSIFNAME: - if (dev->flags&IFF_UP) + if (dev->flags & IFF_UP) return -EBUSY; if (__dev_get_by_name(ifr->ifr_newname)) return -EEXIST; memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ); - dev->name[IFNAMSIZ-1] = 0; - notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); + dev->name[IFNAMSIZ - 1] = 0; + notifier_call_chain(&netdev_chain, + NETDEV_CHANGENAME, dev); return 0; /* @@ -2174,16 +2181,19 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) cmd == SIOCGMIIREG || cmd == SIOCSMIIREG || cmd == SIOCWANDEV) { + err = -EOPNOTSUPP; if (dev->do_ioctl) { - if (!netif_device_present(dev)) - return -ENODEV; - return dev->do_ioctl(dev, ifr, cmd); + if (netif_device_present(dev)) + err = dev->do_ioctl(dev, ifr, + cmd); + else + err = -ENODEV; } - return -EOPNOTSUPP; - } + } else + err = -EINVAL; } - return -EINVAL; + return err; } /* @@ -2197,7 +2207,7 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) * @arg: pointer to a struct ifreq in user space * * Issue ioctl functions to devices. This is normally called by the - * user space syscall interfaces but can sometimes be useful for + * user space syscall interfaces but can sometimes be useful for * other purposes. The return value is the return from the syscall if * positive or a negative errno code on error. */ @@ -2212,16 +2222,15 @@ int dev_ioctl(unsigned int cmd, void *arg) and requires shared lock, because it sleeps writing to user space. */ - + if (cmd == SIOCGIFCONF) { rtnl_shlock(); ret = dev_ifconf((char *) arg); rtnl_shunlock(); return ret; } - if (cmd == SIOCGIFNAME) { + if (cmd == SIOCGIFNAME) return dev_ifname((struct ifreq *)arg); - } if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; @@ -2233,18 +2242,16 @@ int dev_ioctl(unsigned int cmd, void *arg) *colon = 0; /* - * See which interface the caller is talking about. + * See which interface the caller is talking about. */ - - switch(cmd) - { + + switch (cmd) { /* * These ioctl calls: * - can be done by all. * - atomic and do not require locking. * - return a value */ - case SIOCGIFFLAGS: case SIOCGIFMETRIC: case SIOCGIFMTU: @@ -2260,8 +2267,9 @@ int dev_ioctl(unsigned int cmd, void *arg) if (!ret) { if (colon) *colon = ':'; - if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) - return -EFAULT; + if (copy_to_user(arg, &ifr, + sizeof(struct ifreq))) + ret = -EFAULT; } return ret; @@ -2271,7 +2279,6 @@ int dev_ioctl(unsigned int cmd, void *arg) * - require strict serialization. * - return a value */ - case SIOCETHTOOL: case SIOCGMIIPHY: case SIOCGMIIREG: @@ -2286,8 +2293,9 @@ int dev_ioctl(unsigned int cmd, void *arg) if (!ret) { if (colon) *colon = ':'; - if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) - return -EFAULT; + if (copy_to_user(arg, &ifr, + sizeof(struct ifreq))) + ret = -EFAULT; } return ret; @@ -2297,7 +2305,6 @@ int dev_ioctl(unsigned int cmd, void *arg) * - require strict serialization. * - do not return a value */ - case SIOCSIFFLAGS: case SIOCSIFMETRIC: case SIOCSIFMTU: @@ -2325,19 +2332,19 @@ int dev_ioctl(unsigned int cmd, void *arg) rtnl_unlock(); dev_probe_unlock(); return ret; - + case SIOCGIFMEM: - /* Get the per device memory space. We can add this but currently - do not support it */ + /* Get the per device memory space. We can add this but + * currently do not support it */ case SIOCSIFMEM: - /* Set the per device memory buffer space. Not applicable in our case */ + /* Set the per device memory buffer space. + * Not applicable in our case */ case SIOCSIFLINK: return -EINVAL; /* * Unknown or private ioctl. - */ - + */ default: if (cmd == SIOCWANDEV || (cmd >= SIOCDEVPRIVATE && @@ -2348,8 +2355,9 @@ int dev_ioctl(unsigned int cmd, void *arg) ret = dev_ifsioc(&ifr, cmd); rtnl_unlock(); dev_probe_unlock(); - if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) - return -EFAULT; + if (!ret && copy_to_user(arg, &ifr, + sizeof(struct ifreq))) + ret = -EFAULT; return ret; } #ifdef WIRELESS_EXT @@ -2358,8 +2366,8 @@ int dev_ioctl(unsigned int cmd, void *arg) /* If command is `set a parameter', or * `get the encoding parameters', check if * the user has the right to do it */ - if (IW_IS_SET(cmd) || (cmd == SIOCGIWENCODE)) { - if(!capable(CAP_NET_ADMIN)) + if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) { + if (!capable(CAP_NET_ADMIN)) return -EPERM; } dev_load(ifr.ifr_name); @@ -2368,8 +2376,9 @@ int dev_ioctl(unsigned int cmd, void *arg) ret = wireless_process_ioctl(&ifr, cmd); rtnl_unlock(); if (!ret && IW_IS_GET(cmd) && - copy_to_user(arg, &ifr, sizeof(struct ifreq))) - return -EFAULT; + copy_to_user(arg, &ifr, + sizeof(struct ifreq))) + ret = -EFAULT; return ret; } #endif /* WIRELESS_EXT */ @@ -2385,14 +2394,13 @@ int dev_ioctl(unsigned int cmd, void *arg) * number. The caller must hold the rtnl semaphore or the * dev_base_lock to be sure it remains unique. */ - int dev_new_index(void) { static int ifindex; for (;;) { if (++ifindex <= 0) - ifindex=1; - if (__dev_get_by_index(ifindex) == NULL) + ifindex = 1; + if (!__dev_get_by_index(ifindex)) return ifindex; } } @@ -2402,7 +2410,7 @@ static int dev_boot_phase = 1; /** * register_netdevice - register a network device * @dev: device to register - * + * * Take a completed network device structure and add it to the kernel * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier * chain. 0 is returned on success. A negative errno code is returned @@ -2422,15 +2430,13 @@ int net_dev_init(void); int register_netdevice(struct net_device *dev) { struct net_device *d, **dp; -#ifdef CONFIG_NET_DIVERT int ret; -#endif spin_lock_init(&dev->queue_lock); spin_lock_init(&dev->xmit_lock); dev->xmit_lock_owner = -1; #ifdef CONFIG_NET_FASTROUTE - dev->fastpath_lock=RW_LOCK_UNLOCKED; + dev->fastpath_lock = RW_LOCK_UNLOCKED; #endif if (dev_boot_phase) @@ -2439,38 +2445,32 @@ int register_netdevice(struct net_device *dev) #ifdef CONFIG_NET_DIVERT ret = alloc_divert_blk(dev); if (ret) - return ret; + goto out; #endif /* CONFIG_NET_DIVERT */ - + dev->iflink = -1; /* Init, if this function is available */ - if (dev->init && dev->init(dev) != 0) { -#ifdef CONFIG_NET_DIVERT - free_divert_blk(dev); -#endif - return -EIO; - } + ret = -EIO; + if (dev->init && dev->init(dev)) + goto out_err; dev->ifindex = dev_new_index(); if (dev->iflink == -1) dev->iflink = dev->ifindex; /* Check for existence, and append to tail of chain */ - for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) { - if (d == dev || strcmp(d->name, dev->name) == 0) { -#ifdef CONFIG_NET_DIVERT - free_divert_blk(dev); -#endif - return -EEXIST; - } + ret = -EEXIST; + for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) { + if (d == dev || !strcmp(d->name, dev->name)) + goto out_err; } /* * nil rebuild_header routine, * that should be never called and used as just bug trap. */ - if (dev->rebuild_header == NULL) + if (!dev->rebuild_header) dev->rebuild_header = default_rebuild_header; /* @@ -2492,8 +2492,15 @@ int register_netdevice(struct net_device *dev) notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); net_run_sbin_hotplug(dev, "register"); + ret = 0; - return 0; +out: + return ret; +out_err: +#ifdef CONFIG_NET_DIVERT + free_divert_blk(dev); +#endif + goto out; } /** @@ -2503,15 +2510,15 @@ int register_netdevice(struct net_device *dev) * Destroy and free a dead device. A value of zero is returned on * success. */ - int netdev_finish_unregister(struct net_device *dev) { - BUG_TRAP(dev->ip_ptr==NULL); - BUG_TRAP(dev->ip6_ptr==NULL); - BUG_TRAP(dev->dn_ptr==NULL); + BUG_TRAP(!dev->ip_ptr); + BUG_TRAP(!dev->ip6_ptr); + BUG_TRAP(!dev->dn_ptr); if (!dev->deadbeaf) { - printk(KERN_ERR "Freeing alive device %p, %s\n", dev, dev->name); + printk(KERN_ERR "Freeing alive device %p, %s\n", + dev, dev->name); return 0; } #ifdef NET_REFCNT_DEBUG @@ -2547,11 +2554,11 @@ int unregister_netdevice(struct net_device *dev) if (dev->flags & IFF_UP) dev_close(dev); - BUG_TRAP(dev->deadbeaf==0); + BUG_TRAP(!dev->deadbeaf); dev->deadbeaf = 1; /* And unlink it from device chain. */ - for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) { + for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) { if (d == dev) { write_lock_bh(&dev_base_lock); *dp = d->next; @@ -2559,8 +2566,9 @@ int unregister_netdevice(struct net_device *dev) break; } } - if (d == NULL) { - printk(KERN_DEBUG "unregister_netdevice: device %s/%p never was registered\n", dev->name, dev); + if (!d) { + printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " + "was registered\n", dev->name, dev); return -ENODEV; } @@ -2568,7 +2576,7 @@ int unregister_netdevice(struct net_device *dev) br_write_lock_bh(BR_NETPROTO_LOCK); br_write_unlock_bh(BR_NETPROTO_LOCK); - if (dev_boot_phase == 0) { + if (!dev_boot_phase) { #ifdef CONFIG_NET_FASTROUTE dev_clear_fastroute(dev); #endif @@ -2593,7 +2601,7 @@ int unregister_netdevice(struct net_device *dev) dev->uninit(dev); /* Notifier chain MUST detach us from master device. */ - BUG_TRAP(dev->master==NULL); + BUG_TRAP(!dev->master); #ifdef CONFIG_NET_DIVERT free_divert_blk(dev); @@ -2602,20 +2610,20 @@ int unregister_netdevice(struct net_device *dev) if (dev->features & NETIF_F_DYNALLOC) { #ifdef NET_REFCNT_DEBUG if (atomic_read(&dev->refcnt) != 1) - printk(KERN_DEBUG "unregister_netdevice: holding %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt)-1); + printk(KERN_DEBUG "unregister_netdevice: holding %s " + "refcnt=%d\n", + dev->name, atomic_read(&dev->refcnt) - 1); #endif - dev_put(dev); - return 0; + goto out; } /* Last reference is our one */ - if (atomic_read(&dev->refcnt) == 1) { - dev_put(dev); - return 0; - } + if (atomic_read(&dev->refcnt) == 1) + goto out; #ifdef NET_REFCNT_DEBUG - printk("unregister_netdevice: waiting %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt)); + printk(KERN_DEBUG "unregister_netdevice: waiting %s refcnt=%d\n", + dev->name, atomic_read(&dev->refcnt)); #endif /* EXPLANATION. If dev->refcnt is not now 1 (our own reference) @@ -2623,14 +2631,15 @@ int unregister_netdevice(struct net_device *dev) to this device and we cannot release it. "New style" devices have destructors, hence we can return from this - function and destructor will do all the work later. As of kernel 2.4.0 - there are very few "New Style" devices. + function and destructor will do all the work later. As of kernel + 2.4.0 there are very few "New Style" devices. "Old style" devices expect that the device is free of any references upon exit from this function. We cannot return from this function until all such references have - fallen away. This is because the caller of this function will probably - immediately kfree(*dev) and then be unloaded via sys_delete_module. + fallen away. This is because the caller of this function will + probably immediately kfree(*dev) and then be unloaded via + sys_delete_module. So, we linger until all references fall away. The duration of the linger is basically unbounded! It is driven by, for example, the @@ -2643,20 +2652,22 @@ int unregister_netdevice(struct net_device *dev) now = warning_time = jiffies; while (atomic_read(&dev->refcnt) != 1) { - if ((jiffies - now) > 1*HZ) { + if ((jiffies - now) > 1 * HZ) { /* Rebroadcast unregister notification */ - notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); + notifier_call_chain(&netdev_chain, + NETDEV_UNREGISTER, dev); } current->state = TASK_INTERRUPTIBLE; - schedule_timeout(HZ/4); + schedule_timeout(HZ / 4); current->state = TASK_RUNNING; - if ((jiffies - warning_time) > 10*HZ) { - printk(KERN_EMERG "unregister_netdevice: waiting for %s to " - "become free. Usage count = %d\n", - dev->name, atomic_read(&dev->refcnt)); + if ((jiffies - warning_time) > 10 * HZ) { + printk(KERN_EMERG "unregister_netdevice: waiting for " + "%s to become free. Usage count = %d\n", + dev->name, atomic_read(&dev->refcnt)); warning_time = jiffies; } } +out: dev_put(dev); return 0; } @@ -2664,7 +2675,7 @@ int unregister_netdevice(struct net_device *dev) /* * Initialize the DEV module. At boot time this walks the device list and - * unhooks any devices that fail to initialise (normally hardware not + * unhooks any devices that fail to initialise (normally hardware not * present) and leaves us with a valid list of present and active devices. * */ @@ -2692,7 +2703,7 @@ int __init net_dev_init(void) #ifdef CONFIG_NET_DIVERT dv_init(); #endif /* CONFIG_NET_DIVERT */ - + /* * Initialise the packet receive queues. */ @@ -2752,7 +2763,7 @@ int __init net_dev_init(void) if (strchr(dev->name, '%')) dev_alloc_name(dev, dev->name); - /* + /* * Check boot time settings for the device. */ netdev_boot_setup_check(dev); @@ -2770,7 +2781,7 @@ int __init net_dev_init(void) dev->ifindex = dev_new_index(); if (dev->iflink == -1) dev->iflink = dev->ifindex; - if (dev->rebuild_header == NULL) + if (!dev->rebuild_header) dev->rebuild_header = default_rebuild_header; dev_init_scheduler(dev); set_bit(__LINK_STATE_PRESENT, &dev->state); @@ -2815,7 +2826,7 @@ int __init net_dev_init(void) /* * Initialise network devices */ - + net_device_init(); return 0; @@ -2848,7 +2859,7 @@ static int net_run_sbin_hotplug(struct net_device *dev, char *action) envp [i++] = ifname; envp [i++] = action_str; envp [i] = 0; - + return call_usermodehelper(argv [0], argv, envp); } #endif diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d9166fba89a6..57e7f5e83b67 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -21,30 +21,34 @@ * so sockets that fail to connect * don't return -EINPROGRESS. * Alan Cox : Asynchronous I/O support - * Alan Cox : Keep correct socket pointer on sock structures + * Alan Cox : Keep correct socket pointer on sock + * structures * when accept() ed - * Alan Cox : Semantics of SO_LINGER aren't state moved - * to close when you look carefully. With - * this fixed and the accept bug fixed + * Alan Cox : Semantics of SO_LINGER aren't state + * moved to close when you look carefully. + * With this fixed and the accept bug fixed * some RPC stuff seems happier. * Niibe Yutaka : 4.4BSD style write async I/O - * Alan Cox, + * Alan Cox, * Tony Gale : Fixed reuse semantics. * Alan Cox : bind() shouldn't abort existing but dead * sockets. Stops FTP netin:.. I hope. - * Alan Cox : bind() works correctly for RAW sockets. Note - * that FreeBSD at least was broken in this respect - * so be careful with compatibility tests... + * Alan Cox : bind() works correctly for RAW sockets. + * Note that FreeBSD at least was broken + * in this respect so be careful with + * compatibility tests... * Alan Cox : routing cache support - * Alan Cox : memzero the socket structure for compactness. + * Alan Cox : memzero the socket structure for + * compactness. * Matt Day : nonblock connect error handler * Alan Cox : Allow large numbers of pending sockets * (eg for big web sites), but only if * specifically application requested. - * Alan Cox : New buffering throughout IP. Used dumbly. + * Alan Cox : New buffering throughout IP. Used + * dumbly. * Alan Cox : New buffering now used smartly. - * Alan Cox : BSD rather than common sense interpretation of - * listen. + * Alan Cox : BSD rather than common sense + * interpretation of listen. * Germano Caronni : Assorted small races. * Alan Cox : sendmsg/recvmsg basic support. * Alan Cox : Only sendmsg/recvmsg now supported. @@ -117,7 +121,7 @@ #include <linux/wireless.h> /* Note : will define WIRELESS_EXT */ #endif /* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */ -struct linux_mib net_statistics[NR_CPUS*2]; +struct linux_mib net_statistics[NR_CPUS * 2]; #ifdef INET_REFCNT_DEBUG atomic_t inet_sock_nr; @@ -132,7 +136,7 @@ extern int udp_get_info(char *, char **, off_t, int); extern void ip_mc_drop_socket(struct sock *sk); #ifdef CONFIG_DLCI -extern int dlci_ioctl(unsigned int, void*); +extern int dlci_ioctl(unsigned int, void *); #endif #ifdef CONFIG_DLCI_MODULE @@ -177,17 +181,18 @@ void inet_sock_destruct(struct sock *sk) return; } - BUG_TRAP(atomic_read(&sk->rmem_alloc) == 0); - BUG_TRAP(atomic_read(&sk->wmem_alloc) == 0); - BUG_TRAP(sk->wmem_queued == 0); - BUG_TRAP(sk->forward_alloc == 0); + BUG_TRAP(!atomic_read(&sk->rmem_alloc)); + BUG_TRAP(!atomic_read(&sk->wmem_alloc)); + BUG_TRAP(!sk->wmem_queued); + BUG_TRAP(!sk->forward_alloc); if (inet->opt) kfree(inet->opt); dst_release(sk->dst_cache); #ifdef INET_REFCNT_DEBUG atomic_dec(&inet_sock_nr); - printk(KERN_DEBUG "INET socket %p released, %d are still alive\n", sk, atomic_read(&inet_sock_nr)); + printk(KERN_DEBUG "INET socket %p released, %d are still alive\n", + sk, atomic_read(&inet_sock_nr)); #endif } @@ -221,9 +226,9 @@ void inet_sock_release(struct sock *sk) sock_orphan(sk); #ifdef INET_REFCNT_DEBUG - if (atomic_read(&sk->refcnt) != 1) { - printk(KERN_DEBUG "Destruction inet %p delayed, c=%d\n", sk, atomic_read(&sk->refcnt)); - } + if (atomic_read(&sk->refcnt) != 1) + printk(KERN_DEBUG "Destruction inet %p delayed, c=%d\n", + sk, atomic_read(&sk->refcnt)); #endif sock_put(sk); } @@ -234,18 +239,16 @@ void inet_sock_release(struct sock *sk) * socket object. Mostly it punts to the subprotocols of IP to do * the work. */ - /* * Set socket options on an inet socket. */ - int inet_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen) { - struct sock *sk=sock->sk; + struct sock *sk = sock->sk; - return sk->prot->setsockopt(sk,level,optname,optval,optlen); + return sk->prot->setsockopt(sk, level, optname, optval, optlen); } /* @@ -259,9 +262,9 @@ int inet_setsockopt(struct socket *sock, int level, int optname, int inet_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen) { - struct sock *sk=sock->sk; + struct sock *sk = sock->sk; - return sk->prot->getsockopt(sk,level,optname,optval,optlen); + return sk->prot->getsockopt(sk, level, optname, optval, optlen); } /* @@ -270,11 +273,12 @@ int inet_getsockopt(struct socket *sock, int level, int optname, static int inet_autobind(struct sock *sk) { - struct inet_opt *inet = inet_sk(sk); + struct inet_opt *inet; /* We may need to bind the socket. */ lock_sock(sk); + inet = inet_sk(sk); if (!inet->num) { - if (sk->prot->get_port(sk, 0) != 0) { + if (sk->prot->get_port(sk, 0)) { release_sock(sk); return -EAGAIN; } @@ -287,7 +291,6 @@ static int inet_autobind(struct sock *sk) /* * Move a socket into listening state. */ - int inet_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; @@ -301,7 +304,7 @@ int inet_listen(struct socket *sock, int backlog) goto out; old_state = sk->state; - if (!((1<<old_state)&(TCPF_CLOSE|TCPF_LISTEN))) + if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN))) goto out; /* Really, if the socket is already in listen state @@ -349,16 +352,17 @@ static __inline__ int inet_sk_size(int protocol) static int inet_create(struct socket *sock, int protocol) { struct sock *sk; - struct list_head *p; - struct inet_protosw *answer; + struct list_head *p; + struct inet_protosw *answer; struct inet_opt *inet; + int err = -ENOBUFS; sock->state = SS_UNCONNECTED; sk = sk_alloc(PF_INET, GFP_KERNEL, inet_sk_size(protocol), inet_sk_slab(protocol)); - if (sk == NULL) - goto do_oom; - + if (!sk) + goto out; + /* Look for the requested type/protocol pair. */ answer = NULL; br_read_lock_bh(BR_NETPROTO_LOCK); @@ -382,13 +386,16 @@ static int inet_create(struct socket *sock, int protocol) } br_read_unlock_bh(BR_NETPROTO_LOCK); + err = -ESOCKTNOSUPPORT; if (!answer) - goto free_and_badtype; + goto out_sk_free; + err = -EPERM; if (answer->capability > 0 && !capable(answer->capability)) - goto free_and_badperm; + goto out_sk_free; + err = -EPROTONOSUPPORT; if (!protocol) - goto free_and_noproto; - + goto out_sk_free; + err = 0; sock->ops = answer->ops; sk->prot = answer->prot; sk->no_check = answer->no_check; @@ -410,18 +417,15 @@ static int inet_create(struct socket *sock, int protocol) inet->id = 0; - sock_init_data(sock,sk); + sock_init_data(sock, sk); sk->destruct = inet_sock_destruct; - sk->zapped = 0; sk->family = PF_INET; sk->protocol = protocol; - sk->backlog_rcv = sk->prot->backlog_rcv; inet->ttl = sysctl_ip_default_ttl; - inet->mc_loop = 1; inet->mc_ttl = 1; inet->mc_index = 0; @@ -438,34 +442,20 @@ static int inet_create(struct socket *sock, int protocol) * shares. */ inet->sport = htons(inet->num); - /* Add to protocol hash chains. */ sk->prot->hash(sk); } if (sk->prot->init) { - int err = sk->prot->init(sk); - if (err != 0) { + err = sk->prot->init(sk); + if (err) inet_sock_release(sk); - return err; - } } - return 0; - -free_and_badtype: - sk_free(sk); - return -ESOCKTNOSUPPORT; - -free_and_badperm: - sk_free(sk); - return -EPERM; - -free_and_noproto: +out: + return err; +out_sk_free: sk_free(sk); - return -EPROTONOSUPPORT; - -do_oom: - return -ENOBUFS; + goto out; } @@ -474,7 +464,6 @@ do_oom: * function we are destroying the object and from then on nobody * should refer to it. */ - int inet_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -498,7 +487,7 @@ int inet_release(struct socket *sock) sock->sk = NULL; sk->prot->close(sk, timeout); } - return(0); + return 0; } /* It is off by default, see below. */ @@ -506,19 +495,21 @@ int sysctl_ip_nonlocal_bind; static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { - struct sockaddr_in *addr=(struct sockaddr_in *)uaddr; - struct sock *sk=sock->sk; + struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; + struct sock *sk = sock->sk; struct inet_opt *inet = inet_sk(sk); unsigned short snum; int chk_addr_ret; int err; /* If the socket has its own bind function then use it. (RAW) */ - if(sk->prot->bind) - return sk->prot->bind(sk, uaddr, addr_len); - + if (sk->prot->bind) { + err = sk->prot->bind(sk, uaddr, addr_len); + goto out; + } + err = -EINVAL; if (addr_len < sizeof(struct sockaddr_in)) - return -EINVAL; + goto out; chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr); @@ -529,17 +520,19 @@ static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) * (ie. your servers still start up even if your ISDN link * is temporarily down) */ - if (sysctl_ip_nonlocal_bind == 0 && - inet->freebind == 0 && + err = -EADDRNOTAVAIL; + if (!sysctl_ip_nonlocal_bind && + !inet->freebind && addr->sin_addr.s_addr != INADDR_ANY && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) - return -EADDRNOTAVAIL; + goto out; snum = ntohs(addr->sin_port); + err = -EACCES; if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) - return -EACCES; + goto out; /* We keep a pair of addresses. rcv_saddr is the one * used by hash lookups, and saddr is used for transmit. @@ -553,17 +546,17 @@ static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Check these errors (active socket, double bind). */ err = -EINVAL; if (sk->state != TCP_CLOSE || inet->num) - goto out; + goto out_release_sock; inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) inet->saddr = 0; /* Use device */ /* Make sure we are allowed to bind here. */ - if (sk->prot->get_port(sk, snum) != 0) { + if (sk->prot->get_port(sk, snum)) { inet->saddr = inet->rcv_saddr = 0; err = -EADDRINUSE; - goto out; + goto out_release_sock; } if (inet->rcv_saddr) @@ -575,15 +568,16 @@ static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) inet->dport = 0; sk_dst_reset(sk); err = 0; -out: +out_release_sock: release_sock(sk); +out: return err; } int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, int addr_len, int flags) { - struct sock *sk=sock->sk; + struct sock *sk = sock->sk; if (uaddr->sa_family == AF_UNSPEC) return sk->prot->disconnect(sk, flags); @@ -605,7 +599,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo) * Connect() does not allow to get error notifications * without closing the socket. */ - while ((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV)) { + while ((1 << sk->state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); @@ -622,11 +616,10 @@ static long inet_wait_for_connect(struct sock *sk, long timeo) * Connect to a remote host. There is regrettably still a little * TCP 'magic' in here. */ - -int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr, +int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { - struct sock *sk=sock->sk; + struct sock *sk = sock->sk; int err; long timeo; @@ -651,7 +644,7 @@ int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr, break; case SS_UNCONNECTED: err = -EISCONN; - if (sk->state != TCP_CLOSE) + if (sk->state != TCP_CLOSE) goto out; err = sk->prot->connect(sk, uaddr, addr_len); @@ -668,9 +661,9 @@ int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr, break; } - timeo = sock_sndtimeo(sk, flags&O_NONBLOCK); + timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); - if ((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV)) { + if ((1 << sk->state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { /* Error code is set above */ if (!timeo || !inet_wait_for_connect(sk, timeo)) goto out; @@ -712,22 +705,22 @@ sock_error: int inet_accept(struct socket *sock, struct socket *newsock, int flags) { struct sock *sk1 = sock->sk; - struct sock *sk2; int err = -EINVAL; + struct sock *sk2 = sk1->prot->accept(sk1, flags, &err); - if((sk2 = sk1->prot->accept(sk1,flags,&err)) == NULL) + if (!sk2) goto do_err; lock_sock(sk2); - BUG_TRAP((1<<sk2->state)&(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_CLOSE)); + BUG_TRAP((1 << sk2->state) & + (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)); sock_graft(sk2, newsock); newsock->state = SS_CONNECTED; + err = 0; release_sock(sk2); - return 0; - do_err: return err; } @@ -736,19 +729,18 @@ do_err: /* * This does both peername and sockname. */ - static int inet_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int *uaddr_len, int peer) { struct sock *sk = sock->sk; struct inet_opt *inet = inet_sk(sk); struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; - + sin->sin_family = AF_INET; if (peer) { - if (!inet->dport) - return -ENOTCONN; - if (((1<<sk->state)&(TCPF_CLOSE|TCPF_SYN_SENT)) && peer == 1) + if (!inet->dport || + (((1 << sk->state) & (TCPF_CLOSE | TCPF_SYN_SENT)) && + peer == 1)) return -ENOTCONN; sin->sin_port = inet->dport; sin->sin_addr.s_addr = inet->daddr; @@ -760,7 +752,7 @@ static int inet_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin_addr.s_addr = addr; } *uaddr_len = sizeof(*sin); - return(0); + return 0; } @@ -770,10 +762,8 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, int size, { struct sock *sk = sock->sk; int addr_len = 0; - int err; - - err = sk->prot->recvmsg(sk, msg, size, flags&MSG_DONTWAIT, - flags&~MSG_DONTWAIT, &addr_len); + int err = sk->prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, + flags & ~MSG_DONTWAIT, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; return err; @@ -803,12 +793,13 @@ int inet_shutdown(struct socket *sock, int how) how++; /* maps 0->1 has the advantage of making bit 1 rcvs and 1->2 bit 2 snds. 2->3 */ - if ((how & ~SHUTDOWN_MASK) || how==0) /* MAXINT->0 */ + if ((how & ~SHUTDOWN_MASK) || !how) /* MAXINT->0 */ return -EINVAL; lock_sock(sk); if (sock->state == SS_CONNECTING) { - if ((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV|TCPF_CLOSE)) + if ((1 << sk->state) & + (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE)) sock->state = SS_DISCONNECTING; else sock->state = SS_CONNECTED; @@ -858,38 +849,42 @@ int inet_shutdown(struct socket *sock, int how) static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; - int err; + int err = 0; int pid; - switch(cmd) { + switch (cmd) { case FIOSETOWN: case SIOCSPGRP: - err = get_user(pid, (int *) arg); - if (err) - return err; - if (current->pid != pid && current->pgrp != -pid && - !capable(CAP_NET_ADMIN)) - return -EPERM; - sk->proc = pid; - return(0); + if (get_user(pid, (int *)arg)) + err = -EFAULT; + else if (current->pid != pid && + current->pgrp != -pid && + !capable(CAP_NET_ADMIN)) + err = -EPERM; + else + sk->proc = pid; + break; case FIOGETOWN: case SIOCGPGRP: - return put_user(sk->proc, (int *)arg); + err = put_user(sk->proc, (int *)arg); + break; case SIOCGSTAMP: - if(sk->stamp.tv_sec==0) - return -ENOENT; - err = copy_to_user((void *)arg,&sk->stamp,sizeof(struct timeval)); - if (err) + if (!sk->stamp.tv_sec) + err = -ENOENT; + else if (copy_to_user((void *)arg, &sk->stamp, + sizeof(struct timeval))) err = -EFAULT; - return err; + break; case SIOCADDRT: case SIOCDELRT: case SIOCRTMSG: - return(ip_rt_ioctl(cmd,(void *) arg)); + err = ip_rt_ioctl(cmd, (void *)arg); + break; case SIOCDARP: case SIOCGARP: case SIOCSARP: - return(arp_ioctl(cmd,(void *) arg)); + err = arp_ioctl(cmd, (void *)arg); + break; case SIOCGIFADDR: case SIOCSIFADDR: case SIOCGIFBRDADDR: @@ -898,83 +893,82 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCSIFNETMASK: case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: - case SIOCSIFPFLAGS: - case SIOCGIFPFLAGS: + case SIOCSIFPFLAGS: + case SIOCGIFPFLAGS: case SIOCSIFFLAGS: - return(devinet_ioctl(cmd,(void *) arg)); + err = devinet_ioctl(cmd, (void *)arg); + break; case SIOCGIFBR: case SIOCSIFBR: #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) #ifdef CONFIG_KMOD - if (br_ioctl_hook == NULL) + if (!br_ioctl_hook) request_module("bridge"); #endif - if (br_ioctl_hook != NULL) - return br_ioctl_hook(arg); + if (br_ioctl_hook) + err = br_ioctl_hook(arg); + else #endif - return -ENOPKG; - + err = -ENOPKG; + break; case SIOCGIFVLAN: case SIOCSIFVLAN: #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) #ifdef CONFIG_KMOD - if (vlan_ioctl_hook == NULL) + if (!vlan_ioctl_hook) request_module("8021q"); #endif - if (vlan_ioctl_hook != NULL) - return vlan_ioctl_hook(arg); + if (vlan_ioctl_hook) + err = vlan_ioctl_hook(arg); + else #endif - return -ENOPKG; - + err = -ENOPKG; + break; case SIOCGIFDIVERT: case SIOCSIFDIVERT: #ifdef CONFIG_NET_DIVERT - return divert_ioctl(cmd, (struct divert_cf *) arg); + err = divert_ioctl(cmd, (struct divert_cf *)arg); #else - return -ENOPKG; + err = -ENOPKG; #endif /* CONFIG_NET_DIVERT */ - + break; case SIOCADDDLCI: case SIOCDELDLCI: #ifdef CONFIG_DLCI lock_kernel(); - err = dlci_ioctl(cmd, (void *) arg); + err = dlci_ioctl(cmd, (void *)arg); unlock_kernel(); - return err; -#endif - -#ifdef CONFIG_DLCI_MODULE - + break; +#elif CONFIG_DLCI_MODULE #ifdef CONFIG_KMOD - if (dlci_ioctl_hook == NULL) + if (!dlci_ioctl_hook) request_module("dlci"); #endif - if (dlci_ioctl_hook) { lock_kernel(); - err = (*dlci_ioctl_hook)(cmd, (void *) arg); + err = (*dlci_ioctl_hook)(cmd, (void *)arg); unlock_kernel(); - return err; - } + } else #endif - return -ENOPKG; - + err = -ENOPKG; + break; default: - if ((cmd >= SIOCDEVPRIVATE) && - (cmd <= (SIOCDEVPRIVATE + 15))) - return(dev_ioctl(cmd,(void *) arg)); - + if (cmd >= SIOCDEVPRIVATE && + cmd <= (SIOCDEVPRIVATE + 15)) + err = dev_ioctl(cmd, (void *)arg); + else #ifdef WIRELESS_EXT - if((cmd >= SIOCIWFIRST) && (cmd <= SIOCIWLAST)) - return(dev_ioctl(cmd,(void *) arg)); + if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) + err = dev_ioctl(cmd, (void *)arg); + else #endif /* WIRELESS_EXT */ - - if (sk->prot->ioctl==NULL || (err=sk->prot->ioctl(sk, cmd, arg))==-ENOIOCTLCMD) - return(dev_ioctl(cmd,(void *) arg)); - return err; + if (!sk->prot->ioctl || + (err = sk->prot->ioctl(sk, cmd, arg)) == + -ENOIOCTLCMD) + err = dev_ioctl(cmd, (void *)arg); + break; } - /*NOTREACHED*/ - return(0); + return err; } struct proto_ops inet_stream_ops = { @@ -985,7 +979,7 @@ struct proto_ops inet_stream_ops = { connect: inet_stream_connect, socketpair: sock_no_socketpair, accept: inet_accept, - getname: inet_getname, + getname: inet_getname, poll: tcp_poll, ioctl: inet_ioctl, listen: inet_listen, @@ -1006,7 +1000,7 @@ struct proto_ops inet_dgram_ops = { connect: inet_dgram_connect, socketpair: sock_no_socketpair, accept: sock_no_accept, - getname: inet_getname, + getname: inet_getname, poll: datagram_poll, ioctl: inet_ioctl, listen: sock_no_listen, @@ -1067,8 +1061,7 @@ static struct inet_protosw inetsw_array[] = #define INETSW_ARRAY_LEN (sizeof(inetsw_array) / sizeof(struct inet_protosw)) -void -inet_register_protosw(struct inet_protosw *p) +void inet_register_protosw(struct inet_protosw *p) { struct list_head *lh; struct inet_protosw *answer; @@ -1115,8 +1108,7 @@ out_illegal: goto out; } -void -inet_unregister_protosw(struct inet_protosw *p) +void inet_unregister_protosw(struct inet_protosw *p) { if (INET_PROTOSW_PERMANENT & p->flags) { printk(KERN_ERR @@ -1133,7 +1125,7 @@ inet_unregister_protosw(struct inet_protosw *p) /* * Called by socket.c on kernel startup. */ - + static int __init inet_init(void) { struct sk_buff *dummy_skb; @@ -1157,32 +1149,32 @@ static int __init inet_init(void) raw4_sk_cachep = kmem_cache_create("raw4_sock", sizeof(struct raw_sock), 0, SLAB_HWCACHE_ALIGN, 0, 0); - if (!tcp_sk_cachep || !udp_sk_cachep || !raw4_sk_cachep) + if (!tcp_sk_cachep || !udp_sk_cachep || !raw4_sk_cachep) printk(KERN_CRIT "inet_init: Can't create protocol sock SLAB caches!\n"); /* * Tell SOCKET that we are alive... */ - - (void) sock_register(&inet_family_ops); + + (void)sock_register(&inet_family_ops); /* * Add all the protocols. */ printk(KERN_INFO "IP Protocols: "); - for (p = inet_protocol_base; p != NULL;) { - struct inet_protocol *tmp = (struct inet_protocol *) p->next; + for (p = inet_protocol_base; p;) { + struct inet_protocol *tmp = (struct inet_protocol *)p->next; inet_add_protocol(p); - printk("%s%s",p->name,tmp?", ":"\n"); + printk("%s%s", p->name, tmp ? ", " : "\n"); p = tmp; } /* Register the socket-side information for inet_create. */ - for(r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r) + for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r) INIT_LIST_HEAD(r); - for(q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q) + for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q) inet_register_protosw(q); /* diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 80d1031f0a72..d831f67bc850 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -18,16 +18,17 @@ * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * Changes: - * Alexey Kuznetsov: pa_* fields are replaced with ifaddr lists. + * Alexey Kuznetsov: pa_* fields are replaced with ifaddr + * lists. * Cyrus Durgin: updated for kmod - * Matthias Andree: in devinet_ioctl, compare label and + * Matthias Andree: in devinet_ioctl, compare label and * address (4.4BSD alias style support), * fall back to comparing just the label * if no match found. */ #include <linux/config.h> - + #include <asm/uaccess.h> #include <asm/system.h> #include <asm/bitops.h> @@ -60,15 +61,29 @@ #include <net/route.h> #include <net/ip_fib.h> -struct ipv4_devconf ipv4_devconf = { 1, 1, 1, 1, 0, }; -static struct ipv4_devconf ipv4_devconf_dflt = { 1, 1, 1, 1, 1, }; +struct ipv4_devconf ipv4_devconf = { + accept_redirects: 1, + send_redirects: 1, + secure_redirects: 1, + shared_media: 1, +}; + +static struct ipv4_devconf ipv4_devconf_dflt = { + accept_redirects: 1, + send_redirects: 1, + secure_redirects: 1, + shared_media: 1, + accept_source_route: 1, +}; static void rtmsg_ifa(int event, struct in_ifaddr *); static struct notifier_block *inetaddr_chain; -static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy); +static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, + int destroy); #ifdef CONFIG_SYSCTL -static void devinet_sysctl_register(struct in_device *in_dev, struct ipv4_devconf *p); +static void devinet_sysctl_register(struct in_device *in_dev, + struct ipv4_devconf *p); static void devinet_sysctl_unregister(struct ipv4_devconf *p); #endif @@ -79,12 +94,10 @@ int inet_dev_count; rwlock_t inetdev_lock = RW_LOCK_UNLOCKED; - -static struct in_ifaddr * inet_alloc_ifa(void) +static struct in_ifaddr *inet_alloc_ifa(void) { - struct in_ifaddr *ifa; + struct in_ifaddr *ifa = kmalloc(sizeof(*ifa), GFP_KERNEL); - ifa = kmalloc(sizeof(*ifa), GFP_KERNEL); if (ifa) { memset(ifa, 0, sizeof(*ifa)); inet_ifa_count++; @@ -105,18 +118,19 @@ void in_dev_finish_destroy(struct in_device *idev) { struct net_device *dev = idev->dev; - BUG_TRAP(idev->ifa_list==NULL); - BUG_TRAP(idev->mc_list==NULL); + BUG_TRAP(!idev->ifa_list); + BUG_TRAP(!idev->mc_list); #ifdef NET_REFCNT_DEBUG - printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n", idev, dev ? dev->name : "NIL"); + printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n", + idev, dev ? dev->name : "NIL"); #endif dev_put(dev); - if (!idev->dead) { + if (!idev->dead) printk("Freeing alive in_device %p\n", idev); - return; + else { + inet_dev_count--; + kfree(idev); } - inet_dev_count--; - kfree(idev); } struct in_device *inetdev_init(struct net_device *dev) @@ -127,21 +141,20 @@ struct in_device *inetdev_init(struct net_device *dev) in_dev = kmalloc(sizeof(*in_dev), GFP_KERNEL); if (!in_dev) - return NULL; + goto out; memset(in_dev, 0, sizeof(*in_dev)); in_dev->lock = RW_LOCK_UNLOCKED; memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf)); in_dev->cnf.sysctl = NULL; in_dev->dev = dev; - if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL) { - kfree(in_dev); - return NULL; - } + if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL) + goto out_kfree; inet_dev_count++; /* Reference in_dev->dev */ dev_hold(dev); #ifdef CONFIG_SYSCTL - neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4, NET_IPV4_NEIGH, "ipv4"); + neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4, + NET_IPV4_NEIGH, "ipv4"); #endif write_lock_bh(&inetdev_lock); dev->ip_ptr = in_dev; @@ -151,9 +164,14 @@ struct in_device *inetdev_init(struct net_device *dev) #ifdef CONFIG_SYSCTL devinet_sysctl_register(in_dev, &in_dev->cnf); #endif - if (dev->flags&IFF_UP) + if (dev->flags & IFF_UP) ip_mc_up(in_dev); +out: return in_dev; +out_kfree: + kfree(in_dev); + in_dev = NULL; + goto out; } static void inetdev_destroy(struct in_device *in_dev) @@ -197,10 +215,10 @@ int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b) } endfor_ifa(in_dev); read_unlock(&in_dev->lock); return 0; -} +} -static void -inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy) +static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, + int destroy) { struct in_ifaddr *ifa1 = *ifap; @@ -208,12 +226,12 @@ inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy) /* 1. Deleting primary ifaddr forces deletion all secondaries */ - if (!(ifa1->ifa_flags&IFA_F_SECONDARY)) { + if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) { struct in_ifaddr *ifa; struct in_ifaddr **ifap1 = &ifa1->ifa_next; - while ((ifa=*ifap1) != NULL) { - if (!(ifa->ifa_flags&IFA_F_SECONDARY) || + while ((ifa = *ifap1) != NULL) { + if (!(ifa->ifa_flags & IFA_F_SECONDARY) || ifa1->ifa_mask != ifa->ifa_mask || !inet_ifa_match(ifa1->ifa_address, ifa)) { ifap1 = &ifa->ifa_next; @@ -250,20 +268,19 @@ inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy) if (destroy) { inet_free_ifa(ifa1); - if (in_dev->ifa_list == NULL) + if (!in_dev->ifa_list) inetdev_destroy(in_dev); } } -static int -inet_insert_ifa(struct in_ifaddr *ifa) +static int inet_insert_ifa(struct in_ifaddr *ifa) { struct in_device *in_dev = ifa->ifa_dev; struct in_ifaddr *ifa1, **ifap, **last_primary; ASSERT_RTNL(); - if (ifa->ifa_local == 0) { + if (!ifa->ifa_local) { inet_free_ifa(ifa); return 0; } @@ -271,10 +288,13 @@ inet_insert_ifa(struct in_ifaddr *ifa) ifa->ifa_flags &= ~IFA_F_SECONDARY; last_primary = &in_dev->ifa_list; - for (ifap=&in_dev->ifa_list; (ifa1=*ifap)!=NULL; ifap=&ifa1->ifa_next) { - if (!(ifa1->ifa_flags&IFA_F_SECONDARY) && ifa->ifa_scope <= ifa1->ifa_scope) + for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; + ifap = &ifa1->ifa_next) { + if (!(ifa1->ifa_flags & IFA_F_SECONDARY) && + ifa->ifa_scope <= ifa1->ifa_scope) last_primary = &ifa1->ifa_next; - if (ifa1->ifa_mask == ifa->ifa_mask && inet_ifa_match(ifa1->ifa_address, ifa)) { + if (ifa1->ifa_mask == ifa->ifa_mask && + inet_ifa_match(ifa1->ifa_address, ifa)) { if (ifa1->ifa_local == ifa->ifa_local) { inet_free_ifa(ifa); return -EEXIST; @@ -287,7 +307,7 @@ inet_insert_ifa(struct in_ifaddr *ifa) } } - if (!(ifa->ifa_flags&IFA_F_SECONDARY)) { + if (!(ifa->ifa_flags & IFA_F_SECONDARY)) { net_srandom(ifa->ifa_local); ifap = last_primary; } @@ -306,24 +326,23 @@ inet_insert_ifa(struct in_ifaddr *ifa) return 0; } -static int -inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) +static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) { struct in_device *in_dev = __in_dev_get(dev); ASSERT_RTNL(); - if (in_dev == NULL) { + if (!in_dev) { in_dev = inetdev_init(dev); - if (in_dev == NULL) { + if (!in_dev) { inet_free_ifa(ifa); return -ENOBUFS; } } if (ifa->ifa_dev != in_dev) { - BUG_TRAP(ifa->ifa_dev==NULL); + BUG_TRAP(!ifa->ifa_dev); in_dev_hold(in_dev); - ifa->ifa_dev=in_dev; + ifa->ifa_dev = in_dev; } if (LOOPBACK(ifa->ifa_local)) ifa->ifa_scope = RT_SCOPE_HOST; @@ -344,7 +363,8 @@ struct in_device *inetdev_by_index(int ifindex) /* Called only from RTNL semaphored context. No locks. */ -struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, u32 mask) +struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, + u32 mask) { ASSERT_RTNL(); @@ -355,10 +375,9 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, u32 ma return NULL; } -int -inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct rtattr **rta = arg; + struct rtattr **rta = arg; struct in_device *in_dev; struct ifaddrmsg *ifm = NLMSG_DATA(nlh); struct in_ifaddr *ifa, **ifap; @@ -366,93 +385,103 @@ inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ASSERT_RTNL(); if ((in_dev = inetdev_by_index(ifm->ifa_index)) == NULL) - return -EADDRNOTAVAIL; + goto out; __in_dev_put(in_dev); - for (ifap=&in_dev->ifa_list; (ifa=*ifap)!=NULL; ifap=&ifa->ifa_next) { - if ((rta[IFA_LOCAL-1] && memcmp(RTA_DATA(rta[IFA_LOCAL-1]), &ifa->ifa_local, 4)) || - (rta[IFA_LABEL-1] && strcmp(RTA_DATA(rta[IFA_LABEL-1]), ifa->ifa_label)) || - (rta[IFA_ADDRESS-1] && + for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; + ifap = &ifa->ifa_next) { + if ((rta[IFA_LOCAL - 1] && + memcmp(RTA_DATA(rta[IFA_LOCAL - 1]), + &ifa->ifa_local, 4)) || + (rta[IFA_LABEL - 1] && + strcmp(RTA_DATA(rta[IFA_LABEL - 1]), ifa->ifa_label)) || + (rta[IFA_ADDRESS - 1] && (ifm->ifa_prefixlen != ifa->ifa_prefixlen || - !inet_ifa_match(*(u32*)RTA_DATA(rta[IFA_ADDRESS-1]), ifa)))) + !inet_ifa_match(*(u32*)RTA_DATA(rta[IFA_ADDRESS - 1]), + ifa)))) continue; inet_del_ifa(in_dev, ifap, 1); return 0; } - +out: return -EADDRNOTAVAIL; } -int -inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct rtattr **rta = arg; struct net_device *dev; struct in_device *in_dev; struct ifaddrmsg *ifm = NLMSG_DATA(nlh); struct in_ifaddr *ifa; + int rc = -EINVAL; ASSERT_RTNL(); - if (ifm->ifa_prefixlen > 32 || rta[IFA_LOCAL-1] == NULL) - return -EINVAL; + if (ifm->ifa_prefixlen > 32 || !rta[IFA_LOCAL - 1]) + goto out; + rc = -ENODEV; if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL) - return -ENODEV; + goto out; + rc = -ENOBUFS; if ((in_dev = __in_dev_get(dev)) == NULL) { in_dev = inetdev_init(dev); if (!in_dev) - return -ENOBUFS; + goto out; } if ((ifa = inet_alloc_ifa()) == NULL) - return -ENOBUFS; + goto out; - if (rta[IFA_ADDRESS-1] == NULL) - rta[IFA_ADDRESS-1] = rta[IFA_LOCAL-1]; - memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL-1]), 4); - memcpy(&ifa->ifa_address, RTA_DATA(rta[IFA_ADDRESS-1]), 4); + if (!rta[IFA_ADDRESS - 1]) + rta[IFA_ADDRESS - 1] = rta[IFA_LOCAL - 1]; + memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL - 1]), 4); + memcpy(&ifa->ifa_address, RTA_DATA(rta[IFA_ADDRESS - 1]), 4); ifa->ifa_prefixlen = ifm->ifa_prefixlen; ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); - if (rta[IFA_BROADCAST-1]) - memcpy(&ifa->ifa_broadcast, RTA_DATA(rta[IFA_BROADCAST-1]), 4); - if (rta[IFA_ANYCAST-1]) - memcpy(&ifa->ifa_anycast, RTA_DATA(rta[IFA_ANYCAST-1]), 4); + if (rta[IFA_BROADCAST - 1]) + memcpy(&ifa->ifa_broadcast, + RTA_DATA(rta[IFA_BROADCAST - 1]), 4); + if (rta[IFA_ANYCAST - 1]) + memcpy(&ifa->ifa_anycast, RTA_DATA(rta[IFA_ANYCAST - 1]), 4); ifa->ifa_flags = ifm->ifa_flags; ifa->ifa_scope = ifm->ifa_scope; in_dev_hold(in_dev); - ifa->ifa_dev = in_dev; - if (rta[IFA_LABEL-1]) - memcpy(ifa->ifa_label, RTA_DATA(rta[IFA_LABEL-1]), IFNAMSIZ); + ifa->ifa_dev = in_dev; + if (rta[IFA_LABEL - 1]) + memcpy(ifa->ifa_label, RTA_DATA(rta[IFA_LABEL - 1]), IFNAMSIZ); else memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); - return inet_insert_ifa(ifa); + rc = inet_insert_ifa(ifa); +out: + return rc; } -/* - * Determine a default network mask, based on the IP address. +/* + * Determine a default network mask, based on the IP address. */ static __inline__ int inet_abc_len(u32 addr) { - if (ZERONET(addr)) - return 0; + int rc = -1; /* Something else, probably a multicast. */ - addr = ntohl(addr); - if (IN_CLASSA(addr)) - return 8; - if (IN_CLASSB(addr)) - return 16; - if (IN_CLASSC(addr)) - return 24; + if (ZERONET(addr)) + rc = 0; + else { + addr = ntohl(addr); + + if (IN_CLASSA(addr)) + rc = 8; + else if (IN_CLASSB(addr)) + rc = 16; + else if (IN_CLASSC(addr)) + rc = 24; + } - /* - * Something else, probably a multicast. - */ - - return -1; + return rc; } @@ -466,7 +495,7 @@ int devinet_ioctl(unsigned int cmd, void *arg) struct in_ifaddr *ifa = NULL; struct net_device *dev; char *colon; - int ret = 0; + int ret = -EFAULT; int tryaddrmatch = 0; /* @@ -474,8 +503,8 @@ int devinet_ioctl(unsigned int cmd, void *arg) */ if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - return -EFAULT; - ifr.ifr_name[IFNAMSIZ-1] = 0; + goto out; + ifr.ifr_name[IFNAMSIZ - 1] = 0; /* save original address for comparison */ memcpy(&sin_orig, sin, sizeof(*sin)); @@ -503,215 +532,222 @@ int devinet_ioctl(unsigned int cmd, void *arg) break; case SIOCSIFFLAGS: + ret = -EACCES; if (!capable(CAP_NET_ADMIN)) - return -EACCES; + goto out; break; case SIOCSIFADDR: /* Set interface address (and family) */ case SIOCSIFBRDADDR: /* Set the broadcast address */ case SIOCSIFDSTADDR: /* Set the destination address */ case SIOCSIFNETMASK: /* Set the netmask for the interface */ + ret = -EACCES; if (!capable(CAP_NET_ADMIN)) - return -EACCES; + goto out; + ret = -EINVAL; if (sin->sin_family != AF_INET) - return -EINVAL; + goto out; break; default: - return -EINVAL; + ret = -EINVAL; + goto out; } dev_probe_lock(); rtnl_lock(); - if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL) { - ret = -ENODEV; + ret = -ENODEV; + if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL) goto done; - } if (colon) *colon = ':'; - if ((in_dev=__in_dev_get(dev)) != NULL) { + if ((in_dev = __in_dev_get(dev)) != NULL) { if (tryaddrmatch) { /* Matthias Andree */ /* compare label and address (4.4BSD style) */ /* note: we only do this for a limited set of ioctls and only if the original address family was AF_INET. This is checked above. */ - for (ifap=&in_dev->ifa_list; (ifa=*ifap) != NULL; ifap=&ifa->ifa_next) { - if ((strcmp(ifr.ifr_name, ifa->ifa_label) == 0) - && (sin_orig.sin_addr.s_addr == ifa->ifa_address)) { + for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; + ifap = &ifa->ifa_next) { + if (!strcmp(ifr.ifr_name, ifa->ifa_label) && + sin_orig.sin_addr.s_addr == + ifa->ifa_address) { break; /* found */ } } } /* we didn't get a match, maybe the application is - 4.3BSD-style and passed in junk so we fall back to + 4.3BSD-style and passed in junk so we fall back to comparing just the label */ - if (ifa == NULL) { - for (ifap=&in_dev->ifa_list; (ifa=*ifap) != NULL; ifap=&ifa->ifa_next) - if (strcmp(ifr.ifr_name, ifa->ifa_label) == 0) + if (!ifa) { + for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; + ifap = &ifa->ifa_next) + if (!strcmp(ifr.ifr_name, ifa->ifa_label)) break; } } - if (ifa == NULL && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) { - ret = -EADDRNOTAVAIL; + ret = -EADDRNOTAVAIL; + if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) goto done; - } switch(cmd) { - case SIOCGIFADDR: /* Get interface address */ - sin->sin_addr.s_addr = ifa->ifa_local; - goto rarok; - - case SIOCGIFBRDADDR: /* Get the broadcast address */ - sin->sin_addr.s_addr = ifa->ifa_broadcast; - goto rarok; - - case SIOCGIFDSTADDR: /* Get the destination address */ - sin->sin_addr.s_addr = ifa->ifa_address; - goto rarok; - - case SIOCGIFNETMASK: /* Get the netmask for the interface */ - sin->sin_addr.s_addr = ifa->ifa_mask; - goto rarok; - - case SIOCSIFFLAGS: - if (colon) { - if (ifa == NULL) { - ret = -EADDRNOTAVAIL; - break; - } - if (!(ifr.ifr_flags&IFF_UP)) - inet_del_ifa(in_dev, ifap, 1); + case SIOCGIFADDR: /* Get interface address */ + sin->sin_addr.s_addr = ifa->ifa_local; + goto rarok; + + case SIOCGIFBRDADDR: /* Get the broadcast address */ + sin->sin_addr.s_addr = ifa->ifa_broadcast; + goto rarok; + + case SIOCGIFDSTADDR: /* Get the destination address */ + sin->sin_addr.s_addr = ifa->ifa_address; + goto rarok; + + case SIOCGIFNETMASK: /* Get the netmask for the interface */ + sin->sin_addr.s_addr = ifa->ifa_mask; + goto rarok; + + case SIOCSIFFLAGS: + if (colon) { + ret = -EADDRNOTAVAIL; + if (!ifa) break; - } - ret = dev_change_flags(dev, ifr.ifr_flags); + ret = 0; + if (!(ifr.ifr_flags & IFF_UP)) + inet_del_ifa(in_dev, ifap, 1); + break; + } + ret = dev_change_flags(dev, ifr.ifr_flags); + break; + + case SIOCSIFADDR: /* Set interface address (and family) */ + ret = -EINVAL; + if (inet_abc_len(sin->sin_addr.s_addr) < 0) break; - - case SIOCSIFADDR: /* Set interface address (and family) */ - if (inet_abc_len(sin->sin_addr.s_addr) < 0) { - ret = -EINVAL; + + if (!ifa) { + ret = -ENOBUFS; + if ((ifa = inet_alloc_ifa()) == NULL) break; - } + if (colon) + memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); + else + memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); + } else { + ret = 0; + if (ifa->ifa_local == sin->sin_addr.s_addr) + break; + inet_del_ifa(in_dev, ifap, 0); + ifa->ifa_broadcast = 0; + ifa->ifa_anycast = 0; + } - if (!ifa) { - if ((ifa = inet_alloc_ifa()) == NULL) { - ret = -ENOBUFS; - break; - } - if (colon) - memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); - else - memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); - } else { - ret = 0; - if (ifa->ifa_local == sin->sin_addr.s_addr) - break; - inet_del_ifa(in_dev, ifap, 0); - ifa->ifa_broadcast = 0; - ifa->ifa_anycast = 0; - } + ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr; + + if (!(dev->flags & IFF_POINTOPOINT)) { + ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address); + ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen); + if ((dev->flags & IFF_BROADCAST) && + ifa->ifa_prefixlen < 31) + ifa->ifa_broadcast = ifa->ifa_address | + ~ifa->ifa_mask; + } else { + ifa->ifa_prefixlen = 32; + ifa->ifa_mask = inet_make_mask(32); + } + ret = inet_set_ifa(dev, ifa); + break; - ifa->ifa_address = - ifa->ifa_local = sin->sin_addr.s_addr; - - if (!(dev->flags&IFF_POINTOPOINT)) { - ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address); - ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen); - if ((dev->flags&IFF_BROADCAST) && ifa->ifa_prefixlen < 31) - ifa->ifa_broadcast = ifa->ifa_address|~ifa->ifa_mask; - } else { - ifa->ifa_prefixlen = 32; - ifa->ifa_mask = inet_make_mask(32); - } - ret = inet_set_ifa(dev, ifa); - break; + case SIOCSIFBRDADDR: /* Set the broadcast address */ + ret = 0; + if (ifa->ifa_broadcast != sin->sin_addr.s_addr) { + inet_del_ifa(in_dev, ifap, 0); + ifa->ifa_broadcast = sin->sin_addr.s_addr; + inet_insert_ifa(ifa); + } + break; - case SIOCSIFBRDADDR: /* Set the broadcast address */ - if (ifa->ifa_broadcast != sin->sin_addr.s_addr) { - inet_del_ifa(in_dev, ifap, 0); - ifa->ifa_broadcast = sin->sin_addr.s_addr; - inet_insert_ifa(ifa); - } + case SIOCSIFDSTADDR: /* Set the destination address */ + ret = 0; + if (ifa->ifa_address == sin->sin_addr.s_addr) break; - - case SIOCSIFDSTADDR: /* Set the destination address */ - if (ifa->ifa_address != sin->sin_addr.s_addr) { - if (inet_abc_len(sin->sin_addr.s_addr) < 0) { - ret = -EINVAL; - break; - } - inet_del_ifa(in_dev, ifap, 0); - ifa->ifa_address = sin->sin_addr.s_addr; - inet_insert_ifa(ifa); - } + ret = -EINVAL; + if (inet_abc_len(sin->sin_addr.s_addr) < 0) break; + ret = 0; + inet_del_ifa(in_dev, ifap, 0); + ifa->ifa_address = sin->sin_addr.s_addr; + inet_insert_ifa(ifa); + break; - case SIOCSIFNETMASK: /* Set the netmask for the interface */ - - /* - * The mask we set must be legal. - */ - if (bad_mask(sin->sin_addr.s_addr, 0)) { - ret = -EINVAL; - break; - } + case SIOCSIFNETMASK: /* Set the netmask for the interface */ - if (ifa->ifa_mask != sin->sin_addr.s_addr) { - inet_del_ifa(in_dev, ifap, 0); - ifa->ifa_mask = sin->sin_addr.s_addr; - ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); - inet_insert_ifa(ifa); - } + /* + * The mask we set must be legal. + */ + ret = -EINVAL; + if (bad_mask(sin->sin_addr.s_addr, 0)) break; + ret = 0; + if (ifa->ifa_mask != sin->sin_addr.s_addr) { + inet_del_ifa(in_dev, ifap, 0); + ifa->ifa_mask = sin->sin_addr.s_addr; + ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); + inet_insert_ifa(ifa); + } + break; } done: rtnl_unlock(); dev_probe_unlock(); +out: return ret; - rarok: rtnl_unlock(); dev_probe_unlock(); - if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) - return -EFAULT; - return 0; + ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0; + goto out; } -static int -inet_gifconf(struct net_device *dev, char *buf, int len) +static int inet_gifconf(struct net_device *dev, char *buf, int len) { struct in_device *in_dev = __in_dev_get(dev); struct in_ifaddr *ifa; struct ifreq ifr; - int done=0; + int done = 0; - if (in_dev==NULL || (ifa=in_dev->ifa_list)==NULL) - return 0; + if (!in_dev || (ifa = in_dev->ifa_list) == NULL) + goto out; - for ( ; ifa; ifa = ifa->ifa_next) { + for (; ifa; ifa = ifa->ifa_next) { if (!buf) { done += sizeof(ifr); continue; } if (len < (int) sizeof(ifr)) - return done; + break; memset(&ifr, 0, sizeof(struct ifreq)); if (ifa->ifa_label) strcpy(ifr.ifr_name, ifa->ifa_label); else strcpy(ifr.ifr_name, dev->name); - (*(struct sockaddr_in *) &ifr.ifr_addr).sin_family = AF_INET; - (*(struct sockaddr_in *) &ifr.ifr_addr).sin_addr.s_addr = ifa->ifa_local; + (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET; + (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr = + ifa->ifa_local; - if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) - return -EFAULT; - buf += sizeof(struct ifreq); - len -= sizeof(struct ifreq); + if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) { + done = -EFAULT; + break; + } + buf += sizeof(struct ifreq); + len -= sizeof(struct ifreq); done += sizeof(struct ifreq); } +out: return done; } @@ -722,10 +758,8 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope) read_lock(&inetdev_lock); in_dev = __in_dev_get(dev); - if (in_dev == NULL) { - read_unlock(&inetdev_lock); - return 0; - } + if (!in_dev) + goto out_unlock_inetdev; read_lock(&in_dev->lock); for_primary_ifa(in_dev) { @@ -742,7 +776,7 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope) read_unlock(&inetdev_lock); if (addr) - return addr; + goto out; /* Not loopback addresses on loopback should be preferred in this case. It is importnat that lo is the first interface @@ -750,8 +784,8 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope) */ read_lock(&dev_base_lock); read_lock(&inetdev_lock); - for (dev=dev_base; dev; dev=dev->next) { - if ((in_dev=__in_dev_get(dev)) == NULL) + for (dev = dev_base; dev; dev = dev->next) { + if ((in_dev = __in_dev_get(dev)) == NULL) continue; read_lock(&in_dev->lock); @@ -759,17 +793,20 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope) if (ifa->ifa_scope != RT_SCOPE_LINK && ifa->ifa_scope <= scope) { read_unlock(&in_dev->lock); - read_unlock(&inetdev_lock); - read_unlock(&dev_base_lock); - return ifa->ifa_local; + addr = ifa->ifa_local; + goto out_unlock_both; } } endfor_ifa(in_dev); read_unlock(&in_dev->lock); } +out_unlock_both: read_unlock(&inetdev_lock); read_unlock(&dev_base_lock); - - return 0; +out: + return addr; +out_unlock_inetdev: + read_unlock(&inetdev_lock); + goto out; } /* @@ -783,20 +820,21 @@ int register_inetaddr_notifier(struct notifier_block *nb) int unregister_inetaddr_notifier(struct notifier_block *nb) { - return notifier_chain_unregister(&inetaddr_chain,nb); + return notifier_chain_unregister(&inetaddr_chain, nb); } /* Called only under RTNL semaphore */ -static int inetdev_event(struct notifier_block *this, unsigned long event, void *ptr) +static int inetdev_event(struct notifier_block *this, unsigned long event, + void *ptr) { struct net_device *dev = ptr; struct in_device *in_dev = __in_dev_get(dev); ASSERT_RTNL(); - if (in_dev == NULL) - return NOTIFY_DONE; + if (!in_dev) + goto out; switch (event) { case NETDEV_REGISTER: @@ -810,7 +848,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, void struct in_ifaddr *ifa; if ((ifa = inet_alloc_ifa()) != NULL) { ifa->ifa_local = - ifa->ifa_address = htonl(INADDR_LOOPBACK); + ifa->ifa_address = htonl(INADDR_LOOPBACK); ifa->ifa_prefixlen = 8; ifa->ifa_mask = inet_make_mask(8); in_dev_hold(in_dev); @@ -843,7 +881,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, void } break; } - +out: return NOTIFY_DONE; } @@ -887,15 +925,14 @@ rtattr_failure: static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { int idx, ip_idx; - int s_idx, s_ip_idx; struct net_device *dev; struct in_device *in_dev; struct in_ifaddr *ifa; + int s_ip_idx, s_idx = cb->args[0]; - s_idx = cb->args[0]; s_ip_idx = ip_idx = cb->args[1]; read_lock(&dev_base_lock); - for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { + for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) { if (idx < s_idx) continue; if (idx > s_idx) @@ -911,7 +948,8 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) if (ip_idx < s_ip_idx) continue; if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_NEWADDR) <= 0) { + cb->nlh->nlmsg_seq, + RTM_NEWADDR) <= 0) { read_unlock(&in_dev->lock); read_unlock(&inetdev_lock); goto done; @@ -929,65 +967,39 @@ done: return skb->len; } -static void rtmsg_ifa(int event, struct in_ifaddr * ifa) +static void rtmsg_ifa(int event, struct in_ifaddr* ifa) { - struct sk_buff *skb; - int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128); + int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + 128); + struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); - skb = alloc_skb(size, GFP_KERNEL); - if (!skb) { + if (!skb) netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, ENOBUFS); - return; - } - if (inet_fill_ifaddr(skb, ifa, 0, 0, event) < 0) { + else if (inet_fill_ifaddr(skb, ifa, 0, 0, event) < 0) { kfree_skb(skb); netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL); - return; + } else { + NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_IFADDR; + netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV4_IFADDR, GFP_KERNEL); } - NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_IFADDR; - netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV4_IFADDR, GFP_KERNEL); } - -static struct rtnetlink_link inet_rtnetlink_table[RTM_MAX-RTM_BASE+1] = -{ - { NULL, NULL, }, - { NULL, NULL, }, - { NULL, NULL, }, - { NULL, NULL, }, - - { inet_rtm_newaddr, NULL, }, - { inet_rtm_deladdr, NULL, }, - { NULL, inet_dump_ifaddr, }, - { NULL, NULL, }, - - { inet_rtm_newroute, NULL, }, - { inet_rtm_delroute, NULL, }, - { inet_rtm_getroute, inet_dump_fib, }, - { NULL, NULL, }, - - { NULL, NULL, }, - { NULL, NULL, }, - { NULL, NULL, }, - { NULL, NULL, }, - +static struct rtnetlink_link inet_rtnetlink_table[RTM_MAX - RTM_BASE + 1] = { + [4] = { doit: inet_rtm_newaddr, }, + [5] = { doit: inet_rtm_deladdr, }, + [6] = { dumpit: inet_dump_ifaddr, }, + [8] = { doit: inet_rtm_newroute, }, + [9] = { doit: inet_rtm_delroute, }, + [10] = { doit: inet_rtm_getroute, dumpit: inet_dump_fib, }, #ifdef CONFIG_IP_MULTIPLE_TABLES - { inet_rtm_newrule, NULL, }, - { inet_rtm_delrule, NULL, }, - { NULL, inet_dump_rules, }, - { NULL, NULL, }, -#else - { NULL, NULL, }, - { NULL, NULL, }, - { NULL, NULL, }, - { NULL, NULL, }, + [16] = { doit: inet_rtm_newrule, }, + [17] = { doit: inet_rtm_delrule, }, + [18] = { dumpit: inet_dump_rules, }, #endif }; - #ifdef CONFIG_SYSCTL -void inet_forward_change() +void inet_forward_change(void) { struct net_device *dev; int on = ipv4_devconf.forwarding; @@ -1009,15 +1021,13 @@ void inet_forward_change() rt_cache_flush(0); } -static -int devinet_sysctl_forward(ctl_table *ctl, int write, struct file * filp, - void *buffer, size_t *lenp) +static int devinet_sysctl_forward(ctl_table *ctl, int write, + struct file* filp, void *buffer, + size_t *lenp) { int *valp = ctl->data; int val = *valp; - int ret; - - ret = proc_dointvec(ctl, write, filp, buffer, lenp); + int ret = proc_dointvec(ctl, write, filp, buffer, lenp); if (write && *valp != val) { if (valp == &ipv4_devconf.forwarding) @@ -1026,81 +1036,179 @@ int devinet_sysctl_forward(ctl_table *ctl, int write, struct file * filp, rt_cache_flush(0); } - return ret; + return ret; } -static struct devinet_sysctl_table -{ +static struct devinet_sysctl_table { struct ctl_table_header *sysctl_header; - ctl_table devinet_vars[15]; - ctl_table devinet_dev[2]; - ctl_table devinet_conf_dir[2]; - ctl_table devinet_proto_dir[2]; - ctl_table devinet_root_dir[2]; + ctl_table devinet_vars[15]; + ctl_table devinet_dev[2]; + ctl_table devinet_conf_dir[2]; + ctl_table devinet_proto_dir[2]; + ctl_table devinet_root_dir[2]; } devinet_sysctl = { - NULL, - {{NET_IPV4_CONF_FORWARDING, "forwarding", - &ipv4_devconf.forwarding, sizeof(int), 0644, NULL, - &devinet_sysctl_forward}, - {NET_IPV4_CONF_MC_FORWARDING, "mc_forwarding", - &ipv4_devconf.mc_forwarding, sizeof(int), 0444, NULL, - &proc_dointvec}, - {NET_IPV4_CONF_ACCEPT_REDIRECTS, "accept_redirects", - &ipv4_devconf.accept_redirects, sizeof(int), 0644, NULL, - &proc_dointvec}, - {NET_IPV4_CONF_SECURE_REDIRECTS, "secure_redirects", - &ipv4_devconf.secure_redirects, sizeof(int), 0644, NULL, - &proc_dointvec}, - {NET_IPV4_CONF_SHARED_MEDIA, "shared_media", - &ipv4_devconf.shared_media, sizeof(int), 0644, NULL, - &proc_dointvec}, - {NET_IPV4_CONF_RP_FILTER, "rp_filter", - &ipv4_devconf.rp_filter, sizeof(int), 0644, NULL, - &proc_dointvec}, - {NET_IPV4_CONF_SEND_REDIRECTS, "send_redirects", - &ipv4_devconf.send_redirects, sizeof(int), 0644, NULL, - &proc_dointvec}, - {NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE, "accept_source_route", - &ipv4_devconf.accept_source_route, sizeof(int), 0644, NULL, - &proc_dointvec}, - {NET_IPV4_CONF_PROXY_ARP, "proxy_arp", - &ipv4_devconf.proxy_arp, sizeof(int), 0644, NULL, - &proc_dointvec}, - {NET_IPV4_CONF_MEDIUM_ID, "medium_id", - &ipv4_devconf.medium_id, sizeof(int), 0644, NULL, - &proc_dointvec}, - {NET_IPV4_CONF_BOOTP_RELAY, "bootp_relay", - &ipv4_devconf.bootp_relay, sizeof(int), 0644, NULL, - &proc_dointvec}, - {NET_IPV4_CONF_LOG_MARTIANS, "log_martians", - &ipv4_devconf.log_martians, sizeof(int), 0644, NULL, - &proc_dointvec}, - {NET_IPV4_CONF_TAG, "tag", - &ipv4_devconf.tag, sizeof(int), 0644, NULL, - &proc_dointvec}, - {NET_IPV4_CONF_ARPFILTER, "arp_filter", - &ipv4_devconf.arp_filter, sizeof(int), 0644, NULL, - &proc_dointvec}, - {0}}, - - {{NET_PROTO_CONF_ALL, "all", NULL, 0, 0555, devinet_sysctl.devinet_vars},{0}}, - {{NET_IPV4_CONF, "conf", NULL, 0, 0555, devinet_sysctl.devinet_dev},{0}}, - {{NET_IPV4, "ipv4", NULL, 0, 0555, devinet_sysctl.devinet_conf_dir},{0}}, - {{CTL_NET, "net", NULL, 0, 0555, devinet_sysctl.devinet_proto_dir},{0}} + devinet_vars: { + { + ctl_name: NET_IPV4_CONF_FORWARDING, + procname: "forwarding", + data: &ipv4_devconf.forwarding, + maxlen: sizeof(int), + mode: 0644, + proc_handler: &devinet_sysctl_forward, + }, + { + ctl_name: NET_IPV4_CONF_MC_FORWARDING, + procname: "mc_forwarding", + data: &ipv4_devconf.mc_forwarding, + maxlen: sizeof(int), + mode: 0444, + proc_handler: &proc_dointvec, + }, + { + ctl_name: NET_IPV4_CONF_ACCEPT_REDIRECTS, + procname: "accept_redirects", + data: &ipv4_devconf.accept_redirects, + maxlen: sizeof(int), + mode: 0644, + proc_handler: &proc_dointvec, + }, + { + ctl_name: NET_IPV4_CONF_SECURE_REDIRECTS, + procname: "secure_redirects", + data: &ipv4_devconf.secure_redirects, + maxlen: sizeof(int), + mode: 0644, + proc_handler: &proc_dointvec, + }, + { + ctl_name: NET_IPV4_CONF_SHARED_MEDIA, + procname: "shared_media", + data: &ipv4_devconf.shared_media, + maxlen: sizeof(int), + mode: 0644, + proc_handler: &proc_dointvec, + }, + { + ctl_name: NET_IPV4_CONF_RP_FILTER, + procname: "rp_filter", + data: &ipv4_devconf.rp_filter, + maxlen: sizeof(int), + mode: 0644, + proc_handler: &proc_dointvec, + }, + { + ctl_name: NET_IPV4_CONF_SEND_REDIRECTS, + procname: "send_redirects", + data: &ipv4_devconf.send_redirects, + maxlen: sizeof(int), + mode: 0644, + proc_handler: &proc_dointvec, + }, + { + ctl_name: NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE, + procname: "accept_source_route", + data: &ipv4_devconf.accept_source_route, + maxlen: sizeof(int), + mode: 0644, + proc_handler: &proc_dointvec, + }, + { + ctl_name: NET_IPV4_CONF_PROXY_ARP, + procname: "proxy_arp", + data: &ipv4_devconf.proxy_arp, + maxlen: sizeof(int), + mode: 0644, + proc_handler: &proc_dointvec, + }, + { + ctl_name: NET_IPV4_CONF_MEDIUM_ID, + procname: "medium_id", + data: &ipv4_devconf.medium_id, + maxlen: sizeof(int), + mode: 0644, + proc_handler: &proc_dointvec, + }, + { + ctl_name: NET_IPV4_CONF_BOOTP_RELAY, + procname: "bootp_relay", + data: &ipv4_devconf.bootp_relay, + maxlen: sizeof(int), + mode: 0644, + proc_handler: &proc_dointvec, + }, + { + ctl_name: NET_IPV4_CONF_LOG_MARTIANS, + procname: "log_martians", + data: &ipv4_devconf.log_martians, + maxlen: sizeof(int), + mode: 0644, + proc_handler: &proc_dointvec, + }, + { + ctl_name: NET_IPV4_CONF_TAG, + procname: "tag", + data: &ipv4_devconf.tag, + maxlen: sizeof(int), + mode: 0644, + proc_handler: &proc_dointvec, + }, + { + ctl_name: NET_IPV4_CONF_ARPFILTER, + procname: "arp_filter", + data: &ipv4_devconf.arp_filter, + maxlen: sizeof(int), + mode: 0644, + proc_handler: &proc_dointvec, + }, + }, + devinet_dev: { + { + ctl_name: NET_PROTO_CONF_ALL, + procname: "all", + mode: 0555, + child: devinet_sysctl.devinet_vars, + }, + }, + devinet_conf_dir: { + { + ctl_name: NET_IPV4_CONF, + procname: "conf", + mode: 0555, + child: devinet_sysctl.devinet_dev, + }, + }, + devinet_proto_dir: { + { + ctl_name: NET_IPV4, + procname: "ipv4", + mode: 0555, + child: devinet_sysctl.devinet_conf_dir, + }, + }, + devinet_root_dir: { + { + ctl_name: CTL_NET, + procname: "net", + mode: 0555, + child: devinet_sysctl.devinet_proto_dir, + }, + }, }; -static void devinet_sysctl_register(struct in_device *in_dev, struct ipv4_devconf *p) +static void devinet_sysctl_register(struct in_device *in_dev, + struct ipv4_devconf *p) { int i; struct net_device *dev = in_dev ? in_dev->dev : NULL; - struct devinet_sysctl_table *t; + struct devinet_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL); - t = kmalloc(sizeof(*t), GFP_KERNEL); - if (t == NULL) + if (!t) return; memcpy(t, &devinet_sysctl, sizeof(*t)); - for (i=0; i<sizeof(t->devinet_vars)/sizeof(t->devinet_vars[0])-1; i++) { - t->devinet_vars[i].data += (char*)p - (char*)&ipv4_devconf; + for (i = 0; + i < sizeof(t->devinet_vars) / sizeof(t->devinet_vars[0]) - 1; + i++) { + t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf; t->devinet_vars[i].de = NULL; } if (dev) { @@ -1110,17 +1218,17 @@ static void devinet_sysctl_register(struct in_device *in_dev, struct ipv4_devcon t->devinet_dev[0].procname = "default"; t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT; } - t->devinet_dev[0].child = t->devinet_vars; - t->devinet_dev[0].de = NULL; - t->devinet_conf_dir[0].child = t->devinet_dev; - t->devinet_conf_dir[0].de = NULL; + t->devinet_dev[0].child = t->devinet_vars; + t->devinet_dev[0].de = NULL; + t->devinet_conf_dir[0].child = t->devinet_dev; + t->devinet_conf_dir[0].de = NULL; t->devinet_proto_dir[0].child = t->devinet_conf_dir; - t->devinet_proto_dir[0].de = NULL; - t->devinet_root_dir[0].child = t->devinet_proto_dir; - t->devinet_root_dir[0].de = NULL; + t->devinet_proto_dir[0].de = NULL; + t->devinet_root_dir[0].child = t->devinet_proto_dir; + t->devinet_root_dir[0].de = NULL; t->sysctl_header = register_sysctl_table(t->devinet_root_dir, 0); - if (t->sysctl_header == NULL) + if (!t->sysctl_header) kfree(t); else p->sysctl = t; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index b9966cb78cae..a1c6c5de3188 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1,6 +1,6 @@ /* - * NET3: Implementation of the ICMP protocol layer. - * + * NET3: Implementation of the ICMP protocol layer. + * * Alan Cox, <alan@redhat.com> * * Version: $Id: icmp.c,v 1.85 2002/02/01 22:01:03 davem Exp $ @@ -21,25 +21,25 @@ * of broken per type icmp timeouts. * Mike Shaver : RFC1122 checks. * Alan Cox : Multicast ping reply as self. - * Alan Cox : Fix atomicity lockup in ip_build_xmit + * Alan Cox : Fix atomicity lockup in ip_build_xmit * call. - * Alan Cox : Added 216,128 byte paths to the MTU + * Alan Cox : Added 216,128 byte paths to the MTU * code. * Martin Mares : RFC1812 checks. - * Martin Mares : Can be configured to follow redirects + * Martin Mares : Can be configured to follow redirects * if acting as a router _without_ a * routing protocol (RFC 1812). - * Martin Mares : Echo requests may be configured to + * Martin Mares : Echo requests may be configured to * be ignored (RFC 1812). - * Martin Mares : Limitation of ICMP error message + * Martin Mares : Limitation of ICMP error message * transmit rate (RFC 1812). - * Martin Mares : TOS and Precedence set correctly + * Martin Mares : TOS and Precedence set correctly * (RFC 1812). - * Martin Mares : Now copying as much data from the + * Martin Mares : Now copying as much data from the * original packet as we can without * exceeding 576 bytes (RFC 1812). * Willy Konynenberg : Transparent proxying support. - * Keith Owens : RFC1191 correction for 4.2BSD based + * Keith Owens : RFC1191 correction for 4.2BSD based * path MTU bug. * Thomas Quinot : ICMP Dest Unreach codes up to 15 are * valid (RFC 1812). @@ -52,9 +52,10 @@ * the rates sysctl configurable. * Yu Tianli : Fixed two ugly bugs in icmp_send * - IP option length was accounted wrongly - * - ICMP header length was not accounted at all. - * Tristan Greaves : Added sysctl option to ignore bogus broadcast - * responses from broken routers. + * - ICMP header length was not accounted + * at all. + * Tristan Greaves : Added sysctl option to ignore bogus + * broadcast responses from broken routers. * * To Fix: * @@ -95,8 +96,7 @@ * Build xmit assembly blocks */ -struct icmp_bxm -{ +struct icmp_bxm { struct sk_buff *skb; int offset; int data_len; @@ -114,29 +114,76 @@ struct icmp_bxm /* * Statistics */ - -struct icmp_mib icmp_statistics[NR_CPUS*2]; +struct icmp_mib icmp_statistics[NR_CPUS * 2]; /* An array of errno for error messages from dest unreach. */ /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOS_UNREACH and SR_FAIELD MUST be considered 'transient errs'. */ struct icmp_err icmp_err_convert[] = { - { ENETUNREACH, 0 }, /* ICMP_NET_UNREACH */ - { EHOSTUNREACH, 0 }, /* ICMP_HOST_UNREACH */ - { ENOPROTOOPT, 1 }, /* ICMP_PROT_UNREACH */ - { ECONNREFUSED, 1 }, /* ICMP_PORT_UNREACH */ - { EMSGSIZE, 0 }, /* ICMP_FRAG_NEEDED */ - { EOPNOTSUPP, 0 }, /* ICMP_SR_FAILED */ - { ENETUNREACH, 1 }, /* ICMP_NET_UNKNOWN */ - { EHOSTDOWN, 1 }, /* ICMP_HOST_UNKNOWN */ - { ENONET, 1 }, /* ICMP_HOST_ISOLATED */ - { ENETUNREACH, 1 }, /* ICMP_NET_ANO */ - { EHOSTUNREACH, 1 }, /* ICMP_HOST_ANO */ - { ENETUNREACH, 0 }, /* ICMP_NET_UNR_TOS */ - { EHOSTUNREACH, 0 }, /* ICMP_HOST_UNR_TOS */ - { EHOSTUNREACH, 1 }, /* ICMP_PKT_FILTERED */ - { EHOSTUNREACH, 1 }, /* ICMP_PREC_VIOLATION */ - { EHOSTUNREACH, 1 } /* ICMP_PREC_CUTOFF */ + { + errno: ENETUNREACH, /* ICMP_NET_UNREACH */ + fatal: 0, + }, + { + errno: EHOSTUNREACH, /* ICMP_HOST_UNREACH */ + fatal: 0, + }, + { + errno: ENOPROTOOPT /* ICMP_PROT_UNREACH */, + fatal: 1, + }, + { + errno: ECONNREFUSED, /* ICMP_PORT_UNREACH */ + fatal: 1, + }, + { + errno: EMSGSIZE, /* ICMP_FRAG_NEEDED */ + fatal: 0, + }, + { + errno: EOPNOTSUPP, /* ICMP_SR_FAILED */ + fatal: 0, + }, + { + errno: ENETUNREACH, /* ICMP_NET_UNKNOWN */ + fatal: 1, + }, + { + errno: EHOSTDOWN, /* ICMP_HOST_UNKNOWN */ + fatal: 1, + }, + { + errno: ENONET, /* ICMP_HOST_ISOLATED */ + fatal: 1, + }, + { + errno: ENETUNREACH, /* ICMP_NET_ANO */ + fatal: 1, + }, + { + errno: EHOSTUNREACH, /* ICMP_HOST_ANO */ + fatal: 1, + }, + { + errno: ENETUNREACH, /* ICMP_NET_UNR_TOS */ + fatal: 0, + }, + { + errno: EHOSTUNREACH, /* ICMP_HOST_UNR_TOS */ + fatal: 0, + }, + { + errno: EHOSTUNREACH, /* ICMP_PKT_FILTERED */ + fatal: 1, + }, + { + errno: EHOSTUNREACH, /* ICMP_PREC_VIOLATION */ + fatal: 1, + }, + { + errno: EHOSTUNREACH, /* ICMP_PREC_CUTOFF */ + fatal: 1, + }, }; extern int sysctl_ip_default_ttl; @@ -148,19 +195,19 @@ int sysctl_icmp_echo_ignore_broadcasts; /* Control parameter - ignore bogus broadcast responses? */ int sysctl_icmp_ignore_bogus_error_responses; -/* +/* * Configurable global rate limit. * * ratelimit defines tokens/packet consumed for dst->rate_token bucket * ratemask defines which icmp types are ratelimited by setting * it's bit position. * - * default: + * default: * dest unreachable (3), source quench (4), * time exceeded (11), parameter problem (12) */ -int sysctl_icmp_ratelimit = 1*HZ; +int sysctl_icmp_ratelimit = 1 * HZ; int sysctl_icmp_ratemask = 0x1818; /* @@ -182,7 +229,6 @@ static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; * our ICMP output as well as maintain a clean interface throughout * all layers. All Socketless IP sends will soon be gone. */ - struct socket *icmp_socket; /* ICMPv4 socket is only a bit non-reenterable (unlike ICMPv6, @@ -194,13 +240,17 @@ static int icmp_xmit_holder = -1; static int icmp_xmit_lock_bh(void) { + int rc; if (!spin_trylock(&icmp_socket->sk->lock.slock)) { + rc = -EAGAIN; if (icmp_xmit_holder == smp_processor_id()) - return -EAGAIN; + goto out; spin_lock(&icmp_socket->sk->lock.slock); } + rc = 0; icmp_xmit_holder = smp_processor_id(); - return 0; +out: + return rc; } static __inline__ int icmp_xmit_lock(void) @@ -236,14 +286,14 @@ static __inline__ void icmp_xmit_unlock(void) * This function is generic and could be used for other purposes * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. * - * Note that the same dst_entry fields are modified by functions in + * Note that the same dst_entry fields are modified by functions in * route.c too, but these work for packet destinations while xrlim_allow * works for icmp destinations. This means the rate limiting information * for one "ip object" is shared - and these ICMPs are twice limited: * by source and by destination. * * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate - * SHOULD allow setting of rate limits + * SHOULD allow setting of rate limits * * Shared between ICMPv4 and ICMPv6. */ @@ -251,68 +301,75 @@ static __inline__ void icmp_xmit_unlock(void) int xrlim_allow(struct dst_entry *dst, int timeout) { unsigned long now; + int rc = 0; now = jiffies; dst->rate_tokens += now - dst->rate_last; dst->rate_last = now; - if (dst->rate_tokens > XRLIM_BURST_FACTOR*timeout) - dst->rate_tokens = XRLIM_BURST_FACTOR*timeout; + if (dst->rate_tokens > XRLIM_BURST_FACTOR * timeout) + dst->rate_tokens = XRLIM_BURST_FACTOR * timeout; if (dst->rate_tokens >= timeout) { dst->rate_tokens -= timeout; - return 1; + rc = 1; } - return 0; + return rc; } static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code) { - struct dst_entry *dst = &rt->u.dst; + struct dst_entry *dst = &rt->u.dst; + int rc = 1; if (type > NR_ICMP_TYPES) - return 1; + goto out; /* Don't limit PMTU discovery. */ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) - return 1; + goto out; /* No rate limit on loopback */ if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) - return 1; + goto out; /* Limit if icmp type is enabled in ratemask. */ - if((1 << type) & sysctl_icmp_ratemask) - return xrlim_allow(dst, sysctl_icmp_ratelimit); - else - return 1; + if ((1 << type) & sysctl_icmp_ratemask) + rc = xrlim_allow(dst, sysctl_icmp_ratelimit); +out: + return rc; } /* * Maintain the counters used in the SNMP statistics for outgoing ICMP */ - static void icmp_out_count(int type) { - if (type>NR_ICMP_TYPES) - return; - (icmp_pointers[type].output)[(smp_processor_id()*2+!in_softirq())*sizeof(struct icmp_mib)/sizeof(unsigned long)]++; - ICMP_INC_STATS(IcmpOutMsgs); + if (type <= NR_ICMP_TYPES) { + (icmp_pointers[type].output)[(smp_processor_id() * 2 + + !in_softirq()) * + sizeof(struct icmp_mib) / + sizeof(unsigned long)]++; + ICMP_INC_STATS(IcmpOutMsgs); + } } - + /* - * Checksum each fragment, and on the first include the headers and final checksum. + * Checksum each fragment, and on the first include the headers and final + * checksum. */ - -static int icmp_glue_bits(const void *p, char *to, unsigned int offset, unsigned int fraglen) +static int icmp_glue_bits(const void *p, char *to, unsigned int offset, + unsigned int fraglen) { struct icmp_bxm *icmp_param = (struct icmp_bxm *)p; struct icmphdr *icmph; unsigned int csum; if (offset) { - icmp_param->csum=skb_copy_and_csum_bits(icmp_param->skb, - icmp_param->offset+(offset-icmp_param->head_len), - to, fraglen,icmp_param->csum); - return 0; + icmp_param->csum = + skb_copy_and_csum_bits(icmp_param->skb, + icmp_param->offset + + (offset - icmp_param->head_len), + to, fraglen, icmp_param->csum); + goto out; } /* @@ -321,15 +378,14 @@ static int icmp_glue_bits(const void *p, char *to, unsigned int offset, unsigned * for the whole packet here. */ csum = csum_partial_copy_nocheck((void *)&icmp_param->data, - to, icmp_param->head_len, - icmp_param->csum); - csum=skb_copy_and_csum_bits(icmp_param->skb, - icmp_param->offset, - to+icmp_param->head_len, - fraglen-icmp_param->head_len, - csum); - icmph=(struct icmphdr *)to; + to, icmp_param->head_len, + icmp_param->csum); + csum = skb_copy_and_csum_bits(icmp_param->skb, icmp_param->offset, + to + icmp_param->head_len, + fraglen - icmp_param->head_len, csum); + icmph = (struct icmphdr *)to; icmph->checksum = csum_fold(csum); +out: return 0; } @@ -339,20 +395,18 @@ static int icmp_glue_bits(const void *p, char *to, unsigned int offset, unsigned static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) { - struct sock *sk=icmp_socket->sk; + struct sock *sk = icmp_socket->sk; struct inet_opt *inet = inet_sk(sk); struct ipcm_cookie ipc; - struct rtable *rt = (struct rtable*)skb->dst; + struct rtable *rt = (struct rtable *)skb->dst; u32 daddr; - if (ip_options_echo(&icmp_param->replyopts, skb)) - return; - - if (icmp_xmit_lock_bh()) - return; + if (ip_options_echo(&icmp_param->replyopts, skb) || + icmp_xmit_lock_bh()) + goto out; - icmp_param->data.icmph.checksum=0; - icmp_param->csum=0; + icmp_param->data.icmph.checksum = 0; + icmp_param->csum = 0; icmp_out_count(icmp_param->data.icmph.type); inet->tos = skb->nh.iph->tos; @@ -364,24 +418,27 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) if (ipc.opt->srr) daddr = icmp_param->replyopts.faddr; } - if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0)) - goto out; - if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, - icmp_param->data.icmph.code)) { - ip_build_xmit(sk, icmp_glue_bits, icmp_param, + if (ip_route_output(&rt, daddr, rt->rt_spec_dst, + RT_TOS(skb->nh.iph->tos), 0)) + goto out_unlock; + if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, + icmp_param->data.icmph.code)) { + ip_build_xmit(sk, icmp_glue_bits, icmp_param, icmp_param->data_len+icmp_param->head_len, &ipc, rt, MSG_DONTWAIT); } ip_rt_put(rt); -out: +out_unlock: icmp_xmit_unlock_bh(); +out:; } /* * Send an ICMP message in response to a situation * - * RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header. MAY send more (we do). + * RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header. + * MAY send more (we do). * MUST NOT change this header information. * MUST NOT reply to a multicast/broadcast IP address. * MUST NOT reply to a multicast/broadcast MAC address. @@ -393,13 +450,13 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) struct iphdr *iph; int room; struct icmp_bxm icmp_param; - struct rtable *rt = (struct rtable*)skb_in->dst; + struct rtable *rt = (struct rtable *)skb_in->dst; struct ipcm_cookie ipc; u32 saddr; u8 tos; if (!rt) - return; + goto out; /* * Find the original header. It is expected to be valid, of course. @@ -408,66 +465,67 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) */ iph = skb_in->nh.iph; - if ((u8*)iph < skb_in->head || (u8*)(iph+1) > skb_in->tail) - return; + if ((u8 *)iph < skb_in->head || (u8 *)(iph + 1) > skb_in->tail) + goto out; /* * No replies to physical multicast/broadcast */ - if (skb_in->pkt_type!=PACKET_HOST) - return; + if (skb_in->pkt_type != PACKET_HOST) + goto out; /* * Now check at the protocol level */ - if (rt->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST)) - return; + if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) + goto out; /* * Only reply to fragment 0. We byte re-order the constant * mask for efficiency. */ - if (iph->frag_off&htons(IP_OFFSET)) - return; + if (iph->frag_off & htons(IP_OFFSET)) + goto out; - /* + /* * If we send an ICMP error to an ICMP error a mess would result.. */ if (icmp_pointers[type].error) { /* - * We are an error, check if we are replying to an ICMP error + * We are an error, check if we are replying to an + * ICMP error */ - if (iph->protocol==IPPROTO_ICMP) { + if (iph->protocol == IPPROTO_ICMP) { u8 inner_type; if (skb_copy_bits(skb_in, - skb_in->nh.raw + (iph->ihl<<2) - + offsetof(struct icmphdr, type) - - skb_in->data, - &inner_type, 1)) - return; + skb_in->nh.raw + (iph->ihl << 2) + + offsetof(struct icmphdr, type) - + skb_in->data, &inner_type, 1)) + goto out; /* - * Assume any unknown ICMP type is an error. This isn't - * specified by the RFC, but think about it.. + * Assume any unknown ICMP type is an error. This + * isn't specified by the RFC, but think about it.. */ - if (inner_type>NR_ICMP_TYPES || icmp_pointers[inner_type].error) - return; + if (inner_type > NR_ICMP_TYPES || + icmp_pointers[inner_type].error) + goto out; } } if (icmp_xmit_lock()) - return; + goto out; /* * Construct source address and options. */ -#ifdef CONFIG_IP_ROUTE_NAT +#ifdef CONFIG_IP_ROUTE_NAT /* * Restore original addresses if packet has been translated. */ - if (rt->rt_flags&RTCF_NAT && IPCB(skb_in)->flags&IPSKB_TRANSLATED) { + if (rt->rt_flags & RTCF_NAT && IPCB(skb_in)->flags & IPSKB_TRANSLATED) { iph->daddr = rt->key.dst; iph->saddr = rt->key.src; } @@ -477,14 +535,14 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) if (!(rt->rt_flags & RTCF_LOCAL)) saddr = 0; - tos = icmp_pointers[type].error ? - ((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) : - iph->tos; + tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | + IPTOS_PREC_INTERNETCONTROL) : + iph->tos; if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), 0)) - goto out; + goto out_unlock; - if (ip_options_echo(&icmp_param.replyopts, skb_in)) + if (ip_options_echo(&icmp_param.replyopts, skb_in)) goto ende; @@ -492,13 +550,13 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) * Prepare data for ICMP header. */ - icmp_param.data.icmph.type=type; - icmp_param.data.icmph.code=code; + icmp_param.data.icmph.type = type; + icmp_param.data.icmph.code = code; icmp_param.data.icmph.un.gateway = info; - icmp_param.data.icmph.checksum=0; - icmp_param.csum=0; - icmp_param.skb=skb_in; - icmp_param.offset=skb_in->nh.raw - skb_in->data; + icmp_param.data.icmph.checksum = 0; + icmp_param.csum = 0; + icmp_param.skb = skb_in; + icmp_param.offset = skb_in->nh.raw - skb_in->data; icmp_out_count(icmp_param.data.icmph.type); inet_sk(icmp_socket->sk)->tos = tos; inet_sk(icmp_socket->sk)->ttl = sysctl_ip_default_ttl; @@ -506,8 +564,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) ipc.opt = &icmp_param.replyopts; if (icmp_param.replyopts.srr) { ip_rt_put(rt); - if (ip_route_output(&rt, icmp_param.replyopts.faddr, saddr, RT_TOS(tos), 0)) - goto out; + if (ip_route_output(&rt, icmp_param.replyopts.faddr, + saddr, RT_TOS(tos), 0)) + goto out_unlock; } if (!icmpv4_xrlim_allow(rt, type, code)) @@ -521,24 +580,24 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; room -= sizeof(struct icmphdr); - icmp_param.data_len=skb_in->len-icmp_param.offset; + icmp_param.data_len = skb_in->len - icmp_param.offset; if (icmp_param.data_len > room) icmp_param.data_len = room; icmp_param.head_len = sizeof(struct icmphdr); - ip_build_xmit(icmp_socket->sk, icmp_glue_bits, &icmp_param, - icmp_param.data_len+sizeof(struct icmphdr), - &ipc, rt, MSG_DONTWAIT); - + ip_build_xmit(icmp_socket->sk, icmp_glue_bits, &icmp_param, + icmp_param.data_len + sizeof(struct icmphdr), + &ipc, rt, MSG_DONTWAIT); ende: ip_rt_put(rt); -out: +out_unlock: icmp_xmit_unlock(); +out:; } -/* - * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, and ICMP_QUENCH. +/* + * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, and ICMP_QUENCH. */ static void icmp_unreach(struct sk_buff *skb) @@ -556,60 +615,59 @@ static void icmp_unreach(struct sk_buff *skb) * additional check for longer headers in upper levels. */ - if (!pskb_may_pull(skb, sizeof(struct iphdr))) { - ICMP_INC_STATS_BH(IcmpInErrors); - return; - } + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto out_err; icmph = skb->h.icmph; - iph = (struct iphdr *) skb->data; + iph = (struct iphdr *)skb->data; - if (iph->ihl<5) { - /* Mangled header, drop. */ - ICMP_INC_STATS_BH(IcmpInErrors); - return; - } + if (iph->ihl < 5) /* Mangled header, drop. */ + goto out_err; - if(icmph->type==ICMP_DEST_UNREACH) { - switch(icmph->code & 15) { - case ICMP_NET_UNREACH: - break; - case ICMP_HOST_UNREACH: - break; - case ICMP_PROT_UNREACH: - break; - case ICMP_PORT_UNREACH: - break; - case ICMP_FRAG_NEEDED: - if (ipv4_config.no_pmtu_disc) { - if (net_ratelimit()) - printk(KERN_INFO "ICMP: %u.%u.%u.%u: fragmentation needed and DF set.\n", - NIPQUAD(iph->daddr)); - } else { - info = ip_rt_frag_needed(iph, ntohs(icmph->un.frag.mtu)); - if (!info) - goto out; - } - break; - case ICMP_SR_FAILED: + if (icmph->type == ICMP_DEST_UNREACH) { + switch (icmph->code & 15) { + case ICMP_NET_UNREACH: + case ICMP_HOST_UNREACH: + case ICMP_PROT_UNREACH: + case ICMP_PORT_UNREACH: + break; + case ICMP_FRAG_NEEDED: + if (ipv4_config.no_pmtu_disc) { if (net_ratelimit()) - printk(KERN_INFO "ICMP: %u.%u.%u.%u: Source Route Failed.\n", NIPQUAD(iph->daddr)); - break; - default: - break; + printk(KERN_INFO "ICMP: %u.%u.%u.%u: " + "fragmentation needed " + "and DF set.\n", + NIPQUAD(iph->daddr)); + } else { + info = ip_rt_frag_needed(iph, + ntohs(icmph->un.frag.mtu)); + if (!info) + goto out; + } + break; + case ICMP_SR_FAILED: + if (net_ratelimit()) + printk(KERN_INFO "ICMP: %u.%u.%u.%u: Source " + "Route Failed.\n", + NIPQUAD(iph->daddr)); + break; + default: + break; } - if (icmph->code>NR_ICMP_UNREACH) + if (icmph->code > NR_ICMP_UNREACH) goto out; - } else if (icmph->type == ICMP_PARAMETERPROB) { - info = ntohl(icmph->un.gateway)>>24; - } + } else if (icmph->type == ICMP_PARAMETERPROB) + info = ntohl(icmph->un.gateway) >> 24; /* * Throw it at our lower layers * - * RFC 1122: 3.2.2 MUST extract the protocol ID from the passed header. - * RFC 1122: 3.2.2.1 MUST pass ICMP unreach messages to the transport layer. - * RFC 1122: 3.2.2.2 MUST pass ICMP time expired messages to transport layer. + * RFC 1122: 3.2.2 MUST extract the protocol ID from the passed + * header. + * RFC 1122: 3.2.2.1 MUST pass ICMP unreach messages to the + * transport layer. + * RFC 1122: 3.2.2.2 MUST pass ICMP time expired messages to + * transport layer. */ /* @@ -619,25 +677,22 @@ static void icmp_unreach(struct sk_buff *skb) * get the other vendor to fix their kit. */ - if (!sysctl_icmp_ignore_bogus_error_responses) - { - - if (inet_addr_type(iph->daddr) == RTN_BROADCAST) - { - if (net_ratelimit()) - printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP error to a broadcast.\n", - NIPQUAD(skb->nh.iph->saddr)); - goto out; - } + if (!sysctl_icmp_ignore_bogus_error_responses && + inet_addr_type(iph->daddr) == RTN_BROADCAST) { + if (net_ratelimit()) + printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP " + "error to a broadcast.\n", + NIPQUAD(skb->nh.iph->saddr)); + goto out; } /* Checkin full IP header plus 8 bytes of protocol to * avoid additional coding at protocol handlers. */ - if (!pskb_may_pull(skb, iph->ihl*4+8)) + if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) goto out; - iph = (struct iphdr *) skb->data; + iph = (struct iphdr *)skb->data; protocol = iph->protocol; /* @@ -647,10 +702,10 @@ static void icmp_unreach(struct sk_buff *skb) /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */ hash = protocol & (MAX_INET_PROTOS - 1); read_lock(&raw_v4_lock); - if ((raw_sk = raw_v4_htable[hash]) != NULL) - { + if ((raw_sk = raw_v4_htable[hash]) != NULL) { while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr, - iph->saddr, skb->dev->ifindex)) != NULL) { + iph->saddr, + skb->dev->ifindex)) != NULL) { raw_err(raw_sk, skb, info); raw_sk = raw_sk->next; iph = (struct iphdr *)skb->data; @@ -659,19 +714,18 @@ static void icmp_unreach(struct sk_buff *skb) read_unlock(&raw_v4_lock); /* - * This can't change while we are doing it. + * This can't change while we are doing it. * Callers have obtained BR_NETPROTO_LOCK so * we are OK. */ - ipprot = (struct inet_protocol *) inet_protos[hash]; + ipprot = (struct inet_protocol *)inet_protos[hash]; while (ipprot) { struct inet_protocol *nextip; - nextip = (struct inet_protocol *) ipprot->next; - - /* - * Pass it off to everyone who wants it. + nextip = (struct inet_protocol *)ipprot->next; + /* + * Pass it off to everyone who wants it. */ /* RFC1122: OK. Passes appropriate ICMP errors to the */ @@ -682,12 +736,16 @@ static void icmp_unreach(struct sk_buff *skb) ipprot = nextip; } -out:; +out: + return; +out_err: + ICMP_INC_STATS_BH(IcmpInErrors); + goto out; } /* - * Handle ICMP_REDIRECT. + * Handle ICMP_REDIRECT. */ static void icmp_redirect(struct sk_buff *skb) @@ -695,18 +753,16 @@ static void icmp_redirect(struct sk_buff *skb) struct iphdr *iph; unsigned long ip; - if (skb->len < sizeof(struct iphdr)) { - ICMP_INC_STATS_BH(IcmpInErrors); - return; - } + if (skb->len < sizeof(struct iphdr)) + goto out_err; /* * Get the copied header of the packet that caused the redirect */ if (!pskb_may_pull(skb, sizeof(struct iphdr))) - return; + goto out; - iph = (struct iphdr *) skb->data; + iph = (struct iphdr *)skb->data; ip = iph->daddr; switch (skb->h.icmph->code & 7) { @@ -716,22 +772,31 @@ static void icmp_redirect(struct sk_buff *skb) * As per RFC recommendations now handle it as * a host redirect. */ - case ICMP_REDIR_HOST: case ICMP_REDIR_HOSTTOS: - ip_rt_redirect(skb->nh.iph->saddr, ip, skb->h.icmph->un.gateway, iph->saddr, iph->tos, skb->dev); + ip_rt_redirect(skb->nh.iph->saddr, + ip, skb->h.icmph->un.gateway, + iph->saddr, iph->tos, skb->dev); break; default: break; } +out: + return; +out_err: + ICMP_INC_STATS_BH(IcmpInErrors); + goto out; } /* - * Handle ICMP_ECHO ("ping") requests. + * Handle ICMP_ECHO ("ping") requests. * - * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo requests. - * RFC 1122: 3.2.2.6 Data received in the ICMP_ECHO request MUST be included in the reply. - * RFC 1812: 4.3.3.6 SHOULD have a config option for silently ignoring echo requests, MUST have default=NOT. + * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo + * requests. + * RFC 1122: 3.2.2.6 Data received in the ICMP_ECHO request MUST be + * included in the reply. + * RFC 1812: 4.3.3.6 SHOULD have a config option for silently ignoring + * echo requests, MUST have default=NOT. * See also WRT handling of options once they are done and working. */ @@ -740,65 +805,66 @@ static void icmp_echo(struct sk_buff *skb) if (!sysctl_icmp_echo_ignore_all) { struct icmp_bxm icmp_param; - icmp_param.data.icmph=*skb->h.icmph; - icmp_param.data.icmph.type=ICMP_ECHOREPLY; - icmp_param.skb=skb; - icmp_param.offset=0; - icmp_param.data_len=skb->len; - icmp_param.head_len=sizeof(struct icmphdr); + icmp_param.data.icmph = *skb->h.icmph; + icmp_param.data.icmph.type = ICMP_ECHOREPLY; + icmp_param.skb = skb; + icmp_param.offset = 0; + icmp_param.data_len = skb->len; + icmp_param.head_len = sizeof(struct icmphdr); icmp_reply(&icmp_param, skb); } } /* - * Handle ICMP Timestamp requests. + * Handle ICMP Timestamp requests. * RFC 1122: 3.2.2.8 MAY implement ICMP timestamp requests. * SHOULD be in the kernel for minimum random latency. * MUST be accurate to a few minutes. * MUST be updated at least at 15Hz. */ - static void icmp_timestamp(struct sk_buff *skb) { struct timeval tv; struct icmp_bxm icmp_param; - /* * Too short. */ - - if (skb->len < 4) { - ICMP_INC_STATS_BH(IcmpInErrors); - return; - } + if (skb->len < 4) + goto out_err; /* - * Fill in the current time as ms since midnight UT: + * Fill in the current time as ms since midnight UT: */ do_gettimeofday(&tv); - icmp_param.data.times[1] = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000); + icmp_param.data.times[1] = htonl((tv.tv_sec % 86400) * 1000 + + tv.tv_usec / 1000); icmp_param.data.times[2] = icmp_param.data.times[1]; if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4)) BUG(); - icmp_param.data.icmph=*skb->h.icmph; - icmp_param.data.icmph.type=ICMP_TIMESTAMPREPLY; - icmp_param.data.icmph.code=0; - icmp_param.skb=skb; - icmp_param.offset=0; - icmp_param.data_len=0; - icmp_param.head_len=sizeof(struct icmphdr)+12; + icmp_param.data.icmph = *skb->h.icmph; + icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY; + icmp_param.data.icmph.code = 0; + icmp_param.skb = skb; + icmp_param.offset = 0; + icmp_param.data_len = 0; + icmp_param.head_len = sizeof(struct icmphdr) + 12; icmp_reply(&icmp_param, skb); +out: + return; +out_err: + ICMP_INC_STATS_BH(IcmpInErrors); + goto out; } -/* +/* * Handle ICMP_ADDRESS_MASK requests. (RFC950) * - * RFC1122 (3.2.2.9). A host MUST only send replies to - * ADDRESS_MASK requests if it's been configured as an address mask - * agent. Receiving a request doesn't constitute implicit permission to - * act as one. Of course, implementing this correctly requires (SHOULD) - * a way to turn the functionality on and off. Another one for sysctl(), + * RFC1122 (3.2.2.9). A host MUST only send replies to + * ADDRESS_MASK requests if it's been configured as an address mask + * agent. Receiving a request doesn't constitute implicit permission to + * act as one. Of course, implementing this correctly requires (SHOULD) + * a way to turn the functionality on and off. Another one for sysctl(), * I guess. -- MS * * RFC1812 (4.3.3.9). A router MUST implement it. @@ -829,7 +895,7 @@ static void icmp_address(struct sk_buff *skb) #if 0 if (net_ratelimit()) printk(KERN_DEBUG "a guy asks for address mask. Who is it?\n"); -#endif +#endif } /* @@ -839,57 +905,60 @@ static void icmp_address(struct sk_buff *skb) static void icmp_address_reply(struct sk_buff *skb) { - struct rtable *rt = (struct rtable*)skb->dst; + struct rtable *rt = (struct rtable *)skb->dst; struct net_device *dev = skb->dev; struct in_device *in_dev; struct in_ifaddr *ifa; u32 mask; if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC)) - return; + goto out; in_dev = in_dev_get(dev); if (!in_dev) - return; + goto out; read_lock(&in_dev->lock); if (in_dev->ifa_list && IN_DEV_LOG_MARTIANS(in_dev) && IN_DEV_FORWARD(in_dev)) { if (skb_copy_bits(skb, 0, &mask, 4)) BUG(); - for (ifa=in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { - if (mask == ifa->ifa_mask && inet_ifa_match(rt->rt_src, ifa)) + for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { + if (mask == ifa->ifa_mask && + inet_ifa_match(rt->rt_src, ifa)) break; } if (!ifa && net_ratelimit()) { - printk(KERN_INFO "Wrong address mask %u.%u.%u.%u from %s/%u.%u.%u.%u\n", + printk(KERN_INFO "Wrong address mask %u.%u.%u.%u from " + "%s/%u.%u.%u.%u\n", NIPQUAD(mask), dev->name, NIPQUAD(rt->rt_src)); } } read_unlock(&in_dev->lock); in_dev_put(in_dev); +out:; } static void icmp_discard(struct sk_buff *skb) { } -/* +/* * Deal with incoming ICMP packets. */ - int icmp_rcv(struct sk_buff *skb) { struct icmphdr *icmph; - struct rtable *rt = (struct rtable*)skb->dst; + struct rtable *rt = (struct rtable *)skb->dst; ICMP_INC_STATS_BH(IcmpInMsgs); switch (skb->ip_summed) { case CHECKSUM_HW: - if ((u16)csum_fold(skb->csum) == 0) + if (!(u16)csum_fold(skb->csum)) break; - NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "icmp v4 hw csum failure\n")); + NETDEBUG(if (net_ratelimit()) + printk(KERN_DEBUG "icmp v4 hw csum failure\n")); case CHECKSUM_NONE: if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) goto error; @@ -904,17 +973,18 @@ int icmp_rcv(struct sk_buff *skb) /* * 18 is the highest 'known' ICMP type. Anything else is a mystery * - * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently discarded. + * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently + * discarded. */ if (icmph->type > NR_ICMP_TYPES) goto error; /* - * Parse the ICMP message + * Parse the ICMP message */ - if (rt->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST)) { + if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { /* * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be * silently ignored (we let user decide with a sysctl). @@ -933,7 +1003,9 @@ int icmp_rcv(struct sk_buff *skb) } } - icmp_pointers[icmph->type].input[smp_processor_id()*2*sizeof(struct icmp_mib)/sizeof(unsigned long)]++; + icmp_pointers[icmph->type].input[smp_processor_id() * 2 * + sizeof(struct icmp_mib) / + sizeof(unsigned long)]++; (icmp_pointers[icmph->type].handler)(skb); drop: @@ -947,40 +1019,127 @@ error: /* * This table is the definition of how we handle ICMP. */ - -static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1] = { -/* ECHO REPLY (0) */ - { &icmp_statistics[0].IcmpOutEchoReps, &icmp_statistics[0].IcmpInEchoReps, icmp_discard, 0 }, - { &icmp_statistics[0].dummy, &icmp_statistics[0].IcmpInErrors, icmp_discard, 1 }, - { &icmp_statistics[0].dummy, &icmp_statistics[0].IcmpInErrors, icmp_discard, 1 }, -/* DEST UNREACH (3) */ - { &icmp_statistics[0].IcmpOutDestUnreachs, &icmp_statistics[0].IcmpInDestUnreachs, icmp_unreach, 1 }, -/* SOURCE QUENCH (4) */ - { &icmp_statistics[0].IcmpOutSrcQuenchs, &icmp_statistics[0].IcmpInSrcQuenchs, icmp_unreach, 1 }, -/* REDIRECT (5) */ - { &icmp_statistics[0].IcmpOutRedirects, &icmp_statistics[0].IcmpInRedirects, icmp_redirect, 1 }, - { &icmp_statistics[0].dummy, &icmp_statistics[0].IcmpInErrors, icmp_discard, 1 }, - { &icmp_statistics[0].dummy, &icmp_statistics[0].IcmpInErrors, icmp_discard, 1 }, -/* ECHO (8) */ - { &icmp_statistics[0].IcmpOutEchos, &icmp_statistics[0].IcmpInEchos, icmp_echo, 0 }, - { &icmp_statistics[0].dummy, &icmp_statistics[0].IcmpInErrors, icmp_discard, 1 }, - { &icmp_statistics[0].dummy, &icmp_statistics[0].IcmpInErrors, icmp_discard, 1 }, -/* TIME EXCEEDED (11) */ - { &icmp_statistics[0].IcmpOutTimeExcds, &icmp_statistics[0].IcmpInTimeExcds, icmp_unreach, 1 }, -/* PARAMETER PROBLEM (12) */ - { &icmp_statistics[0].IcmpOutParmProbs, &icmp_statistics[0].IcmpInParmProbs, icmp_unreach, 1 }, -/* TIMESTAMP (13) */ - { &icmp_statistics[0].IcmpOutTimestamps, &icmp_statistics[0].IcmpInTimestamps, icmp_timestamp, 0 }, -/* TIMESTAMP REPLY (14) */ - { &icmp_statistics[0].IcmpOutTimestampReps, &icmp_statistics[0].IcmpInTimestampReps, icmp_discard, 0 }, -/* INFO (15) */ - { &icmp_statistics[0].dummy, &icmp_statistics[0].dummy, icmp_discard, 0 }, -/* INFO REPLY (16) */ - { &icmp_statistics[0].dummy, &icmp_statistics[0].dummy, icmp_discard, 0 }, -/* ADDR MASK (17) */ - { &icmp_statistics[0].IcmpOutAddrMasks, &icmp_statistics[0].IcmpInAddrMasks, icmp_address, 0 }, -/* ADDR MASK REPLY (18) */ - { &icmp_statistics[0].IcmpOutAddrMaskReps, &icmp_statistics[0].IcmpInAddrMaskReps, icmp_address_reply, 0 } +static struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = { + /* ECHO REPLY (0) */ + [0] = { + output: &icmp_statistics[0].IcmpOutEchoReps, + input: &icmp_statistics[0].IcmpInEchoReps, + handler: icmp_discard, + }, + [1] = { + output: &icmp_statistics[0].dummy, + input: &icmp_statistics[0].IcmpInErrors, + handler: icmp_discard, + error: 1, + }, + [2] = { + output: &icmp_statistics[0].dummy, + input: &icmp_statistics[0].IcmpInErrors, + handler: icmp_discard, + error: 1, + }, + /* DEST UNREACH (3) */ + [3] = { + output: &icmp_statistics[0].IcmpOutDestUnreachs, + input: &icmp_statistics[0].IcmpInDestUnreachs, + handler: icmp_unreach, + error: 1, + }, + /* SOURCE QUENCH (4) */ + [4] = { + output: &icmp_statistics[0].IcmpOutSrcQuenchs, + input: &icmp_statistics[0].IcmpInSrcQuenchs, + icmp_unreach, + error: 1, + }, + /* REDIRECT (5) */ + [5] = { + output: &icmp_statistics[0].IcmpOutRedirects, + input: &icmp_statistics[0].IcmpInRedirects, + handler: icmp_redirect, + error: 1, + }, + [6] = { + output: &icmp_statistics[0].dummy, + input: &icmp_statistics[0].IcmpInErrors, + handler: icmp_discard, + error: 1, + }, + [7] = { + output: &icmp_statistics[0].dummy, + input: &icmp_statistics[0].IcmpInErrors, + handler: icmp_discard, + error: 1, + }, + /* ECHO (8) */ + [8] = { + output: &icmp_statistics[0].IcmpOutEchos, + input: &icmp_statistics[0].IcmpInEchos, + handler: icmp_echo, + error: 0, + }, + [9] = { + output: &icmp_statistics[0].dummy, + input: &icmp_statistics[0].IcmpInErrors, + handler: icmp_discard, + error: 1, + }, + [10] = { + output: &icmp_statistics[0].dummy, + input: &icmp_statistics[0].IcmpInErrors, + handler: icmp_discard, + error: 1, + }, + /* TIME EXCEEDED (11) */ + [11] = { + output: &icmp_statistics[0].IcmpOutTimeExcds, + input: &icmp_statistics[0].IcmpInTimeExcds, + handler: icmp_unreach, + error: 1, + }, + /* PARAMETER PROBLEM (12) */ + [12] = { + output: &icmp_statistics[0].IcmpOutParmProbs, + input: &icmp_statistics[0].IcmpInParmProbs, + handler: icmp_unreach, + error: 1, + }, + /* TIMESTAMP (13) */ + [13] = { + output: &icmp_statistics[0].IcmpOutTimestamps, + input: &icmp_statistics[0].IcmpInTimestamps, + handler: icmp_timestamp, + }, + /* TIMESTAMP REPLY (14) */ + [14] = { + output: &icmp_statistics[0].IcmpOutTimestampReps, + input: &icmp_statistics[0].IcmpInTimestampReps, + handler: icmp_discard, + }, + /* INFO (15) */ + [15] = { + output: &icmp_statistics[0].dummy, + input: &icmp_statistics[0].dummy, + handler: icmp_discard, + }, + /* INFO REPLY (16) */ + [16] = { + output: &icmp_statistics[0].dummy, + input: &icmp_statistics[0].dummy, + handler: icmp_discard, + }, + /* ADDR MASK (17) */ + [17] = { + output: &icmp_statistics[0].IcmpOutAddrMasks, + input: &icmp_statistics[0].IcmpInAddrMasks, + handler: icmp_address, + }, + /* ADDR MASK REPLY (18) */ + [18] = { + output: &icmp_statistics[0].IcmpOutAddrMaskReps, + input: &icmp_statistics[0].IcmpInAddrMaskReps, + handler: icmp_address_reply, + } }; void __init icmp_init(struct net_proto_family *ops) @@ -990,8 +1149,8 @@ void __init icmp_init(struct net_proto_family *ops) if (err < 0) panic("Failed to create the ICMP control socket.\n"); - icmp_socket->sk->allocation=GFP_ATOMIC; - icmp_socket->sk->sndbuf = SK_WMEM_MAX*2; + icmp_socket->sk->allocation = GFP_ATOMIC; + icmp_socket->sk->sndbuf = SK_WMEM_MAX * 2; inet = inet_sk(icmp_socket->sk); inet->ttl = MAXTTL; inet->pmtudisc = IP_PMTUDISC_DONT; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0dd3bb28b4d1..53c06caf141f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -32,7 +32,8 @@ * and the rest go in the other half. * Andi Kleen : Add support for syncookies and fixed * some bugs: ip options weren't passed to - * the TCP layer, missed a check for an ACK bit. + * the TCP layer, missed a check for an + * ACK bit. * Andi Kleen : Implemented fast path mtu discovery. * Fixed many serious bugs in the * open_request handling and moved @@ -42,7 +43,8 @@ * Mike McLagan : Routing by source * Juan Jose Ciarlante: ip_dynaddr bits * Andi Kleen: various fixes. - * Vitaly E. Lavrov : Transparent proxy revived after year coma. + * Vitaly E. Lavrov : Transparent proxy revived after year + * coma. * Andi Kleen : Fix new listen. * Andi Kleen : Fix accept error reporting. */ @@ -65,26 +67,18 @@ extern int sysctl_ip_dynaddr; extern int sysctl_ip_default_ttl; -int sysctl_tcp_tw_reuse = 0; +int sysctl_tcp_tw_reuse; /* Check TCP sequence numbers in ICMP packets. */ #define ICMP_MIN_LENGTH 8 -/* Socket used for sending RSTs */ +/* Socket used for sending RSTs */ static struct socket *tcp_socket; -void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, +void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, struct sk_buff *skb); -/* - * ALL members must be initialised to prevent gcc-2.7.2.3 miscompilation - */ struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = { - __tcp_ehash: NULL, - __tcp_bhash: NULL, - __tcp_bhash_size: 0, - __tcp_ehash_size: 0, - __tcp_listening_hash: { NULL, }, __tcp_lhash_lock: RW_LOCK_UNLOCKED, __tcp_lhash_users: ATOMIC_INIT(0), __tcp_lhash_wait: @@ -98,14 +92,14 @@ struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = { * 32768-61000 */ int sysctl_local_port_range[2] = { 1024, 4999 }; -int tcp_port_rover = (1024 - 1); +int tcp_port_rover = 1024 - 1; static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport, __u32 faddr, __u16 fport) { - int h = ((laddr ^ lport) ^ (faddr ^ fport)); - h ^= h>>16; - h ^= h>>8; + int h = (laddr ^ lport) ^ (faddr ^ fport); + h ^= h >> 16; + h ^= h >> 8; return h & (tcp_ehash_size - 1); } @@ -126,14 +120,13 @@ static __inline__ int tcp_sk_hashfn(struct sock *sk) struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head, unsigned short snum) { - struct tcp_bind_bucket *tb; - - tb = kmem_cache_alloc(tcp_bucket_cachep, SLAB_ATOMIC); - if(tb != NULL) { + struct tcp_bind_bucket *tb = kmem_cache_alloc(tcp_bucket_cachep, + SLAB_ATOMIC); + if (tb) { tb->port = snum; tb->fastreuse = 0; tb->owners = NULL; - if((tb->next = head->chain) != NULL) + if ((tb->next = head->chain) != NULL) tb->next->pprev = &tb->next; head->chain = tb; tb->pprev = &head->chain; @@ -152,9 +145,9 @@ static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child) tb = (struct tcp_bind_bucket *)sk->prev; if ((child->bind_next = tb->owners) != NULL) tb->owners->bind_pprev = &child->bind_next; - tb->owners = child; + tb->owners = child; child->bind_pprev = &tb->owners; - child->prev = (struct sock *) tb; + child->prev = (struct sock *)tb; spin_unlock(&head->lock); } @@ -165,14 +158,15 @@ __inline__ void tcp_inherit_port(struct sock *sk, struct sock *child) local_bh_enable(); } -static inline void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, unsigned short snum) +static inline void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, + unsigned short snum) { inet_sk(sk)->num = snum; if ((sk->bind_next = tb->owners) != NULL) tb->owners->bind_pprev = &sk->bind_next; - tb->owners = sk; + tb->owners = sk; sk->bind_pprev = &tb->owners; - sk->prev = (struct sock *) tb; + sk->prev = (struct sock *)tb; } static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb) @@ -180,17 +174,14 @@ static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb) struct inet_opt *inet = inet_sk(sk); struct sock *sk2 = tb->owners; int sk_reuse = sk->reuse; - - for( ; sk2 != NULL; sk2 = sk2->bind_next) { - if (sk != sk2 && - sk->bound_dev_if == sk2->bound_dev_if) { - if (!sk_reuse || - !sk2->reuse || + + for ( ; sk2; sk2 = sk2->bind_next) { + if (sk != sk2 && sk->bound_dev_if == sk2->bound_dev_if) { + if (!sk_reuse || !sk2->reuse || sk2->state == TCP_LISTEN) { struct inet_opt *inet2 = inet_sk(sk2); - if (!inet2->rcv_saddr || - !inet->rcv_saddr || - (inet2->rcv_saddr == inet->rcv_saddr)) + if (!inet2->rcv_saddr || !inet->rcv_saddr || + inet2->rcv_saddr == inet->rcv_saddr) break; } } @@ -208,7 +199,7 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) int ret; local_bh_disable(); - if (snum == 0) { + if (!snum) { int low = sysctl_local_port_range[0]; int high = sysctl_local_port_range[1]; int remaining = (high - low) + 1; @@ -216,8 +207,9 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) spin_lock(&tcp_portalloc_lock); rover = tcp_port_rover; - do { rover++; - if ((rover < low) || (rover > high)) + do { + rover++; + if (rover < low || rover > high) rover = low; head = &tcp_bhash[tcp_bhashfn(rover)]; spin_lock(&head->lock); @@ -244,14 +236,14 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) } else { head = &tcp_bhash[tcp_bhashfn(snum)]; spin_lock(&head->lock); - for (tb = head->chain; tb != NULL; tb = tb->next) + for (tb = head->chain; tb; tb = tb->next) if (tb->port == snum) break; } - if (tb != NULL && tb->owners != NULL) { + if (tb && tb->owners) { if (sk->reuse > 1) goto success; - if (tb->fastreuse > 0 && sk->reuse != 0 && sk->state != TCP_LISTEN) { + if (tb->fastreuse > 0 && sk->reuse && sk->state != TCP_LISTEN) { goto success; } else { ret = 1; @@ -260,21 +252,19 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) } } ret = 1; - if (tb == NULL && - (tb = tcp_bucket_create(head, snum)) == NULL) - goto fail_unlock; - if (tb->owners == NULL) { + if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL) + goto fail_unlock; + if (!tb->owners) { if (sk->reuse && sk->state != TCP_LISTEN) tb->fastreuse = 1; else tb->fastreuse = 0; - } else if (tb->fastreuse && - ((sk->reuse == 0) || (sk->state == TCP_LISTEN))) + } else if (tb->fastreuse && (!sk->reuse || sk->state == TCP_LISTEN)) tb->fastreuse = 0; success: - if (sk->prev == NULL) + if (!sk->prev) tcp_bind_hash(sk, tb, snum); - BUG_TRAP(sk->prev == (struct sock *) tb); + BUG_TRAP(sk->prev == (struct sock *)tb); ret = 0; fail_unlock: @@ -298,9 +288,9 @@ __inline__ void __tcp_put_port(struct sock *sk) if (sk->bind_next) sk->bind_next->bind_pprev = sk->bind_pprev; *(sk->bind_pprev) = sk->bind_next; - sk->prev = NULL; + sk->prev = NULL; inet->num = 0; - if (tb->owners == NULL) { + if (!tb->owners) { if (tb->next) tb->next->pprev = tb->pprev; *(tb->pprev) = tb->next; @@ -333,7 +323,7 @@ void tcp_listen_wlock(void) add_wait_queue_exclusive(&tcp_lhash_wait, &wait); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); - if (atomic_read(&tcp_lhash_users) == 0) + if (!atomic_read(&tcp_lhash_users)) break; write_unlock_bh(&tcp_lhash_lock); schedule(); @@ -350,8 +340,8 @@ static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible) struct sock **skp; rwlock_t *lock; - BUG_TRAP(sk->pprev==NULL); - if(listen_possible && sk->state == TCP_LISTEN) { + BUG_TRAP(!sk->pprev); + if (listen_possible && sk->state == TCP_LISTEN) { skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; lock = &tcp_lhash_lock; tcp_listen_wlock(); @@ -360,7 +350,7 @@ static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible) lock = &tcp_ehash[sk->hashent].lock; write_lock(lock); } - if((sk->next = *skp) != NULL) + if ((sk->next = *skp) != NULL) (*skp)->pprev = &sk->next; *skp = sk; sk->pprev = skp; @@ -396,8 +386,8 @@ void tcp_unhash(struct sock *sk) write_lock_bh(&head->lock); } - if(sk->pprev) { - if(sk->next) + if (sk->pprev) { + if (sk->next) sk->next->pprev = sk->pprev; *sk->pprev = sk->next; sk->pprev = NULL; @@ -416,20 +406,21 @@ void tcp_unhash(struct sock *sk) * connection. So always assume those are both wildcarded * during the search since they can never be otherwise. */ -static struct sock *__tcp_v4_lookup_listener(struct sock *sk, u32 daddr, unsigned short hnum, int dif) +static struct sock *__tcp_v4_lookup_listener(struct sock *sk, u32 daddr, + unsigned short hnum, int dif) { struct sock *result = NULL; int score, hiscore; hiscore=0; - for(; sk; sk = sk->next) { + for (; sk; sk = sk->next) { struct inet_opt *inet = inet_sk(sk); - if(inet->num == hnum) { + if (inet->num == hnum) { __u32 rcv_saddr = inet->rcv_saddr; score = 1; - if(rcv_saddr) { + if (rcv_saddr) { if (rcv_saddr != daddr) continue; score++; @@ -451,7 +442,8 @@ static struct sock *__tcp_v4_lookup_listener(struct sock *sk, u32 daddr, unsigne } /* Optimize the common listener case. */ -__inline__ struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum, int dif) +__inline__ struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum, + int dif) { struct sock *sk; @@ -460,8 +452,7 @@ __inline__ struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum, i if (sk) { struct inet_opt *inet = inet_sk(sk); - if (inet->num == hnum && - sk->next == NULL && + if (inet->num == hnum && !sk->next && (!inet->rcv_saddr || inet->rcv_saddr == daddr) && !sk->bound_dev_if) goto sherry_cache; @@ -482,53 +473,47 @@ sherry_cache: */ static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport, - u32 daddr, u16 hnum, int dif) + u32 daddr, u16 hnum, + int dif) { struct tcp_ehash_bucket *head; TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) __u32 ports = TCP_COMBINED_PORTS(sport, hnum); struct sock *sk; - int hash; - /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - hash = tcp_hashfn(daddr, hnum, saddr, sport); + int hash = tcp_hashfn(daddr, hnum, saddr, sport); head = &tcp_ehash[hash]; read_lock(&head->lock); - for(sk = head->chain; sk; sk = sk->next) { - if(TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif)) + for (sk = head->chain; sk; sk = sk->next) { + if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ - for(sk = (head + tcp_ehash_size)->chain; sk; sk = sk->next) - if(TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif)) + for (sk = (head + tcp_ehash_size)->chain; sk; sk = sk->next) + if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif)) goto hit; +out: read_unlock(&head->lock); - - return NULL; - + return sk; hit: sock_hold(sk); - read_unlock(&head->lock); - return sk; + goto out; } static inline struct sock *__tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 hnum, int dif) { - struct sock *sk; + struct sock *sk = __tcp_v4_lookup_established(saddr, sport, + daddr, hnum, dif); - sk = __tcp_v4_lookup_established(saddr, sport, daddr, hnum, dif); - - if (sk) - return sk; - - return tcp_v4_lookup_listener(daddr, hnum, dif); + return sk ? : tcp_v4_lookup_listener(daddr, hnum, dif); } -__inline__ struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif) +__inline__ struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, + u16 dport, int dif) { struct sock *sk; @@ -565,11 +550,11 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ - for(skp = &(head + tcp_ehash_size)->chain; (sk2=*skp) != NULL; - skp = &sk2->next) { - tw = (struct tcp_tw_bucket*)sk2; + for (skp = &(head + tcp_ehash_size)->chain; (sk2 = *skp) != NULL; + skp = &sk2->next) { + tw = (struct tcp_tw_bucket *)sk2; - if(TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { + if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { struct tcp_opt *tp = tcp_sk(sk); /* With PAWS, it is safe from the viewpoint @@ -588,7 +573,8 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, */ if (tw->ts_recent_stamp && (!twp || (sysctl_tcp_tw_reuse && - xtime.tv_sec - tw->ts_recent_stamp > 1))) { + xtime.tv_sec - + tw->ts_recent_stamp > 1))) { if ((tp->write_seq = tw->snd_nxt + 65535 + 2) == 0) tp->write_seq = 1; @@ -604,8 +590,8 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, tw = NULL; /* And established part... */ - for(skp = &head->chain; (sk2=*skp)!=NULL; skp = &sk2->next) { - if(TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif)) + for (skp = &head->chain; (sk2 = *skp) != NULL; skp = &sk2->next) { + if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif)) goto not_unique; } @@ -614,7 +600,7 @@ unique: * in hash table socket with a funny identity. */ inet->num = lport; inet->sport = htons(lport); - BUG_TRAP(sk->pprev==NULL); + BUG_TRAP(!sk->pprev); if ((sk->next = *skp) != NULL) (*skp)->pprev = &sk->next; @@ -651,16 +637,17 @@ static int tcp_v4_hash_connect(struct sock *sk) unsigned short snum = inet_sk(sk)->num; struct tcp_bind_hashbucket *head; struct tcp_bind_bucket *tb; - - if (snum == 0) { + int ret; + + if (!snum) { int rover; int low = sysctl_local_port_range[0]; int high = sysctl_local_port_range[1]; int remaining = (high - low) + 1; struct tcp_tw_bucket *tw = NULL; - + local_bh_disable(); - + /* TODO. Actually it is not so bad idea to remove * tcp_portalloc_lock before next submission to Linus. * As soon as we touch this place at all it is time to think. @@ -676,29 +663,31 @@ static int tcp_v4_hash_connect(struct sock *sk) */ spin_lock(&tcp_portalloc_lock); rover = tcp_port_rover; - + do { rover++; if ((rover < low) || (rover > high)) rover = low; head = &tcp_bhash[tcp_bhashfn(rover)]; - spin_lock(&head->lock); - + spin_lock(&head->lock); + /* Does not bother with rcv_saddr checks, * because the established check is already * unique enough. */ for (tb = head->chain; tb; tb = tb->next) { if (tb->port == rover) { - BUG_TRAP(tb->owners != NULL); + BUG_TRAP(tb->owners); if (tb->fastreuse >= 0) goto next_port; - if (!__tcp_v4_check_established(sk, rover, &tw)) + if (!__tcp_v4_check_established(sk, + rover, + &tw)) goto ok; goto next_port; } } - + tb = tcp_bucket_create(head, rover); if (!tb) { spin_unlock(&head->lock); @@ -706,22 +695,22 @@ static int tcp_v4_hash_connect(struct sock *sk) } tb->fastreuse = -1; goto ok; - + next_port: spin_unlock(&head->lock); } while (--remaining > 0); tcp_port_rover = rover; spin_unlock(&tcp_portalloc_lock); - + local_bh_enable(); - + return -EADDRNOTAVAIL; - - ok: + +ok: /* All locks still held and bhs disabled */ tcp_port_rover = rover; spin_unlock(&tcp_portalloc_lock); - + tcp_bind_hash(sk, tb, rover); if (!sk->pprev) { inet_sk(sk)->sport = htons(rover); @@ -734,23 +723,23 @@ static int tcp_v4_hash_connect(struct sock *sk) tcp_timewait_kill(tw); tcp_tw_put(tw); } - - local_bh_enable(); - return 0; + + ret = 0; + goto out; } - + head = &tcp_bhash[tcp_bhashfn(snum)]; tb = (struct tcp_bind_bucket *)sk->prev; spin_lock_bh(&head->lock); - if (tb->owners == sk && sk->bind_next == NULL) { + if (tb->owners == sk && !sk->bind_next) { __tcp_v4_hash(sk, 0); spin_unlock_bh(&head->lock); return 0; } else { - int ret; spin_unlock(&head->lock); /* No definite answer... Walk to established hash table */ ret = __tcp_v4_check_established(sk, snum, NULL); +out: local_bh_enable(); return ret; } @@ -761,21 +750,21 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_opt *inet = inet_sk(sk); struct tcp_opt *tp = tcp_sk(sk); - struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; + struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct rtable *rt; u32 daddr, nexthop; int tmp; int err; if (addr_len < sizeof(struct sockaddr_in)) - return(-EINVAL); + return -EINVAL; if (usin->sin_family != AF_INET) - return(-EAFNOSUPPORT); + return -EAFNOSUPPORT; nexthop = daddr = usin->sin_addr.s_addr; if (inet->opt && inet->opt->srr) { - if (daddr == 0) + if (!daddr) return -EINVAL; nexthop = inet->opt->faddr; } @@ -785,7 +774,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (tmp < 0) return tmp; - if (rt->rt_flags&(RTCF_MULTICAST|RTCF_BROADCAST)) { + if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { ip_rt_put(rt); return -ENETUNREACH; } @@ -808,8 +797,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) } if (sysctl_tcp_tw_recycle && - !tp->ts_recent_stamp && - rt->rt_dst == daddr) { + !tp->ts_recent_stamp && rt->rt_dst == daddr) { struct inet_peer *peer = rt_get_peer(rt); /* VJ's idea. We save last timestamp seen from @@ -866,24 +854,24 @@ failure: static __inline__ int tcp_v4_iif(struct sk_buff *skb) { - return ((struct rtable*)skb->dst)->rt_iif; + return ((struct rtable *)skb->dst)->rt_iif; } static __inline__ unsigned tcp_v4_synq_hash(u32 raddr, u16 rport) { unsigned h = raddr ^ rport; - h ^= h>>16; - h ^= h>>8; - return h&(TCP_SYNQ_HSIZE-1); + h ^= h >> 16; + h ^= h >> 8; + return h & (TCP_SYNQ_HSIZE - 1); } -static struct open_request *tcp_v4_search_req(struct tcp_opt *tp, +static struct open_request *tcp_v4_search_req(struct tcp_opt *tp, struct open_request ***prevp, __u16 rport, __u32 raddr, __u32 laddr) { struct tcp_listen_opt *lopt = tp->listen_opt; - struct open_request *req, **prev; + struct open_request *req, **prev; for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport)]; (req = *prev) != NULL; @@ -892,13 +880,13 @@ static struct open_request *tcp_v4_search_req(struct tcp_opt *tp, req->af.v4_req.rmt_addr == raddr && req->af.v4_req.loc_addr == laddr && TCP_INET_FAMILY(req->class->family)) { - BUG_TRAP(req->sk == NULL); + BUG_TRAP(!req->sk); *prevp = prev; - return req; + break; } } - return NULL; + return req; } static void tcp_v4_synq_add(struct sock *sk, struct open_request *req) @@ -920,7 +908,7 @@ static void tcp_v4_synq_add(struct sock *sk, struct open_request *req) } -/* +/* * This routine does path mtu discovery as defined in RFC1191. */ static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, @@ -935,7 +923,7 @@ static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, * unfragmented). */ if (sk->state == TCP_LISTEN) - return; + return; /* We don't check in the destentry if pmtu discovery is forbidden * on this route. We just assume that no packet_to_big packets @@ -958,7 +946,7 @@ static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, tp->pmtu_cookie > dst->pmtu) { tcp_sync_mss(sk, dst->pmtu); - /* Resend the TCP packet because it's + /* Resend the TCP packet because it's * clear that the old packet has been * dropped. This is the new "fast" path mtu * discovery. @@ -985,8 +973,8 @@ static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, void tcp_v4_err(struct sk_buff *skb, u32 info) { - struct iphdr *iph = (struct iphdr*)skb->data; - struct tcphdr *th = (struct tcphdr*)(skb->data+(iph->ihl<<2)); + struct iphdr *iph = (struct iphdr *)skb->data; + struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); struct tcp_opt *tp; struct inet_opt *inet; int type = skb->h.icmph->type; @@ -996,17 +984,18 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) int err; if (skb->len < (iph->ihl << 2) + 8) { - ICMP_INC_STATS_BH(IcmpInErrors); + ICMP_INC_STATS_BH(IcmpInErrors); return; } - sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source, tcp_v4_iif(skb)); - if (sk == NULL) { + sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, + th->source, tcp_v4_iif(skb)); + if (!sk) { ICMP_INC_STATS_BH(IcmpInErrors); return; } if (sk->state == TCP_TIME_WAIT) { - tcp_tw_put((struct tcp_tw_bucket*)sk); + tcp_tw_put((struct tcp_tw_bucket *)sk); return; } @@ -1014,7 +1003,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) /* If too many ICMPs get dropped on busy * servers this needs to be solved differently. */ - if (sk->lock.users != 0) + if (sk->lock.users) NET_INC_STATS_BH(LockDroppedIcmps); if (sk->state == TCP_CLOSE) @@ -1033,18 +1022,18 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) /* This is deprecated, but if someone generated it, * we have no reasons to ignore it. */ - if (sk->lock.users == 0) + if (!sk->lock.users) tcp_enter_cwr(tp); goto out; case ICMP_PARAMETERPROB: err = EPROTO; - break; + break; case ICMP_DEST_UNREACH: if (code > NR_ICMP_UNREACH) goto out; if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ - if (sk->lock.users == 0) + if (!sk->lock.users) do_pmtu_discovery(sk, iph, info); goto out; } @@ -1061,39 +1050,38 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) switch (sk->state) { struct open_request *req, **prev; case TCP_LISTEN: - if (sk->lock.users != 0) + if (sk->lock.users) goto out; - req = tcp_v4_search_req(tp, &prev, - th->dest, - iph->daddr, iph->saddr); + req = tcp_v4_search_req(tp, &prev, th->dest, + iph->daddr, iph->saddr); if (!req) goto out; /* ICMPs are not backlogged, hence we cannot get an established socket here. */ - BUG_TRAP(req->sk == NULL); + BUG_TRAP(!req->sk); if (seq != req->snt_isn) { NET_INC_STATS_BH(OutOfWindowIcmps); goto out; } - /* + /* * Still in SYN_RECV, just remove it silently. * There is no good way to pass the error to the newly * created socket, and POSIX does not want network - * errors returned from accept(). - */ + * errors returned from accept(). + */ tcp_synq_drop(sk, req, prev); goto out; case TCP_SYN_SENT: case TCP_SYN_RECV: /* Cannot happen. It can f.e. if SYNs crossed. - */ - if (sk->lock.users == 0) { + */ + if (!sk->lock.users) { TCP_INC_STATS_BH(TcpAttemptFails); sk->err = err; @@ -1123,7 +1111,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) */ inet = inet_sk(sk); - if (sk->lock.users == 0 && inet->recverr) { + if (!sk->lock.users && inet->recverr) { sk->err = err; sk->error_report(sk); } else { /* Only an error on timeout */ @@ -1136,7 +1124,7 @@ out: } /* This routine computes an IPv4 TCP checksum. */ -void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, +void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, struct sk_buff *skb) { struct inet_opt *inet = inet_sk(sk); @@ -1146,7 +1134,9 @@ void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, skb->csum = offsetof(struct tcphdr, check); } else { th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr, - csum_partial((char *)th, th->doff<<2, skb->csum)); + csum_partial((char *)th, + th->doff << 2, + skb->csum)); } } @@ -1173,34 +1163,32 @@ static void tcp_v4_send_reset(struct sk_buff *skb) if (th->rst) return; - if (((struct rtable*)skb->dst)->rt_type != RTN_LOCAL) + if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL) return; /* Swap the send and the receive. */ - memset(&rth, 0, sizeof(struct tcphdr)); - rth.dest = th->source; - rth.source = th->dest; - rth.doff = sizeof(struct tcphdr)/4; - rth.rst = 1; + memset(&rth, 0, sizeof(struct tcphdr)); + rth.dest = th->source; + rth.source = th->dest; + rth.doff = sizeof(struct tcphdr) / 4; + rth.rst = 1; if (th->ack) { rth.seq = th->ack_seq; } else { rth.ack = 1; - rth.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin - + skb->len - (th->doff<<2)); + rth.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + + skb->len - (th->doff << 2)); } - memset(&arg, 0, sizeof arg); - arg.iov[0].iov_base = (unsigned char *)&rth; + memset(&arg, 0, sizeof arg); + arg.iov[0].iov_base = (unsigned char *)&rth; arg.iov[0].iov_len = sizeof rth; - arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, + arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, skb->nh.iph->saddr, /*XXX*/ - sizeof(struct tcphdr), - IPPROTO_TCP, - 0); + sizeof(struct tcphdr), IPPROTO_TCP, 0); arg.n_iov = 1; - arg.csumoffset = offsetof(struct tcphdr, check) / 2; + arg.csumoffset = offsetof(struct tcphdr, check) / 2; inet_sk(tcp_socket->sk)->ttl = sysctl_ip_default_ttl; ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth); @@ -1213,7 +1201,8 @@ static void tcp_v4_send_reset(struct sk_buff *skb) outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts) +static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, + u32 win, u32 ts) { struct tcphdr *th = skb->h.th; struct { @@ -1225,34 +1214,31 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 memset(&rep.th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof arg); - arg.iov[0].iov_base = (unsigned char *)&rep; + arg.iov[0].iov_base = (unsigned char *)&rep; arg.iov[0].iov_len = sizeof(rep.th); arg.n_iov = 1; if (ts) { - rep.tsopt[0] = __constant_htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | - TCPOLEN_TIMESTAMP); + rep.tsopt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); rep.tsopt[1] = htonl(tcp_time_stamp); rep.tsopt[2] = htonl(ts); arg.iov[0].iov_len = sizeof(rep); } /* Swap the send and the receive. */ - rep.th.dest = th->source; - rep.th.source = th->dest; - rep.th.doff = arg.iov[0].iov_len/4; - rep.th.seq = htonl(seq); + rep.th.dest = th->source; + rep.th.source = th->dest; + rep.th.doff = arg.iov[0].iov_len / 4; + rep.th.seq = htonl(seq); rep.th.ack_seq = htonl(ack); - rep.th.ack = 1; - rep.th.window = htons(win); + rep.th.ack = 1; + rep.th.window = htons(win); - arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, + arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, skb->nh.iph->saddr, /*XXX*/ - arg.iov[0].iov_len, - IPPROTO_TCP, - 0); - arg.csumoffset = offsetof(struct tcphdr, check) / 2; + arg.iov[0].iov_len, IPPROTO_TCP, 0); + arg.csumoffset = offsetof(struct tcphdr, check) / 2; ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); @@ -1264,26 +1250,25 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; tcp_v4_send_ack(skb, tw->snd_nxt, tw->rcv_nxt, - tw->rcv_wnd>>tw->rcv_wscale, tw->ts_recent); + tw->rcv_wnd >> tw->rcv_wscale, tw->ts_recent); tcp_tw_put(tw); } static void tcp_v4_or_send_ack(struct sk_buff *skb, struct open_request *req) { - tcp_v4_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, + tcp_v4_send_ack(skb, req->snt_isn + 1, req->rcv_isn + 1, req->rcv_wnd, req->ts_recent); } -static struct dst_entry* tcp_v4_route_req(struct sock *sk, struct open_request *req) +static struct dst_entry* tcp_v4_route_req(struct sock *sk, + struct open_request *req) { struct rtable *rt; - struct ip_options *opt; + struct ip_options *opt = req->af.v4_req.opt; - opt = req->af.v4_req.opt; - if(ip_route_output(&rt, ((opt && opt->srr) ? - opt->faddr : - req->af.v4_req.rmt_addr), + if (ip_route_output(&rt, ((opt && opt->srr) ? opt->faddr : + req->af.v4_req.rmt_addr), req->af.v4_req.loc_addr, RT_CONN_FLAGS(sk), sk->bound_dev_if)) { IP_INC_STATS_BH(IpOutNoRoutes); @@ -1298,10 +1283,10 @@ static struct dst_entry* tcp_v4_route_req(struct sock *sk, struct open_request * } /* - * Send a SYN-ACK after having received an ACK. + * Send a SYN-ACK after having received an ACK. * This still operates on a open_request only, not on a big * socket. - */ + */ static int tcp_v4_send_synack(struct sock *sk, struct open_request *req, struct dst_entry *dst) { @@ -1309,8 +1294,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct open_request *req, struct sk_buff * skb; /* First, grab a route. */ - if (dst == NULL && - (dst = tcp_v4_route_req(sk, req)) == NULL) + if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL) goto out; skb = tcp_make_synack(sk, dst, req); @@ -1319,11 +1303,14 @@ static int tcp_v4_send_synack(struct sock *sk, struct open_request *req, struct tcphdr *th = skb->h.th; th->check = tcp_v4_check(th, skb->len, - req->af.v4_req.loc_addr, req->af.v4_req.rmt_addr, - csum_partial((char *)th, skb->len, skb->csum)); + req->af.v4_req.loc_addr, + req->af.v4_req.rmt_addr, + csum_partial((char *)th, skb->len, + skb->csum)); err = ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr, - req->af.v4_req.rmt_addr, req->af.v4_req.opt); + req->af.v4_req.rmt_addr, + req->af.v4_req.opt); if (err == NET_XMIT_CN) err = 0; } @@ -1335,7 +1322,7 @@ out: /* * IPv4 open_request destructor. - */ + */ static void tcp_v4_or_free(struct open_request *req) { if (req->af.v4_req.opt) @@ -1345,26 +1332,26 @@ static void tcp_v4_or_free(struct open_request *req) static inline void syn_flood_warning(struct sk_buff *skb) { static unsigned long warntime; - - if (jiffies - warntime > HZ*60) { + + if (jiffies - warntime > HZ * 60) { warntime = jiffies; - printk(KERN_INFO - "possible SYN flooding on port %d. Sending cookies.\n", + printk(KERN_INFO + "possible SYN flooding on port %d. Sending cookies.\n", ntohs(skb->h.th->dest)); } } -/* - * Save and compile IPv4 options into the open_request if needed. +/* + * Save and compile IPv4 options into the open_request if needed. */ -static inline struct ip_options * -tcp_v4_save_options(struct sock *sk, struct sk_buff *skb) +static inline struct ip_options *tcp_v4_save_options(struct sock *sk, + struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); - struct ip_options *dopt = NULL; + struct ip_options *dopt = NULL; if (opt && opt->optlen) { - int opt_size = optlength(opt); + int opt_size = optlength(opt); dopt = kmalloc(opt_size, GFP_ATOMIC); if (dopt) { if (ip_options_echo(dopt, skb)) { @@ -1376,7 +1363,7 @@ tcp_v4_save_options(struct sock *sk, struct sk_buff *skb) return dopt; } -/* +/* * Maximum number of SYN_RECV sockets in queue per LISTEN socket. * One SYN_RECV socket costs about 80bytes on a 32bit machine. * It would be better to replace it with a global counter for all sockets @@ -1389,14 +1376,14 @@ tcp_v4_save_options(struct sock *sk, struct sk_buff *skb) * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb). * Further increasing requires to change hash table size. */ -int sysctl_max_syn_backlog = 256; +int sysctl_max_syn_backlog = 256; struct or_calltable or_ipv4 = { - PF_INET, - tcp_v4_send_synack, - tcp_v4_or_send_ack, - tcp_v4_or_free, - tcp_v4_send_reset + family: PF_INET, + rtx_syn_ack: tcp_v4_send_synack, + send_ack: tcp_v4_or_send_ack, + destructor: tcp_v4_or_free, + send_reset: tcp_v4_send_reset, }; int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) @@ -1414,9 +1401,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) #endif /* Never answer to SYNs send to broadcast or multicast */ - if (((struct rtable *)skb->dst)->rt_flags & - (RTCF_BROADCAST|RTCF_MULTICAST)) - goto drop; + if (((struct rtable *)skb->dst)->rt_flags & + (RTCF_BROADCAST | RTCF_MULTICAST)) + goto drop; /* TW buckets are converted to open requests without * limitations, they conserve resources and peer is @@ -1425,7 +1412,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (tcp_synq_is_full(sk) && !isn) { #ifdef CONFIG_SYN_COOKIES if (sysctl_tcp_syncookies) { - want_cookie = 1; + want_cookie = 1; } else #endif goto drop; @@ -1440,12 +1427,12 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop; req = tcp_openreq_alloc(); - if (req == NULL) + if (!req) goto drop; tcp_clear_options(&tp); tp.mss_clamp = 536; - tp.user_mss = tcp_sk(sk)->user_mss; + tp.user_mss = tcp_sk(sk)->user_mss; tcp_parse_options(skb, &tp, 0); @@ -1454,14 +1441,14 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tp.saw_tstamp = 0; } - if (tp.saw_tstamp && tp.rcv_tsval == 0) { + if (tp.saw_tstamp && !tp.rcv_tsval) { /* Some OSes (unknown ones, but I see them on web server, which * contains information interesting only for windows' * users) do not send their stamp in SYN. It is easy case. * We simply do not advertise TS support. */ tp.saw_tstamp = 0; - tp.tstamp_ok = 0; + tp.tstamp_ok = 0; } tp.tstamp_ok = tp.saw_tstamp; @@ -1479,7 +1466,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) syn_flood_warning(skb); #endif isn = cookie_v4_init_sequence(sk, skb, &req->mss); - } else if (isn == 0) { + } else if (!isn) { struct inet_peer *peer = NULL; /* VJ's idea. We save last timestamp seen @@ -1494,10 +1481,11 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (tp.saw_tstamp && sysctl_tcp_tw_recycle && (dst = tcp_v4_route_req(sk, req)) != NULL && - (peer = rt_get_peer((struct rtable*)dst)) != NULL && + (peer = rt_get_peer((struct rtable *)dst)) != NULL && peer->v4daddr == saddr) { if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL && - (s32)(peer->tcp_ts - req->ts_recent) > TCP_PAWS_WINDOW) { + (s32)(peer->tcp_ts - req->ts_recent) > + TCP_PAWS_WINDOW) { NET_INC_STATS_BH(PAWSPassiveRejected); dst_release(dst); goto drop_and_free; @@ -1505,19 +1493,23 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) } /* Kill the following clause, if you dislike this way. */ else if (!sysctl_tcp_syncookies && - (sysctl_max_syn_backlog - tcp_synq_len(sk) - < (sysctl_max_syn_backlog>>2)) && + (sysctl_max_syn_backlog - tcp_synq_len(sk) < + (sysctl_max_syn_backlog >> 2)) && (!peer || !peer->tcp_ts_stamp) && (!dst || !dst->rtt)) { /* Without syncookies last quarter of - * backlog is filled with destinations, proven to be alive. + * backlog is filled with destinations, + * proven to be alive. * It means that we continue to communicate * to destinations, already remembered * to the moment of synflood. */ NETDEBUG(if (net_ratelimit()) \ - printk(KERN_DEBUG "TCP: drop open request from %u.%u.%u.%u/%u\n", \ - NIPQUAD(saddr), ntohs(skb->h.th->source))); + printk(KERN_DEBUG "TCP: drop open " + "request from %u.%u." + "%u.%u/%u\n", \ + NIPQUAD(saddr), + ntohs(skb->h.th->source))); dst_release(dst); goto drop_and_free; } @@ -1530,27 +1522,27 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; if (want_cookie) { - tcp_openreq_free(req); + tcp_openreq_free(req); } else { tcp_v4_synq_add(sk, req); } return 0; drop_and_free: - tcp_openreq_free(req); + tcp_openreq_free(req); drop: TCP_INC_STATS_BH(TcpAttemptFails); return 0; } -/* - * The three way handshake has completed - we got a valid synack - - * now create the new socket. +/* + * The three way handshake has completed - we got a valid synack - + * now create the new socket. */ -struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, - struct open_request *req, - struct dst_entry *dst) +struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, + struct open_request *req, + struct dst_entry *dst) { struct inet_opt *newinet; struct tcp_opt *newtp; @@ -1559,8 +1551,7 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (tcp_acceptq_is_full(sk)) goto exit_overflow; - if (dst == NULL && - (dst = tcp_v4_route_req(sk, req)) == NULL) + if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL) goto exit; newsk = tcp_create_openreq_child(sk, req, skb); @@ -1570,15 +1561,15 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->dst_cache = dst; newsk->route_caps = dst->dev->features; - newtp = tcp_sk(newsk); - newinet = inet_sk(newsk); - newinet->daddr = req->af.v4_req.rmt_addr; - newinet->rcv_saddr = req->af.v4_req.loc_addr; - newinet->saddr = req->af.v4_req.loc_addr; - newinet->opt = req->af.v4_req.opt; - req->af.v4_req.opt = NULL; - newinet->mc_index = tcp_v4_iif(skb); - newinet->mc_ttl = skb->nh.iph->ttl; + newtp = tcp_sk(newsk); + newinet = inet_sk(newsk); + newinet->daddr = req->af.v4_req.rmt_addr; + newinet->rcv_saddr = req->af.v4_req.loc_addr; + newinet->saddr = req->af.v4_req.loc_addr; + newinet->opt = req->af.v4_req.opt; + req->af.v4_req.opt = NULL; + newinet->mc_index = tcp_v4_iif(skb); + newinet->mc_ttl = skb->nh.iph->ttl; newtp->ext_header_len = 0; if (newinet->opt) newtp->ext_header_len = newinet->opt->optlen; @@ -1601,18 +1592,16 @@ exit: return NULL; } -static struct sock *tcp_v4_hnd_req(struct sock *sk,struct sk_buff *skb) +static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) { - struct open_request *req, **prev; struct tcphdr *th = skb->h.th; struct iphdr *iph = skb->nh.iph; struct tcp_opt *tp = tcp_sk(sk); struct sock *nsk; - + struct open_request **prev; /* Find possible connection requests. */ - req = tcp_v4_search_req(tp, &prev, - th->source, - iph->saddr, iph->daddr); + struct open_request *req = tcp_v4_search_req(tp, &prev, th->source, + iph->saddr, iph->daddr); if (req) return tcp_check_req(sk, skb, req, prev); @@ -1627,7 +1616,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk,struct sk_buff *skb) bh_lock_sock(nsk); return nsk; } - tcp_tw_put((struct tcp_tw_bucket*)nsk); + tcp_tw_put((struct tcp_tw_bucket *)nsk); return NULL; } @@ -1642,22 +1631,24 @@ static int tcp_v4_checksum_init(struct sk_buff *skb) { if (skb->ip_summed == CHECKSUM_HW) { skb->ip_summed = CHECKSUM_UNNECESSARY; - if (!tcp_v4_check(skb->h.th,skb->len,skb->nh.iph->saddr, - skb->nh.iph->daddr,skb->csum)) + if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr, + skb->nh.iph->daddr, skb->csum)) return 0; - NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "hw tcp v4 csum failed\n")); + NETDEBUG(if (net_ratelimit()) + printk(KERN_DEBUG "hw tcp v4 csum failed\n")); skb->ip_summed = CHECKSUM_NONE; } if (skb->len <= 76) { - if (tcp_v4_check(skb->h.th,skb->len,skb->nh.iph->saddr, + if (tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr, skb->nh.iph->daddr, skb_checksum(skb, 0, skb->len, 0))) return -1; skb->ip_summed = CHECKSUM_UNNECESSARY; } else { - skb->csum = ~tcp_v4_check(skb->h.th,skb->len,skb->nh.iph->saddr, - skb->nh.iph->daddr,0); + skb->csum = ~tcp_v4_check(skb->h.th, skb->len, + skb->nh.iph->saddr, + skb->nh.iph->daddr, 0); } return 0; } @@ -1686,13 +1677,13 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) goto reset; TCP_CHECK_TIMER(sk); - return 0; + return 0; } - if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb)) + if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb)) goto csum_err; - if (sk->state == TCP_LISTEN) { + if (sk->state == TCP_LISTEN) { struct sock *nsk = tcp_v4_hnd_req(sk, skb); if (!nsk) goto discard; @@ -1715,7 +1706,7 @@ reset: discard: kfree_skb(skb); /* Be careful here. If this function gets more complicated and - * gcc suffers from register pressure on the x86, sk (in %ebx) + * gcc suffers from register pressure on the x86, sk (in %ebx) * might be destroyed here. This current version compiles correctly, * but you have been warned. */ @@ -1736,7 +1727,7 @@ int tcp_v4_rcv(struct sk_buff *skb) struct sock *sk; int ret; - if (skb->pkt_type!=PACKET_HOST) + if (skb->pkt_type != PACKET_HOST) goto discard_it; /* Count it even if it's bad */ @@ -1747,9 +1738,9 @@ int tcp_v4_rcv(struct sk_buff *skb) th = skb->h.th; - if (th->doff < sizeof(struct tcphdr)/4) + if (th->doff < sizeof(struct tcphdr) / 4) goto bad_packet; - if (!pskb_may_pull(skb, th->doff*4)) + if (!pskb_may_pull(skb, th->doff * 4)) goto discard_it; /* An explanation is required here, I think. @@ -1763,20 +1754,21 @@ int tcp_v4_rcv(struct sk_buff *skb) th = skb->h.th; TCP_SKB_CB(skb)->seq = ntohl(th->seq); TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + - skb->len - th->doff*4); + skb->len - th->doff * 4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); - TCP_SKB_CB(skb)->when = 0; - TCP_SKB_CB(skb)->flags = skb->nh.iph->tos; - TCP_SKB_CB(skb)->sacked = 0; + TCP_SKB_CB(skb)->when = 0; + TCP_SKB_CB(skb)->flags = skb->nh.iph->tos; + TCP_SKB_CB(skb)->sacked = 0; sk = __tcp_v4_lookup(skb->nh.iph->saddr, th->source, - skb->nh.iph->daddr, ntohs(th->dest), tcp_v4_iif(skb)); + skb->nh.iph->daddr, ntohs(th->dest), + tcp_v4_iif(skb)); if (!sk) goto no_tcp_socket; process: - if(!ipsec_sk_policy(sk,skb)) + if (!ipsec_sk_policy(sk, skb)) goto discard_and_relse; if (sk->state == TCP_TIME_WAIT) @@ -1798,7 +1790,7 @@ process: return ret; no_tcp_socket: - if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { + if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { bad_packet: TCP_INC_STATS_BH(TcpInErrs); } else { @@ -1815,18 +1807,17 @@ discard_and_relse: goto discard_it; do_time_wait: - if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { + if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { TCP_INC_STATS_BH(TcpInErrs); goto discard_and_relse; } - switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk, - skb, th, skb->len)) { - case TCP_TW_SYN: - { - struct sock *sk2; - - sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr, ntohs(th->dest), tcp_v4_iif(skb)); - if (sk2 != NULL) { + switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk, + skb, th, skb->len)) { + case TCP_TW_SYN: { + struct sock *sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr, + ntohs(th->dest), + tcp_v4_iif(skb)); + if (sk2) { tcp_tw_deschedule((struct tcp_tw_bucket *)sk); tcp_timewait_kill((struct tcp_tw_bucket *)sk); tcp_tw_put((struct tcp_tw_bucket *)sk); @@ -1884,7 +1875,7 @@ static int tcp_v4_reselect_saddr(struct sock *sk) if (sysctl_ip_dynaddr > 1) { printk(KERN_INFO "tcp_v4_rebuild_header(): shifting inet->" "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n", - NIPQUAD(old_saddr), + NIPQUAD(old_saddr), NIPQUAD(new_saddr)); } @@ -1910,7 +1901,7 @@ int tcp_v4_rebuild_header(struct sock *sk) int err; /* Route is OK, nothing to do. */ - if (rt != NULL) + if (rt) return 0; /* Reroute. */ @@ -1958,15 +1949,15 @@ int tcp_v4_remember_stamp(struct sock *sk) { struct inet_opt *inet = inet_sk(sk); struct tcp_opt *tp = tcp_sk(sk); - struct rtable *rt = (struct rtable*)__sk_dst_get(sk); + struct rtable *rt = (struct rtable *)__sk_dst_get(sk); struct inet_peer *peer = NULL; int release_it = 0; - if (rt == NULL || rt->rt_dst != inet->daddr) { + if (!rt || rt->rt_dst != inet->daddr) { peer = inet_getpeer(inet->daddr, 1); release_it = 1; } else { - if (rt->peer == NULL) + if (!rt->peer) rt_bind_peer(rt, 1); peer = rt->peer; } @@ -2007,18 +1998,17 @@ int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw) } struct tcp_func ipv4_specific = { - ip_queue_xmit, - tcp_v4_send_check, - tcp_v4_rebuild_header, - tcp_v4_conn_request, - tcp_v4_syn_recv_sock, - tcp_v4_remember_stamp, - sizeof(struct iphdr), - - ip_setsockopt, - ip_getsockopt, - v4_addr2sockaddr, - sizeof(struct sockaddr_in) + queue_xmit: ip_queue_xmit, + send_check: tcp_v4_send_check, + rebuild_header: tcp_v4_rebuild_header, + conn_request: tcp_v4_conn_request, + syn_recv_sock: tcp_v4_syn_recv_sock, + remember_stamp: tcp_v4_remember_stamp, + net_header_len: sizeof(struct iphdr), + setsockopt: ip_setsockopt, + getsockopt: ip_getsockopt, + addr2sockaddr: v4_addr2sockaddr, + sockaddr_len: sizeof(struct sockaddr_in), }; /* NOTE: A lot of things set to zero explicitly by call to @@ -2034,7 +2024,7 @@ static int tcp_v4_init_sock(struct sock *sk) tp->rto = TCP_TIMEOUT_INIT; tp->mdev = TCP_TIMEOUT_INIT; - + /* So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control * algorithms that we must have the following bandaid to talk @@ -2082,7 +2072,7 @@ static int tcp_v4_destroy_sock(struct sock *sk) __skb_queue_purge(&tp->ucopy.prequeue); /* Clean up a referenced TCP bind bucket. */ - if(sk->prev != NULL) + if (sk->prev) tcp_put_port(sk); /* If sendmsg cached page exists, toss it. */ @@ -2095,7 +2085,8 @@ static int tcp_v4_destroy_sock(struct sock *sk) } /* Proc filesystem TCP sock list dumping. */ -static void get_openreq(struct sock *sk, struct open_request *req, char *tmpbuf, int i, int uid) +static void get_openreq(struct sock *sk, struct open_request *req, + char *tmpbuf, int i, int uid) { int ttd = req->expires - jiffies; @@ -2107,31 +2098,28 @@ static void get_openreq(struct sock *sk, struct open_request *req, char *tmpbuf, req->af.v4_req.rmt_addr, ntohs(req->rmt_port), TCP_SYN_RECV, - 0,0, /* could print option size, but that is af dependent. */ - 1, /* timers active (only the expire timer) */ - ttd, + 0, 0, /* could print option size, but that is af dependent. */ + 1, /* timers active (only the expire timer) */ + ttd, req->retrans, uid, - 0, /* non standard timer */ + 0, /* non standard timer */ 0, /* open_requests have no inode */ atomic_read(&sk->refcnt), - req - ); + req); } static void get_tcp_sock(struct sock *sp, char *tmpbuf, int i) { - unsigned int dest, src; - __u16 destp, srcp; int timer_active; unsigned long timer_expires; struct tcp_opt *tp = tcp_sk(sp); struct inet_opt *inet = inet_sk(sp); + unsigned int dest = inet->daddr; + unsigned int src = inet->rcv_saddr; + __u16 destp = ntohs(inet->dport); + __u16 srcp = ntohs(inet->sport); - dest = inet->daddr; - src = inet->rcv_saddr; - destp = ntohs(inet->dport); - srcp = ntohs(inet->sport); if (tp->pending == TCP_TIME_RETRANS) { timer_active = 1; timer_expires = tp->timeout; @@ -2146,19 +2134,19 @@ static void get_tcp_sock(struct sock *sp, char *tmpbuf, int i) timer_expires = jiffies; } - sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d", - i, src, srcp, dest, destp, sp->state, - tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq, - timer_active, timer_expires-jiffies, + sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " + "%08X %5d %8d %lu %d %p %u %u %u %u %d", + i, src, srcp, dest, destp, sp->state, + tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq, + timer_active, timer_expires - jiffies, tp->retransmits, sock_i_uid(sp), tp->probes_out, sock_i_ino(sp), atomic_read(&sp->refcnt), sp, - tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong, - tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh - ); + tp->rto, tp->ack.ato, (tp->ack.quick << 1) | tp->ack.pingpong, + tp->snd_cwnd, + tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh); } static void get_timewait_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i) @@ -2188,18 +2176,19 @@ int tcp_get_info(char *buffer, char **start, off_t offset, int length) { int len = 0, num = 0, i; off_t begin, pos = 0; - char tmpbuf[TMPSZ+1]; + char tmpbuf[TMPSZ + 1]; if (offset < TMPSZ) - len += sprintf(buffer, "%-*s\n", TMPSZ-1, + len += sprintf(buffer, "%-*s\n", TMPSZ - 1, " sl local_address rem_address st tx_queue " - "rx_queue tr tm->when retrnsmt uid timeout inode"); + "rx_queue tr tm->when retrnsmt uid timeout " + "inode"); pos = TMPSZ; /* First, walk listening socket table. */ tcp_listen_lock(); - for(i = 0; i < TCP_LHTABLE_SIZE; i++) { + for (i = 0; i < TCP_LHTABLE_SIZE; i++) { struct sock *sk; struct tcp_listen_opt *lopt; int k; @@ -2215,7 +2204,8 @@ int tcp_get_info(char *buffer, char **start, off_t offset, int length) pos += TMPSZ; if (pos >= offset) { get_tcp_sock(sk, tmpbuf, num); - len += sprintf(buffer+len, "%-*s\n", TMPSZ-1, tmpbuf); + len += sprintf(buffer + len, "%-*s\n", + TMPSZ - 1, tmpbuf); if (pos >= offset + length) { tcp_listen_unlock(); goto out_no_bh; @@ -2226,17 +2216,22 @@ skip_listen: uid = sock_i_uid(sk); read_lock_bh(&tp->syn_wait_lock); lopt = tp->listen_opt; - if (lopt && lopt->qlen != 0) { - for (k=0; k<TCP_SYNQ_HSIZE; k++) { - for (req = lopt->syn_table[k]; req; req = req->dl_next, num++) { + if (lopt && lopt->qlen) { + for (k = 0; k < TCP_SYNQ_HSIZE; k++) { + for (req = lopt->syn_table[k]; + req; req = req->dl_next, num++) { if (!TCP_INET_FAMILY(req->class->family)) continue; pos += TMPSZ; if (pos <= offset) continue; - get_openreq(sk, req, tmpbuf, num, uid); - len += sprintf(buffer+len, "%-*s\n", TMPSZ-1, tmpbuf); + get_openreq(sk, req, tmpbuf, + num, uid); + len += sprintf(buffer + len, + "%-*s\n", + TMPSZ - 1, + tmpbuf); if (pos >= offset + length) { read_unlock_bh(&tp->syn_wait_lock); tcp_listen_unlock(); @@ -2261,21 +2256,23 @@ skip_listen: struct tcp_tw_bucket *tw; read_lock(&head->lock); - for(sk = head->chain; sk; sk = sk->next, num++) { + for (sk = head->chain; sk; sk = sk->next, num++) { if (!TCP_INET_FAMILY(sk->family)) continue; pos += TMPSZ; if (pos <= offset) continue; get_tcp_sock(sk, tmpbuf, num); - len += sprintf(buffer+len, "%-*s\n", TMPSZ-1, tmpbuf); + len += sprintf(buffer + len, "%-*s\n", + TMPSZ - 1, tmpbuf); if (pos >= offset + length) { read_unlock(&head->lock); goto out; } } - for (tw = (struct tcp_tw_bucket *)tcp_ehash[i+tcp_ehash_size].chain; - tw != NULL; + for (tw = (struct tcp_tw_bucket *)tcp_ehash[i + + tcp_ehash_size].chain; + tw; tw = (struct tcp_tw_bucket *)tw->next, num++) { if (!TCP_INET_FAMILY(tw->family)) continue; @@ -2283,7 +2280,8 @@ skip_listen: if (pos <= offset) continue; get_timewait_sock(tw, tmpbuf, num); - len += sprintf(buffer+len, "%-*s\n", TMPSZ-1, tmpbuf); + len += sprintf(buffer + len, "%-*s\n", + TMPSZ - 1, tmpbuf); if (pos >= offset + length) { read_unlock(&head->lock); goto out; @@ -2302,7 +2300,7 @@ out_no_bh: if (len > length) len = length; if (len < 0) - len = 0; + len = 0; return len; } @@ -2333,7 +2331,7 @@ void __init tcp_v4_init(struct net_proto_family *ops) int err = sock_create(PF_INET, SOCK_RAW, IPPROTO_TCP, &tcp_socket); if (err < 0) panic("Failed to create the TCP control socket.\n"); - tcp_socket->sk->allocation=GFP_ATOMIC; + tcp_socket->sk->allocation = GFP_ATOMIC; inet_sk(tcp_socket->sk)->ttl = MAXTTL; /* Unhash it so that IP input processing does not even diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index f3c90cfcf500..22db31eea3f5 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -11,6 +11,7 @@ * * Fixes: * Hideaki YOSHIFUJI : sin6_scope_id support + * YOSHIFUJI,H.@USAGI : raw checksum (RFC2292(bis) compliance) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -487,11 +488,18 @@ static int rawv6_frag_cksum(const void *data, struct in6_addr *addr, hdr->cksum = csum_ipv6_magic(addr, daddr, hdr->len, hdr->proto, hdr->cksum); - if (opt->offset < len) { + if (opt->offset + 1 < len) { __u16 *csum; csum = (__u16 *) (buff + opt->offset); - *csum = hdr->cksum; + if (*csum) { + /* in case cksum was not initialized */ + __u32 sum = hdr->cksum; + sum += *csum; + *csum = hdr->cksum = (sum + (sum>>16)); + } else { + *csum = hdr->cksum; + } } else { if (net_ratelimit()) printk(KERN_DEBUG "icmp: cksum offset too big\n"); @@ -720,6 +728,10 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname, switch (optname) { case IPV6_CHECKSUM: + /* You may get strange result with a positive odd offset; + RFC2292bis agrees with me. */ + if (val > 0 && (val&1)) + return(-EINVAL); if (val < 0) { opt->checksum = 0; } else { @@ -817,6 +829,11 @@ static void rawv6_close(struct sock *sk, long timeout) static int rawv6_init_sk(struct sock *sk) { + if (inet_sk(sk)->num == IPPROTO_ICMPV6) { + struct raw6_opt *opt = raw6_sk(sk); + opt->checksum = 1; + opt->offset = 2; + } return(0); } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 8df0b9e2d29a..4b2917f4084a 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1117,7 +1117,7 @@ static void psched_tick(unsigned long dummy) psched_timer.expires = jiffies + 1*HZ; #else unsigned long now = jiffies; - psched_time_base = ((u64)now)<<PSCHED_JSCALE; + psched_time_base += ((u64)(now-psched_time_mark))<<PSCHED_JSCALE; psched_time_mark = now; psched_timer.expires = now + 60*60*HZ; #endif diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index ad9d45174d4b..1533e0076eca 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -176,7 +176,7 @@ void xdr_shift_iovec(struct iovec *iov, int nr, size_t len) /* * Map a struct xdr_buf into an iovec array. */ -int xdr_kmap(struct iovec *iov_base, struct xdr_buf *xdr, unsigned int base) +int xdr_kmap(struct iovec *iov_base, struct xdr_buf *xdr, size_t base) { struct iovec *iov = iov_base; struct page **ppage = xdr->pages; @@ -226,7 +226,7 @@ map_tail: return (iov - iov_base); } -void xdr_kunmap(struct xdr_buf *xdr, unsigned int base) +void xdr_kunmap(struct xdr_buf *xdr, size_t base) { struct page **ppage = xdr->pages; unsigned int pglen = xdr->page_len; diff --git a/scripts/Makefile b/scripts/Makefile index 0c1ef38ca7ec..83f72fb047b4 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -2,14 +2,22 @@ # always needed # --------------------------------------------------------------------------- -all: mkdep split-include +# The following temporary rule will make sure that people's +# trees get updated to the right permissions, since patch(1) +# can't do it +CHMOD_FILES := docgen gen-all-syms kernel-doc mkcompile_h mkversion_h makelst -mkdep: mkdep.c +all: fixdep split-include $(CHMOD_FILES) + +fixdep: fixdep.c $(HOSTCC) $(HOSTCFLAGS) -o $@ $< split-include: split-include.c $(HOSTCC) $(HOSTCFLAGS) -o $@ $< +$(CHMOD_FILES): FORCE + @chmod a+x $@ + # xconfig # --------------------------------------------------------------------------- @@ -44,15 +52,11 @@ tkparse.o tkcond.o tkgen.o: %.o: %.c # DocBook stuff # --------------------------------------------------------------------------- -doc-progs: docproc docgen gen-all-syms kernel-doc +doc-progs: docproc docproc: docproc.c $(HOSTCC) $(HOSTCFLAGS) -o $@ $< -docgen gen-all-syms kernel-doc: FORCE - chmod 755 $@ - - include $(TOPDIR)/Rules.make # we don't have automatic deps for host programs diff --git a/scripts/docgen b/scripts/docgen index a7183053d28f..a7183053d28f 100644..100755 --- a/scripts/docgen +++ b/scripts/docgen diff --git a/scripts/fixdep.c b/scripts/fixdep.c new file mode 100644 index 000000000000..07c927c58f3f --- /dev/null +++ b/scripts/fixdep.c @@ -0,0 +1,389 @@ +/* + * "Optimize" a list of dependencies as spit out by gcc -MD + * for the kernel build + * =========================================================================== + * + * Author Kai Germaschewski + * Copyright 2002 by Kai Germaschewski <kai.germaschewski@gmx.de> + * + * This software may be used and distributed according to the terms + * of the GNU General Public License, incorporated herein by reference. + * + * + * Introduction: + * + * gcc produces a very nice and correct list of dependencies which + * tells make when to remake a file. + * + * To use this list as-is however has the drawback that virtually + * every file in the kernel includes <linux/config.h> which then again + * includes <linux/autoconf.h> + * + * If the user re-runs make *config, linux/autoconf.h will be + * regenerated. make notices that and will rebuild every file which + * includes autconf.h, i.e. basically all files. This is extremely + * annoying if the user just changed CONFIG_HIS_DRIVER from n to m. + * + * So we play the same trick that "mkdep" played before. We replace + * the dependency on linux/autoconf.h by a dependency on every config + * option which is mentioned in any of the listed prequisites. + * + * To be exact, split-include populates a tree in include/config/, + * e.g. include/config/his/driver.h, which contains the #define/#undef + * for the CONFIG_HIS_DRIVER option. + * + * So if the user changes his CONFIG_HIS_DRIVER option, only the objects + * which depend on "include/linux/config/his/driver.h" will be rebuilt, + * so most likely only his driver ;-) + * + * The idea above dates, by the way, back to Michael E Chastain, AFAIK. + * + * So to get dependencies right, there two issues: + * o if any of the files the compiler read changed, we need to rebuild + * o if the command line given to the compile the file changed, we + * better rebuild as well. + * + * The former is handled by using the -MD output, the later by saving + * the command line used to compile the old object and comparing it + * to the one we would now use. + * + * Again, also this idea is pretty old and has been discussed on + * kbuild-devel a long time ago. I don't have a sensibly working + * internet connection right now, so I rather don't mention names + * without double checking. + * + * This code here has been based partially based on mkdep.c, which + * says the following about its history: + * + * Copyright abandoned, Michael Chastain, <mailto:mec@shout.net>. + * This is a C version of syncdep.pl by Werner Almesberger. + * + * + * It is invoked as + * + * fixdep <target> <topdir> <cmdline> + * + * and will read the dependency file ".<target>.d". + * + * The transformed dependency snipped is written to stdout. + * + * It first generates a line + * + * cmd_<target> = <cmdline> + * + * and then basically copies the .<target>.d file to stdout, in the + * process filtering out the dependency on linux/autconf.h and adding + * dependencies on include/config/my/option.h for every + * CONFIG_MY_OPTION encountered in any of the prequisites. + * + * It will also filter out all the dependencies on *.ver. We need + * to make sure that the generated version checksum are globally up + * to date before even starting the recursive build, so it's too late + * at this point anyway. + * + * The algorithm to grep for "CONFIG_..." is bit unusual, but should + * be fast ;-) We don't even try to really parse the header files, but + * merely grep, i.e. if CONFIG_FOO is mentioned in a comment, it will + * be picked up as well. It's not a problem with respect to + * correctness, since that can only give too many dependencies, thus + * we cannot miss a rebuild. Since people tend to not mention totally + * unrelated CONFIG_ options all over the place, it's not an + * efficiency problem either. + * + * (Note: it'd be easy to port over the complete mkdep state machine, + * but I don't think the added complexity is worth it) + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <limits.h> +#include <ctype.h> +#include <netinet/in.h> + +#define INT_CONF ntohl(0x434f4e46) +#define INT_ONFI ntohl(0x4f4e4649) +#define INT_NFIG ntohl(0x4e464947) +#define INT_FIG_ ntohl(0x4649475f) + +char *topdir; + +void usage(void) + +{ + fprintf(stderr, "Usage: fixdep <target> <topdir> <cmdline>\n"); + exit(1); +} + +void print_cmdline(char *target, char *cmdline) +{ + char *s = strdup(target); + char *p = s; + + if (!s) { + fprintf(stderr, "no mem!\n"); + exit(2); + } + while ((p = strchr(p,'/'))) + *p = '_'; + + printf("cmd_%s := %s\n\n", s, cmdline); + + free(s); +} + +char * str_config = NULL; +int size_config = 0; +int len_config = 0; + +/* + * Grow the configuration string to a desired length. + * Usually the first growth is plenty. + */ +void grow_config(int len) +{ + while (len_config + len > size_config) { + if (size_config == 0) + size_config = 2048; + str_config = realloc(str_config, size_config *= 2); + if (str_config == NULL) + { perror("malloc"); exit(1); } + } +} + + + +/* + * Lookup a value in the configuration string. + */ +int is_defined_config(const char * name, int len) +{ + const char * pconfig; + const char * plast = str_config + len_config - len; + for ( pconfig = str_config + 1; pconfig < plast; pconfig++ ) { + if (pconfig[ -1] == '\n' + && pconfig[len] == '\n' + && !memcmp(pconfig, name, len)) + return 1; + } + return 0; +} + +/* + * Add a new value to the configuration string. + */ +void define_config(const char * name, int len) +{ + grow_config(len + 1); + + memcpy(str_config+len_config, name, len); + len_config += len; + str_config[len_config++] = '\n'; +} + +/* + * Clear the set of configuration strings. + */ +void clear_config(void) +{ + len_config = 0; + define_config("", 0); +} + +/* + * Record the use of a CONFIG_* word. + */ +void use_config(char *m, int slen) +{ + char s[PATH_MAX]; + char *p; + + if (is_defined_config(m, slen)) + return; + + define_config(m, slen); + + memcpy(s, m, slen); s[slen] = 0; + + for (p = s; p < s + slen; p++) { + if (*p == '_') + *p = '/'; + else + *p = tolower(*p); + } + printf(" $(wildcard %s/include/config/%s.h) \\\n", topdir, s); +} + +void parse_config_file(char *map, size_t len) +{ + int *end = (int *) (map + len); + // start at +1, so that p can never be < map + int *m = (int *) map + 1; + char *p, *q; + + for (; m < end; m++) { + if (*m == INT_CONF) { p = (char *) m ; goto conf; } + if (*m == INT_ONFI) { p = (char *) m-1; goto conf; } + if (*m == INT_NFIG) { p = (char *) m-2; goto conf; } + if (*m == INT_FIG_) { p = (char *) m-3; goto conf; } + continue; + conf: + if (p > map + len - 7) + continue; + if (memcmp(p, "CONFIG_", 7)) + continue; + for (q = p + 7; q < map + len; q++) { + if (!(isalnum(*q))) + goto found; + } + continue; + + found: + use_config(p+7, q-p-7); + } +} + +/* test is s ends in sub */ +int strrcmp(char *s, char *sub) +{ + int slen = strlen(s); + int sublen = strlen(sub); + + if (sublen > slen) + return 1; + + return memcmp(s + slen - sublen, sub, sublen); +} + +void do_config_file(char *filename) +{ + struct stat st; + int fd; + void *map; + + fd = open(filename, O_RDONLY); + if (fd < 0) { + perror(filename); + exit(2); + } + fstat(fd, &st); + if (st.st_size == 0) { + close(fd); + return; + } + map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if ((long) map == -1) { + perror("mmap"); + close(fd); + return; + } + + parse_config_file(map, st.st_size); + + munmap(map, st.st_size); +} + +void parse_dep_file(void *map, size_t len) +{ + char *m = map; + char *end = map + len; + char *p; + char s[PATH_MAX]; + + p = strchr(m, ':'); + if (!p) { + fprintf(stderr, "parse error at %d", __LINE__); + exit(1); + } + memcpy(s, m, p-m); s[p-m] = 0; + printf("%s: \\\n", s); + m = p+1; + + clear_config(); + + while (m < end) { + while (*m == ' ' || *m == '\\' || *m == '\n') + m++; + + p = strchr(m, ' '); + if (!p) { + p = end; + while (!isalpha(*p)) p--; + p++; + } + memcpy(s, m, p-m); s[p-m] = 0; + if (strrcmp(s, "include/linux/autoconf.h") && + strrcmp(s, ".ver")) { + printf(" %s \\\n", s); + do_config_file(s); + } + m = p + 1; + } + printf("\n"); +} + +void print_deps(char *target) +{ + char filename[PATH_MAX]; + struct stat st; + int fd; + void *map; + + sprintf(filename, ".%s.d", target); + fd = open(filename, O_RDONLY); + if (fd < 0) { + perror(filename); + exit(2); + } + fstat(fd, &st); + if (st.st_size == 0) { + fprintf(stderr,"%s is empty\n",filename); + close(fd); + return; + } + map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if ((long) map == -1) { + perror("mmap"); + close(fd); + return; + } + + parse_dep_file(map, st.st_size); + + munmap(map, st.st_size); +} + +void traps(void) +{ + char *test = "CONF"; + + if (*(int *)test != INT_CONF) { + fprintf(stderr, "sizeof(int) != 4 or wrong endianess? %#x\n", + *(int *)test); + exit(2); + } +} + +int main(int argc, char *argv[]) +{ + char *target, *cmdline; + + traps(); + + if (argc != 4) + usage(); + + target = argv[1]; + topdir = argv[2]; + cmdline = argv[3]; + + print_cmdline(target, cmdline); + print_deps(target); + + return 0; +} diff --git a/scripts/gen-all-syms b/scripts/gen-all-syms index b15b160040e8..b15b160040e8 100644..100755 --- a/scripts/gen-all-syms +++ b/scripts/gen-all-syms diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 028e5a33ac42..028e5a33ac42 100644..100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc diff --git a/scripts/makelst b/scripts/makelst index a76ca78b34ac..3c04512728f2 100644..100755 --- a/scripts/makelst +++ b/scripts/makelst @@ -6,17 +6,26 @@ # William Stearns <wstearns@pobox.com> #%.lst: %.c # $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(CFLAGS_$@) -g -c -o $*.o $< -# $(TOPDIR)/scripts/makelst $* $(TOPDIR) $(OBJDUMP) +# $(TOPDIR)/scripts/makelst $*.o $(TOPDIR)/System.map $(OBJDUMP) # # Copyright (C) 2000 IBM Corporation # Author(s): DJ Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) # -t1=`$3 --syms $2/$1.o | grep .text | grep " F " | head -n 1` -t2=`echo $t1 | gawk '{ print $6 }'` -t3=`grep $t2 $2/System.map` -t4=`echo $t3 | gawk '{ print $1 }'` -t5=`echo $t1 | gawk '{ print $1 }'` -t6=`echo $t4 - $t5 | sed -e s/a/A/g -e s/b/B/g -e s/c/C/g -e s/d/D/g -e s/e/E/g -e s/f/F/g` -t7=`( echo ibase=16 ; echo $t6 ) | bc` -$3 --source --adjust-vma=$t7 $2/$1.o > $2/$1.lst +t1=`$3 --syms $1 | grep .text | grep " F " | head -n 1` +if [ -n "$t1" ]; then + t2=`echo $t1 | gawk '{ print $6 }'` + if [ ! -r $2 ]; then + echo "No System.map" >&2 + t7=0 + else + t3=`grep $t2 $2` + t4=`echo $t3 | gawk '{ print $1 }'` + t5=`echo $t1 | gawk '{ print $1 }'` + t6=`echo $t4 - $t5 | tr a-f A-F` + t7=`( echo ibase=16 ; echo $t6 ) | bc` + fi +else + t7=0 +fi +$3 --source --adjust-vma=$t7 $1 diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h index 7fc14334f04a..e18fc4f263e3 100644..100755 --- a/scripts/mkcompile_h +++ b/scripts/mkcompile_h @@ -48,9 +48,10 @@ if [ -r $TARGET ] && \ grep -v 'UTS_VERSION\|LINUX_COMPILE_TIME' $TARGET > .tmpver.1 && \ grep -v 'UTS_VERSION\|LINUX_COMPILE_TIME' .tmpcompile > .tmpver.2 && \ cmp -s .tmpver.1 .tmpver.2; then - echo $TARGET is unchanged; + echo $TARGET was not updated; rm -f .tmpcompile else + echo $TARGET was updated mv -f .tmpcompile $TARGET fi rm -f .tmpver.1 .tmpver.2 diff --git a/scripts/mkdep.c b/scripts/mkdep.c deleted file mode 100644 index 01386ea2563c..000000000000 --- a/scripts/mkdep.c +++ /dev/null @@ -1,628 +0,0 @@ -/* - * Originally by Linus Torvalds. - * Smart CONFIG_* processing by Werner Almesberger, Michael Chastain. - * - * Usage: mkdep cflags -- file ... - * - * Read source files and output makefile dependency lines for them. - * I make simple dependency lines for #include <*.h> and #include "*.h". - * I also find instances of CONFIG_FOO and generate dependencies - * like include/config/foo.h. - * - * 1 August 1999, Michael Elizabeth Chastain, <mec@shout.net> - * - Keith Owens reported a bug in smart config processing. There used - * to be an optimization for "#define CONFIG_FOO ... #ifdef CONFIG_FOO", - * so that the file would not depend on CONFIG_FOO because the file defines - * this symbol itself. But this optimization is bogus! Consider this code: - * "#if 0 \n #define CONFIG_FOO \n #endif ... #ifdef CONFIG_FOO". Here - * the definition is inactivated, but I still used it. It turns out this - * actually happens a few times in the kernel source. The simple way to - * fix this problem is to remove this particular optimization. - * - * 2.3.99-pre1, Andrew Morton <andrewm@uow.edu.au> - * - Changed so that 'filename.o' depends upon 'filename.[cS]'. This is so that - * missing source files are noticed, rather than silently ignored. - * - * 2.4.2-pre3, Keith Owens <kaos@ocs.com.au> - * - Accept cflags followed by '--' followed by filenames. mkdep extracts -I - * options from cflags and looks in the specified directories as well as the - * defaults. Only -I is supported, no attempt is made to handle -idirafter, - * -isystem, -I- etc. - */ - -#include <ctype.h> -#include <fcntl.h> -#include <limits.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> - -#include <sys/fcntl.h> -#include <sys/mman.h> -#include <sys/stat.h> -#include <sys/types.h> - - - -char __depname[512] = "\n\t@touch "; -#define depname (__depname+9) -int hasdep; - -struct path_struct { - int len; - char *buffer; -}; -struct path_struct *path_array; -int paths; - - -/* Current input file */ -static const char *g_filename; - -/* - * This records all the configuration options seen. - * In perl this would be a hash, but here it's a long string - * of values separated by newlines. This is simple and - * extremely fast. - */ -char * str_config = NULL; -int size_config = 0; -int len_config = 0; - -static void -do_depname(void) -{ - if (!hasdep) { - hasdep = 1; - printf("%s:", depname); - if (g_filename) - printf(" %s", g_filename); - } -} - -/* - * Grow the configuration string to a desired length. - * Usually the first growth is plenty. - */ -void grow_config(int len) -{ - while (len_config + len > size_config) { - if (size_config == 0) - size_config = 2048; - str_config = realloc(str_config, size_config *= 2); - if (str_config == NULL) - { perror("malloc config"); exit(1); } - } -} - - - -/* - * Lookup a value in the configuration string. - */ -int is_defined_config(const char * name, int len) -{ - const char * pconfig; - const char * plast = str_config + len_config - len; - for ( pconfig = str_config + 1; pconfig < plast; pconfig++ ) { - if (pconfig[ -1] == '\n' - && pconfig[len] == '\n' - && !memcmp(pconfig, name, len)) - return 1; - } - return 0; -} - - - -/* - * Add a new value to the configuration string. - */ -void define_config(const char * name, int len) -{ - grow_config(len + 1); - - memcpy(str_config+len_config, name, len); - len_config += len; - str_config[len_config++] = '\n'; -} - - - -/* - * Clear the set of configuration strings. - */ -void clear_config(void) -{ - len_config = 0; - define_config("", 0); -} - - - -/* - * This records all the precious .h filenames. No need for a hash, - * it's a long string of values enclosed in tab and newline. - */ -char * str_precious = NULL; -int size_precious = 0; -int len_precious = 0; - - - -/* - * Grow the precious string to a desired length. - * Usually the first growth is plenty. - */ -void grow_precious(int len) -{ - while (len_precious + len > size_precious) { - if (size_precious == 0) - size_precious = 2048; - str_precious = realloc(str_precious, size_precious *= 2); - if (str_precious == NULL) - { perror("malloc"); exit(1); } - } -} - - - -/* - * Add a new value to the precious string. - */ -void define_precious(const char * filename) -{ - int len = strlen(filename); - grow_precious(len + 4); - *(str_precious+len_precious++) = '\t'; - memcpy(str_precious+len_precious, filename, len); - len_precious += len; - memcpy(str_precious+len_precious, " \\\n", 3); - len_precious += 3; -} - - - -/* - * Handle an #include line. - */ -void handle_include(int start, const char * name, int len) -{ - struct path_struct *path; - int i; - - if (len == 14 && !memcmp(name, "linux/config.h", len)) - return; - - if (len >= 7 && !memcmp(name, "config/", 7)) - define_config(name+7, len-7-2); - - for (i = start, path = path_array+start; i < paths; ++i, ++path) { - memcpy(path->buffer+path->len, name, len); - path->buffer[path->len+len] = '\0'; - if (access(path->buffer, F_OK) == 0) { - do_depname(); - printf(" \\\n %s", path->buffer); - return; - } - } - -} - - - -/* - * Add a path to the list of include paths. - */ -void add_path(const char * name) -{ - struct path_struct *path; - char resolved_path[PATH_MAX+1]; - const char *name2; - - if (strcmp(name, ".")) { - name2 = realpath(name, resolved_path); - if (!name2) { - fprintf(stderr, "realpath(%s) failed, %m\n", name); - exit(1); - } - } - else { - name2 = ""; - } - - path_array = realloc(path_array, (++paths)*sizeof(*path_array)); - if (!path_array) { - fprintf(stderr, "cannot expand path_arry\n"); - exit(1); - } - - path = path_array+paths-1; - path->len = strlen(name2); - path->buffer = malloc(path->len+1+256+1); - if (!path->buffer) { - fprintf(stderr, "cannot allocate path buffer\n"); - exit(1); - } - strcpy(path->buffer, name2); - if (path->len && *(path->buffer+path->len-1) != '/') { - *(path->buffer+path->len) = '/'; - *(path->buffer+(++(path->len))) = '\0'; - } -} - - - -/* - * Record the use of a CONFIG_* word. - */ -void use_config(const char * name, int len) -{ - char *pc; - int i; - - pc = path_array[paths-1].buffer + path_array[paths-1].len; - memcpy(pc, "config/", 7); - pc += 7; - - for (i = 0; i < len; i++) { - int c = (unsigned char) name[i]; - if (isupper(c)) c = tolower(c); - if (c == '_') c = '/'; - pc[i] = c; - } - pc[len] = '\0'; - - if (is_defined_config(pc, len)) - return; - - define_config(pc, len); - - do_depname(); - printf(" \\\n $(wildcard %s.h)", path_array[paths-1].buffer); -} - - - -/* - * Macros for stunningly fast map-based character access. - * __buf is a register which holds the current word of the input. - * Thus, there is one memory access per sizeof(unsigned long) characters. - */ - -#if defined(__alpha__) || defined(__i386__) || defined(__ia64__) || defined(__x86_64__) || defined(__MIPSEL__) \ - || defined(__arm__) -#define LE_MACHINE -#endif - -#ifdef LE_MACHINE -#define next_byte(x) (x >>= 8) -#define current ((unsigned char) __buf) -#else -#define next_byte(x) (x <<= 8) -#define current (__buf >> 8*(sizeof(unsigned long)-1)) -#endif - -#define GETNEXT { \ - next_byte(__buf); \ - if ((unsigned long) next % sizeof(unsigned long) == 0) { \ - if (next >= end) \ - break; \ - __buf = * (unsigned long *) next; \ - } \ - next++; \ -} - -/* - * State machine macros. - */ -#define CASE(c,label) if (current == c) goto label -#define NOTCASE(c,label) if (current != c) goto label - -/* - * Yet another state machine speedup. - */ -#define MAX2(a,b) ((a)>(b)?(a):(b)) -#define MIN2(a,b) ((a)<(b)?(a):(b)) -#define MAX5(a,b,c,d,e) (MAX2(a,MAX2(b,MAX2(c,MAX2(d,e))))) -#define MIN5(a,b,c,d,e) (MIN2(a,MIN2(b,MIN2(c,MIN2(d,e))))) - - - -/* - * The state machine looks for (approximately) these Perl regular expressions: - * - * m|\/\*.*?\*\/| - * m|\/\/.*| - * m|'.*?'| - * m|".*?"| - * m|#\s*include\s*"(.*?)"| - * m|#\s*include\s*<(.*?>"| - * m|#\s*(?define|undef)\s*CONFIG_(\w*)| - * m|(?!\w)CONFIG_| - * - * About 98% of the CPU time is spent here, and most of that is in - * the 'start' paragraph. Because the current characters are - * in a register, the start loop usually eats 4 or 8 characters - * per memory read. The MAX5 and MIN5 tests dispose of most - * input characters with 1 or 2 comparisons. - */ -void state_machine(const char * map, const char * end) -{ - const char * next = map; - const char * map_dot; - unsigned long __buf = 0; - - for (;;) { -start: - GETNEXT -__start: - if (current > MAX5('/','\'','"','#','C')) goto start; - if (current < MIN5('/','\'','"','#','C')) goto start; - CASE('/', slash); - CASE('\'', squote); - CASE('"', dquote); - CASE('#', pound); - CASE('C', cee); - goto start; - -/* // */ -slash_slash: - GETNEXT - CASE('\n', start); - NOTCASE('\\', slash_slash); - GETNEXT - goto slash_slash; - -/* / */ -slash: - GETNEXT - CASE('/', slash_slash); - NOTCASE('*', __start); -slash_star_dot_star: - GETNEXT -__slash_star_dot_star: - NOTCASE('*', slash_star_dot_star); - GETNEXT - NOTCASE('/', __slash_star_dot_star); - goto start; - -/* '.*?' */ -squote: - GETNEXT - CASE('\'', start); - NOTCASE('\\', squote); - GETNEXT - goto squote; - -/* ".*?" */ -dquote: - GETNEXT - CASE('"', start); - NOTCASE('\\', dquote); - GETNEXT - goto dquote; - -/* #\s* */ -pound: - GETNEXT - CASE(' ', pound); - CASE('\t', pound); - CASE('i', pound_i); - CASE('d', pound_d); - CASE('u', pound_u); - goto __start; - -/* #\s*i */ -pound_i: - GETNEXT NOTCASE('n', __start); - GETNEXT NOTCASE('c', __start); - GETNEXT NOTCASE('l', __start); - GETNEXT NOTCASE('u', __start); - GETNEXT NOTCASE('d', __start); - GETNEXT NOTCASE('e', __start); - goto pound_include; - -/* #\s*include\s* */ -pound_include: - GETNEXT - CASE(' ', pound_include); - CASE('\t', pound_include); - map_dot = next; - CASE('"', pound_include_dquote); - CASE('<', pound_include_langle); - goto __start; - -/* #\s*include\s*"(.*)" */ -pound_include_dquote: - GETNEXT - CASE('\n', start); - NOTCASE('"', pound_include_dquote); - handle_include(0, map_dot, next - map_dot - 1); - goto start; - -/* #\s*include\s*<(.*)> */ -pound_include_langle: - GETNEXT - CASE('\n', start); - NOTCASE('>', pound_include_langle); - handle_include(1, map_dot, next - map_dot - 1); - goto start; - -/* #\s*d */ -pound_d: - GETNEXT NOTCASE('e', __start); - GETNEXT NOTCASE('f', __start); - GETNEXT NOTCASE('i', __start); - GETNEXT NOTCASE('n', __start); - GETNEXT NOTCASE('e', __start); - goto pound_define_undef; - -/* #\s*u */ -pound_u: - GETNEXT NOTCASE('n', __start); - GETNEXT NOTCASE('d', __start); - GETNEXT NOTCASE('e', __start); - GETNEXT NOTCASE('f', __start); - goto pound_define_undef; - -/* - * #\s*(define|undef)\s*CONFIG_(\w*) - * - * this does not define the word, because it could be inside another - * conditional (#if 0). But I do parse the word so that this instance - * does not count as a use. -- mec - */ -pound_define_undef: - GETNEXT - CASE(' ', pound_define_undef); - CASE('\t', pound_define_undef); - - NOTCASE('C', __start); - GETNEXT NOTCASE('O', __start); - GETNEXT NOTCASE('N', __start); - GETNEXT NOTCASE('F', __start); - GETNEXT NOTCASE('I', __start); - GETNEXT NOTCASE('G', __start); - GETNEXT NOTCASE('_', __start); - - map_dot = next; -pound_define_undef_CONFIG_word: - GETNEXT - if (isalnum(current) || current == '_') - goto pound_define_undef_CONFIG_word; - goto __start; - -/* \<CONFIG_(\w*) */ -cee: - if (next >= map+2 && (isalnum(next[-2]) || next[-2] == '_')) - goto start; - GETNEXT NOTCASE('O', __start); - GETNEXT NOTCASE('N', __start); - GETNEXT NOTCASE('F', __start); - GETNEXT NOTCASE('I', __start); - GETNEXT NOTCASE('G', __start); - GETNEXT NOTCASE('_', __start); - - map_dot = next; -cee_CONFIG_word: - GETNEXT - if (isalnum(current) || current == '_') - goto cee_CONFIG_word; - use_config(map_dot, next - map_dot - 1); - goto __start; - } -} - - - -/* - * Generate dependencies for one file. - */ -void do_depend(const char * filename, const char * command) -{ - int mapsize; - int pagesizem1 = getpagesize()-1; - int fd; - struct stat st; - char * map; - - fd = open(filename, O_RDONLY); - if (fd < 0) { - perror(filename); - return; - } - - fstat(fd, &st); - if (st.st_size == 0) { - fprintf(stderr,"%s is empty\n",filename); - close(fd); - return; - } - - mapsize = st.st_size; - mapsize = (mapsize+pagesizem1) & ~pagesizem1; - map = mmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0); - if ((long) map == -1) { - perror("mkdep: mmap"); - close(fd); - return; - } - if ((unsigned long) map % sizeof(unsigned long) != 0) - { - fprintf(stderr, "do_depend: map not aligned\n"); - exit(1); - } - - hasdep = 0; - clear_config(); - state_machine(map, map+st.st_size); - if (hasdep) { - puts(command); - if (*command) - define_precious(filename); - } - - munmap(map, mapsize); - close(fd); -} - - - -/* - * Generate dependencies for all files. - */ -int main(int argc, char **argv) -{ - int len; - const char *hpath; - - hpath = getenv("HPATH"); - if (!hpath) { - fputs("mkdep: HPATH not set in environment. " - "Don't bypass the top level Makefile.\n", stderr); - return 1; - } - - add_path("."); /* for #include "..." */ - - while (++argv, --argc > 0) { - if (strncmp(*argv, "-I", 2) == 0) { - if (*((*argv)+2)) { - add_path((*argv)+2); - } - else { - ++argv; - --argc; - add_path(*argv); - } - } - else if (strcmp(*argv, "--") == 0) { - break; - } - } - - add_path(hpath); /* must be last entry, for config files */ - - while (--argc > 0) { - const char * filename = *++argv; - const char * command = __depname; - g_filename = 0; - len = strlen(filename); - memcpy(depname, filename, len+1); - if (len > 2 && filename[len-2] == '.') { - if (filename[len-1] == 'c' || filename[len-1] == 'S') { - depname[len-1] = 'o'; - g_filename = filename; - command = ""; - } - } - do_depend(filename, command); - } - if (len_precious) { - *(str_precious+len_precious) = '\0'; - printf(".PRECIOUS:%s\n", str_precious); - } - return 0; -} diff --git a/scripts/mkversion_h b/scripts/mkversion_h index c8ceaee2fa8c..dd8c5eb6d7dd 100644..100755 --- a/scripts/mkversion_h +++ b/scripts/mkversion_h @@ -17,8 +17,9 @@ SUBLEVEL=$5 if [ -r $TARGET ] && \ cmp -s $TARGET .tmpversion; then - echo $TARGET is unchanged; + echo $TARGET was not updated; rm -f .tmpversion else + echo $TARGET was updated; mv -f .tmpversion $TARGET fi diff --git a/sound/last.c b/sound/last.c index 5e2c8d5422bf..93a00159c394 100644 --- a/sound/last.c +++ b/sound/last.c @@ -19,6 +19,8 @@ * */ +#include <linux/init.h> + #define SNDRV_MAIN_OBJECT_FILE #include <sound/driver.h> #include <sound/core.h> |
