From 49daee21a02ddd4584ef2492e55b3ee31d1f487d Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@gnumonks.org>
Date: Sun, 13 Oct 2002 14:51:42 -0700
Subject: [NETFILTER]: Avoid nesting readlocks in conntrack code.

---
 include/linux/netfilter_ipv4/ip_conntrack_core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h
index 7d8b393f3c44..c46f0e86fe60 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_core.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h
@@ -17,7 +17,7 @@ extern void ip_conntrack_cleanup(void);
 struct ip_conntrack_protocol;
 extern struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol);
 /* Like above, but you already have conntrack read lock. */
-extern struct ip_conntrack_protocol *__find_proto(u_int8_t protocol);
+extern struct ip_conntrack_protocol *__ip_ct_find_proto(u_int8_t protocol);
 extern struct list_head protocol_list;
 
 /* Returns conntrack if it dealt with ICMP, and filled in skb->nfct */
-- 
cgit v1.2.3


From 0ebd6ce0f49a6a8a7623ede60befa51a575271ce Mon Sep 17 00:00:00 2001
From: Russell King <rmk@flint.arm.linux.org.uk>
Date: Mon, 14 Oct 2002 23:25:35 +0100
Subject: [MTD] Update 2.5 MTD code from MTD CVS and ARM tree This cset updates
 the 2.5 MTD code from the MTD CVS.  David Woodhouse is happy with me sending
 this.

Summary of changes:
- Add MTD device concatenation support module.
- Bootldr MTD partition parsing is obsolete, replaced by command-line
  based partition information.
- Add support for ARM map drivers:
   AUTCPU12, Ceiva, Camelot, Fortunet, edb7312, Impa7, PCI
- Add support for PCMCIA memory cards
- Update help texts for:
   Ocelot, ITE QED-4N-S01B, Flaga

Please note that this does not completely synchronise the 2.5 kernel
tree with MTD CVS.
---
 drivers/mtd/Config.help           |   60 +-
 drivers/mtd/Config.in             |    5 +-
 drivers/mtd/Makefile              |   11 +-
 drivers/mtd/bootldr.c             |  214 -----
 drivers/mtd/cmdline.c             |  343 ++++++++
 drivers/mtd/maps/Config.help      |  109 ++-
 drivers/mtd/maps/Config.in        |   12 +-
 drivers/mtd/maps/Makefile         |   20 +-
 drivers/mtd/maps/autcpu12-nvram.c |  179 +++++
 drivers/mtd/maps/ceiva.c          |  408 ++++++++++
 drivers/mtd/maps/dc21285.c        |   25 +-
 drivers/mtd/maps/edb7312.c        |  202 +++++
 drivers/mtd/maps/epxa10db-flash.c |  233 ++++++
 drivers/mtd/maps/fortunet.c       |  309 ++++++++
 drivers/mtd/maps/impa7.c          |  234 ++++++
 drivers/mtd/maps/iq80310.c        |    5 +-
 drivers/mtd/maps/pci.c            |  385 +++++++++
 drivers/mtd/maps/pcmciamtd.c      |  893 +++++++++++++++++++++
 drivers/mtd/maps/sa1100-flash.c   | 1554 +++++++++++++++++++++++++------------
 drivers/mtd/mtdconcat.c           |  675 ++++++++++++++++
 include/linux/mtd/concat.h        |   23 +
 21 files changed, 5130 insertions(+), 769 deletions(-)
 delete mode 100644 drivers/mtd/bootldr.c
 create mode 100644 drivers/mtd/cmdline.c
 create mode 100644 drivers/mtd/maps/autcpu12-nvram.c
 create mode 100644 drivers/mtd/maps/ceiva.c
 create mode 100644 drivers/mtd/maps/edb7312.c
 create mode 100644 drivers/mtd/maps/epxa10db-flash.c
 create mode 100644 drivers/mtd/maps/fortunet.c
 create mode 100644 drivers/mtd/maps/impa7.c
 create mode 100644 drivers/mtd/maps/pci.c
 create mode 100644 drivers/mtd/maps/pcmciamtd.c
 create mode 100644 drivers/mtd/mtdconcat.c
 create mode 100644 include/linux/mtd/concat.h

(limited to 'include/linux')

diff --git a/drivers/mtd/Config.help b/drivers/mtd/Config.help
index 822dc7424d34..83e9b5c7d44b 100644
--- a/drivers/mtd/Config.help
+++ b/drivers/mtd/Config.help
@@ -21,36 +21,62 @@ CONFIG_MTD_PARTITIONS
   devices. Partitioning on NFTL 'devices' is a different - that's the
   'normal' form of partitioning used on a block device.
 
+CONFIG_MTD_CONCAT
+  Support for concatenating several MTD devices into a single
+  (virtual) one. This allows you to have -for example- a JFFS(2)
+  file system spanning multiple physical flash chips. If unsure,
+  say 'Y'.
+
 CONFIG_MTD_REDBOOT_PARTS
   RedBoot is a ROM monitor and bootloader which deals with multiple
-  'images' in flash devices by putting a table in the last erase block
-  of the device, similar to a partition table, which gives the
-  offsets, lengths and names of all the images stored in the flash.
+  'images' in flash devices by putting a table in the last erase
+  block of the device, similar to a partition table, which gives
+  the offsets, lengths and names of all the images stored in the
+  flash.
 
   If you need code which can detect and parse this table, and register
   MTD 'partitions' corresponding to each image in the table, enable
-  this option.
+  this option. 
 
   You will still need the parsing functions to be called by the driver
-  for your particular device. It won't happen automatically. The
-  SA1100 map driver (CONFIG_MTD_SA1100) has an option for this, for
+  for your particular device. It won't happen automatically. The 
+  SA1100 map driver (CONFIG_MTD_SA1100) has an option for this, for 
   example.
 
-CONFIG_MTD_BOOTLDR_PARTS
-  The Compaq bootldr deals with multiple 'images' in flash devices
-  by putting a table in one of the first erase blocks of the device,
-  similar to a partition table, which gives the offsets, lengths and
-  names of all the images stored in the flash.
-
-  If you need code which can detect and parse this table, and register
-  MTD 'partitions' corresponding to each image in the table, enable
-  this option.
-
+CONFIG_MTD_CMDLINE_PARTS
+  Allow generic configuration of the MTD paritition tables via the kernel
+  command line. Multiple flash resources are supported for hardware where
+  different kinds of flash memory are available. 
+  
   You will still need the parsing functions to be called by the driver
   for your particular device. It won't happen automatically. The 
   SA1100 map driver (CONFIG_MTD_SA1100) has an option for this, for 
   example.
 
+  The format for the command line is as follows:
+
+  mtdparts=<mtddef>[;<mtddef]
+  <mtddef>  := <mtd-id>:<partdef>[,<partdef>]
+  <partdef> := <size>[@offset][<name>][ro]
+  <mtd-id>  := unique id used in mapping driver/device
+  <size>    := standard linux memsize OR "-" to denote all 
+               remaining space
+  <name>    := (NAME)
+
+  Due to the way Linux handles the command line, no spaces are 
+  allowed in the partition definition, including mtd id's and partition 
+  names.
+
+  Examples:
+
+  1 flash resource (mtd-id "sa1100"), with 1 single writable partition:
+  mtdparts=sa1100:-
+
+  Same flash, but 2 named partitions, the first one being read-only:
+  mtdparts=sa1100:256k(ARMboot)ro,-(root)
+
+  If unsure, say 'N'.
+
 CONFIG_MTD_AFS_PARTS
   The ARM Firmware Suite allows the user to divide flash devices into
   multiple 'images'. Each such image has a header containing its name
@@ -61,7 +87,7 @@ CONFIG_MTD_AFS_PARTS
   enable this option.
 
   You will still need the parsing functions to be called by the driver
-  for your particular device.  It won't happen automatically.  The 
+  for your particular device. It won't happen automatically. The
   'armflash' map driver (CONFIG_MTD_ARMFLASH) does this, for example.
 
 CONFIG_MTD_DEBUG_VERBOSE
diff --git a/drivers/mtd/Config.in b/drivers/mtd/Config.in
index 797f79667844..7e3d3ffd2983 100644
--- a/drivers/mtd/Config.in
+++ b/drivers/mtd/Config.in
@@ -1,5 +1,5 @@
 
-# $Id: Config.in,v 1.71 2001/10/03 11:38:38 dwmw2 Exp $
+# $Id: Config.in,v 1.74 2002/04/23 13:52:14 mag Exp $
 
 mainmenu_option next_comment
 comment 'Memory Technology Devices (MTD)'
@@ -12,9 +12,10 @@ if [ "$CONFIG_MTD" = "y" -o "$CONFIG_MTD" = "m" ]; then
       int '  Debugging verbosity (0 = quiet, 3 = noisy)' CONFIG_MTD_DEBUG_VERBOSE 0
    fi
    dep_tristate '  MTD partitioning support' CONFIG_MTD_PARTITIONS $CONFIG_MTD
+   dep_tristate '  MTD concatenating support' CONFIG_MTD_CONCAT $CONFIG_MTD
    dep_tristate '  RedBoot partition table parsing' CONFIG_MTD_REDBOOT_PARTS $CONFIG_MTD_PARTITIONS
+   dep_tristate '  Command line partition table parsing' CONFIG_MTD_CMDLINE_PARTS $CONFIG_MTD_PARTITIONS
    if [ "$CONFIG_ARM" = "y" ]; then
-      dep_tristate '  Compaq bootldr partition table parsing' CONFIG_MTD_BOOTLDR_PARTS $CONFIG_MTD_PARTITIONS
       dep_tristate '  ARM Firmware Suite partition parsing' CONFIG_MTD_AFS_PARTS $CONFIG_MTD_PARTITIONS
    fi
 
diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile
index 4b8108198e7e..7ec5dfbb2501 100644
--- a/drivers/mtd/Makefile
+++ b/drivers/mtd/Makefile
@@ -1,12 +1,12 @@
 #
 # Makefile for the memory technology device drivers.
 #
-#
-# $Id: Makefile,v 1.63 2001/06/13 09:43:07 dwmw2 Exp $
+# Based on:
+# $Id: Makefile,v 1.66 2002/04/23 13:52:14 mag Exp $
 
-export-objs	:= mtdcore.o mtdpart.o redboot.o bootldr.o afs.o
+export-objs	:= mtdcore.o mtdpart.o redboot.o cmdline.o afs.o mtdconcat.o
 
-obj-y           += chips/ maps/	devices/ nand/
+obj-y           += chips/ maps/ devices/ nand/
 
 #                       *** BIG UGLY NOTE ***
 #
@@ -26,9 +26,10 @@ obj-y           += chips/ maps/	devices/ nand/
 
 # Core functionality.
 obj-$(CONFIG_MTD)		+= mtdcore.o
+obj-$(CONFIG_MTD_CONCAT)	+= mtdconcat.o
 obj-$(CONFIG_MTD_PARTITIONS)	+= mtdpart.o
 obj-$(CONFIG_MTD_REDBOOT_PARTS) += redboot.o
-obj-$(CONFIG_MTD_BOOTLDR_PARTS) += bootldr.o
+obj-$(CONFIG_MTD_CMDLINE_PARTS) += cmdline.o
 obj-$(CONFIG_MTD_AFS_PARTS)	+= afs.o
 
 # 'Users' - code which presents functionality to userspace.
diff --git a/drivers/mtd/bootldr.c b/drivers/mtd/bootldr.c
deleted file mode 100644
index 43fcd6bea8b8..000000000000
--- a/drivers/mtd/bootldr.c
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Read flash partition table from Compaq Bootloader
- *
- * Copyright 2001 Compaq Computer Corporation.
- *
- * $Id: bootldr.c,v 1.6 2001/10/02 15:05:11 dwmw2 Exp $
- *
- * Use consistent with the GNU GPL is permitted,
- * provided that this copyright notice is
- * preserved in its entirety in all copies and derived works.
- *
- * COMPAQ COMPUTER CORPORATION MAKES NO WARRANTIES, EXPRESSED OR IMPLIED,
- * AS TO THE USEFULNESS OR CORRECTNESS OF THIS CODE OR ITS
- * FITNESS FOR ANY PARTICULAR PURPOSE.
- *
- */
-
-/*
- * Maintainer: Jamey Hicks (jamey.hicks@compaq.com)
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/partitions.h>
-#include <asm/setup.h>
-#include <linux/bootmem.h>
-
-#define FLASH_PARTITION_NAMELEN 32
-enum LFR_FLAGS {
-   LFR_SIZE_PREFIX = 1,		/* prefix data with 4-byte size */
-   LFR_PATCH_BOOTLDR = 2,	/* patch bootloader's 0th instruction */
-   LFR_KERNEL = 4,		/* add BOOTIMG_MAGIC, imgsize and VKERNEL_BASE to head of programmed region (see bootldr.c) */
-   LFR_EXPAND = 8               /* expand partition size to fit rest of flash */
-};
-
-// the tags are parsed too early to malloc or alloc_bootmem so we'll fix it
-// for now
-#define MAX_NUM_PARTITIONS 8
-typedef struct FlashRegion {
-   char name[FLASH_PARTITION_NAMELEN];
-   unsigned long base;
-   unsigned long size;
-   enum LFR_FLAGS flags;
-} FlashRegion;
-
-typedef struct BootldrFlashPartitionTable {
-  int magic; /* should be filled with 0x646c7470 (btlp) BOOTLDR_PARTITION_MAGIC */
-  int npartitions;
-  struct FlashRegion partition[8];
-} BootldrFlashPartitionTable;
-
-#define BOOTLDR_MAGIC      0x646c7462        /* btld: marks a valid bootldr image */
-#define BOOTLDR_PARTITION_MAGIC  0x646c7470  /* btlp: marks a valid bootldr partition table in params sector */
-
-#define BOOTLDR_MAGIC_OFFSET 0x20 /* offset 0x20 into the bootldr */
-#define BOOTCAP_OFFSET 0X30 /* offset 0x30 into the bootldr */
-
-#define BOOTCAP_WAKEUP	(1<<0)
-#define BOOTCAP_PARTITIONS (1<<1) /* partition table stored in params sector */
-#define BOOTCAP_PARAMS_AFTER_BOOTLDR (1<<2) /* params sector right after bootldr sector(s), else in last sector */
-
-static struct BootldrFlashPartitionTable Table;
-static struct BootldrFlashPartitionTable *partition_table = NULL;
-
-
-int parse_bootldr_partitions(struct mtd_info *master, struct mtd_partition **pparts)
-{
-	struct mtd_partition *parts;
-	int ret, retlen, i;
-	int npartitions = 0;
-	long partition_table_offset;
-	long bootmagic = 0;
-	long bootcap = 0;
-	int namelen = 0;
-
-	char *names; 
-
-#if 0
-	/* verify bootldr magic */
-	ret = master->read(master, BOOTLDR_MAGIC_OFFSET, sizeof(long), &retlen, (void *)&bootmagic);
-	if (ret) 
-		goto out;
-        if (bootmagic != BOOTLDR_MAGIC)
-                goto out;
-	/* see if bootldr supports partition tables and where to find the partition table */
-	ret = master->read(master, BOOTCAP_OFFSET, sizeof(long), &retlen, (void *)&bootcap);
-	if (ret) 
-		goto out;
-
-	if (!(bootcap & BOOTCAP_PARTITIONS))
-		goto out;
-	if (bootcap & BOOTCAP_PARAMS_AFTER_BOOTLDR)
-		partition_table_offset = master->erasesize;
-	else
-		partition_table_offset = master->size - master->erasesize;
-
-	printk(__FUNCTION__ ": partition_table_offset=%#lx\n", partition_table_offset);
-	printk(__FUNCTION__ ": ptable_addr=%#lx\n", ptable_addr);
-
-
-	/* Read the partition table */
-	partition_table = (struct BootldrFlashPartitionTable *)kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!partition_table)
-		return -ENOMEM;
-
-	ret = master->read(master, partition_table_offset,
-			   PAGE_SIZE, &retlen, (void *)partition_table);
-	if (ret)
-	    goto out;
-
-#endif
-	if (!partition_table)
-	    return -ENOMEM;
-
-	
-	printk(__FUNCTION__ ": magic=%#x\n", partition_table->magic);
-	printk(__FUNCTION__ ": numPartitions=%#x\n", partition_table->npartitions);
-
-
-	/* check for partition table magic number */
-	if (partition_table->magic != BOOTLDR_PARTITION_MAGIC) 
-		goto out;
-	npartitions = (partition_table->npartitions > MAX_NUM_PARTITIONS)?
-	    MAX_NUM_PARTITIONS:partition_table->npartitions;	
-
-	printk(__FUNCTION__ ": npartitions=%#x\n", npartitions);
-
-	for (i = 0; i < npartitions; i++) {
-		namelen += strlen(partition_table->partition[i].name) + 1;
-	}
-
-	parts = kmalloc(sizeof(*parts)*npartitions + namelen, GFP_KERNEL);
-	if (!parts) {
-		ret = -ENOMEM;
-		goto out;
-	}
-	names = (char *)&parts[npartitions];
-	memset(parts, 0, sizeof(*parts)*npartitions + namelen);
-
-
-
-	// from here we use the partition table
-	for (i = 0; i < npartitions; i++) {
-                struct FlashRegion *partition = &partition_table->partition[i];
-		const char *name = partition->name;
-		parts[i].name = names;
-		names += strlen(name) + 1;
-		strcpy(parts[i].name, name);
-
-                if (partition->flags & LFR_EXPAND)
-                        parts[i].size = MTDPART_SIZ_FULL;
-                else
-                        parts[i].size = partition->size;
-		parts[i].offset = partition->base;
-		parts[i].mask_flags = 0;
-		
-		printk("        partition %s o=%x s=%x\n", 
-		       parts[i].name, parts[i].offset, parts[i].size);
-
-	}
-
-	ret = npartitions;
-	*pparts = parts;
-
- out:
-#if 0
-	if (partition_table)
-		kfree(partition_table);
-#endif
-	
-	return ret;
-}
-
-
-static int __init parse_tag_ptable(const struct tag *tag)
-{
-    char buf[128];
-    int i;
-    int j;
-    
-    partition_table = &Table;
-
-#ifdef CONFIG_DEBUG_LL    
-    sprintf(buf,"ptable: magic = = 0x%lx  npartitions= %d \n",
-	    tag->u.ptable.magic,tag->u.ptable.npartitions);
-    printascii(buf);
-    
-    for (i=0; i<tag->u.ptable.npartitions; i++){
-	sprintf(buf,"ptable: partition name = %s base= 0x%lx  size= 0x%lx flags= 0x%lx\n",
-	    (char *) (&tag->u.ptable.partition[i].name[0]),
-		tag->u.ptable.partition[i].base,
-		tag->u.ptable.partition[i].size,
-		tag->u.ptable.partition[i].flags);
-	printascii(buf);
-    }
-#endif
-
-    memcpy((void *)partition_table,(void *) (&(tag->u.ptable)),sizeof(partition_table) +
-	sizeof(struct FlashRegion)*tag->u.ptable.npartitions);
-
-    
-    return 0;
-}
-
-__tagtable(ATAG_PTABLE, parse_tag_ptable);
-
-EXPORT_SYMBOL(parse_bootldr_partitions);
-
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Compaq Computer Corporation");
-MODULE_DESCRIPTION("Parsing code for Compaq bootldr partitions");
diff --git a/drivers/mtd/cmdline.c b/drivers/mtd/cmdline.c
new file mode 100644
index 000000000000..4d92157f46de
--- /dev/null
+++ b/drivers/mtd/cmdline.c
@@ -0,0 +1,343 @@
+/*
+ * $Id: cmdline.c,v 1.4 2002/09/13 01:18:38 jamey Exp $
+ *
+ * Read flash partition table from command line
+ *
+ * Copyright 2002 SYSGO Real-Time Solutions GmbH
+ *
+ * The format for the command line is as follows:
+ * 
+ * mtdparts=<mtddef>[;<mtddef]
+ * <mtddef>  := <mtd-id>:<partdef>[,<partdef>]
+ * <partdef> := <size>[@offset][<name>][ro]
+ * <mtd-id>  := unique id used in mapping driver/device
+ * <size>    := standard linux memsize OR "-" to denote all remaining space
+ * <name>    := '(' NAME ')'
+ * 
+ * Examples:
+ * 
+ * 1 NOR Flash, with 1 single writable partition:
+ * edb7312-nor:-
+ * 
+ * 1 NOR Flash with 2 partitions, 1 NAND with one
+ * edb7312-nor:256k(ARMboot)ro,-(root);edb7312-nand:-(home)
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <asm/setup.h>
+#include <linux/bootmem.h>
+
+/* error message prefix */
+#define ERRP "mtd: "
+
+/* debug macro */
+#if 0
+#define dbg(x) do { printk("DEBUG-CMDLINE-PART: "); printk x; } while(0)
+#else
+#define dbg(x)
+#endif
+
+
+/* special size referring to all the remaining space in a partition */
+#define SIZE_REMAINING 0xffffffff
+
+struct cmdline_mtd_partition {
+	struct cmdline_mtd_partition *next;
+	char *mtd_id;
+	int num_parts;
+	struct mtd_partition *parts;
+};
+
+/* mtdpart_setup() parses into here */
+static struct cmdline_mtd_partition *partitions;
+
+/* the command line passed to mtdpart_setupd() */
+static char *cmdline;
+static int cmdline_parsed = 0;
+
+/*
+ * Parse one partition definition for an MTD. Since there can be many
+ * comma separated partition definitions, this function calls itself 
+ * recursively until no more partition definitions are found. Nice side
+ * effect: the memory to keep the mtd_partition structs and the names
+ * is allocated upon the last definition being found. At that point the
+ * syntax has been verified ok.
+ */
+static struct mtd_partition * newpart(char *s, 
+                                      char **retptr,
+                                      int *num_parts,
+                                      int this_part, 
+                                      unsigned char **extra_mem_ptr, 
+                                      int extra_mem_size)
+{
+	struct mtd_partition *parts;
+	unsigned long size;
+	unsigned long offset = 0;
+	char *name;
+	int name_len;
+	unsigned char *extra_mem;
+	char delim;
+	unsigned int mask_flags;
+
+	/* fetch the partition size */
+	if (*s == '-')
+	{	/* assign all remaining space to this partition */
+		size = SIZE_REMAINING;
+		s++;
+	}
+	else
+	{
+		size = memparse(s, &s);
+		if (size < PAGE_SIZE)
+		{
+			printk(KERN_ERR ERRP "partition size too small (%lx)\n", size);
+			return 0;
+		}
+	}
+
+	/* fetch partition name and flags */
+	mask_flags = 0; /* this is going to be a regular partition */
+	delim = 0;
+        /* check for offset */
+        if (*s == '@') 
+	{
+           s++;
+           offset = memparse(s, &s);
+        }
+        /* now look for name */
+	if (*s == '(')
+	{
+		delim = ')';
+	}
+	if (delim)
+	{
+		char *p;
+
+	    	name = ++s;
+		if ((p = strchr(name, delim)) == 0)
+		{
+			printk(KERN_ERR ERRP "no closing %c found in partition name\n", delim);
+			return 0;
+		}
+		name_len = p - name;
+		s = p + 1;
+	}
+	else
+	{
+	    	name = NULL;
+		name_len = 13; /* Partition_000 */
+	}
+   
+	/* record name length for memory allocation later */
+	extra_mem_size += name_len + 1;
+
+        /* test for options */
+        if (strncmp(s, "ro", 2) == 0) 
+	{
+		mask_flags |= MTD_WRITEABLE;
+		s += 2;
+        }
+
+	/* test if more partitions are following */
+	if (*s == ',')
+	{
+		if (size == SIZE_REMAINING)
+		{
+			printk(KERN_ERR ERRP "no partitions allowed after a fill-up partition\n");
+			return 0;
+		}
+		/* more partitions follow, parse them */
+		if ((parts = newpart(s + 1, &s, num_parts, 
+		                     this_part + 1, &extra_mem, extra_mem_size)) == 0)
+		  return 0;
+	}
+	else
+	{	/* this is the last partition: allocate space for all */
+		int alloc_size;
+
+		*num_parts = this_part + 1;
+		alloc_size = *num_parts * sizeof(struct mtd_partition) +
+			     extra_mem_size;
+		parts = kmalloc(alloc_size, GFP_KERNEL);
+		if (!parts)
+		{
+			printk(KERN_ERR ERRP "out of memory\n");
+			return 0;
+		}
+		memset(parts, 0, alloc_size);
+		extra_mem = (unsigned char *)(parts + *num_parts);
+	}
+	/* enter this partition (offset will be calculated later if it is zero at this point) */
+	parts[this_part].size = size;
+	parts[this_part].offset = offset;
+	parts[this_part].mask_flags = mask_flags;
+	if (name)
+	{
+		strncpy(extra_mem, name, name_len);
+		extra_mem[name_len] = 0;
+	}
+	else
+	{
+		sprintf(extra_mem, "Partition_%03d", this_part);
+	}
+	parts[this_part].name = extra_mem;
+	extra_mem += name_len + 1;
+
+	dbg(("partition %d: name <%s>, offset %x, size %x, mask flags %x\n",
+	     this_part, 
+	     parts[this_part].name,
+	     parts[this_part].offset,
+	     parts[this_part].size,
+	     parts[this_part].mask_flags));
+
+	/* return (updated) pointer to extra_mem memory */
+	if (extra_mem_ptr)
+	  *extra_mem_ptr = extra_mem;
+
+	/* return (updated) pointer command line string */
+	*retptr = s;
+
+	/* return partition table */
+	return parts;
+}
+
+/* 
+ * Parse the command line. 
+ */
+static int mtdpart_setup_real(char *s)
+{
+	cmdline_parsed = 1;
+
+	for( ; s != NULL; )
+	{
+		struct cmdline_mtd_partition *this_mtd;
+		struct mtd_partition *parts;
+	    	int mtd_id_len;
+		int num_parts;
+		char *p, *mtd_id;
+
+	    	mtd_id = s;
+		/* fetch <mtd-id> */
+		if (!(p = strchr(s, ':')))
+		{
+			printk(KERN_ERR ERRP "no mtd-id\n");
+			return 0;
+		}
+		mtd_id_len = p - mtd_id;
+
+		dbg(("parsing <%s>\n", p+1));
+
+		/* 
+		 * parse one mtd. have it reserve memory for the
+		 * struct cmdline_mtd_partition and the mtd-id string.
+		 */
+		parts = newpart(p + 1,		/* cmdline */
+				&s,		/* out: updated cmdline ptr */
+				&num_parts,	/* out: number of parts */
+				0,		/* first partition */
+				(unsigned char**)&this_mtd, /* out: extra mem */
+				mtd_id_len + 1 + sizeof(*this_mtd));
+
+		/* enter results */	    
+		this_mtd->parts = parts;
+		this_mtd->num_parts = num_parts;
+		this_mtd->mtd_id = (char*)(this_mtd + 1);
+		strncpy(this_mtd->mtd_id, mtd_id, mtd_id_len);
+		this_mtd->mtd_id[mtd_id_len] = 0;
+
+		/* link into chain */
+		this_mtd->next = partitions;	    	
+		partitions = this_mtd;
+
+		dbg(("mtdid=<%s> num_parts=<%d>\n", 
+		     this_mtd->mtd_id, this_mtd->num_parts));
+		
+
+		/* EOS - we're done */
+		if (*s == 0)
+			break;
+
+		/* does another spec follow? */
+		if (*s != ';')
+		{
+			printk(KERN_ERR ERRP "bad character after partition (%c)\n", *s);
+			return 0;
+		}
+		s++;
+	}
+	return 1;
+}
+
+/*
+ * Main function to be called from the MTD mapping driver/device to
+ * obtain the partitioning information. At this point the command line
+ * arguments will actually be parsed and turned to struct mtd_partition
+ * information.
+ */
+int parse_cmdline_partitions(struct mtd_info *master, 
+                             struct mtd_partition **pparts,
+                             const char *mtd_id)
+{
+	unsigned long offset;
+	int i;
+	struct cmdline_mtd_partition *part;
+
+	if (!cmdline)
+		return -EINVAL;
+
+	/* parse command line */
+	if (!cmdline_parsed)
+		mtdpart_setup_real(cmdline);
+
+	for(part = partitions; part; part = part->next)
+	{
+		if (!strcmp(part->mtd_id, mtd_id))
+		{
+			for(i = 0, offset = 0; i < part->num_parts; i++)
+			{
+				if (!part->parts[i].offset)
+				  part->parts[i].offset = offset;
+				else
+				  offset = part->parts[i].offset;
+				if (part->parts[i].size == SIZE_REMAINING)
+				  part->parts[i].size = master->size - offset;
+				if (offset + part->parts[i].size > master->size)
+				{
+					printk(KERN_WARNING ERRP 
+					       "%s: partitioning exceeds flash size, truncating\n",
+					       mtd_id);
+					part->parts[i].size = master->size - offset;
+					part->num_parts = i;
+				}
+				offset += part->parts[i].size;
+			}
+			*pparts = part->parts;
+			return part->num_parts;
+		}
+	}
+	return -EINVAL;
+}
+
+
+/* 
+ * This is the handler for our kernel parameter, called from 
+ * main.c::checksetup(). Note that we can not yet kmalloc() anything,
+ * so we only save the commandline for later processing.
+ */
+static int __init mtdpart_setup(char *s)
+{
+	cmdline = s;
+	return 1;
+}
+
+__setup("mtdparts=", mtdpart_setup);
+
+EXPORT_SYMBOL(parse_cmdline_partitions);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marius Groeger <mag@sysgo.de>");
+MODULE_DESCRIPTION("Command line configuration of MTD partitions");
diff --git a/drivers/mtd/maps/Config.help b/drivers/mtd/maps/Config.help
index aaf3a1aa894e..d4cc1af6505e 100644
--- a/drivers/mtd/maps/Config.help
+++ b/drivers/mtd/maps/Config.help
@@ -1,3 +1,32 @@
+CONFIG_MTD_CDB89712
+  This enables access to the flash or ROM chips on the CDB89712 board.
+  If you have such a board, say 'Y'.
+
+CONFIG_MTD_CEIVA
+  This enables access to the flash chips on the Ceiva/Polaroid
+  PhotoMax Digital Picture Frame.
+  If you have such a device, say 'Y'.
+
+CONFIG_MTD_FORTUNET
+  This enables access to the Flash on the FortuNet board.  If you
+  have such a board, say 'Y'.
+
+CONFIG_MTD_AUTCPU12
+  This enables access to the NV-RAM on autronix autcpu12 board.
+  If you have such a board, say 'Y'.
+
+CONFIG_MTD_EDB7312
+  This enables access to the CFI Flash on the Cogent EDB7312 board.
+  If you have such a board, say 'Y' here.
+
+CONFIG_MTD_NAND_EDB7312
+  This enables access to the NAND Flash on the Cogent EDB7312 board.
+  If you have such a board, say 'Y' here.
+
+CONFIG_MTD_IMPA7
+  This enables access to the NOR Flash on the impA7 board of
+  implementa GmbH. If you have such a board, say 'Y' here.
+
 CONFIG_MTD_SA1100
   This enables access to the flash chips on most platforms based on
   the SA1100 and SA1110, including the Assabet and the Compaq iPAQ.
@@ -39,6 +68,12 @@ CONFIG_MTD_SUN_UFLASH
 CONFIG_MTD_NORA
   If you had to ask, you don't have one. Say 'N'.
 
+CONFIG_MTD_L440GX
+  Support for treating the BIOS flash chip on Intel L440GX motherboards
+  as an MTD device - with this you can reprogram your BIOS.
+
+  BE VERY CAREFUL.
+
 CONFIG_MTD_PNC2000
   PNC-2000 is the name of Network Camera product from PHOTRON
   Ltd. in Japan. It uses CFI-compliant flash.
@@ -50,6 +85,13 @@ CONFIG_MTD_RPXLITE
   to communicate with the chips on the RPXLite board. More at
   <http://www.embeddedplanet.com/rpx_lite_specification_sheet.htm>.
 
+CONFIG_MTD_TQM8XXL
+  The TQM8xxL PowerPC board has up to two banks of CFI-compliant
+  chips, currently uses AMD one. This 'mapping' driver supports
+  that arrangement, allowing the CFI probe and command set driver
+  code to communicate with the chips on the TQM8xxL board. More at
+  <http://www.denx.de/embedded-ppc-en.html>.
+
 CONFIG_MTD_SC520CDP
   The SC520 CDP board has two banks of CFI-compliant chips and one
   Dual-in-line JEDEC chip. This 'mapping' driver supports that
@@ -59,7 +101,7 @@ CONFIG_MTD_SBC_GXX
   This provides a driver for the on-board flash of Arcom Control
   Systems' SBC-GXn family of boards, formerly known as SBC-MediaGX.
   By default the flash is split into 3 partitions which are accessed
-  as separate MTD devices.  This board utilizes Intel StrataFlash.
+  as separate MTD devices. This board utilizes Intel StrataFlash.
   More info at
   <http://www.arcomcontrols.com/products/icp/pc104/processors/>.
 
@@ -78,6 +120,11 @@ CONFIG_MTD_NETSC520
   demonstration board. If you have one of these boards and would like 
   to use the flash chips on it, say 'Y'.
 
+CONFIG_MTD_OCELOT
+  This enables access routines for the boot flash device and for the
+  NVRAM on the Momenco Ocelot board. If you have one of these boards
+  and would like access to either of these, say 'Y'.
+
 CONFIG_MTD_ELAN_104NC
   This provides a driver for the on-board flash of the Arcom Control
   System's ELAN-104NC development board. By default the flash
@@ -91,17 +138,17 @@ CONFIG_MTD_DC21285
   <http://developer.intel.com/design/bridge/quicklist/dsc-21285.htm>.
 
 CONFIG_MTD_CSTM_MIPS_IXX
-  This provides a mapping driver for the Integrated Tecnology Express,
-  Inc (ITE) QED-4N-S01B eval board and the Globespan IVR Reference
-  Board.  It provides the necessary addressing, length, buswidth, vpp
-  code and addition setup of the flash device for these boards.  In
-  addition, this mapping driver can be used for other boards via
-  setting of the CONFIG_MTD_CSTM_MIPS_IXX_START/LEN/BUSWIDTH
-  parameters.  This mapping will provide one mtd device using one
-  partition.  The start address can be offset from the beginning of
-  flash and the len can be less than the total flash device size to
-  allow a window into the flash.  Both CFI and JEDEC probes are
-  called.
+  This provides a mapping driver for the Integrated Tecnology
+  Express, Inc (ITE) QED-4N-S01B eval board and the Globespan IVR
+  Reference Board. It provides the necessary addressing, length,
+  buswidth, vpp code and addition setup of the flash device for
+  these boards. In addition, this mapping driver can be used for
+  other boards via setting of the CONFIG_MTD_CSTM_MIPS_IXX_START/
+  LEN/BUSWIDTH parameters. This mapping will provide one mtd device
+  using one partition. The start address can be offset from the
+  beginning of flash and the len can be less than the total flash
+  device size to allow a window into the flash. Both CFI and JEDEC
+  probes are called.
 
 CONFIG_MTD_CSTM_MIPS_IXX_START
   This is the physical memory location that the MTD driver will
@@ -141,6 +188,11 @@ CONFIG_MTD_OCTAGON
   Computer. More information on the board is available at
   <http://www.octagonsystems.com/Products/5066/5066.html>.
 
+CONFIG_MTD_PCMCIA
+  Map driver for accessing PCMCIA linear flash memory cards. These
+  cards are usually around 4-16MiB in size. This does not include
+  Compact Flash cards which are treated as IDE devices.
+
 CONFIG_MTD_VMAX
   This provides a 'mapping' driver which supports the way in which
   the flash chips are connected in the Tempustech VMAX SBC301 Single
@@ -148,32 +200,21 @@ CONFIG_MTD_VMAX
   <http://www.tempustech.com/tt301.htm>.
 
 CONFIG_MTD_CFI_FLAGADM
-  Mapping for the Flaga digital module.  If you don�t have one, ignore
+  Mapping for the Flaga digital module. If you don�t have one, ignore
   this setting.
 
-CONFIG_MTD_OCELOT
-  This enables access routines for the boot flash device and for the
-  NVRAM on the Momenco Ocelot board.  If you have one of these boards
-  and would like access to either of these, say 'Y'.
-
-CONFIG_MTD_CDB89712
-  This enables access to the flash or ROM chips on the CDB89712 board.
-  If you have such a board, say 'Y'.
-
-CONFIG_MTD_L440GX
-  Support for treating the BIOS flash chip on Intel L440GX motherboards
-  as an MTD device - with this you can reprogram your BIOS.
-
-  BE VERY CAREFUL.
-
 CONFIG_MTD_SOLUTIONENGINE
   This enables access to the flash chips on the Hitachi SolutionEngine and
   similar boards. Say 'Y' if you are building a kernel for such a board.
 
-CONFIG_MTD_TQM8XXL
-  The TQM8xxL PowerPC board has up to two banks of CFI-compliant
-  chips, currently uses AMD one. This 'mapping' driver supports
-  that arrangement, allowing the CFI probe and command set driver
-  code to communicate with the chips on the TQM8xxL board. More at
-  <http://www.denx.de/embedded-ppc-en.html>.
+CONFIG_MTD_EPXA10DB
+  This enables support for the flash devices on the Altera
+  Excalibur XA10 Development Board. If you are building a kernel
+  for on of these boards then you should say 'Y' otherwise say 'N'.
+
+CONFIG_MTD_PCI
+  Mapping for accessing flash devices on add-in cards like the Intel XScale
+  IQ80310 card, and the Intel EBSA285 card in blank ROM programming mode
+  (please see the manual for the link settings).
 
+  If you are not sure, say N.
diff --git a/drivers/mtd/maps/Config.in b/drivers/mtd/maps/Config.in
index 7b4cbd4eda85..e0668372fa79 100644
--- a/drivers/mtd/maps/Config.in
+++ b/drivers/mtd/maps/Config.in
@@ -56,8 +56,18 @@ if [ "$CONFIG_ARM" = "y" ]; then
    dep_tristate '  CFI Flash device mapped on ARM Integrator/P720T' CONFIG_MTD_ARM_INTEGRATOR $CONFIG_MTD_CFI
    dep_tristate '  Cirrus CDB89712 evaluation board mappings' CONFIG_MTD_CDB89712 $CONFIG_MTD_CFI $CONFIG_ARCH_CDB89712
    dep_tristate '  CFI Flash device mapped on StrongARM SA11x0' CONFIG_MTD_SA1100 $CONFIG_MTD_CFI $CONFIG_ARCH_SA1100 $CONFIG_MTD_PARTITIONS
-   dep_tristate '  CFI Flash device mapped on DC21285 Footbridge' CONFIG_MTD_DC21285 $CONFIG_MTD_CFI $CONFIG_ARCH_FOOTBRIDGE $CONFIG_MTD_PARTITIONS
+   dep_tristate '  CFI Flash device mapped on DC21285 Footbridge' CONFIG_MTD_DC21285 $CONFIG_MTD_CFI $CONFIG_ARCH_FOOTBRIDGE
    dep_tristate '  CFI Flash device mapped on the XScale IQ80310 board' CONFIG_MTD_IQ80310 $CONFIG_MTD_CFI $CONFIG_ARCH_IQ80310
+   dep_tristate '  CFI Flash device mapped on Epxa10db' CONFIG_MTD_EPXA10DB $CONFIG_MTD_CFI  $CONFIG_MTD_PARTITIONS $CONFIG_ARCH_CAMELOT
+   dep_tristate '  CFI Flash device mapped on the FortuNet board' CONFIG_MTD_FORTUNET $CONFIG_MTD_CFI $CONFIG_MTD_PARTITIONS $CONFIG_SA1100_FORTUNET
+   dep_tristate '  NV-RAM mapping AUTCPU12 board' CONFIG_MTD_AUTCPU12 $CONFIG_ARCH_AUTCPU12
+   dep_tristate '  CFI Flash device mapped on EDB7312' CONFIG_MTD_EDB7312 $CONFIG_MTD_CFI
+   dep_tristate '  JEDEC Flash device mapped on impA7' CONFIG_MTD_IMPA7 $CONFIG_MTD_JEDECPROBE
+   dep_tristate '  JEDEC Flash device mapped on Ceiva/Polaroid PhotoMax Digital Picture Frame' CONFIG_MTD_CEIVA $CONFIG_MTD_JEDECPROBE  $CONFIG_ARCH_CEIVA
 fi
 
+# This needs CFI or JEDEC, depending on the cards found.
+dep_tristate '  PCI MTD driver' CONFIG_MTD_PCI $CONFIG_MTD $CONFIG_PCI
+dep_tristate '  PCMCIA MTD driver' CONFIG_MTD_PCMCIA $CONFIG_MTD $CONFIG_PCMCIA
+
 endmenu
diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile
index c0bdc2fa8f23..f4acee989d04 100644
--- a/drivers/mtd/maps/Makefile
+++ b/drivers/mtd/maps/Makefile
@@ -4,29 +4,37 @@
 # $Id: Makefile,v 1.13 2001/08/16 15:16:58 rmk Exp $
 
 # Chip mappings
-obj-$(CONFIG_MTD_CDB89712)      += cdb89712.o
+obj-$(CONFIG_MTD_CDB89712)	+= cdb89712.o
 obj-$(CONFIG_MTD_ARM_INTEGRATOR)+= integrator-flash.o
 obj-$(CONFIG_MTD_CFI_FLAGADM)	+= cfi_flagadm.o
-obj-$(CONFIG_MTD_CSTM_MIPS_IXX) += cstm_mips_ixx.o
-obj-$(CONFIG_MTD_DC21285)       += dc21285.o
-obj-$(CONFIG_MTD_ELAN_104NC)    += elan-104nc.o
+obj-$(CONFIG_MTD_CSTM_MIPS_IXX)	+= cstm_mips_ixx.o
+obj-$(CONFIG_MTD_DC21285)	+= dc21285.o
+obj-$(CONFIG_MTD_ELAN_104NC)	+= elan-104nc.o
+obj-$(CONFIG_MTD_EPXA10DB)	+= epxa10db-flash.o
 obj-$(CONFIG_MTD_IQ80310)	+= iq80310.o
 obj-$(CONFIG_MTD_L440GX)	+= l440gx.o
 obj-$(CONFIG_MTD_NORA)		+= nora.o
+obj-$(CONFIG_MTD_CEIVA)		+= ceiva.o
 obj-$(CONFIG_MTD_OCTAGON)	+= octagon-5066.o
 obj-$(CONFIG_MTD_PHYSMAP)	+= physmap.o 
 obj-$(CONFIG_MTD_PNC2000)	+= pnc2000.o
+obj-$(CONFIG_MTD_PCMCIA)	+= pcmciamtd.o
 obj-$(CONFIG_MTD_RPXLITE)	+= rpxlite.o
 obj-$(CONFIG_MTD_TQM8XXL)	+= tqm8xxl.o
-obj-$(CONFIG_MTD_SA1100)        += sa1100-flash.o
+obj-$(CONFIG_MTD_SA1100)	+= sa1100-flash.o
 obj-$(CONFIG_MTD_SBC_GXX)	+= sbc_gxx.o
 obj-$(CONFIG_MTD_SC520CDP)	+= sc520cdp.o
 obj-$(CONFIG_MTD_NETSC520)	+= netsc520.o
-obj-$(CONFIG_MTD_SUN_UFLASH)    += sun_uflash.o
+obj-$(CONFIG_MTD_SUN_UFLASH)	+= sun_uflash.o
 obj-$(CONFIG_MTD_VMAX)		+= vmax301.o
 obj-$(CONFIG_MTD_SCx200_DOCFLASH)+= scx200_docflash.o
 obj-$(CONFIG_MTD_DBOX2)		+= dbox2-flash.o
 obj-$(CONFIG_MTD_OCELOT)	+= ocelot.o
 obj-$(CONFIG_MTD_SOLUTIONENGINE)+= solutionengine.o
+obj-$(CONFIG_MTD_PCI)		+= pci.o
+obj-$(CONFIG_MTD_AUTCPU12)	+= autcpu12-nvram.o
+obj-$(CONFIG_MTD_EDB7312)	+= edb7312.o
+obj-$(CONFIG_MTD_IMPA7)		+= impa7.o
+obj-$(CONFIG_MTD_FORTUNET)	+= fortunet.o
 
 include $(TOPDIR)/Rules.make
diff --git a/drivers/mtd/maps/autcpu12-nvram.c b/drivers/mtd/maps/autcpu12-nvram.c
new file mode 100644
index 000000000000..db78b01e6438
--- /dev/null
+++ b/drivers/mtd/maps/autcpu12-nvram.c
@@ -0,0 +1,179 @@
+/*
+ * NV-RAM memory access on autcpu12 
+ * (C) 2002 Thomas Gleixner (gleixner@autronix.de)
+ *
+ * $Id: autcpu12-nvram.c,v 1.1 2002/02/22 09:30:24 gleixner Exp $ 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/ioport.h>
+#include <asm/io.h>
+#include <asm/sizes.h>
+#include <asm/hardware.h>
+#include <asm/arch/autcpu12.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/map.h>
+#include <linux/mtd/partitions.h>
+
+__u8 autcpu12_read8(struct map_info *map, unsigned long ofs)
+{
+	return __raw_readb(map->map_priv_1 + ofs);
+}
+
+__u16 autcpu12_read16(struct map_info *map, unsigned long ofs)
+{
+	return __raw_readw(map->map_priv_1 + ofs);
+}
+
+__u32 autcpu12_read32(struct map_info *map, unsigned long ofs)
+{
+	return __raw_readl(map->map_priv_1 + ofs);
+}
+
+void autcpu12_write8(struct map_info *map, __u8 d, unsigned long adr)
+{
+	__raw_writeb(d, map->map_priv_1 + adr);
+	mb();
+}
+
+void autcpu12_write16(struct map_info *map, __u16 d, unsigned long adr)
+{
+	__raw_writew(d, map->map_priv_1 + adr);
+	mb();
+}
+
+void autcpu12_write32(struct map_info *map, __u32 d, unsigned long adr)
+{
+	__raw_writel(d, map->map_priv_1 + adr);
+	mb();
+}
+
+void autcpu12_copy_from(struct map_info *map, void *to, unsigned long from, ssize_t len)
+{
+	memcpy_fromio(to, map->map_priv_1 + from, len);
+}
+
+void autcpu12_copy_to(struct map_info *map, unsigned long to, const void *from, ssize_t len)
+{
+	while(len) {
+		__raw_writeb(*(unsigned char *) from, map->map_priv_1 + to);
+		from++;
+		to++;
+		len--;
+	}
+}
+
+static struct mtd_info *sram_mtd;
+
+struct map_info autcpu12_sram_map = {
+	name: "SRAM",
+	size: 32768,
+	buswidth: 8,
+	read8: autcpu12_read8,
+	read16: autcpu12_read16,
+	read32: autcpu12_read32,
+	copy_from: autcpu12_copy_from,
+	write8: autcpu12_write8,
+	write16: autcpu12_write16,
+	write32: autcpu12_write32,
+	copy_to: autcpu12_copy_to
+};
+
+static int __init init_autcpu12_sram (void)
+{
+	int err, save0, save1;
+
+	autcpu12_sram_map.map_priv_1 = (unsigned long)ioremap(0x12000000, SZ_128K);
+	if (!autcpu12_sram_map.map_priv_1) {
+		printk("Failed to ioremap autcpu12 NV-RAM space\n");
+		err = -EIO;
+		goto out;
+	}
+	
+	/* 
+	 * Check for 32K/128K 
+	 * read ofs 0 
+	 * read ofs 0x10000 
+	 * Write complement to ofs 0x100000
+	 * Read	and check result on ofs 0x0
+	 * Restore contents
+	 */
+	save0 = autcpu12_read32(&autcpu12_sram_map,0);
+	save1 = autcpu12_read32(&autcpu12_sram_map,0x10000);
+	autcpu12_write32(&autcpu12_sram_map,~save0,0x10000);
+	/* if we find this pattern on 0x0, we have 32K size 
+	 * restore contents and exit
+	 */
+	if ( autcpu12_read32(&autcpu12_sram_map,0) != save0) {
+		autcpu12_write32(&autcpu12_sram_map,save0,0x0);
+		goto map;
+	}
+	/* We have a 128K found, restore 0x10000 and set size
+	 * to 128K
+	 */
+	autcpu12_write32(&autcpu12_sram_map,save1,0x10000);
+	autcpu12_sram_map.size = SZ_128K;
+
+map:
+	sram_mtd = do_map_probe("map_ram", &autcpu12_sram_map);
+	if (!sram_mtd) {
+		printk("NV-RAM probe failed\n");
+		err = -ENXIO;
+		goto out_ioremap;
+	}
+
+	sram_mtd->module = THIS_MODULE;
+	sram_mtd->erasesize = 16;
+	
+	if (add_mtd_device(sram_mtd)) {
+		printk("NV-RAM device addition failed\n");
+		err = -ENOMEM;
+		goto out_probe;
+	}
+
+	printk("NV-RAM device size %ldK registered on AUTCPU12\n",autcpu12_sram_map.size/SZ_1K);
+		
+	return 0;
+
+out_probe:
+	map_destroy(sram_mtd);
+	sram_mtd = 0;
+
+out_ioremap:
+	iounmap((void *)autcpu12_sram_map.map_priv_1);
+out:
+	return err;
+}
+
+static void __exit cleanup_autcpu12_maps(void)
+{
+	if (sram_mtd) {
+		del_mtd_device(sram_mtd);
+		map_destroy(sram_mtd);
+		iounmap((void *)autcpu12_sram_map.map_priv_1);
+	}
+}
+
+module_init(init_autcpu12_sram);
+module_exit(cleanup_autcpu12_maps);
+
+MODULE_AUTHOR("Thomas Gleixner");
+MODULE_DESCRIPTION("autcpu12 NV-RAM map driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/maps/ceiva.c b/drivers/mtd/maps/ceiva.c
new file mode 100644
index 000000000000..259a9a8b76c0
--- /dev/null
+++ b/drivers/mtd/maps/ceiva.c
@@ -0,0 +1,408 @@
+/*
+ * Ceiva flash memory driver.
+ * Copyright (C) 2002 Rob Scott <rscott@mtrob.fdns.net>
+ *
+ * Note: this driver supports jedec compatible devices. Modification
+ * for CFI compatible devices should be straight forward: change
+ * jedec_probe to cfi_probe.
+ *
+ * Based on: sa1100-flash.c, which has the following copyright:
+ * Flash memory access on SA11x0 based devices
+ *
+ * (C) 2000 Nicolas Pitre <nico@cam.org>
+ *
+ * $Id: ceiva.c,v 1.2 2002/10/14 12:50:22 rmk Exp $
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/map.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/concat.h>
+
+#include <asm/hardware.h>
+#include <asm/mach-types.h>
+#include <asm/io.h>
+#include <asm/sizes.h>
+
+/*
+ * This isnt complete yet, so...
+ */
+#define CONFIG_MTD_CEIVA_STATICMAP
+
+static __u8 clps_read8(struct map_info *map, unsigned long ofs)
+{
+	return readb(map->map_priv_1 + ofs);
+}
+
+static __u16 clps_read16(struct map_info *map, unsigned long ofs)
+{
+	return readw(map->map_priv_1 + ofs);
+}
+
+static __u32 clps_read32(struct map_info *map, unsigned long ofs)
+{
+	return readl(map->map_priv_1 + ofs);
+}
+
+static void clps_copy_from(struct map_info *map, void *to, unsigned long from, ssize_t len)
+{
+	memcpy(to, (void *)(map->map_priv_1 + from), len);
+}
+
+static void clps_write8(struct map_info *map, __u8 d, unsigned long adr)
+{
+	writeb(d, map->map_priv_1 + adr);
+}
+
+static void clps_write16(struct map_info *map, __u16 d, unsigned long adr)
+{
+	writew(d, map->map_priv_1 + adr);
+}
+
+static void clps_write32(struct map_info *map, __u32 d, unsigned long adr)
+{
+	writel(d, map->map_priv_1 + adr);
+}
+
+static void clps_copy_to(struct map_info *map, unsigned long to, const void *from, ssize_t len)
+{
+	memcpy((void *)(map->map_priv_1 + to), from, len);
+}
+
+static struct map_info clps_map __initdata = {
+	name:		"clps flash",
+	read8:		clps_read8,
+	read16:		clps_read16,
+	read32:		clps_read32,
+	copy_from:	clps_copy_from,
+	write8:		clps_write8,
+	write16:	clps_write16,
+	write32:	clps_write32,
+	copy_to:	clps_copy_to,
+};
+
+#ifdef CONFIG_MTD_CEIVA_STATICMAP
+/*
+ * See include/linux/mtd/partitions.h for definition of the mtd_partition
+ * structure.
+ *
+ * Please note:
+ *  1. The flash size given should be the largest flash size that can
+ *     be accomodated.
+ *
+ *  2. The bus width must defined in clps_setup_flash.
+ *
+ * The MTD layer will detect flash chip aliasing and reduce the size of
+ * the map accordingly.
+ *
+ */
+
+#ifdef CONFIG_ARCH_CEIVA
+/* Flash / Partition sizing */
+/* For the 28F8003, we use the block mapping to calcuate the sizes */
+#define MAX_SIZE_KiB                  (16 + 8 + 8 + 96 + (7*128))
+#define BOOT_PARTITION_SIZE_KiB       (16)
+#define PARAMS_PARTITION_SIZE_KiB     (8)
+#define KERNEL_PARTITION_SIZE_KiB     (4*128)
+/* Use both remaing portion of first flash, and all of second flash */
+#define ROOT_PARTITION_SIZE_KiB       (3*128) + (8*128)
+
+static struct mtd_partition ceiva_partitions[] = {
+	{
+		name: "Ceiva BOOT partition",
+		size:   BOOT_PARTITION_SIZE_KiB*1024,
+		offset: 0,
+
+	},{
+		name: "Ceiva parameters partition",
+		size:   PARAMS_PARTITION_SIZE_KiB*1024,
+		offset: (16 + 8) * 1024,
+	},{
+		name: "Ceiva kernel partition",
+		size: (KERNEL_PARTITION_SIZE_KiB)*1024,
+		offset: 0x20000,
+
+	},{
+		name: "Ceiva root filesystem partition",
+		offset: MTDPART_OFS_APPEND,
+		size: (ROOT_PARTITION_SIZE_KiB)*1024,
+	}
+};
+#endif
+
+static int __init clps_static_partitions(struct mtd_partition **parts)
+{
+	int nb_parts = 0;
+
+#ifdef CONFIG_ARCH_CEIVA
+	if (machine_is_ceiva()) {
+		*parts       = ceiva_partitions;
+		nb_parts     = ARRAY_SIZE(ceiva_partitions);
+	}
+#endif
+	return nb_parts;
+}
+#endif
+
+struct clps_info {
+	unsigned long base;
+	unsigned long size;
+	int width;
+	void *vbase;
+	struct map_info *map;
+	struct mtd_info *mtd;
+	struct resource *res;
+};
+
+#define NR_SUBMTD 4
+
+static struct clps_info info[NR_SUBMTD];
+
+static int __init clps_setup_mtd(struct clps_info *clps, int nr, struct mtd_info **rmtd)
+{
+	struct mtd_info *subdev[nr];
+	struct map_info *maps;
+	int i, found = 0, ret = 0;
+
+	/*
+	 * Allocate the map_info structs in one go.
+	 */
+	maps = kmalloc(sizeof(struct map_info) * nr, GFP_KERNEL);
+	if (!maps)
+		return -ENOMEM;
+
+	/*
+	 * Claim and then map the memory regions.
+	 */
+	for (i = 0; i < nr; i++) {
+		if (clps[i].base == (unsigned long)-1)
+			break;
+
+		clps[i].res = request_mem_region(clps[i].base, clps[i].size, "clps flash");
+		if (!clps[i].res) {
+			ret = -EBUSY;
+			break;
+		}
+
+		clps[i].map = maps + i;
+		memcpy(clps[i].map, &clps_map, sizeof(struct map_info));
+
+		clps[i].vbase = ioremap(clps[i].base, clps[i].size);
+		if (!clps[i].vbase) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		clps[i].map->map_priv_1 = (unsigned long)clps[i].vbase;
+		clps[i].map->buswidth = clps[i].width;
+		clps[i].map->size = clps[i].size;
+
+		clps[i].mtd = do_map_probe("jedec_probe", clps[i].map);
+		if (clps[i].mtd == NULL) {
+			ret = -ENXIO;
+			break;
+		}
+		clps[i].mtd->module = THIS_MODULE;
+		subdev[i] = clps[i].mtd;
+
+		printk(KERN_INFO "clps flash: JEDEC device at 0x%08lx, %dMiB, "
+			"%d-bit\n", clps[i].base, clps[i].mtd->size >> 20,
+			clps[i].width * 8);
+		found += 1;
+	}
+
+	/*
+	 * ENXIO is special.  It means we didn't find a chip when
+	 * we probed.  We need to tear down the mapping, free the
+	 * resource and mark it as such.
+	 */
+	if (ret == -ENXIO) {
+		iounmap(clps[i].vbase);
+		clps[i].vbase = NULL;
+		release_resource(clps[i].res);
+		clps[i].res = NULL;
+	}
+
+	/*
+	 * If we found one device, don't bother with concat support.
+	 * If we found multiple devices, use concat if we have it
+	 * available, otherwise fail.
+	 */
+	if (ret == 0 || ret == -ENXIO) {
+		if (found == 1) {
+			*rmtd = subdev[0];
+			ret = 0;
+		} else if (found > 1) {
+			/*
+			 * We detected multiple devices.  Concatenate
+			 * them together.
+			 */
+#ifdef CONFIG_MTD_CONCAT
+			*rmtd = mtd_concat_create(subdev, found,
+						  "clps flash");
+			if (*rmtd == NULL)
+				ret = -ENXIO;
+#else
+			printk(KERN_ERR "clps flash: multiple devices "
+			       "found but MTD concat support disabled.\n");
+			ret = -ENXIO;
+#endif
+		}
+	}
+
+	/*
+	 * If we failed, clean up.
+	 */
+	if (ret) {
+		do {
+			if (clps[i].mtd)
+				map_destroy(clps[i].mtd);
+			if (clps[i].vbase)
+				iounmap(clps[i].vbase);
+			if (clps[i].res)
+				release_resource(clps[i].res);
+		} while (i--);
+
+		kfree(maps);
+	}
+
+	return ret;
+}
+
+static void __exit clps_destroy_mtd(struct clps_info *clps, struct mtd_info *mtd)
+{
+	int i;
+
+	del_mtd_partitions(mtd);
+
+	if (mtd != clps[0].mtd)
+		mtd_concat_destroy(mtd);
+
+	for (i = NR_SUBMTD; i >= 0; i--) {
+		if (clps[i].mtd)
+			map_destroy(clps[i].mtd);
+		if (clps[i].vbase)
+			iounmap(clps[i].vbase);
+		if (clps[i].res)
+			release_resource(clps[i].res);
+	}
+	kfree(clps[0].map);
+}
+
+/*
+ * We define the memory space, size, and width for the flash memory
+ * space here.
+ */
+
+static int __init clps_setup_flash(void)
+{
+	int nr;
+
+#ifdef CONFIG_ARCH_CEIVA
+	if (machine_is_ceiva()) {
+		info[0].base = CS0_PHYS_BASE;
+		info[0].size = SZ_32M;
+		info[0].width = CEIVA_FLASH_WIDTH;
+		info[1].base = CS1_PHYS_BASE;
+		info[1].size = SZ_32M;
+		info[1].width = CEIVA_FLASH_WIDTH;
+		nr = 2;
+	}
+#endif
+	return nr;
+}
+
+extern int parse_redboot_partitions(struct mtd_info *master, struct mtd_partition **pparts);
+extern int parse_cmdline_partitions(struct mtd_info *master, struct mtd_partition **pparts, char *);
+
+static struct mtd_partition *parsed_parts;
+
+static void __init clps_locate_partitions(struct mtd_info *mtd)
+{
+	const char *part_type = NULL;
+	int nr_parts = 0;
+	do {
+		/*
+		 * Partition selection stuff.
+		 */
+#ifdef CONFIG_MTD_CMDLINE_PARTS
+		nr_parts = parse_cmdline_partitions(mtd, &parsed_parts, "clps");
+		if (nr_parts > 0) {
+			part_type = "command line";
+			break;
+		}
+#endif
+#ifdef CONFIG_MTD_REDBOOT_PARTS
+		nr_parts = parse_redboot_partitions(mtd, &parsed_parts);
+		if (nr_parts > 0) {
+			part_type = "RedBoot";
+			break;
+		}
+#endif
+#ifdef CONFIG_MTD_CEIVA_STATICMAP
+		nr_parts = clps_static_partitions(&parsed_parts);
+		if (nr_parts > 0) {
+			part_type = "static";
+			break;
+		}
+		printk("found: %d partitions\n", nr_parts);
+#endif
+	} while (0);
+
+	if (nr_parts == 0) {
+		printk(KERN_NOTICE "clps flash: no partition info "
+			"available, registering whole flash\n");
+		add_mtd_device(mtd);
+	} else {
+		printk(KERN_NOTICE "clps flash: using %s partition "
+			"definition\n", part_type);
+		add_mtd_partitions(mtd, parsed_parts, nr_parts);
+	}
+
+	/* Always succeeds. */
+}
+
+static void __exit clps_destroy_partitions(void)
+{
+	if (parsed_parts)
+		kfree(parsed_parts);
+}
+
+static struct mtd_info *mymtd;
+
+static int __init clps_mtd_init(void)
+{
+	int ret;
+	int nr;
+
+	nr = clps_setup_flash();
+	if (nr < 0)
+		return nr;
+
+	ret = clps_setup_mtd(info, nr, &mymtd);
+	if (ret)
+		return ret;
+
+	clps_locate_partitions(mymtd);
+
+	return 0;
+}
+
+static void __exit clps_mtd_cleanup(void)
+{
+	clps_destroy_mtd(info, mymtd);
+	clps_destroy_partitions();
+}
+
+module_init(clps_mtd_init);
+module_exit(clps_mtd_cleanup);
+
+MODULE_AUTHOR("Rob Scott");
+MODULE_DESCRIPTION("Cirrus Logic JEDEC map driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/maps/dc21285.c b/drivers/mtd/maps/dc21285.c
index e7eea7ef53b2..f030f3447302 100644
--- a/drivers/mtd/maps/dc21285.c
+++ b/drivers/mtd/maps/dc21285.c
@@ -5,9 +5,9 @@
  *
  * This code is GPL
  * 
- * $Id: dc21285.c,v 1.6 2001/10/02 15:05:14 dwmw2 Exp $
+ * $Id: dc21285.c,v 1.9 2002/10/14 12:22:10 rmk Exp $
  */
-
+#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
@@ -44,15 +44,15 @@ void dc21285_copy_from(struct map_info *map, void *to, unsigned long from, ssize
 
 void dc21285_write8(struct map_info *map, __u8 d, unsigned long adr)
 {
-	*CSR_ROMWRITEREG = adr;
+	*CSR_ROMWRITEREG = adr & 3;
 	adr &= ~3;
 	*(__u8*)(map->map_priv_1 + adr) = d;
 }
 
 void dc21285_write16(struct map_info *map, __u16 d, unsigned long adr)
 {
-	*CSR_ROMWRITEREG = adr;
-	adr &= ~1;
+	*CSR_ROMWRITEREG = adr & 3;
+	adr &= ~3;
 	*(__u16*)(map->map_priv_1 + adr) = d;
 }
 
@@ -131,7 +131,7 @@ int __init init_dc21285(void)
 		dc21285_map.buswidth*8);
 
 	/* Let's map the flash area */
-	dc21285_map.map_priv_1 = (unsigned long)__ioremap(DC21285_FLASH, 16*1024*1024, 0);
+	dc21285_map.map_priv_1 = (unsigned long)ioremap(DC21285_FLASH, 16*1024*1024);
 	if (!dc21285_map.map_priv_1) {
 		printk("Failed to ioremap\n");
 		return -EIO;
@@ -139,21 +139,22 @@ int __init init_dc21285(void)
 
 	mymtd = do_map_probe("cfi_probe", &dc21285_map);
 	if (mymtd) {
-		int nrparts;
+		int nrparts = 0;
 
 		mymtd->module = THIS_MODULE;
 			
 		/* partition fixup */
 
+#ifdef CONFIG_MTD_REDBOOT_PARTS
 		nrparts = parse_redboot_partitions(mymtd, &dc21285_parts);
-		if (nrparts <=0) {
+#endif
+		if (nrparts > 0) {
+			add_mtd_partitions(mymtd, dc21285_parts, nrparts);
+		} else if (nrparts == 0) {
 			printk(KERN_NOTICE "RedBoot partition table failed\n");
-			iounmap((void *)dc21285_map.map_priv_1);
-			return -ENXIO;
+			add_mtd_device(mymtd);
 		}
 
-		add_mtd_partitions(mymtd, dc21285_parts, nrparts);
-
 		/* 
 		 * Flash timing is determined with bits 19-16 of the
 		 * CSR_SA110_CNTL.  The value is the number of wait cycles, or
diff --git a/drivers/mtd/maps/edb7312.c b/drivers/mtd/maps/edb7312.c
new file mode 100644
index 000000000000..405429d92735
--- /dev/null
+++ b/drivers/mtd/maps/edb7312.c
@@ -0,0 +1,202 @@
+/*
+ * $Id: edb7312.c,v 1.2 2002/09/05 05:11:24 acurtis Exp $
+ *
+ * Handle mapping of the NOR flash on Cogent EDB7312 boards
+ *
+ * Copyright 2002 SYSGO Real-Time Solutions GmbH
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <asm/io.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/map.h>
+#include <linux/config.h>
+
+#ifdef CONFIG_MTD_PARTITIONS
+#include <linux/mtd/partitions.h>
+#endif
+
+#define WINDOW_ADDR 0x00000000      /* physical properties of flash */
+#define WINDOW_SIZE 0x01000000
+#define BUSWIDTH    2
+#define FLASH_BLOCKSIZE_MAIN	0x20000
+#define FLASH_NUMBLOCKS_MAIN	128
+/* can be "cfi_probe", "jedec_probe", "map_rom", 0 }; */
+#define PROBETYPES { "cfi_probe", 0 }
+
+#define MSG_PREFIX "EDB7312-NOR:"   /* prefix for our printk()'s */
+#define MTDID      "edb7312-nor"    /* for mtdparts= partitioning */
+
+static struct mtd_info *mymtd;
+
+__u8 edb7312nor_read8(struct map_info *map, unsigned long ofs)
+{
+	return __raw_readb(map->map_priv_1 + ofs);
+}
+
+__u16 edb7312nor_read16(struct map_info *map, unsigned long ofs)
+{
+	return __raw_readw(map->map_priv_1 + ofs);
+}
+
+__u32 edb7312nor_read32(struct map_info *map, unsigned long ofs)
+{
+	return __raw_readl(map->map_priv_1 + ofs);
+}
+
+void edb7312nor_copy_from(struct map_info *map, void *to, unsigned long from, ssize_t len)
+{
+	memcpy_fromio(to, map->map_priv_1 + from, len);
+}
+
+void edb7312nor_write8(struct map_info *map, __u8 d, unsigned long adr)
+{
+	__raw_writeb(d, map->map_priv_1 + adr);
+	mb();
+}
+
+void edb7312nor_write16(struct map_info *map, __u16 d, unsigned long adr)
+{
+	__raw_writew(d, map->map_priv_1 + adr);
+	mb();
+}
+
+void edb7312nor_write32(struct map_info *map, __u32 d, unsigned long adr)
+{
+	__raw_writel(d, map->map_priv_1 + adr);
+	mb();
+}
+
+void edb7312nor_copy_to(struct map_info *map, unsigned long to, const void *from, ssize_t len)
+{
+	memcpy_toio(map->map_priv_1 + to, from, len);
+}
+
+struct map_info edb7312nor_map = {
+	name: "NOR flash on EDB7312",
+	size: WINDOW_SIZE,
+	buswidth: BUSWIDTH,
+	read8: edb7312nor_read8,
+	read16: edb7312nor_read16,
+	read32: edb7312nor_read32,
+	copy_from: edb7312nor_copy_from,
+	write8: edb7312nor_write8,
+	write16: edb7312nor_write16,
+	write32: edb7312nor_write32,
+	copy_to: edb7312nor_copy_to
+};
+
+#ifdef CONFIG_MTD_PARTITIONS
+
+/*
+ * MTD partitioning stuff 
+ */
+static struct mtd_partition static_partitions[3] =
+{
+    {
+	name: "ARMboot",
+	  size: 0x40000,
+	  offset: 0
+    },
+    {
+	name: "Kernel",
+	  size: 0x200000,
+	  offset: 0x40000
+    },
+    {
+	name: "RootFS",
+	  size: 0xDC0000,
+	  offset: 0x240000
+    },
+};
+
+#define NB_OF(x) (sizeof (x) / sizeof (x[0]))
+
+#ifdef CONFIG_MTD_CMDLINE_PARTS
+int parse_cmdline_partitions(struct mtd_info *master, 
+			     struct mtd_partition **pparts,
+			     const char *mtd_id);
+#endif
+
+#endif
+
+static int                   mtd_parts_nb = 0;
+static struct mtd_partition *mtd_parts    = 0;
+
+int __init init_edb7312nor(void)
+{
+	static const char *rom_probe_types[] = PROBETYPES;
+	const char **type;
+	const char *part_type = 0;
+
+       	printk(KERN_NOTICE MSG_PREFIX "0x%08x at 0x%08x\n", 
+	       WINDOW_SIZE, WINDOW_ADDR);
+	edb7312nor_map.map_priv_1 = (unsigned long)
+	  ioremap(WINDOW_ADDR, WINDOW_SIZE);
+
+	if (!edb7312nor_map.map_priv_1) {
+		printk(MSG_PREFIX "failed to ioremap\n");
+		return -EIO;
+	}
+
+	mymtd = 0;
+	type = rom_probe_types;
+	for(; !mymtd && *type; type++) {
+		mymtd = do_map_probe(*type, &edb7312nor_map);
+	}
+	if (mymtd) {
+		mymtd->module = THIS_MODULE;
+
+#ifdef CONFIG_MTD_PARTITIONS
+#ifdef CONFIG_MTD_CMDLINE_PARTS
+		mtd_parts_nb = parse_cmdline_partitions(mymtd, &mtd_parts, MTDID);
+		if (mtd_parts_nb > 0)
+		  part_type = "command line";
+#endif
+		if (mtd_parts_nb == 0)
+		{
+			mtd_parts = static_partitions;
+			mtd_parts_nb = NB_OF(static_partitions);
+			part_type = "static";
+		}
+#endif
+		add_mtd_device(mymtd);
+		if (mtd_parts_nb == 0)
+		  printk(KERN_NOTICE MSG_PREFIX "no partition info available\n");
+		else
+		{
+			printk(KERN_NOTICE MSG_PREFIX
+			       "using %s partition definition\n", part_type);
+			add_mtd_partitions(mymtd, mtd_parts, mtd_parts_nb);
+		}
+		return 0;
+	}
+
+	iounmap((void *)edb7312nor_map.map_priv_1);
+	return -ENXIO;
+}
+
+static void __exit cleanup_edb7312nor(void)
+{
+	if (mymtd) {
+		del_mtd_device(mymtd);
+		map_destroy(mymtd);
+	}
+	if (edb7312nor_map.map_priv_1) {
+		iounmap((void *)edb7312nor_map.map_priv_1);
+		edb7312nor_map.map_priv_1 = 0;
+	}
+}
+
+module_init(init_edb7312nor);
+module_exit(cleanup_edb7312nor);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marius Groeger <mag@sysgo.de>");
+MODULE_DESCRIPTION("Generic configurable MTD map driver");
diff --git a/drivers/mtd/maps/epxa10db-flash.c b/drivers/mtd/maps/epxa10db-flash.c
new file mode 100644
index 000000000000..cb4c76e4bb71
--- /dev/null
+++ b/drivers/mtd/maps/epxa10db-flash.c
@@ -0,0 +1,233 @@
+/*
+ * Flash memory access on EPXA based devices
+ *
+ * (C) 2000 Nicolas Pitre <nico@cam.org>
+ *  Copyright (C) 2001 Altera Corporation
+ *  Copyright (C) 2001 Red Hat, Inc.
+ *
+ * $Id: epxa10db-flash.c,v 1.4 2002/08/22 10:46:19 cdavies Exp $ 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <asm/io.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/map.h>
+#include <linux/mtd/partitions.h>
+
+#include <asm/hardware.h>
+#ifdef CONFIG_EPXA10DB
+#define BOARD_NAME "EPXA10DB"
+#else
+#define BOARD_NAME "EPXA1DB"
+#endif
+
+static int nr_parts = 0;
+static struct mtd_partition *parts;
+
+static struct mtd_info *mymtd;
+
+extern int parse_redboot_partitions(struct mtd_info *, struct mtd_partition **);
+static int epxa_default_partitions(struct mtd_info *master, struct mtd_partition **pparts);
+
+static __u8 epxa_read8(struct map_info *map, unsigned long ofs)
+{
+	return __raw_readb(map->map_priv_1 + ofs);
+}
+
+static __u16 epxa_read16(struct map_info *map, unsigned long ofs)
+{
+	return __raw_readw(map->map_priv_1 + ofs);
+}
+
+static __u32 epxa_read32(struct map_info *map, unsigned long ofs)
+{
+	return __raw_readl(map->map_priv_1 + ofs);
+}
+
+static void epxa_copy_from(struct map_info *map, void *to, unsigned long from, ssize_t len)
+{
+	memcpy_fromio(to, (void *)(map->map_priv_1 + from), len);
+}
+
+static void epxa_write8(struct map_info *map, __u8 d, unsigned long adr)
+{
+	__raw_writeb(d, map->map_priv_1 + adr);
+	mb();
+}
+
+static void epxa_write16(struct map_info *map, __u16 d, unsigned long adr)
+{
+	__raw_writew(d, map->map_priv_1 + adr);
+	mb();
+}
+
+static void epxa_write32(struct map_info *map, __u32 d, unsigned long adr)
+{
+	__raw_writel(d, map->map_priv_1 + adr);
+	mb();
+}
+
+static void epxa_copy_to(struct map_info *map, unsigned long to, const void *from, ssize_t len)
+{
+	memcpy_toio((void *)(map->map_priv_1 + to), from, len);
+}
+
+
+
+static struct map_info epxa_map = {
+	name:		"EPXA flash",
+	size:		FLASH_SIZE,
+	buswidth:	2,
+	read8:		epxa_read8,
+	read16:		epxa_read16,
+	read32:		epxa_read32,
+	copy_from:	epxa_copy_from,
+	write8:		epxa_write8,
+	write16:	epxa_write16,
+	write32:	epxa_write32,
+	copy_to:	epxa_copy_to
+};
+
+
+static int __init epxa_mtd_init(void)
+{
+	int i;
+	
+	printk(KERN_NOTICE "%s flash device: %x at %x\n", BOARD_NAME, FLASH_SIZE, FLASH_START);
+	epxa_map.map_priv_1 = (unsigned long)ioremap(FLASH_START, FLASH_SIZE);
+	if (!epxa_map.map_priv_1) {
+		printk("Failed to ioremap %s flash\n",BOARD_NAME);
+		return -EIO;
+	}
+
+	mymtd = do_map_probe("cfi_probe", &epxa_map);
+	if (!mymtd) {
+		iounmap((void *)epxa_map.map_priv_1);
+		return -ENXIO;
+	}
+
+	mymtd->module = THIS_MODULE;
+
+	/* Unlock the flash device. */
+	if(mymtd->unlock){
+		for (i=0; i<mymtd->numeraseregions;i++){
+			int j;
+			for(j=0;j<mymtd->eraseregions[i].numblocks;j++){
+				mymtd->unlock(mymtd,mymtd->eraseregions[i].offset + j * mymtd->eraseregions[i].erasesize,mymtd->eraseregions[i].erasesize);
+			}
+		}
+	}
+
+#ifdef CONFIG_MTD_REDBOOT_PARTS
+	nr_parts = parse_redboot_partitions(mymtd, &parts);
+
+	if (nr_parts > 0) {
+		add_mtd_partitions(mymtd, parts, nr_parts);
+		return 0;
+	}
+#endif
+#ifdef CONFIG_MTD_AFS_PARTS
+	nr_parts = parse_afs_partitions(mymtd, &parts);
+
+	if (nr_parts > 0) {
+		add_mtd_partitions(mymtd, parts, nr_parts);
+		return 0;
+	}
+#endif
+
+	/* No recognised partitioning schemes found - use defaults */
+	nr_parts = epxa_default_partitions(mymtd, &parts);
+	if (nr_parts > 0) {
+		add_mtd_partitions(mymtd, parts, nr_parts);
+		return 0;
+	}
+
+	/* If all else fails... */
+	add_mtd_device(mymtd);
+	return 0;
+}
+
+static void __exit epxa_mtd_cleanup(void)
+{
+	if (mymtd) {
+		if (nr_parts)
+			del_mtd_partitions(mymtd);
+		else
+			del_mtd_device(mymtd);
+		map_destroy(mymtd);
+	}
+	if (epxa_map.map_priv_1) {
+		iounmap((void *)epxa_map.map_priv_1);
+		epxa_map.map_priv_1 = 0;
+	}
+}
+
+
+/* 
+ * This will do for now, once we decide which bootldr we're finally 
+ * going to use then we'll remove this function and do it properly
+ *
+ * Partions are currently (as offsets from base of flash):
+ * 0x00000000 - 0x003FFFFF - bootloader (!)
+ * 0x00400000 - 0x00FFFFFF - Flashdisk
+ */
+
+static int __init epxa_default_partitions(struct mtd_info *master, struct mtd_partition **pparts)
+{
+	struct mtd_partition *parts;
+	int ret, i;
+	int npartitions = 0;
+	char *names; 
+	const char *name = "jffs";
+
+	printk("Using default partitions for %s\n",BOARD_NAME);
+	npartitions=1;
+	parts = kmalloc(npartitions*sizeof(*parts)+strlen(name), GFP_KERNEL);
+	memzero(parts,npartitions*sizeof(*parts)+strlen(name));
+	if (!parts) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	i=0;
+	names = (char *)&parts[npartitions];	
+	parts[i].name = names;
+	names += strlen(name) + 1;
+	strcpy(parts[i].name, name);
+
+#ifdef CONFIG_EPXA10DB
+	parts[i].size = FLASH_SIZE-0x00400000;
+	parts[i].offset = 0x00400000;
+#else
+	parts[i].size = FLASH_SIZE-0x00180000;
+	parts[i].offset = 0x00180000;
+#endif
+
+ out:
+	*pparts = parts;
+	return npartitions;
+}
+
+
+module_init(epxa_mtd_init);
+module_exit(epxa_mtd_cleanup);
+
+MODULE_AUTHOR("Clive Davies");
+MODULE_DESCRIPTION("Altera epxa mtd flash map");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/maps/fortunet.c b/drivers/mtd/maps/fortunet.c
new file mode 100644
index 000000000000..98fd322e9523
--- /dev/null
+++ b/drivers/mtd/maps/fortunet.c
@@ -0,0 +1,309 @@
+/* fortunet.c memory map
+ *
+ * $Id: fortunet.c,v 1.2 2002/10/14 12:50:22 rmk Exp $
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <asm/io.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/map.h>
+#include <linux/mtd/partitions.h>
+
+#define MAX_NUM_REGIONS		4
+#define MAX_NUM_PARTITIONS	8
+
+#define DEF_WINDOW_ADDR_PHY	0x00000000
+#define DEF_WINDOW_SIZE		0x00800000		// 8 Mega Bytes
+
+#define MTD_FORTUNET_PK		"MTD FortuNet: "
+
+#define MAX_NAME_SIZE		128
+
+struct map_region
+{
+	int			window_addr_phyical;
+	int			altbuswidth;
+	struct map_info		map_info;
+	struct mtd_info		*mymtd;
+	struct mtd_partition	parts[MAX_NUM_PARTITIONS];
+	char			map_name[MAX_NAME_SIZE];
+	char			parts_name[MAX_NUM_PARTITIONS][MAX_NAME_SIZE];
+};
+
+static struct map_region	map_regions[MAX_NUM_REGIONS];
+static int			map_regions_set[MAX_NUM_REGIONS] = {0,0,0,0};
+static int			map_regions_parts[MAX_NUM_REGIONS] = {0,0,0,0};
+
+
+__u8 fortunet_read8(struct map_info *map, unsigned long ofs)
+{
+	return *(__u8 *)(map->map_priv_1 + ofs);
+}
+
+__u16 fortunet_read16(struct map_info *map, unsigned long ofs)
+{
+	return *(__u16 *)(map->map_priv_1 + ofs);
+}
+
+__u32 fortunet_read32(struct map_info *map, unsigned long ofs)
+{
+	return *(__u32 *)(map->map_priv_1 + ofs);
+}
+
+void fortunet_copy_from(struct map_info *map, void *to, unsigned long from, ssize_t len)
+{
+	memcpy(to, (void *)(map->map_priv_1 + from), len);
+}
+
+void fortunet_write8(struct map_info *map, __u8 d, unsigned long adr)
+{
+	*(__u8 *)(map->map_priv_1 + adr) = d;
+}
+
+void fortunet_write16(struct map_info *map, __u16 d, unsigned long adr)
+{
+	*(__u16 *)(map->map_priv_1 + adr) = d;
+}
+
+void fortunet_write32(struct map_info *map, __u32 d, unsigned long adr)
+{
+	*(__u32 *)(map->map_priv_1 + adr) = d;
+}
+
+void fortunet_copy_to(struct map_info *map, unsigned long to, const void *from, ssize_t len)
+{
+	memcpy((void *)(map->map_priv_1 + to), from, len);
+}
+
+struct map_info default_map = {
+	size: DEF_WINDOW_SIZE,
+	buswidth: 4,
+	read8: fortunet_read8,
+	read16: fortunet_read16,
+	read32: fortunet_read32,
+	copy_from: fortunet_copy_from,
+	write8: fortunet_write8,
+	write16: fortunet_write16,
+	write32: fortunet_write32,
+	copy_to: fortunet_copy_to
+};
+
+static char * __init get_string_option(char *dest,int dest_size,char *sor)
+{
+	if(!dest_size)
+		return sor;
+	dest_size--;
+	while(*sor)
+	{
+		if(*sor==',')
+		{
+			sor++;
+			break;
+		}
+		else if(*sor=='\"')
+		{
+			sor++;
+			while(*sor)
+			{
+				if(*sor=='\"')
+				{
+					sor++;
+					break;
+				}
+				*dest = *sor;
+				dest++;
+				sor++;
+				dest_size--;
+				if(!dest_size)
+				{
+					*dest = 0;
+					return sor;
+				}
+			}
+		}
+		else
+		{
+			*dest = *sor;
+			dest++;
+			sor++;
+			dest_size--;
+			if(!dest_size)
+			{
+				*dest = 0;
+				return sor;
+			}
+		}
+	}
+	*dest = 0;
+	return sor;
+}
+
+static int __init MTD_New_Region(char *line)
+{
+	char	string[MAX_NAME_SIZE];
+	int	params[6];
+	get_options (get_string_option(string,sizeof(string),line),6,params);
+	if(params[0]<1)
+	{
+		printk(MTD_FORTUNET_PK "Bad paramters for MTD Region "
+			" name,region-number[,base,size,buswidth,altbuswidth]\n");
+		return 1;
+	}
+	if((params[1]<0)||(params[1]>=MAX_NUM_REGIONS))
+	{
+		printk(MTD_FORTUNET_PK "Bad region index of %d only have 0..%u regions\n",
+			params[1],MAX_NUM_REGIONS-1);
+		return 1;
+	}
+	memset(&map_regions[params[1]],0,sizeof(map_regions[params[1]]));
+	memcpy(&map_regions[params[1]].map_info,
+		&default_map,sizeof(map_regions[params[1]].map_info));
+        map_regions_set[params[1]] = 1;
+        map_regions[params[1]].window_addr_phyical = DEF_WINDOW_ADDR_PHY;
+        map_regions[params[1]].altbuswidth = 2;
+        map_regions[params[1]].mymtd = NULL;
+	map_regions[params[1]].map_info.name = map_regions[params[1]].map_name;
+	strcpy(map_regions[params[1]].map_info.name,string);
+	if(params[0]>1)
+	{
+		map_regions[params[1]].window_addr_phyical = params[2];
+	}
+	if(params[0]>2)
+	{
+		map_regions[params[1]].map_info.size = params[3];
+	}
+	if(params[0]>3)
+	{
+		map_regions[params[1]].map_info.buswidth = params[4];
+	}
+	if(params[0]>4)
+	{
+		map_regions[params[1]].altbuswidth = params[5];
+	}
+	return 1;
+}
+
+static int __init MTD_New_Partion(char *line)
+{
+	char	string[MAX_NAME_SIZE];
+	int	params[4];
+	get_options (get_string_option(string,sizeof(string),line),4,params);
+	if(params[0]<3)
+	{
+		printk(MTD_FORTUNET_PK "Bad paramters for MTD Partion "
+			" name,region-number,size,offset\n");
+		return 1;
+	}
+	if((params[1]<0)||(params[1]>=MAX_NUM_REGIONS))
+	{
+		printk(MTD_FORTUNET_PK "Bad region index of %d only have 0..%u regions\n",
+			params[1],MAX_NUM_REGIONS-1);
+		return 1;
+	}
+	if(map_regions_parts[params[1]]>=MAX_NUM_PARTITIONS)
+	{
+		printk(MTD_FORTUNET_PK "Out of space for partion in this region\n");
+		return 1;
+	}
+	map_regions[params[1]].parts[map_regions_parts[params[1]]].name =
+		map_regions[params[1]].	parts_name[map_regions_parts[params[1]]];
+	strcpy(map_regions[params[1]].parts[map_regions_parts[params[1]]].name,string);
+	map_regions[params[1]].parts[map_regions_parts[params[1]]].size =
+		params[2];
+	map_regions[params[1]].parts[map_regions_parts[params[1]]].offset =
+		params[3];
+	map_regions[params[1]].parts[map_regions_parts[params[1]]].mask_flags = 0;
+	map_regions_parts[params[1]]++;
+	return 1;
+}
+
+__setup("MTD_Region=", MTD_New_Region);
+__setup("MTD_Partion=", MTD_New_Partion);
+
+int __init init_fortunet(void)
+{
+	int	ix,iy;
+	for(iy=ix=0;ix<MAX_NUM_REGIONS;ix++)
+	{
+		if(map_regions_parts[ix]&&(!map_regions_set[ix]))
+		{
+			printk(MTD_FORTUNET_PK "Region %d is not setup (Seting to default)\n",
+				ix);
+			memset(&map_regions[ix],0,sizeof(map_regions[ix]));
+			memcpy(&map_regions[ix].map_info,&default_map,
+				sizeof(map_regions[ix].map_info));
+			map_regions_set[ix] = 1;
+			map_regions[ix].window_addr_phyical = DEF_WINDOW_ADDR_PHY;
+			map_regions[ix].altbuswidth = 2;
+			map_regions[ix].mymtd = NULL;
+			map_regions[ix].map_info.name = map_regions[ix].map_name;
+			strcpy(map_regions[ix].map_info.name,"FORTUNET");
+		}
+		if(map_regions_set[ix])
+		{
+			iy++;
+			printk(KERN_NOTICE MTD_FORTUNET_PK "%s flash device at phyicaly "
+				" address %x size %x\n",
+				map_regions[ix].map_info.name,
+				map_regions[ix].window_addr_phyical,
+				map_regions[ix].map_info.size);
+			map_regions[ix].map_info.map_priv_1 =
+				(int)ioremap_nocache(
+				map_regions[ix].window_addr_phyical,
+				map_regions[ix].map_info.size);
+			if(!map_regions[ix].map_info.map_priv_1)
+			{
+				printk(MTD_FORTUNET_PK "%s flash failed to ioremap!\n",
+					map_regions[ix].map_info.name);
+				return -ENXIO;
+			}
+			printk(KERN_NOTICE MTD_FORTUNET_PK "%s flash is veritualy at: %x\n",
+				map_regions[ix].map_info.name,
+				map_regions[ix].map_info.map_priv_1);
+			map_regions[ix].mymtd = do_map_probe("cfi_probe",
+				&map_regions[ix].map_info);
+			if((!map_regions[ix].mymtd)&&(
+				map_regions[ix].altbuswidth!=map_regions[ix].map_info.buswidth))
+			{
+				printk(KERN_NOTICE MTD_FORTUNET_PK "Trying alternet buswidth "
+					"for %s flash.\n",
+					map_regions[ix].map_info.name);
+				map_regions[ix].map_info.buswidth =
+					map_regions[ix].altbuswidth;
+				map_regions[ix].mymtd = do_map_probe("cfi_probe",
+					&map_regions[ix].map_info);
+			}
+			map_regions[ix].mymtd->module = THIS_MODULE;
+			add_mtd_partitions(map_regions[ix].mymtd,
+				map_regions[ix].parts,map_regions_parts[ix]);
+		}
+	}
+	if(iy)
+		return 0;
+	return -ENXIO;
+}
+
+static void __exit cleanup_fortunet(void)
+{
+	int	ix;
+	for(ix=0;ix<MAX_NUM_REGIONS;ix++)
+	{
+		if(map_regions_set[ix])
+		{
+			if( map_regions[ix].mymtd )
+			{
+				del_mtd_partitions( map_regions[ix].mymtd );
+				map_destroy( map_regions[ix].mymtd );
+			}
+			iounmap((void *)map_regions[ix].map_info.map_priv_1);
+		}
+	}
+}
+
+module_init(init_fortunet);
+module_exit(cleanup_fortunet);
+
+MODULE_AUTHOR("FortuNet, Inc.");
+MODULE_DESCRIPTION("MTD map driver for FortuNet boards");
diff --git a/drivers/mtd/maps/impa7.c b/drivers/mtd/maps/impa7.c
new file mode 100644
index 000000000000..3dc382bc9511
--- /dev/null
+++ b/drivers/mtd/maps/impa7.c
@@ -0,0 +1,234 @@
+/*
+ * $Id: impa7.c,v 1.2 2002/09/05 05:11:24 acurtis Exp $
+ *
+ * Handle mapping of the NOR flash on implementa A7 boards
+ *
+ * Copyright 2002 SYSGO Real-Time Solutions GmbH
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <asm/io.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/map.h>
+#include <linux/config.h>
+
+#ifdef CONFIG_MTD_PARTITIONS
+#include <linux/mtd/partitions.h>
+#endif
+
+#define WINDOW_ADDR0 0x00000000      /* physical properties of flash */
+#define WINDOW_SIZE0 0x00800000
+#define WINDOW_ADDR1 0x10000000      /* physical properties of flash */
+#define WINDOW_SIZE1 0x00800000
+#define NUM_FLASHBANKS 2
+#define BUSWIDTH     4
+
+/* can be { "cfi_probe", "jedec_probe", "map_rom", 0 }; */
+#define PROBETYPES { "jedec_probe", 0 }
+
+#define MSG_PREFIX "impA7:"   /* prefix for our printk()'s */
+#define MTDID      "impa7-%d"  /* for mtdparts= partitioning */
+
+static struct mtd_info *impa7_mtd[NUM_FLASHBANKS] = { 0 };
+
+__u8 impa7_read8(struct map_info *map, unsigned long ofs)
+{
+	return __raw_readb(map->map_priv_1 + ofs);
+}
+
+__u16 impa7_read16(struct map_info *map, unsigned long ofs)
+{
+	return __raw_readw(map->map_priv_1 + ofs);
+}
+
+__u32 impa7_read32(struct map_info *map, unsigned long ofs)
+{
+	return __raw_readl(map->map_priv_1 + ofs);
+}
+
+void impa7_copy_from(struct map_info *map, void *to, unsigned long from, ssize_t len)
+{
+	memcpy_fromio(to, map->map_priv_1 + from, len);
+}
+
+void impa7_write8(struct map_info *map, __u8 d, unsigned long adr)
+{
+	__raw_writeb(d, map->map_priv_1 + adr);
+	mb();
+}
+
+void impa7_write16(struct map_info *map, __u16 d, unsigned long adr)
+{
+	__raw_writew(d, map->map_priv_1 + adr);
+	mb();
+}
+
+void impa7_write32(struct map_info *map, __u32 d, unsigned long adr)
+{
+	__raw_writel(d, map->map_priv_1 + adr);
+	mb();
+}
+
+void impa7_copy_to(struct map_info *map, unsigned long to, const void *from, ssize_t len)
+{
+	memcpy_toio(map->map_priv_1 + to, from, len);
+}
+
+static struct map_info impa7_map[NUM_FLASHBANKS] = {
+	{
+	name: "impA7 NOR Flash Bank #0",
+	size: WINDOW_SIZE0,
+	buswidth: BUSWIDTH,
+	read8: impa7_read8,
+	read16: impa7_read16,
+	read32: impa7_read32,
+	copy_from: impa7_copy_from,
+	write8: impa7_write8,
+	write16: impa7_write16,
+	write32: impa7_write32,
+	copy_to: impa7_copy_to
+	},
+	{
+	name: "impA7 NOR Flash Bank #1",
+	size: WINDOW_SIZE1,
+	buswidth: BUSWIDTH,
+	read8: impa7_read8,
+	read16: impa7_read16,
+	read32: impa7_read32,
+	copy_from: impa7_copy_from,
+	write8: impa7_write8,
+	write16: impa7_write16,
+	write32: impa7_write32,
+	copy_to: impa7_copy_to
+	},
+};
+
+#ifdef CONFIG_MTD_PARTITIONS
+
+/*
+ * MTD partitioning stuff 
+ */
+static struct mtd_partition static_partitions[] =
+{
+    {
+	name: "FileSystem",
+	  size: 0x800000,
+	  offset: 0x00000000
+    },
+};
+
+#define NB_OF(x) (sizeof (x) / sizeof (x[0]))
+
+#ifdef CONFIG_MTD_CMDLINE_PARTS
+int parse_cmdline_partitions(struct mtd_info *master, 
+			     struct mtd_partition **pparts,
+			     const char *mtd_id);
+#endif
+
+#endif
+
+static int                   mtd_parts_nb = 0;
+static struct mtd_partition *mtd_parts    = 0;
+
+int __init init_impa7(void)
+{
+	static const char *rom_probe_types[] = PROBETYPES;
+	const char **type;
+	const char *part_type = 0;
+	int i;
+	static struct { u_long addr; u_long size; } pt[NUM_FLASHBANKS] = {
+	  { WINDOW_ADDR0, WINDOW_SIZE0 },
+	  { WINDOW_ADDR1, WINDOW_SIZE1 },
+        };
+	char mtdid[10];
+	int devicesfound = 0;
+
+	for(i=0; i<NUM_FLASHBANKS; i++)
+	{
+		printk(KERN_NOTICE MSG_PREFIX "probing 0x%08lx at 0x%08lx\n",
+		       pt[i].size, pt[i].addr);
+		impa7_map[i].map_priv_1 = (unsigned long)
+		  ioremap(pt[i].addr, pt[i].size);
+
+		if (!impa7_map[i].map_priv_1) {
+			printk(MSG_PREFIX "failed to ioremap\n");
+			return -EIO;
+		}
+
+		impa7_mtd[i] = 0;
+		type = rom_probe_types;
+		for(; !impa7_mtd[i] && *type; type++) {
+			impa7_mtd[i] = do_map_probe(*type, &impa7_map[i]);
+		}
+
+		if (impa7_mtd[i]) 
+		{
+			impa7_mtd[i]->module = THIS_MODULE;
+			add_mtd_device(impa7_mtd[i]);
+			devicesfound++;
+#ifdef CONFIG_MTD_PARTITIONS
+#ifdef CONFIG_MTD_CMDLINE_PARTS
+			sprintf(mtdid, MTDID, i);
+			mtd_parts_nb = parse_cmdline_partitions(impa7_mtd[i], 
+								&mtd_parts, 
+								mtdid);
+			if (mtd_parts_nb > 0)
+			  part_type = "command line";
+#endif
+			if (mtd_parts_nb <= 0)
+			{
+				mtd_parts = static_partitions;
+				mtd_parts_nb = NB_OF(static_partitions);
+				part_type = "static";
+			}
+			if (mtd_parts_nb <= 0)
+			{
+				printk(KERN_NOTICE MSG_PREFIX 
+				       "no partition info available\n");
+			}
+			else
+			{
+				printk(KERN_NOTICE MSG_PREFIX
+				       "using %s partition definition\n", 
+				       part_type);
+				add_mtd_partitions(impa7_mtd[i], 
+						   mtd_parts, mtd_parts_nb);
+			}
+#endif
+		}
+		else 
+		  iounmap((void *)impa7_map[i].map_priv_1);
+	}
+	return devicesfound == 0 ? -ENXIO : 0;
+}
+
+static void __exit cleanup_impa7(void)
+{
+	int i;
+	for (i=0; i<NUM_FLASHBANKS; i++) 
+	{
+		if (impa7_mtd[i]) 
+		{
+			del_mtd_device(impa7_mtd[i]);
+			map_destroy(impa7_mtd[i]);
+		}
+		if (impa7_map[i].map_priv_1)
+		{
+			iounmap((void *)impa7_map[i].map_priv_1);
+			impa7_map[i].map_priv_1 = 0;
+		}
+	}
+}
+
+module_init(init_impa7);
+module_exit(cleanup_impa7);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pavel Bartusek <pba@sysgo.de>");
+MODULE_DESCRIPTION("MTD map driver for implementa impA7");
diff --git a/drivers/mtd/maps/iq80310.c b/drivers/mtd/maps/iq80310.c
index cb3cb05766d1..3a301135831b 100644
--- a/drivers/mtd/maps/iq80310.c
+++ b/drivers/mtd/maps/iq80310.c
@@ -1,5 +1,5 @@
 /*
- * $Id: iq80310.c,v 1.8 2001/10/02 15:05:14 dwmw2 Exp $
+ * $Id: iq80310.c,v 1.9 2002/01/01 22:45:02 rmk Exp $
  *
  * Mapping for the Intel XScale IQ80310 evaluation board
  *
@@ -116,7 +116,7 @@ static int __init init_iq80310(void)
 	int parsed_nr_parts = 0;
 	char *part_type = "static";
 
-	iq80310_map.map_priv_1 = (unsigned long)__ioremap(WINDOW_ADDR, WINDOW_SIZE, 0);
+	iq80310_map.map_priv_1 = (unsigned long)ioremap(WINDOW_ADDR, WINDOW_SIZE);
 	if (!iq80310_map.map_priv_1) {
 		printk("Failed to ioremap\n");
 		return -EIO;
@@ -161,7 +161,6 @@ static void __exit cleanup_iq80310(void)
 	}
 	if (iq80310_map.map_priv_1)
 		iounmap((void *)iq80310_map.map_priv_1);
-	return 0;
 }
 
 module_init(init_iq80310);
diff --git a/drivers/mtd/maps/pci.c b/drivers/mtd/maps/pci.c
new file mode 100644
index 000000000000..ccc854980c6f
--- /dev/null
+++ b/drivers/mtd/maps/pci.c
@@ -0,0 +1,385 @@
+/*
+ *  linux/drivers/mtd/maps/pci.c
+ *
+ *  Copyright (C) 2001 Russell King, All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  $Id: pci.c,v 1.1 2001/09/27 20:28:45 rmk Exp $
+ * 
+ * Generic PCI memory map driver.  We support the following boards:
+ *  - Intel IQ80310 ATU.
+ *  - Intel EBSA285 (blank rom programming mode). Tested working 27/09/2001
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/map.h>
+#include <linux/mtd/partitions.h>
+
+struct map_pci_info;
+
+struct mtd_pci_info {
+	int  (*init)(struct pci_dev *dev, struct map_pci_info *map);
+	void (*exit)(struct pci_dev *dev, struct map_pci_info *map);
+	unsigned long (*translate)(struct map_pci_info *map, unsigned long ofs);
+	const char *map_name;
+};
+
+struct map_pci_info {
+	struct map_info map;
+	void *base;
+	void (*exit)(struct pci_dev *dev, struct map_pci_info *map);
+	unsigned long (*translate)(struct map_pci_info *map, unsigned long ofs);
+	struct pci_dev *dev;
+};	
+
+/*
+ * Intel IOP80310 Flash driver
+ */
+
+static int
+intel_iq80310_init(struct pci_dev *dev, struct map_pci_info *map)
+{
+	u32 win_base;
+
+	map->map.buswidth = 1;
+	map->map.size     = 0x00800000;
+	map->base         = ioremap_nocache(pci_resource_start(dev, 0),
+					    pci_resource_len(dev, 0));
+
+	if (!map->base)
+		return -ENOMEM;
+
+	/*
+	 * We want to base the memory window at Xscale
+	 * bus address 0, not 0x1000.
+	 */
+	pci_read_config_dword(dev, 0x44, &win_base);
+	pci_write_config_dword(dev, 0x44, 0);
+
+	map->map.map_priv_2 = win_base;
+
+	return 0;
+}
+
+static void
+intel_iq80310_exit(struct pci_dev *dev, struct map_pci_info *map)
+{
+	if (map->base)
+		iounmap((void *)map->base);
+	pci_write_config_dword(dev, 0x44, map->map.map_priv_2);
+}
+
+static unsigned long
+intel_iq80310_translate(struct map_pci_info *map, unsigned long ofs)
+{
+	unsigned long page_addr = ofs & 0x00400000;
+
+	/*
+	 * This mundges the flash location so we avoid
+	 * the first 80 bytes (they appear to read nonsense).
+	 */
+	if (page_addr) {
+		writel(0x00000008, map->base + 0x1558);
+		writel(0x00000000, map->base + 0x1550);
+	} else {
+		writel(0x00000007, map->base + 0x1558);
+		writel(0x00800000, map->base + 0x1550);
+		ofs += 0x00800000;
+	}
+
+	return ofs;
+}
+
+static struct mtd_pci_info intel_iq80310_info = {
+	init:		intel_iq80310_init,
+	exit:		intel_iq80310_exit,
+	translate:	intel_iq80310_translate,
+	map_name:	"cfi_probe",
+};
+
+/*
+ * Intel DC21285 driver
+ */
+
+static int
+intel_dc21285_init(struct pci_dev *dev, struct map_pci_info *map)
+{
+	unsigned long base, len;
+
+	base = pci_resource_start(dev, PCI_ROM_RESOURCE);
+	len  = pci_resource_len(dev, PCI_ROM_RESOURCE);
+
+	if (!len || !base) {
+		/*
+		 * No ROM resource
+		 */
+		base = pci_resource_start(dev, 2);
+		len  = pci_resource_len(dev, 2);
+
+		/*
+		 * We need to re-allocate PCI BAR2 address range to the
+		 * PCI ROM BAR, and disable PCI BAR2.
+		 */
+	} else {
+		/*
+		 * Hmm, if an address was allocated to the ROM resource, but
+		 * not enabled, should we be allocating a new resource for it
+		 * or simply enabling it?
+		 */
+		if (!(pci_resource_flags(dev, PCI_ROM_RESOURCE) &
+		     PCI_ROM_ADDRESS_ENABLE)) {
+		     	u32 val;
+			pci_resource_flags(dev, PCI_ROM_RESOURCE) |= PCI_ROM_ADDRESS_ENABLE;
+			pci_read_config_dword(dev, PCI_ROM_ADDRESS, &val);
+			val |= PCI_ROM_ADDRESS_ENABLE;
+			pci_write_config_dword(dev, PCI_ROM_ADDRESS, val);
+			printk("%s: enabling expansion ROM\n", dev->slot_name);
+		}
+	}
+
+	if (!len || !base)
+		return -ENXIO;
+
+	map->map.buswidth = 4;
+	map->map.size     = len;
+	map->base         = ioremap_nocache(base, len);
+
+	if (!map->base)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void
+intel_dc21285_exit(struct pci_dev *dev, struct map_pci_info *map)
+{
+	u32 val;
+
+	if (map->base)
+		iounmap((void *)map->base);
+
+	/*
+	 * We need to undo the PCI BAR2/PCI ROM BAR address alteration.
+	 */
+	pci_resource_flags(dev, PCI_ROM_RESOURCE) &= ~PCI_ROM_ADDRESS_ENABLE;
+	pci_read_config_dword(dev, PCI_ROM_ADDRESS, &val);
+	val &= ~PCI_ROM_ADDRESS_ENABLE;
+	pci_write_config_dword(dev, PCI_ROM_ADDRESS, val);
+}
+
+static unsigned long
+intel_dc21285_translate(struct map_pci_info *map, unsigned long ofs)
+{
+	return ofs & 0x00ffffc0 ? ofs : (ofs ^ (1 << 5));
+}
+
+static struct mtd_pci_info intel_dc21285_info = {
+	init:		intel_dc21285_init,
+	exit:		intel_dc21285_exit,
+	translate:	intel_dc21285_translate,
+	map_name:	"jedec_probe",
+};
+
+/*
+ * PCI device ID table
+ */
+
+static struct pci_device_id mtd_pci_ids[] __devinitdata = {
+	{
+		vendor:		PCI_VENDOR_ID_INTEL,
+		device:		0x530d,
+		subvendor:	PCI_ANY_ID,
+		subdevice:	PCI_ANY_ID,
+		class:		PCI_CLASS_MEMORY_OTHER << 8,
+		class_mask:	0xffff00,
+		driver_data:	(unsigned long)&intel_iq80310_info,
+	},
+	{
+		vendor:		PCI_VENDOR_ID_DEC,
+		device:		PCI_DEVICE_ID_DEC_21285,
+		subvendor:	0,	/* DC21285 defaults to 0 on reset */
+		subdevice:	0,	/* DC21285 defaults to 0 on reset */
+		class:		0,
+		class_mask:	0,
+		driver_data:	(unsigned long)&intel_dc21285_info,
+	},
+	{ 0, }
+};
+
+/*
+ * Generic code follows.
+ */
+
+static u8 mtd_pci_read8(struct map_info *_map, unsigned long ofs)
+{
+	struct map_pci_info *map = (struct map_pci_info *)_map;
+	u8 val = readb(map->base + map->translate(map, ofs));
+//	printk("read8 : %08lx => %02x\n", ofs, val);
+	return val;
+}
+
+static u16 mtd_pci_read16(struct map_info *_map, unsigned long ofs)
+{
+	struct map_pci_info *map = (struct map_pci_info *)_map;
+	u16 val = readw(map->base + map->translate(map, ofs));
+//	printk("read16: %08lx => %04x\n", ofs, val);
+	return val;
+}
+
+static u32 mtd_pci_read32(struct map_info *_map, unsigned long ofs)
+{
+	struct map_pci_info *map = (struct map_pci_info *)_map;
+	u32 val = readl(map->base + map->translate(map, ofs));
+//	printk("read32: %08lx => %08x\n", ofs, val);
+	return val;
+}
+
+static void mtd_pci_copyfrom(struct map_info *_map, void *to, unsigned long from, ssize_t len)
+{
+	struct map_pci_info *map = (struct map_pci_info *)_map;
+	memcpy_fromio(to, map->base + map->translate(map, from), len);
+}
+
+static void mtd_pci_write8(struct map_info *_map, u8 val, unsigned long ofs)
+{
+	struct map_pci_info *map = (struct map_pci_info *)_map;
+//	printk("write8 : %08lx <= %02x\n", ofs, val);
+	writeb(val, map->base + map->translate(map, ofs));
+}
+
+static void mtd_pci_write16(struct map_info *_map, u16 val, unsigned long ofs)
+{
+	struct map_pci_info *map = (struct map_pci_info *)_map;
+//	printk("write16: %08lx <= %04x\n", ofs, val);
+	writew(val, map->base + map->translate(map, ofs));
+}
+
+static void mtd_pci_write32(struct map_info *_map, u32 val, unsigned long ofs)
+{
+	struct map_pci_info *map = (struct map_pci_info *)_map;
+//	printk("write32: %08lx <= %08x\n", ofs, val);
+	writel(val, map->base + map->translate(map, ofs));
+}
+
+static void mtd_pci_copyto(struct map_info *_map, unsigned long to, const void *from, ssize_t len)
+{
+	struct map_pci_info *map = (struct map_pci_info *)_map;
+	memcpy_toio(map->base + map->translate(map, to), from, len);
+}
+
+static struct map_info mtd_pci_map = {
+	read8:		mtd_pci_read8,
+	read16:		mtd_pci_read16,
+	read32:		mtd_pci_read32,
+	copy_from:	mtd_pci_copyfrom,
+	write8:		mtd_pci_write8,
+	write16:	mtd_pci_write16,
+	write32:	mtd_pci_write32,
+	copy_to:	mtd_pci_copyto,
+};
+
+static int __devinit
+mtd_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+	struct mtd_pci_info *info = (struct mtd_pci_info *)id->driver_data;
+	struct map_pci_info *map = NULL;
+	struct mtd_info *mtd = NULL;
+	int err;
+
+	err = pci_enable_device(dev);
+	if (err)
+		goto out;
+
+	err = pci_request_regions(dev, "pci mtd");
+	if (err)
+		goto out;
+
+	map = kmalloc(sizeof(*map), GFP_KERNEL);
+	err = -ENOMEM;
+	if (!map)
+		goto release;
+
+	map->map       = mtd_pci_map;
+	map->map.name  = dev->slot_name;
+	map->dev       = dev;
+	map->exit      = info->exit;
+	map->translate = info->translate;
+
+	err = info->init(dev, map);
+	if (err)
+		goto release;
+
+	/* tsk - do_map_probe should take const char * */
+	mtd = do_map_probe((char *)info->map_name, &map->map);
+	err = -ENODEV;
+	if (!mtd)
+		goto release;
+
+	mtd->module = THIS_MODULE;
+	add_mtd_device(mtd);
+
+	pci_set_drvdata(dev, mtd);
+
+	return 0;
+
+release:
+	if (mtd)
+		map_destroy(mtd);
+
+	if (map) {
+		map->exit(dev, map);
+		kfree(map);
+	}
+
+	pci_release_regions(dev);
+out:
+	return err;
+}
+
+static void __devexit
+mtd_pci_remove(struct pci_dev *dev)
+{
+	struct mtd_info *mtd = pci_get_drvdata(dev);
+	struct map_pci_info *map = mtd->priv;
+
+	del_mtd_device(mtd);
+	map_destroy(mtd);
+	map->exit(dev, map);
+	kfree(map);
+
+	pci_set_drvdata(dev, NULL);
+	pci_release_regions(dev);
+}
+
+static struct pci_driver mtd_pci_driver = {
+	name:		"MTD PCI",
+	probe:		mtd_pci_probe,
+	remove:		mtd_pci_remove,
+	id_table:	mtd_pci_ids,
+};
+
+static int __init mtd_pci_maps_init(void)
+{
+	return pci_module_init(&mtd_pci_driver);
+}
+
+static void __exit mtd_pci_maps_exit(void)
+{
+	pci_unregister_driver(&mtd_pci_driver);
+}
+
+module_init(mtd_pci_maps_init);
+module_exit(mtd_pci_maps_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Russell King <rmk@arm.linux.org.uk>");
+MODULE_DESCRIPTION("Generic PCI map driver");
+MODULE_DEVICE_TABLE(pci, mtd_pci_ids);
+
diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c
new file mode 100644
index 000000000000..fb87cdd8b873
--- /dev/null
+++ b/drivers/mtd/maps/pcmciamtd.c
@@ -0,0 +1,893 @@
+/*
+ * $Id: pcmciamtd.c,v 1.36 2002/10/14 18:49:12 rmk Exp $
+ *
+ * pcmciamtd.c - MTD driver for PCMCIA flash memory cards
+ *
+ * Author: Simon Evans <spse@secret.org.uk>
+ *
+ * Copyright (C) 2002 Simon Evans
+ *
+ * Licence: GPL
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <asm/io.h>
+#include <asm/system.h>
+
+#include <pcmcia/version.h>
+#include <pcmcia/cs_types.h>
+#include <pcmcia/cs.h>
+#include <pcmcia/cistpl.h>
+#include <pcmcia/ds.h>
+
+#include <linux/mtd/map.h>
+
+#ifdef CONFIG_MTD_DEBUG
+static int debug = CONFIG_MTD_DEBUG_VERBOSE;
+MODULE_PARM(debug, "i");
+MODULE_PARM_DESC(debug, "Set Debug Level 0=quiet, 5=noisy");
+#undef DEBUG
+#define DEBUG(n, format, arg...) \
+	if (n <= debug) {	 \
+		printk(KERN_DEBUG __FILE__ ":%s(): " format "\n", __FUNCTION__ , ## arg); \
+	}
+
+#else
+#undef DEBUG
+#define DEBUG(n, arg...)
+static const int debug = 0;
+#endif
+
+#define err(format, arg...) printk(KERN_ERR __FILE__ ": " format "\n" , ## arg)
+#define info(format, arg...) printk(KERN_INFO __FILE__ ": " format "\n" , ## arg)
+#define warn(format, arg...) printk(KERN_WARNING __FILE__ ": " format "\n" , ## arg)
+
+
+#define DRIVER_DESC	"PCMCIA Flash memory card driver"
+#define DRIVER_VERSION	"$Revision: 1.36 $"
+
+/* Size of the PCMCIA address space: 26 bits = 64 MB */
+#define MAX_PCMCIA_ADDR	0x4000000
+
+struct pcmciamtd_dev {
+	struct list_head list;
+	dev_link_t	link;		/* PCMCIA link */
+	caddr_t		win_base;	/* ioremapped address of PCMCIA window */
+	unsigned int	win_size;	/* size of window */
+	unsigned int	cardsize;	/* size of whole card */
+	unsigned int	offset;		/* offset into card the window currently points at */
+	struct map_info	pcmcia_map;
+	struct mtd_info	*mtd_info;
+	u8		vpp;
+	char		mtd_name[sizeof(struct cistpl_vers_1_t)];
+};
+
+
+static dev_info_t dev_info = "pcmciamtd";
+static LIST_HEAD(dev_list);
+
+/* Module parameters */
+
+/* 2 = do 16-bit transfers, 1 = do 8-bit transfers */
+static int buswidth = 2;
+
+/* Speed of memory accesses, in ns */
+static int mem_speed;
+
+/* Force the size of an SRAM card */
+static int force_size;
+
+/* Force Vpp */
+static int vpp;
+
+/* Set Vpp */
+static int setvpp;
+
+/* Force card to be treated as FLASH, ROM or RAM */
+static int mem_type;
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Simon Evans <spse@secret.org.uk>");
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_PARM(buswidth, "i");
+MODULE_PARM_DESC(buswidth, "Set buswidth (1=8 bit, 2=16 bit, default=2)");
+MODULE_PARM(mem_speed, "i");
+MODULE_PARM_DESC(mem_speed, "Set memory access speed in ns");
+MODULE_PARM(force_size, "i");
+MODULE_PARM_DESC(force_size, "Force size of card in MB (1-64)");
+MODULE_PARM(setvpp, "i");
+MODULE_PARM_DESC(setvpp, "Set Vpp (0=Never, 1=On writes, 2=Always on, default=0)");
+MODULE_PARM(vpp, "i");
+MODULE_PARM_DESC(vpp, "Vpp value in 1/10ths eg 33=3.3V 120=12V (Dangerous)");
+MODULE_PARM(mem_type, "i");
+MODULE_PARM_DESC(mem_type, "Set Memory type (0=Flash, 1=RAM, 2=ROM, default=0)");
+
+
+
+static void inline cs_error(client_handle_t handle, int func, int ret)
+{
+	error_info_t err = { func, ret };
+	CardServices(ReportError, handle, &err);
+}
+
+
+/* read/write{8,16} copy_{from,to} routines with window remapping to access whole card */
+
+static caddr_t remap_window(struct map_info *map, unsigned long to)
+{
+	struct pcmciamtd_dev *dev = (struct pcmciamtd_dev *)map->map_priv_1;
+	window_handle_t win = (window_handle_t)map->map_priv_2;
+	memreq_t mrq;
+	int ret;
+
+	mrq.CardOffset = to & ~(dev->win_size-1);
+	if(mrq.CardOffset != dev->offset) {
+		DEBUG(2, "Remapping window from 0x%8.8x to 0x%8.8x",
+		      dev->offset, mrq.CardOffset);
+		mrq.Page = 0;
+		if( (ret = CardServices(MapMemPage, win, &mrq)) != CS_SUCCESS) {
+			cs_error(dev->link.handle, MapMemPage, ret);
+			return NULL;
+		}
+		dev->offset = mrq.CardOffset;
+	}
+	return dev->win_base + (to & (dev->win_size-1));
+}
+
+
+static u8 pcmcia_read8_remap(struct map_info *map, unsigned long ofs)
+{
+	caddr_t addr;
+	u8 d;
+
+	addr = remap_window(map, ofs);
+	if(!addr)
+		return 0;
+
+	d = readb(addr);
+	DEBUG(3, "ofs = 0x%08lx (%p) data = 0x%02x", ofs, addr, d);
+	return d;
+}
+
+
+static u16 pcmcia_read16_remap(struct map_info *map, unsigned long ofs)
+{
+	caddr_t addr;
+	u16 d;
+
+	addr = remap_window(map, ofs);
+	if(!addr)
+		return 0;
+
+	d = readw(addr);
+	DEBUG(3, "ofs = 0x%08lx (%p) data = 0x%04x", ofs, addr, d);
+	return d;
+}
+
+
+static void pcmcia_copy_from_remap(struct map_info *map, void *to, unsigned long from, ssize_t len)
+{
+	struct pcmciamtd_dev *dev = (struct pcmciamtd_dev *)map->map_priv_1;
+	unsigned long win_size = dev->win_size;
+
+	DEBUG(3, "to = %p from = %lu len = %u", to, from, len);
+	while(len) {
+		int toread = win_size - (from & (win_size-1));
+		caddr_t addr;
+
+		if(toread > len)
+			toread = len;
+		
+		addr = remap_window(map, from);
+		if(!addr)
+			return;
+
+		DEBUG(4, "memcpy from %p to %p len = %d", addr, to, toread);
+		memcpy_fromio(to, addr, toread);
+		len -= toread;
+		to += toread;
+		from += toread;
+	}
+}
+
+
+static void pcmcia_write8_remap(struct map_info *map, u8 d, unsigned long adr)
+{
+	caddr_t addr = remap_window(map, adr);
+
+	if(!addr)
+		return;
+
+	DEBUG(3, "adr = 0x%08lx (%p)  data = 0x%02x", adr, addr, d);
+	writeb(d, addr);
+}
+
+
+static void pcmcia_write16_remap(struct map_info *map, u16 d, unsigned long adr)
+{
+	caddr_t addr = remap_window(map, adr);
+	if(!addr)
+		return;
+
+	DEBUG(3, "adr = 0x%08lx (%p)  data = 0x%04x", adr, addr, d);
+	writew(d, addr);
+}
+
+
+static void pcmcia_copy_to_remap(struct map_info *map, unsigned long to, const void *from, ssize_t len)
+{
+	struct pcmciamtd_dev *dev = (struct pcmciamtd_dev *)map->map_priv_1;
+	unsigned long win_size = dev->win_size;
+
+	DEBUG(3, "to = %lu from = %p len = %u", to, from, len);
+	while(len) {
+		int towrite = win_size - (to & (win_size-1));
+		caddr_t addr;
+
+		if(towrite > len)
+			towrite = len;
+
+		addr = remap_window(map, to);
+		if(!addr)
+			return;
+
+		DEBUG(4, "memcpy from %p to %p len = %d", from, addr, towrite);
+		memcpy_toio(addr, from, towrite);
+		len -= towrite;
+		to += towrite;
+		from += towrite;
+	}
+}
+
+
+/* read/write{8,16} copy_{from,to} routines with direct access */
+
+static u8 pcmcia_read8(struct map_info *map, unsigned long ofs)
+{
+	caddr_t win_base = (caddr_t)map->map_priv_2;
+	u8 d;
+
+	d = readb(win_base + ofs);
+	DEBUG(3, "ofs = 0x%08lx (%p) data = 0x%02x", ofs, win_base + ofs, d);
+	return d;
+}
+
+
+static u16 pcmcia_read16(struct map_info *map, unsigned long ofs)
+{
+	caddr_t win_base = (caddr_t)map->map_priv_2;
+	u16 d;
+
+	d = readw(win_base + ofs);
+	DEBUG(3, "ofs = 0x%08lx (%p) data = 0x%04x", ofs, win_base + ofs, d);
+	return d;
+}
+
+
+static void pcmcia_copy_from(struct map_info *map, void *to, unsigned long from, ssize_t len)
+{
+	caddr_t win_base = (caddr_t)map->map_priv_2;
+
+	DEBUG(3, "to = %p from = %lu len = %u", to, from, len);
+	memcpy_fromio(to, win_base + from, len);
+}
+
+
+static void pcmcia_write8(struct map_info *map, u8 d, unsigned long adr)
+{
+	caddr_t win_base = (caddr_t)map->map_priv_2;
+
+	DEBUG(3, "adr = 0x%08lx (%p)  data = 0x%02x", adr, win_base + adr, d);
+	writeb(d, win_base + adr);
+}
+
+
+static void pcmcia_write16(struct map_info *map, u16 d, unsigned long adr)
+{
+	caddr_t win_base = (caddr_t)map->map_priv_2;
+
+	DEBUG(3, "adr = 0x%08lx (%p)  data = 0x%04x", adr, win_base + adr, d);
+	writew(d, win_base + adr);
+}
+
+
+static void pcmcia_copy_to(struct map_info *map, unsigned long to, const void *from, ssize_t len)
+{
+	caddr_t win_base = (caddr_t)map->map_priv_2;
+
+	DEBUG(3, "to = %lu from = %p len = %u", to, from, len);
+	memcpy_toio(win_base + to, from, len);
+}
+
+
+static void pcmciamtd_set_vpp(struct map_info *map, int on)
+{
+	struct pcmciamtd_dev *dev = (struct pcmciamtd_dev *)map->map_priv_1;
+	dev_link_t *link = &dev->link;
+	modconf_t mod;
+	int ret;
+
+	mod.Attributes = CONF_VPP1_CHANGE_VALID | CONF_VPP2_CHANGE_VALID;
+	mod.Vcc = 0;
+	mod.Vpp1 = mod.Vpp2 = on ? dev->vpp : 0;
+
+	DEBUG(2, "dev = %p on = %d vpp = %d\n", dev, on, dev->vpp);
+	ret = CardServices(ModifyConfiguration, link->handle, &mod);
+	if(ret != CS_SUCCESS) {
+		cs_error(link->handle, ModifyConfiguration, ret);
+	}
+}
+
+
+/* After a card is removed, pcmciamtd_release() will unregister the
+ * device, and release the PCMCIA configuration.  If the device is
+ * still open, this will be postponed until it is closed.
+ */
+
+static void pcmciamtd_release(u_long arg)
+{
+	dev_link_t *link = (dev_link_t *)arg;
+	struct pcmciamtd_dev *dev = NULL;
+	int ret;
+	struct list_head *temp1, *temp2;
+
+	DEBUG(3, "link = 0x%p", link);
+	/* Find device in list */
+	list_for_each_safe(temp1, temp2, &dev_list) {
+		dev = list_entry(temp1, struct pcmciamtd_dev, list);
+		if(link == &dev->link)
+			break;
+	}
+	if(link != &dev->link) {
+		DEBUG(1, "Cant find %p in dev_list", link);
+		return;
+	}
+
+	if(dev) {
+		if(dev->mtd_info) {
+			del_mtd_device(dev->mtd_info);
+			dev->mtd_info = NULL;
+			MOD_DEC_USE_COUNT;
+		}
+		if (link->win) {
+			if(dev->win_base) {
+				iounmap(dev->win_base);
+				dev->win_base = NULL;
+			}
+			CardServices(ReleaseWindow, link->win);
+		}
+		ret = CardServices(ReleaseConfiguration, link->handle);
+		if(ret != CS_SUCCESS)
+			cs_error(link->handle, ReleaseConfiguration, ret);
+			
+	}
+	link->state &= ~DEV_CONFIG;
+}
+
+
+static void card_settings(struct pcmciamtd_dev *dev, dev_link_t *link, int *new_name)
+{
+	int rc;
+	tuple_t tuple;
+	cisparse_t parse;
+	u_char buf[64];
+
+	tuple.Attributes = 0;
+	tuple.TupleData = (cisdata_t *)buf;
+	tuple.TupleDataMax = sizeof(buf);
+	tuple.TupleOffset = 0;
+	tuple.DesiredTuple = RETURN_FIRST_TUPLE;
+
+	rc = CardServices(GetFirstTuple, link->handle, &tuple);
+	while(rc == CS_SUCCESS) {
+		rc = CardServices(GetTupleData, link->handle, &tuple);
+		if(rc != CS_SUCCESS) {
+			cs_error(link->handle, GetTupleData, rc);
+			break;
+		}
+		rc = CardServices(ParseTuple, link->handle, &tuple, &parse);
+		if(rc != CS_SUCCESS) {
+			cs_error(link->handle, ParseTuple, rc);
+			break;
+		}
+		
+		switch(tuple.TupleCode) {
+		case  CISTPL_FORMAT: {
+			cistpl_format_t *t = &parse.format;
+			(void)t; /* Shut up, gcc */
+			DEBUG(2, "Format type: %u, Error Detection: %u, offset = %u, length =%u",
+			      t->type, t->edc, t->offset, t->length);
+			break;
+			
+		}
+			
+		case CISTPL_DEVICE: {
+			cistpl_device_t *t = &parse.device;
+			int i;
+			DEBUG(2, "Common memory:");
+			dev->pcmcia_map.size = t->dev[0].size;
+			for(i = 0; i < t->ndev; i++) {
+				DEBUG(2, "Region %d, type = %u", i, t->dev[i].type);
+				DEBUG(2, "Region %d, wp = %u", i, t->dev[i].wp);
+				DEBUG(2, "Region %d, speed = %u ns", i, t->dev[i].speed);
+				DEBUG(2, "Region %d, size = %u bytes", i, t->dev[i].size);
+			}
+			break;
+		}
+			
+		case CISTPL_VERS_1: {
+			cistpl_vers_1_t *t = &parse.version_1;
+			int i;
+			if(t->ns) {
+				dev->mtd_name[0] = '\0';
+				for(i = 0; i < t->ns; i++) {
+					if(i)
+						strcat(dev->mtd_name, " ");
+					strcat(dev->mtd_name, t->str+t->ofs[i]);
+				}
+			}
+			DEBUG(2, "Found name: %s", dev->mtd_name);
+			break;
+		}
+			
+		case CISTPL_JEDEC_C: {
+			cistpl_jedec_t *t = &parse.jedec;
+			int i;
+			for(i = 0; i < t->nid; i++) {
+				DEBUG(2, "JEDEC: 0x%02x 0x%02x", t->id[i].mfr, t->id[i].info);
+			}
+			break;
+		}
+			
+		case CISTPL_DEVICE_GEO: {
+			cistpl_device_geo_t *t = &parse.device_geo;
+			int i;
+			dev->pcmcia_map.buswidth = t->geo[0].buswidth;
+			for(i = 0; i < t->ngeo; i++) {
+				DEBUG(2, "region: %d buswidth = %u", i, t->geo[i].buswidth);
+				DEBUG(2, "region: %d erase_block = %u", i, t->geo[i].erase_block);
+				DEBUG(2, "region: %d read_block = %u", i, t->geo[i].read_block);
+				DEBUG(2, "region: %d write_block = %u", i, t->geo[i].write_block);
+				DEBUG(2, "region: %d partition = %u", i, t->geo[i].partition);
+				DEBUG(2, "region: %d interleave = %u", i, t->geo[i].interleave);
+			}
+			break;
+		}
+			
+		default:
+			DEBUG(2, "Unknown tuple code %d", tuple.TupleCode);
+		}
+		
+		rc = CardServices(GetNextTuple, link->handle, &tuple, &parse);
+	}
+	if(!dev->pcmcia_map.size)
+		dev->pcmcia_map.size = MAX_PCMCIA_ADDR;
+
+	if(!dev->pcmcia_map.buswidth)
+		dev->pcmcia_map.buswidth = 2;
+
+	if(force_size) {
+		dev->pcmcia_map.size = force_size << 20;
+		DEBUG(2, "size forced to %dM", force_size);
+
+	}
+
+	if(buswidth) {
+		dev->pcmcia_map.buswidth = buswidth;
+		DEBUG(2, "buswidth forced to %d", buswidth);
+	}		
+
+	dev->pcmcia_map.name = dev->mtd_name;
+	if(!dev->mtd_name[0]) {
+		strcpy(dev->mtd_name, "PCMCIA Memory card");
+		*new_name = 1;
+	}
+
+	DEBUG(1, "Device: Size: %lu Width:%d Name: %s",
+	      dev->pcmcia_map.size, dev->pcmcia_map.buswidth << 3, dev->mtd_name);
+}
+
+
+/* pcmciamtd_config() is scheduled to run after a CARD_INSERTION event
+ * is received, to configure the PCMCIA socket, and to make the
+ * MTD device available to the system.
+ */
+
+#define CS_CHECK(fn, args...) \
+while ((last_ret=CardServices(last_fn=(fn), args))!=0) goto cs_failed
+
+static void pcmciamtd_config(dev_link_t *link)
+{
+	struct pcmciamtd_dev *dev = link->priv;
+	struct mtd_info *mtd = NULL;
+	cs_status_t status;
+	win_req_t req;
+	int last_ret = 0, last_fn = 0;
+	int ret;
+	int i;
+	config_info_t t;
+	static char *probes[] = { "jedec_probe", "cfi_probe" };
+	cisinfo_t cisinfo;
+	int new_name = 0;
+
+	DEBUG(3, "link=0x%p", link);
+
+	/* Configure card */
+	link->state |= DEV_CONFIG;
+
+	DEBUG(2, "Validating CIS");
+	ret = CardServices(ValidateCIS, link->handle, &cisinfo);
+	if(ret != CS_SUCCESS) {
+		cs_error(link->handle, GetTupleData, ret);
+	} else {
+		DEBUG(2, "ValidateCIS found %d chains", cisinfo.Chains);
+	}
+
+	card_settings(dev, link, &new_name);
+
+	dev->pcmcia_map.read8 = pcmcia_read8_remap;
+	dev->pcmcia_map.read16 = pcmcia_read16_remap;
+	dev->pcmcia_map.copy_from = pcmcia_copy_from_remap;
+	dev->pcmcia_map.write8 = pcmcia_write8_remap;
+	dev->pcmcia_map.write16 = pcmcia_write16_remap;
+	dev->pcmcia_map.copy_to = pcmcia_copy_to_remap;
+	if(setvpp == 1)
+		dev->pcmcia_map.set_vpp = pcmciamtd_set_vpp;
+
+	/* Request a memory window for PCMCIA. Some architeures can map windows upto the maximum
+	   that PCMCIA can support (64Mb) - this is ideal and we aim for a window the size of the
+	   whole card - otherwise we try smaller windows until we succeed */
+
+	req.Attributes =  WIN_MEMORY_TYPE_CM | WIN_ENABLE;
+	req.Attributes |= (dev->pcmcia_map.buswidth == 1) ? WIN_DATA_WIDTH_8 : WIN_DATA_WIDTH_16;
+	req.Base = 0;
+	req.AccessSpeed = mem_speed;
+	link->win = (window_handle_t)link->handle;
+	req.Size = (force_size) ? force_size << 20 : MAX_PCMCIA_ADDR;
+	dev->win_size = 0;
+
+	do {
+		int ret;
+		DEBUG(2, "requesting window with size = %dKB memspeed = %d",
+		      req.Size >> 10, req.AccessSpeed);
+		link->win = (window_handle_t)link->handle;
+		ret = CardServices(RequestWindow, &link->win, &req);
+		DEBUG(2, "ret = %d dev->win_size = %d", ret, dev->win_size);
+		if(ret) {
+			req.Size >>= 1;
+		} else {
+			DEBUG(2, "Got window of size %dKB", req.Size >> 10);
+			dev->win_size = req.Size;
+			break;
+		}
+	} while(req.Size >= 0x1000);
+
+	DEBUG(2, "dev->win_size = %d", dev->win_size);
+
+	if(!dev->win_size) {
+		err("Cant allocate memory window");
+		pcmciamtd_release((u_long)link);
+		return;
+	}
+	DEBUG(1, "Allocated a window of %dKB", dev->win_size >> 10);
+		
+	/* Get write protect status */
+	CS_CHECK(GetStatus, link->handle, &status);
+	DEBUG(2, "status value: 0x%x window handle = 0x%8.8lx",
+	      status.CardState, (unsigned long)link->win);
+	dev->win_base = ioremap(req.Base, req.Size);
+	if(!dev->win_base) {
+		err("ioremap(%lu, %u) failed", req.Base, req.Size);
+		pcmciamtd_release((u_long)link);
+		return;
+	}
+	DEBUG(1, "mapped window dev = %p req.base = 0x%lx base = %p size = 0x%x",
+	      dev, req.Base, dev->win_base, req.Size);
+	dev->cardsize = 0;
+	dev->offset = 0;
+
+	dev->pcmcia_map.map_priv_1 = (unsigned long)dev;
+	dev->pcmcia_map.map_priv_2 = (unsigned long)link->win;
+
+	DEBUG(2, "Getting configuration");
+	CS_CHECK(GetConfigurationInfo, link->handle, &t);
+	DEBUG(2, "Vcc = %d Vpp1 = %d Vpp2 = %d", t.Vcc, t.Vpp1, t.Vpp2);
+	dev->vpp = (vpp) ? vpp : t.Vpp1;
+	link->conf.Attributes = 0;
+	link->conf.Vcc = t.Vcc;
+	if(setvpp == 2) {
+		link->conf.Vpp1 = dev->vpp;
+		link->conf.Vpp2 = dev->vpp;
+	} else {
+		link->conf.Vpp1 = 0;
+		link->conf.Vpp2 = 0;
+	}
+
+	link->conf.IntType = INT_MEMORY;
+	link->conf.ConfigBase = t.ConfigBase;
+	link->conf.Status = t.Status;
+	link->conf.Pin = t.Pin;
+	link->conf.Copy = t.Copy;
+	link->conf.ExtStatus = t.ExtStatus;
+	link->conf.ConfigIndex = 0;
+	link->conf.Present = t.Present;
+	DEBUG(2, "Setting Configuration");
+	ret = CardServices(RequestConfiguration, link->handle, &link->conf);
+	if(ret != CS_SUCCESS) {
+		cs_error(link->handle, RequestConfiguration, ret);
+	}
+
+	link->dev = NULL;
+	link->state &= ~DEV_CONFIG_PENDING;
+
+	if(mem_type == 1) {
+		mtd = do_map_probe("map_ram", &dev->pcmcia_map);
+	} else if(mem_type == 2) {
+		mtd = do_map_probe("map_rom", &dev->pcmcia_map);
+	} else {
+		for(i = 0; i < sizeof(probes) / sizeof(char *); i++) {
+			DEBUG(1, "Trying %s", probes[i]);
+			mtd = do_map_probe(probes[i], &dev->pcmcia_map);
+			if(mtd)
+				break;
+			
+			DEBUG(1, "FAILED: %s", probes[i]);
+		}
+	}
+	
+	if(!mtd) {
+		DEBUG(1, "Cant find an MTD");
+		pcmciamtd_release((u_long)link);
+		return;
+	}
+
+	dev->mtd_info = mtd;
+	mtd->module = THIS_MODULE;
+	dev->cardsize = mtd->size;
+
+	if(new_name) {
+		int size = 0;
+		char unit = ' ';
+		/* Since we are using a default name, make it better by adding in the
+		   size */
+		if(mtd->size < 1048576) { /* <1MB in size, show size in K */
+			size = mtd->size >> 10;
+			unit = 'K'; 
+		} else {
+			size = mtd->size >> 20;
+			unit = 'M';
+		}
+		sprintf(mtd->name, "%d%cB %s", size, unit, "PCMCIA Memory card");
+	}
+
+	/* If the memory found is fits completely into the mapped PCMCIA window,
+	   use the faster non-remapping read/write functions */
+	if(dev->cardsize <= dev->win_size) {
+		DEBUG(1, "Using non remapping memory functions");
+
+		dev->pcmcia_map.map_priv_2 = (unsigned long)dev->win_base;
+		dev->pcmcia_map.read8 = pcmcia_read8;
+		dev->pcmcia_map.read16 = pcmcia_read16;
+		dev->pcmcia_map.copy_from = pcmcia_copy_from;
+		dev->pcmcia_map.write8 = pcmcia_write8;
+		dev->pcmcia_map.write16 = pcmcia_write16;
+		dev->pcmcia_map.copy_to = pcmcia_copy_to;
+	}
+
+	MOD_INC_USE_COUNT;
+	if(add_mtd_device(mtd)) {
+		dev->mtd_info = NULL;
+		MOD_DEC_USE_COUNT;
+		err("Couldnt register MTD device");
+		pcmciamtd_release((u_long)link);
+		return;
+	}
+	DEBUG(1, "mtd added @ %p mtd->priv = %p", mtd, mtd->priv);
+
+	return;
+
+ cs_failed:
+	cs_error(link->handle, last_fn, last_ret);
+	err("CS Error, exiting");
+	pcmciamtd_release((u_long)link);
+	return;
+}
+
+
+/* The card status event handler.  Mostly, this schedules other
+ * stuff to run after an event is received.  A CARD_REMOVAL event
+ * also sets some flags to discourage the driver from trying
+ * to talk to the card any more.
+ */
+
+static int pcmciamtd_event(event_t event, int priority,
+			event_callback_args_t *args)
+{
+	dev_link_t *link = args->client_data;
+
+	DEBUG(1, "event=0x%06x", event);
+	switch (event) {
+	case CS_EVENT_CARD_REMOVAL:
+		DEBUG(2, "EVENT_CARD_REMOVAL");
+		link->state &= ~DEV_PRESENT;
+		if (link->state & DEV_CONFIG)
+			mod_timer(&link->release, jiffies + HZ/20);
+		break;
+	case CS_EVENT_CARD_INSERTION:
+		DEBUG(2, "EVENT_CARD_INSERTION");
+		link->state |= DEV_PRESENT | DEV_CONFIG_PENDING;
+		pcmciamtd_config(link);
+		break;
+	case CS_EVENT_PM_SUSPEND:
+		DEBUG(2, "EVENT_PM_SUSPEND");
+		link->state |= DEV_SUSPEND;
+		/* Fall through... */
+	case CS_EVENT_RESET_PHYSICAL:
+		DEBUG(2, "EVENT_RESET_PHYSICAL");
+		/* get_lock(link); */
+		break;
+	case CS_EVENT_PM_RESUME:
+		DEBUG(2, "EVENT_PM_RESUME");
+		link->state &= ~DEV_SUSPEND;
+		/* Fall through... */
+	case CS_EVENT_CARD_RESET:
+		DEBUG(2, "EVENT_CARD_RESET");
+		/* free_lock(link); */
+		break;
+	default:
+		DEBUG(2, "Unknown event %d", event);
+	}
+	return 0;
+}
+
+
+/* This deletes a driver "instance".  The device is de-registered
+ * with Card Services.  If it has been released, all local data
+ * structures are freed.  Otherwise, the structures will be freed
+ * when the device is released.
+ */
+
+static void pcmciamtd_detach(dev_link_t *link)
+{
+	int ret;
+	struct pcmciamtd_dev *dev = NULL;
+	struct list_head *temp1, *temp2;
+
+	DEBUG(3, "link=0x%p", link);
+
+	/* Find device in list */
+	list_for_each_safe(temp1, temp2, &dev_list) {
+		dev = list_entry(temp1, struct pcmciamtd_dev, list);
+		if(link == &dev->link)
+			break;
+	}
+	if(link != &dev->link) {
+		DEBUG(1, "Cant find %p in dev_list", link);
+		return;
+	}
+	
+	del_timer(&link->release);
+
+	if(!dev) {
+		DEBUG(3, "dev is NULL");
+		return;
+	}
+
+	if (link->state & DEV_CONFIG) {
+		//pcmciamtd_release((u_long)link);
+		DEBUG(3, "DEV_CONFIG set");
+		link->state |= DEV_STALE_LINK;
+		return;
+	}
+
+	if (link->handle) {
+		DEBUG(2, "Deregistering with card services");
+		ret = CardServices(DeregisterClient, link->handle);
+		if (ret != CS_SUCCESS)
+			cs_error(link->handle, DeregisterClient, ret);
+	}
+	DEBUG(3, "Freeing dev (%p)", dev);
+	list_del(&dev->list);
+	link->priv = NULL;
+	kfree(dev);
+}
+
+
+/* pcmciamtd_attach() creates an "instance" of the driver, allocating
+ * local data structures for one device.  The device is registered
+ * with Card Services.
+ */
+
+static dev_link_t *pcmciamtd_attach(void)
+{
+	struct pcmciamtd_dev *dev;
+	dev_link_t *link;
+	client_reg_t client_reg;
+	int ret;
+
+	/* Create new memory card device */
+	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev) return NULL;
+	DEBUG(1, "dev=0x%p", dev);
+
+	memset(dev, 0, sizeof(*dev));
+	link = &dev->link; link->priv = dev;
+
+	link->release.function = &pcmciamtd_release;
+	link->release.data = (u_long)link;
+
+	link->conf.Attributes = 0;
+	link->conf.IntType = INT_MEMORY;
+
+	list_add(&dev->list, &dev_list);
+
+	/* Register with Card Services */
+	client_reg.dev_info = &dev_info;
+	client_reg.Attributes = INFO_IO_CLIENT | INFO_CARD_SHARE;
+	client_reg.EventMask =
+		CS_EVENT_RESET_PHYSICAL | CS_EVENT_CARD_RESET |
+		CS_EVENT_CARD_INSERTION | CS_EVENT_CARD_REMOVAL |
+		CS_EVENT_PM_SUSPEND | CS_EVENT_PM_RESUME;
+	client_reg.event_handler = &pcmciamtd_event;
+	client_reg.Version = 0x0210;
+	client_reg.event_callback_args.client_data = link;
+	DEBUG(2, "Calling RegisterClient");
+	ret = CardServices(RegisterClient, &link->handle, &client_reg);
+	if (ret != 0) {
+		cs_error(link->handle, RegisterClient, ret);
+		pcmciamtd_detach(link);
+		return NULL;
+	}
+
+	return link;
+}
+
+
+static int __init init_pcmciamtd(void)
+{
+	servinfo_t serv;
+
+	info(DRIVER_DESC " " DRIVER_VERSION);
+	CardServices(GetCardServicesInfo, &serv);
+	if (serv.Revision != CS_RELEASE_CODE) {
+		err("Card Services release does not match!");
+		return -1;
+	}
+
+	if(buswidth && buswidth != 1 && buswidth != 2) {
+		info("bad buswidth (%d), using default", buswidth);
+		buswidth = 2;
+	}
+	if(force_size && (force_size < 1 || force_size > 64)) {
+		info("bad force_size (%d), using default", force_size);
+		force_size = 0;
+	}
+	if(mem_type && mem_type != 1 && mem_type != 2) {
+		info("bad mem_type (%d), using default", mem_type);
+		mem_type = 0;
+	}
+	register_pccard_driver(&dev_info, &pcmciamtd_attach, &pcmciamtd_detach);
+	return 0;
+}
+
+
+static void __exit exit_pcmciamtd(void)
+{
+	struct list_head *temp1, *temp2;
+
+	DEBUG(1, DRIVER_DESC " unloading");
+	unregister_pccard_driver(&dev_info);
+	list_for_each_safe(temp1, temp2, &dev_list) {
+		dev_link_t *link = &list_entry(temp1, struct pcmciamtd_dev, list)->link;
+		if (link && (link->state & DEV_CONFIG)) {
+			pcmciamtd_release((u_long)link);
+			pcmciamtd_detach(link);
+		}
+	}
+}
+
+module_init(init_pcmciamtd);
+module_exit(exit_pcmciamtd);
diff --git a/drivers/mtd/maps/sa1100-flash.c b/drivers/mtd/maps/sa1100-flash.c
index b6c1c0f9efe7..b2592a0a0d63 100644
--- a/drivers/mtd/maps/sa1100-flash.c
+++ b/drivers/mtd/maps/sa1100-flash.c
@@ -3,28 +3,35 @@
  * 
  * (C) 2000 Nicolas Pitre <nico@cam.org>
  * 
- * $Id: sa1100-flash.c,v 1.22 2001/10/02 10:04:52 rmk Exp $
+ * $Id: sa1100-flash.c,v 1.28 2002/05/07 13:48:38 abz Exp $
  */
 
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <linux/ioport.h>
 #include <linux/kernel.h>
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/map.h>
 #include <linux/mtd/partitions.h>
+#include <linux/mtd/concat.h>
 
 #include <asm/hardware.h>
+#include <asm/mach-types.h>
 #include <asm/io.h>
+#include <asm/sizes.h>
 
+#include <asm/arch/h3600.h>
 
 #ifndef CONFIG_ARCH_SA1100
 #error This is for SA1100 architecture only
 #endif
 
-
-#define WINDOW_ADDR 0xe8000000
+/*
+ * This isnt complete yet, so...
+ */
+#define CONFIG_MTD_SA1100_STATICMAP 1
 
 static __u8 sa1100_read8(struct map_info *map, unsigned long ofs)
 {
@@ -66,33 +73,7 @@ static void sa1100_copy_to(struct map_info *map, unsigned long to, const void *f
 	memcpy((void *)(map->map_priv_1 + to), from, len);
 }
 
-
-#ifdef CONFIG_SA1100_H3600
-
-static void h3600_set_vpp(struct map_info *map, int vpp)
-{
-	if (vpp)
-		set_h3600_egpio(EGPIO_H3600_VPP_ON);
-	else
-		clr_h3600_egpio(EGPIO_H3600_VPP_ON);
-}
-
-#endif
-
-#ifdef CONFIG_SA1100_JORNADA720
-
-static void jornada720_set_vpp(int vpp)
-{
-  if (vpp)
-      PPSR |= 0x80;
-  else
-      PPSR &= ~0x80;
-  PPDR |= 0x80;
-}
-
-#endif
-
-static struct map_info sa1100_map = {
+static struct map_info sa1100_map __initdata = {
 	name:		"SA1100 flash",
 	read8:		sa1100_read8,
 	read16:		sa1100_read16,
@@ -102,609 +83,1232 @@ static struct map_info sa1100_map = {
 	write16:	sa1100_write16,
 	write32:	sa1100_write32,
 	copy_to:	sa1100_copy_to,
-
-	map_priv_1:	WINDOW_ADDR,
 };
 
 
+#ifdef CONFIG_MTD_SA1100_STATICMAP
 /*
  * Here are partition information for all known SA1100-based devices.
  * See include/linux/mtd/partitions.h for definition of the mtd_partition
  * structure.
- * 
- * The *_max_flash_size is the maximum possible mapped flash size which
- * is not necessarily the actual flash size.  It must correspond to the 
- * value specified in the mapping definition defined by the
- * "struct map_desc *_io_desc" for the corresponding machine.
+ *
+ * Please note:
+ *  1. We no longer support static flash mappings via the machine io_desc
+ *     structure.
+ *  2. The flash size given should be the largest flash size that can
+ *     be accomodated.
+ *
+ * The MTD layer will detect flash chip aliasing and reduce the size of
+ * the map accordingly.
+ *
+ * Please keep these in alphabetical order, and formatted as per existing
+ * entries.  Thanks.
  */
 
-#ifdef CONFIG_SA1100_ASSABET
+#ifdef CONFIG_SA1100_ADSBITSY
+static struct mtd_partition adsbitsy_partitions[] = {
+	{
+		name:		"bootROM",
+		size:		0x80000,
+		offset:		0,
+		mask_flags:	MTD_WRITEABLE,  /* force read-only */
+	}, {
+		name:		"zImage",
+		size:		0x100000,
+		offset:		MTDPART_OFS_APPEND,
+		mask_flags:	MTD_WRITEABLE,  /* force read-only */
+	}, {
+		name:		"ramdisk.gz",
+		size:		0x300000,
+		offset:		MTDPART_OFS_APPEND,
+		mask_flags:	MTD_WRITEABLE,  /* force read-only */
+	}, {
+		name:		"User FS",
+		size:		MTDPART_SIZ_FULL,
+		offset:		MTDPART_OFS_APPEND,
+	}
+};
+#endif
 
+#ifdef CONFIG_SA1100_ASSABET
 /* Phase 4 Assabet has two 28F160B3 flash parts in bank 0: */
-static unsigned long assabet4_max_flash_size = 0x00400000;
 static struct mtd_partition assabet4_partitions[] = {
-        {
-                name: "bootloader",
-                size: 0x00020000,
-                offset: 0,
-                mask_flags: MTD_WRITEABLE
-        },{
-                name: "bootloader params",
-                size: 0x00020000,
-                offset: MTDPART_OFS_APPEND,
-                mask_flags: MTD_WRITEABLE
-        },{
-                name: "jffs",
-                size: MTDPART_SIZ_FULL,
-                offset: MTDPART_OFS_APPEND
-        }
+	{
+		name:		"bootloader",
+		size:		0x00020000,
+		offset:		0,
+		mask_flags:	MTD_WRITEABLE,
+	}, {
+		name:		"bootloader params",
+		size:		0x00020000,
+		offset:		MTDPART_OFS_APPEND,
+		mask_flags:	MTD_WRITEABLE,
+	}, {
+		name:		"jffs",
+		size:		MTDPART_SIZ_FULL,
+		offset:		MTDPART_OFS_APPEND,
+	}
 };
 
 /* Phase 5 Assabet has two 28F128J3A flash parts in bank 0: */
-static unsigned long assabet5_max_flash_size = 0x02000000;
 static struct mtd_partition assabet5_partitions[] = {
-        {
-                name: "bootloader",
-                size: 0x00040000,
-                offset: 0,
-                mask_flags: MTD_WRITEABLE
-        },{
-                name: "bootloader params",
-                size: 0x00040000,
-                offset: MTDPART_OFS_APPEND,
-                mask_flags: MTD_WRITEABLE
-        },{
-                name: "jffs",
-                size: MTDPART_SIZ_FULL,
-                offset: MTDPART_OFS_APPEND
-        }
+	{
+		name:		"bootloader",
+		size:		0x00040000,
+		offset:		0,
+		mask_flags:	MTD_WRITEABLE,
+	}, {
+		name:		"bootloader params",
+		size:		0x00040000,
+		offset:		MTDPART_OFS_APPEND,
+		mask_flags:	MTD_WRITEABLE,
+	}, {
+		name:		"jffs",
+		size:		MTDPART_SIZ_FULL,
+		offset:		MTDPART_OFS_APPEND,
+	}
 };
 
-#define assabet_max_flash_size assabet5_max_flash_size
-#define assabet_partitions     assabet5_partitions
-
+#define assabet_partitions	assabet5_partitions
 #endif
 
-#ifdef CONFIG_SA1100_FLEXANET
-
-/* Flexanet has two 28F128J3A flash parts in bank 0: */
-static unsigned long flexanet_max_flash_size = 0x02000000;
-static struct mtd_partition flexanet_partitions[] = {
-        {
-                name: "bootloader",
-                size: 0x00040000,
-                offset: 0,
-                mask_flags: MTD_WRITEABLE
-        },{
-                name: "bootloader params",
-                size: 0x00040000,
-                offset: MTDPART_OFS_APPEND,
-                mask_flags: MTD_WRITEABLE
-        },{
-                name: "kernel",
-                size: 0x000C0000,
-                offset: MTDPART_OFS_APPEND,
-                mask_flags: MTD_WRITEABLE
-        },{
-                name: "altkernel",
-                size: 0x000C0000,
-                offset: MTDPART_OFS_APPEND,
-                mask_flags: MTD_WRITEABLE
-        },{
-                name: "root",
-                size: 0x00400000,
-                offset: MTDPART_OFS_APPEND,
-                mask_flags: MTD_WRITEABLE
-        },{
-                name: "free1",
-                size: 0x00300000,
-                offset: MTDPART_OFS_APPEND,
-                mask_flags: MTD_WRITEABLE
-        },{
-                name: "free2",
-                size: 0x00300000,
-                offset: MTDPART_OFS_APPEND,
-                mask_flags: MTD_WRITEABLE
-        },{
-                name: "free3",
-                size: MTDPART_SIZ_FULL,
-                offset: MTDPART_OFS_APPEND,
-                mask_flags: MTD_WRITEABLE
-        }
+#ifdef CONFIG_SA1100_BADGE4
+/*
+ * 1 x Intel 28F320C3BA100 Advanced+ Boot Block Flash (32 Mi bit)
+ *   Eight 4 KiW Parameter Bottom Blocks (64 KiB)
+ *   Sixty-three 32 KiW Main Blocks (4032 Ki b)
+ */
+static struct mtd_partition badge4_partitions[] = {
+	{
+		name:		"BLOB boot loader",
+		offset:		0,
+		size:		0x0000A000
+	}, {
+		name:		"params",
+		offset:		MTDPART_OFS_APPEND,
+		size:		0x00006000
+	}, {
+		name:		"kernel",
+		offset:		MTDPART_OFS_APPEND,
+		size:		0x00100000
+	}, {
+		name:		"root",
+		offset:		MTDPART_OFS_APPEND,
+		size:		MTDPART_SIZ_FULL
+	}
 };
-
 #endif
 
-#ifdef CONFIG_SA1100_HUW_WEBPANEL
-static unsigned long huw_webpanel_max_flash_size = 0x01000000;
-static struct mtd_partition huw_webpanel_partitions[] = {
-	{ 
-	  name: "Loader",
-	  size: 0x00040000,
-	  offset: 0,
-	},{
-	  name: "Sector 1",
-	  size: 0x00040000,
-	  offset: MTDPART_OFS_APPEND,
-	},{
-	  size: MTDPART_SIZ_FULL,
-	  offset: MTDPART_OFS_APPEND,
+
+#ifdef CONFIG_SA1100_CERF
+#ifdef CONFIG_SA1100_CERF_FLASH_32MB
+static struct mtd_partition cerf_partitions[] = {
+	{
+		name:		"firmware",
+		size:		0x00040000,
+		offset:		0,
+	}, {
+		name:		"params",
+		size:		0x00040000,
+		offset:		0x00040000,
+	}, {
+		name:		"kernel",
+		size:		0x00100000,
+		offset:		0x00080000,
+	}, {
+		name:		"rootdisk",
+		size:		0x01E80000,
+		offset:		0x00180000,
 	}
 };
-#endif /* CONFIG_SA1100_HUW_WEBPANEL */
-
-
-#ifdef CONFIG_SA1100_H3600
-
-static unsigned long h3600_max_flash_size = 0x02000000;
-static struct mtd_partition h3600_partitions[] = {
+#elif defined CONFIG_SA1100_CERF_FLASH_16MB
+static struct mtd_partition cerf_partitions[] = {
 	{
-		name: "H3600 boot firmware",
-		size: 0x00040000,
-		offset: 0,
-		mask_flags: MTD_WRITEABLE  /* force read-only */
-	},{
-		name: "H3600 kernel",
-		size: 0x00080000,
-		offset: 0x40000
-	},{
-		name: "H3600 params",
-		size: 0x00040000,
-		offset: 0xC0000
-	},{
-#ifdef CONFIG_JFFS2_FS
-		name: "H3600 root jffs2",
-		offset: 0x00100000,
-		size: MTDPART_SIZ_FULL
+		name:		"firmware",
+		size:		0x00020000,
+		offset:		0,
+	}, {
+		name:		"params",
+		size:		0x00020000,
+		offset:		0x00020000,
+	}, {
+		name:		"kernel",
+		size:		0x00100000,
+		offset:		0x00040000,
+	}, {
+		name:		"rootdisk",
+		size:		0x00EC0000,
+		offset:		0x00140000,
+	}
+};
+#elif defined CONFIG_SA1100_CERF_FLASH_8MB
+#   error "Unwritten type definition"
 #else
-		name: "H3600 initrd",
-		size: 0x00100000,
-		offset: 0x00100000
-	},{
-		name: "H3600 root cramfs",
-		size: 0x00300000,
-		offset: 0x00200000
-	},{
-		name: "H3600 usr cramfs",
-		size: 0x00800000,
-		offset: 0x00500000
-	},{
-		name: "H3600 usr local",
-		offset: 0x00d00000,
-		size: MTDPART_SIZ_FULL
+#   error "Undefined memory orientation for CERF in sa1100-flash.c"
+#endif
 #endif
+
+#ifdef CONFIG_SA1100_CONSUS
+static struct mtd_partition consus_partitions[] = {
+	{
+		name:		"Consus boot firmware",
+		offset: 	0,
+		size:		0x00040000,
+		mask_flags:	MTD_WRITABLE, /* force read-only */
+	}, {
+		name:		"Consus kernel",
+		offset: 	0x00040000,
+		size:		0x00100000,
+		mask_flags:	0,
+	}, {
+		name:		"Consus disk",
+		offset: 	0x00140000,
+		/* The rest (up to 16M) for jffs.  We could put 0 and
+		   make it find the size automatically, but right now
+		   i have 32 megs.  jffs will use all 32 megs if given
+		   the chance, and this leads to horrible problems
+		   when you try to re-flash the image because blob
+		   won't erase the whole partition. */
+		size:		0x01000000 - 0x00140000,
+		mask_flags:	0,
+	}, {
+		/* this disk is a secondary disk, which can be used as
+		   needed, for simplicity, make it the size of the other
+		   consus partition, although realistically it could be
+		   the remainder of the disk (depending on the file
+		   system used) */
+		 name:		"Consus disk2",
+		 offset:	0x01000000,
+		 size:		0x01000000 - 0x00140000,
+		 mask_flags:	0,
 	}
 };
+#endif
 
+#ifdef CONFIG_SA1100_FLEXANET
+/* Flexanet has two 28F128J3A flash parts in bank 0: */
+#define FLEXANET_FLASH_SIZE		0x02000000
+static struct mtd_partition flexanet_partitions[] = {
+	{
+		name:		"bootloader",
+		size:		0x00040000,
+		offset:		0,
+		mask_flags:	MTD_WRITEABLE,
+	}, {
+		name:		"bootloader params",
+		size:		0x00040000,
+		offset:		MTDPART_OFS_APPEND,
+		mask_flags:	MTD_WRITEABLE,
+	}, {
+		name:		"kernel",
+		size:		0x000C0000,
+		offset:		MTDPART_OFS_APPEND,
+		mask_flags:	MTD_WRITEABLE,
+	}, {
+		name:		"altkernel",
+		size:		0x000C0000,
+		offset:		MTDPART_OFS_APPEND,
+		mask_flags:	MTD_WRITEABLE,
+	}, {
+		name:		"root",
+		size:		0x00400000,
+		offset:		MTDPART_OFS_APPEND,
+		mask_flags:	MTD_WRITEABLE,
+	}, {
+		name:		"free1",
+		size:		0x00300000,
+		offset:		MTDPART_OFS_APPEND,
+		mask_flags:	MTD_WRITEABLE,
+	}, {
+		name:		"free2",
+		size:		0x00300000,
+		offset:		MTDPART_OFS_APPEND,
+		mask_flags:	MTD_WRITEABLE,
+	}, {
+		name:		"free3",
+		size:		MTDPART_SIZ_FULL,
+		offset:		MTDPART_OFS_APPEND,
+		mask_flags:	MTD_WRITEABLE,
+	}
+};
 #endif
+
 #ifdef CONFIG_SA1100_FREEBIRD
-static unsigned long freebird_max_flash_size = 0x02000000;
 static struct mtd_partition freebird_partitions[] = {
 #if CONFIG_SA1100_FREEBIRD_NEW
-    {
-     name: "firmware",
-     size: 0x00040000,
-     offset: 0,
-     mask_flags: MTD_WRITEABLE  /* force read-only */
-    },{
-     name: "kernel",
-     size: 0x00080000,
-     offset: 0x40000
-    },{
-     name: "params",
-     size: 0x00040000,
-     offset: 0xC0000
-    },{
-     name: "initrd",
-     size: 0x00100000,
-     offset: 0x00100000
-    },{
-     name: "root cramfs",
-     size: 0x00300000,
-     offset: 0x00200000
-    },{
-     name: "usr cramfs",
-     size: 0x00C00000,
-     offset: 0x00500000
-    },{
-	 name: "local",
-	 offset: 0x01100000,
-	 size: MTDPART_SIZ_FULL 
+	{
+		name:		"firmware",
+		size:		0x00040000,
+		offset:		0,
+		mask_flags:	MTD_WRITEABLE,  /* force read-only */
+	}, {
+		name:		"kernel",
+		size:		0x00080000,
+		offset:		0x00040000,
+	}, {
+		name:		"params",
+		size:		0x00040000,
+		offset:		0x000C0000,
+	}, {
+		name:		"initrd",
+		size:		0x00100000,
+		offset:		0x00100000,
+	}, {
+		name:		"root cramfs",
+		size:		0x00300000,
+		offset:		0x00200000,
+	}, {
+		name:		"usr cramfs",
+		size:		0x00C00000,
+		offset:		0x00500000,
+	}, {
+		name:		"local",
+		size:		MTDPART_SIZ_FULL,
+		offset:		0x01100000,
 	}
 #else
-	{ offset: 0,            		size: 0x00040000,   },
-	{ offset: MTDPART_OFS_APPEND,   size: 0x000c0000,   },
-	{ offset: MTDPART_OFS_APPEND,	size: 0x00400000,	},
-	{ offset: MTDPART_OFS_APPEND,   size: MTDPART_SIZ_FULL  }
+	{
+		size:		0x00040000,
+		offset:		0,
+	}, {
+		size:		0x000c0000,
+		offset:		MTDPART_OFS_APPEND,
+	}, {
+		size:		0x00400000,
+		offset:		MTDPART_OFS_APPEND,
+	}, {
+		size:		MTDPART_SIZ_FULL,
+		offset:		MTDPART_OFS_APPEND,
+	}
 #endif
-	};
+};
 #endif
-																									
-
-#ifdef CONFIG_SA1100_CERF
 
-static unsigned long cerf_max_flash_size = 0x01000000;
-static struct mtd_partition cerf_partitions[] = {
-	{ offset: 0,			size: 0x00800000 	},
-	{ offset: MTDPART_OFS_APPEND,	size: 0x00800000 	}
+#ifdef CONFIG_SA1100_FRODO
+/* Frodo has 2 x 16M 28F128J3A flash chips in bank 0: */
+static struct mtd_partition frodo_partitions[] =
+{
+	{
+		name:		"bootloader",
+		size:		0x00040000,
+		offset: 	0x00000000,
+		mask_flags:	MTD_WRITEABLE
+	}, {
+		name:		"bootloader params",
+		size:		0x00040000,
+		offset: 	MTDPART_OFS_APPEND,
+		mask_flags:	MTD_WRITEABLE
+	}, {
+		name:		"kernel",
+		size:		0x00100000,
+		offset: 	MTDPART_OFS_APPEND,
+		mask_flags:	MTD_WRITEABLE
+	}, {
+		name:		"ramdisk",
+		size:		0x00400000,
+		offset: 	MTDPART_OFS_APPEND,
+		mask_flags:	MTD_WRITEABLE
+	}, {
+		name:		"file system",
+		size:		MTDPART_SIZ_FULL,
+		offset: 	MTDPART_OFS_APPEND
+	}
 };
-
 #endif
 
 #ifdef CONFIG_SA1100_GRAPHICSCLIENT
-
-static unsigned long graphicsclient_max_flash_size = 0x01000000;
 static struct mtd_partition graphicsclient_partitions[] = {
-	{ 
-	 name: "zImage",
-	 offset: 0,
-	 size: 0x100000
-	},
-	{ 
-         name: "ramdisk.gz",
-         offset: MTDPART_OFS_APPEND,
-         size: 0x300000 		
-	},
-	{ 
-	  name: "User FS",
-          offset: MTDPART_OFS_APPEND,	
-          size: MTDPART_SIZ_FULL
+	{
+		name:		"zImage",
+		size:		0x100000,
+		offset:		0,
+		mask_flags:	MTD_WRITEABLE,  /* force read-only */
+	}, {
+		name:		"ramdisk.gz",
+		size:		0x300000,
+		offset:		MTDPART_OFS_APPEND,
+		mask_flags:	MTD_WRITEABLE,  /* force read-only */
+	}, {
+		name:		"User FS",
+		size:		MTDPART_SIZ_FULL,
+		offset:		MTDPART_OFS_APPEND,
 	}
 };
-
 #endif
 
 #ifdef CONFIG_SA1100_GRAPHICSMASTER
-
-static unsigned long graphicsmaster_max_flash_size = 0x01000000;
 static struct mtd_partition graphicsmaster_partitions[] = {
-	{ 
-	 name: "zImage",
-	 offset: 0,
-	 size: 0x100000
+	{
+		name:		"zImage",
+		size:		0x100000,
+		offset:		0,
+		mask_flags:	MTD_WRITEABLE,  /* force read-only */
 	},
-	{ 
-         name: "ramdisk.gz",
-         offset: MTDPART_OFS_APPEND,
-         size: 0x300000 		
+	{
+		name:		"ramdisk.gz",
+		size:		0x300000,
+		offset:		MTDPART_OFS_APPEND,
+		mask_flags:	MTD_WRITEABLE,  /* force read-only */
 	},
-	{ 
-	  name: "User FS",
-          offset: MTDPART_OFS_APPEND,	
-          size: MTDPART_SIZ_FULL
+	{
+		name:		"User FS",
+		size:		MTDPART_SIZ_FULL,
+		offset:		MTDPART_OFS_APPEND,
 	}
 };
+#endif
 
+#ifdef CONFIG_SA1100_H3XXX
+static struct mtd_partition h3xxx_partitions[] = {
+	{
+		name:		"H3XXX boot firmware",
+		size:		0x00040000,
+		offset:		0,
+		mask_flags:	MTD_WRITEABLE,  /* force read-only */
+	}, {
+#ifdef CONFIG_MTD_2PARTS_IPAQ
+		name:		"H3XXX root jffs2",
+		size:		MTDPART_SIZ_FULL,
+		offset:		0x00040000,
+#else
+		name:		"H3XXX kernel",
+		size:		0x00080000,
+		offset:		0x00040000,
+	}, {
+		name:		"H3XXX params",
+		size:		0x00040000,
+		offset:		0x000C0000,
+	}, {
+#ifdef CONFIG_JFFS2_FS
+		name:		"H3XXX root jffs2",
+		size:		MTDPART_SIZ_FULL,
+		offset:		0x00100000,
+#else
+		name:		"H3XXX initrd",
+		size:		0x00100000,
+		offset:		0x00100000,
+	}, {
+		name:		"H3XXX root cramfs",
+		size:		0x00300000,
+		offset:		0x00200000,
+	}, {
+		name:		"H3XXX usr cramfs",
+		size:		0x00800000,
+		offset:		0x00500000,
+	}, {
+		name:		"H3XXX usr local",
+		size:		MTDPART_SIZ_FULL,
+		offset:		0x00d00000,
+#endif
 #endif
+	}
+};
 
-#ifdef CONFIG_SA1100_PANGOLIN
+static void h3xxx_set_vpp(struct map_info *map, int vpp)
+{
+	assign_h3600_egpio(IPAQ_EGPIO_VPP_ON, vpp);
+}
+#else
+#define h3xxx_set_vpp NULL
+#endif
 
-static unsigned long pangolin_max_flash_size = 0x04000000;
-static struct mtd_partition pangolin_partitions[] = {
-	{
-	  name: "boot firmware",
-	  offset: 0x00000000,
-	  size: 0x00080000,
-	  mask_flags: MTD_WRITEABLE,  /* force read-only */
-	},
-	{
-	  name: "kernel",
-	  offset: 0x00080000,
-	  size: 0x00100000,
-	},
+#ifdef CONFIG_SA1100_HUW_WEBPANEL
+static struct mtd_partition huw_webpanel_partitions[] = {
 	{
-	  name: "initrd",
-	  offset: 0x00180000,
-	  size: 0x00280000,
-	},
+		name:		"Loader",
+		size:		0x00040000,
+		offset:		0,
+	}, {
+		name:		"Sector 1",
+		size:		0x00040000,
+		offset:		MTDPART_OFS_APPEND,
+	}, {
+		size:		MTDPART_SIZ_FULL,
+		offset:		MTDPART_OFS_APPEND,
+	}
+};
+#endif
+
+#ifdef CONFIG_SA1100_JORNADA720
+static struct mtd_partition jornada720_partitions[] = {
 	{
-	  name: "initrd-test",
-	  offset: 0x00400000,
-	  size: 0x03C00000,
+		name:		"JORNADA720 boot firmware",
+		size:		0x00040000,
+		offset:		0,
+		mask_flags:	MTD_WRITEABLE,  /* force read-only */
+	}, {
+		name:		"JORNADA720 kernel",
+		size:		0x000c0000,
+		offset:		0x00040000,
+	}, {
+		name:		"JORNADA720 params",
+		size:		0x00040000,
+		offset:		0x00100000,
+	}, {
+		name:		"JORNADA720 initrd",
+		size:		0x00100000,
+		offset:		0x00140000,
+	}, {
+		name:		"JORNADA720 root cramfs",
+		size:		0x00300000,
+		offset:		0x00240000,
+	}, {
+		name:		"JORNADA720 usr cramfs",
+		size:		0x00800000,
+		offset:		0x00540000,
+	}, {
+		name:		"JORNADA720 usr local",
+		size:		0  /* will expand to the end of the flash */
+		offset:		0x00d00000,
 	}
 };
 
+static void jornada720_set_vpp(int vpp)
+{
+	if (vpp)
+		PPSR |= 0x80;
+	else
+		PPSR &= ~0x80;
+	PPDR |= 0x80;
+}
+#else
+#define jornada720_set_vpp NULL
 #endif
 
-#ifdef CONFIG_SA1100_YOPY
+#ifdef CONFIG_SA1100_PANGOLIN
+static struct mtd_partition pangolin_partitions[] = {
+	{
+		name:		"boot firmware",
+		size:		0x00080000,
+		offset:		0x00000000,
+		mask_flags:	MTD_WRITEABLE,  /* force read-only */
+	}, {
+		name:		"kernel",
+		size:		0x00100000,
+		offset:		0x00080000,
+	}, {
+		name:		"initrd",
+		size:		0x00280000,
+		offset:		0x00180000,
+	}, {
+		name:		"initrd-test",
+		size:		0x03C00000,
+		offset:		0x00400000,
+	}
+};
+#endif
 
-static unsigned long yopy_max_flash_size = 0x08000000;
-static struct mtd_partition yopy_partitions[] = {
+#ifdef CONFIG_SA1100_PT_SYSTEM3
+/* erase size is 0x40000 == 256k partitions have to have this boundary */
+static struct mtd_partition system3_partitions[] = {
 	{
-		name: "boot firmware",
-		offset: 0x00000000,
-		size: 0x00040000,
-		mask_flags: MTD_WRITEABLE,  /* force read-only */
+		name:		"BLOB",
+		size:		0x00040000,
+		offset:		0x00000000,
+		mask_flags:	MTD_WRITEABLE,  /* force read-only */
+	}, {
+		name:		"config",
+		size:		0x00040000,
+		offset:		MTDPART_OFS_APPEND,
+	}, {
+		name:		"kernel",
+		size:		0x00100000,
+		offset:		MTDPART_OFS_APPEND,
+	}, {
+		name:		"root",
+		size:		MTDPART_SIZ_FULL,
+		offset:		MTDPART_OFS_APPEND,
+	}
+};
+#endif
+
+#ifdef CONFIG_SA1100_SHANNON
+static struct mtd_partition shannon_partitions[] = {
+	{
+		name: "BLOB boot loader",
+		offset: 0,
+		size: 0x20000
 	},
 	{
 		name: "kernel",
-		offset: 0x00080000,
-		size: 0x00080000,
+		offset: MTDPART_OFS_APPEND,
+		size: 0xe0000
 	},
-	{
+	{ 
 		name: "initrd",
-		offset: 0x00100000,
-		size: 0x00300000,
-	},
-	{
-		name: "root",
-		offset: 0x00400000,
-		size: 0x01000000,
-	},
+		offset: MTDPART_OFS_APPEND,	
+		size: MTDPART_SIZ_FULL
+	}
 };
 
 #endif
 
-#ifdef CONFIG_SA1100_JORNADA720
-
-static unsigned long jornada720_max_flash_size = 0x02000000;
-static struct mtd_partition jornada720_partitions[] = {
+#ifdef CONFIG_SA1100_SHERMAN
+static struct mtd_partition sherman_partitions[] = {
 	{
-		name: "JORNADA720 boot firmware",
-		size: 0x00040000,
-		offset: 0,
-		mask_flags: MTD_WRITEABLE  /* force read-only */
-	},{
-		name: "JORNADA720 kernel",
-		size: 0x000c0000,
-		offset: 0x40000
-	},{
-		name: "JORNADA720 params",
-		size: 0x00040000,
-		offset: 0x100000
-	},{
-		name: "JORNADA720 initrd",
-		size: 0x00100000,
-		offset: 0x00140000
-	},{
-		name: "JORNADA720 root cramfs",
-		size: 0x00300000,
-		offset: 0x00240000
-	},{
-		name: "JORNADA720 usr cramfs",
-		size: 0x00800000,
-		offset: 0x00540000
-	},{
-		name: "JORNADA720 usr local",
-		offset: 0x00d00000,
-		size: 0  /* will expand to the end of the flash */
+		size:		0x50000,
+		offset:		0,
+	}, {
+		size:		0x70000,
+		offset:		MTDPART_OFS_APPEND,
+	}, {
+		size:		0x600000,
+		offset:		MTDPART_OFS_APPEND,
+	}, {
+		size:		0xA0000,
+		offset:		MTDPART_OFS_APPEND,
 	}
 };
 #endif
 
-#ifdef CONFIG_SA1100_SHERMAN
-
-static unsigned long sherman_max_flash_size = 0x02000000;
-static struct mtd_partition sherman_partitions[] = {
-	{ offset: 0,			size: 0x50000 	},
-	{ offset: MTDPART_OFS_APPEND,	size: 0x70000 	},
-	{ offset: MTDPART_OFS_APPEND,	size: 0x600000 	},
-	{ offset: MTDPART_OFS_APPEND,	size: 0xA0000 	}
-};
-
+#ifdef CONFIG_SA1100_SIMPAD
+static struct mtd_partition simpad_partitions[] = {
+	{
+		name:		"SIMpad boot firmware",
+		size:		0x00080000,
+		offset:		0,
+		mask_flags:	MTD_WRITEABLE,  /* force read-only */
+	}, {
+		name:		"SIMpad kernel",
+		size:		0x00100000,
+		offset:		0x00080000,
+	}, {
+#ifdef CONFIG_JFFS2_FS
+		name:		"SIMpad root jffs2",
+		size:		MTDPART_SIZ_FULL,
+		offset:		0x00180000,
+#else
+		name:		"SIMpad initrd",
+		size:		0x00300000,
+		offset:		0x00180000,
+	}, {
+		name:		"SIMpad root cramfs",
+		size:		0x00300000,
+		offset:		0x00480000,
+	}, {
+		name:		"SIMpad usr cramfs",
+		size:		0x005c0000,
+		offset:		0x00780000,
+	}, {
+		name:		"SIMpad usr local",
+		size:		MTDPART_SIZ_FULL,
+		offset:		0x00d40000,
 #endif
+	}
+};
+#endif /* CONFIG_SA1100_SIMPAD */
 
 #ifdef CONFIG_SA1100_STORK
-
-static unsigned long stork_max_flash_size = 0x02000000;
 static struct mtd_partition stork_partitions[] = {
 	{
-		name: "STORK boot firmware",
-		size: 0x00040000,
-		offset: 0,
-		mask_flags: MTD_WRITEABLE  /* force read-only */
-	},{
-		name: "STORK params",
-		size: 0x00040000,
-		offset: 0x40000
-	},{
-		name: "STORK kernel",
-		size: 0x00100000,
-		offset: 0x80000
-	},{
+		name:		"STORK boot firmware",
+		size:		0x00040000,
+		offset:		0,
+		mask_flags:	MTD_WRITEABLE,  /* force read-only */
+	}, {
+		name:		"STORK params",
+		size:		0x00040000,
+		offset:		0x00040000,
+	}, {
+		name:		"STORK kernel",
+		size:		0x00100000,
+		offset:		0x00080000,
+	}, {
 #ifdef CONFIG_JFFS2_FS
-		name: "STORK root jffs2",
-		offset: 0x00180000,
-		size: MTDPART_SIZ_FULL
+		name:		"STORK root jffs2",
+		offset:		0x00180000,
+		size:		MTDPART_SIZ_FULL,
 #else
-		name: "STORK initrd",
-		size: 0x00100000,
-		offset: 0x00180000
-	},{
-		name: "STORK root cramfs",
-		size: 0x00300000,
-		offset: 0x00280000
-	},{
-		name: "STORK usr cramfs",
-		size: 0x00800000,
-		offset: 0x00580000
-	},{
-		name: "STORK usr local",
-		offset: 0x00d80000,
-		size: MTDPART_SIZ_FULL
+		name:		"STORK initrd",
+		size:		0x00100000,
+		offset:		0x00180000,
+	}, {
+		name:		"STORK root cramfs",
+		size:		0x00300000,
+		offset:		0x00280000,
+	}, {
+		name:		"STORK usr cramfs",
+		size:		0x00800000,
+		offset:		0x00580000,
+	}, {
+		name:		"STORK usr local",
+		offset:		0x00d80000,
+		size:		MTDPART_SIZ_FULL,
 #endif
 	}
 };
-
 #endif
 
-#define NB_OF(x)  (sizeof(x)/sizeof(x[0]))
-
-
-extern int parse_redboot_partitions(struct mtd_info *master, struct mtd_partition **pparts);
-extern int parse_bootldr_partitions(struct mtd_info *master, struct mtd_partition **pparts);
+#ifdef CONFIG_SA1100_TRIZEPS
+static struct mtd_partition trizeps_partitions[] = {
+	{
+		name:		"Bootloader & the kernel",
+		size:		0x00200000,
+		offset:		0,
+	}, {
+		name:		"Data",
+		size:		0x00400000,
+		offset:		MTDPART_OFS_APPEND,
+	}, {
+		size:		MTDPART_SIZ_FULL,
+		offset:		MTDPART_OFS_APPEND,
+	}
+};
+#endif
 
-static struct mtd_partition *parsed_parts;
-static struct mtd_info *mymtd;
+#ifdef CONFIG_SA1100_YOPY
+static struct mtd_partition yopy_partitions[] = {
+	{
+		name:		"boot firmware",
+		size:		0x00040000,
+		offset:		0x00000000,
+		mask_flags:	MTD_WRITEABLE,  /* force read-only */
+	}, {
+		name:		"kernel",
+		size:		0x00080000,
+		offset:		0x00080000,
+	}, {
+		name:		"initrd",
+		size:		0x00300000,
+		offset:		0x00100000,
+	}, {
+		name:		"root",
+		size:		0x01000000,
+		offset:		0x00400000,
+	}
+};
+#endif
 
-int __init sa1100_mtd_init(void)
+static int __init sa1100_static_partitions(struct mtd_partition **parts)
 {
-	struct mtd_partition *parts;
 	int nb_parts = 0;
-	int parsed_nr_parts = 0;
-	char *part_type;
-	
-	/* Default flash buswidth */
-	sa1100_map.buswidth = (MSC0 & MSC_RBW) ? 2 : 4;
 
-	/*
-	 * Static partition definition selection
-	 */
-	part_type = "static";
+#ifdef CONFIG_SA1100_ADSBITSY
+	if (machine_is_adsbitsy()) {
+		*parts       = adsbitsy_partitions;
+		nb_parts     = ARRAY_SIZE(adsbitsy_partitions);
+	}
+#endif
 #ifdef CONFIG_SA1100_ASSABET
 	if (machine_is_assabet()) {
-		parts = assabet_partitions;
-		nb_parts = NB_OF(assabet_partitions);
-		sa1100_map.size = assabet_max_flash_size;
+		*parts       = assabet_partitions;
+		nb_parts     = ARRAY_SIZE(assabet_partitions);
 	}
 #endif
-
-#ifdef CONFIG_SA1100_HUW_WEBPANEL
-	if (machine_is_huw_webpanel()) {
-		parts = huw_webpanel_partitions;
-		nb_parts = NB_OF(huw_webpanel_partitions);
-		sa1100_map.size = huw_webpanel_max_flash_size;
+#ifdef CONFIG_SA1100_BADGE4
+	if (machine_is_badge4()) {
+		*parts       = badge4_partitions;
+		nb_parts     = ARRAY_SIZE(badge4_partitions);
 	}
 #endif
-
-#ifdef CONFIG_SA1100_H3600
-	if (machine_is_h3600()) {
-		parts = h3600_partitions;
-		nb_parts = NB_OF(h3600_partitions);
-		sa1100_map.size = h3600_max_flash_size;
-		sa1100_map.set_vpp = h3600_set_vpp;
+#ifdef CONFIG_SA1100_CERF
+	if (machine_is_cerf()) {
+		*parts       = cerf_partitions;
+		nb_parts     = ARRAY_SIZE(cerf_partitions);
+	}
+#endif
+#ifdef CONFIG_SA1100_CONSUS
+	if (machine_is_consus()) {
+		*parts       = consus_partitions;
+		nb_parts     = ARRAY_SIZE(consus_partitions);
+	}
+#endif
+#ifdef CONFIG_SA1100_FLEXANET
+	if (machine_is_flexanet()) {
+		*parts       = flexanet_partitions;
+		nb_parts     = ARRAY_SIZE(flexanet_partitions);
 	}
 #endif
 #ifdef CONFIG_SA1100_FREEBIRD
 	if (machine_is_freebird()) {
-		parts = freebird_partitions;
-		nb_parts = NB_OF(freebird_partitions);
-		sa1100_map.size = freebird_max_flash_size;
+		*parts       = freebird_partitions;
+		nb_parts     = ARRAY_SIZE(freebird_partitions);
 	}
 #endif
-#ifdef CONFIG_SA1100_CERF
-	if (machine_is_cerf()) {
-		parts = cerf_partitions;
-		nb_parts = NB_OF(cerf_partitions);
-		sa1100_map.size = cerf_max_flash_size;
+#ifdef CONFIG_SA1100_FRODO
+	if (machine_is_frodo()) {
+		*parts       = frodo_partitions;
+		nb_parts     = ARRAY_SIZE(frodo_partitions);
 	}
-#endif
+#endif	
 #ifdef CONFIG_SA1100_GRAPHICSCLIENT
 	if (machine_is_graphicsclient()) {
-		parts = graphicsclient_partitions;
-		nb_parts = NB_OF(graphicsclient_partitions);
-		sa1100_map.size = graphicsclient_max_flash_size;
-		sa1100_map.buswidth = (MSC1 & MSC_RBW) ? 2:4;
+		*parts       = graphicsclient_partitions;
+		nb_parts     = ARRAY_SIZE(graphicsclient_partitions);
 	}
 #endif
 #ifdef CONFIG_SA1100_GRAPHICSMASTER
 	if (machine_is_graphicsmaster()) {
-		parts = graphicsmaster_partitions;
-		nb_parts = NB_OF(graphicsmaster_partitions);
-		sa1100_map.size = graphicsmaster_max_flash_size;
-		sa1100_map.buswidth = (MSC1 & MSC_RBW) ? 2:4;
+		*parts       = graphicsmaster_partitions;
+		nb_parts     = ARRAY_SIZE(graphicsmaster_partitions);
 	}
 #endif
-#ifdef CONFIG_SA1100_PANGOLIN
-	if (machine_is_pangolin()) {
-		parts = pangolin_partitions;
-		nb_parts = NB_OF(pangolin_partitions);
-		sa1100_map.size = pangolin_max_flash_size;
+#ifdef CONFIG_SA1100_H3XXX
+	if (machine_is_h3xxx()) {
+		*parts       = h3xxx_partitions;
+		nb_parts     = ARRAY_SIZE(h3xxx_partitions);
+	}
+#endif
+#ifdef CONFIG_SA1100_HUW_WEBPANEL
+	if (machine_is_huw_webpanel()) {
+		*parts       = huw_webpanel_partitions;
+		nb_parts     = ARRAY_SIZE(huw_webpanel_partitions);
 	}
 #endif
 #ifdef CONFIG_SA1100_JORNADA720
 	if (machine_is_jornada720()) {
-		parts = jornada720_partitions;
-		nb_parts = NB_OF(jornada720_partitions);
-		sa1100_map.size = jornada720_max_flash_size;
-		sa1100_map.set_vpp = jornada720_set_vpp;
+		*parts       = jornada720_partitions;
+		nb_parts     = ARRAY_SIZE(jornada720_partitions);
 	}
 #endif
-#ifdef CONFIG_SA1100_YOPY
-	if (machine_is_yopy()) {
-		parts = yopy_partitions;
-		nb_parts = NB_OF(yopy_partitions);
-		sa1100_map.size = yopy_max_flash_size;
+#ifdef CONFIG_SA1100_PANGOLIN
+	if (machine_is_pangolin()) {
+		*parts       = pangolin_partitions;
+		nb_parts     = ARRAY_SIZE(pangolin_partitions);
+	}
+#endif
+#ifdef CONFIG_SA1100_PT_SYSTEM3
+	if (machine_is_pt_system3()) {
+		*parts       = system3_partitions;
+		nb_parts     = ARRAY_SIZE(system3_partitions);
+	}
+#endif
+#ifdef CONFIG_SA1100_SHANNON
+	if (machine_is_shannon()) {
+		*parts       = shannon_partitions;
+		nb_parts     = ARRAY_SIZE(shannon_partitions);
 	}
 #endif
 #ifdef CONFIG_SA1100_SHERMAN
 	if (machine_is_sherman()) {
-		parts = sherman_partitions;
-		nb_parts = NB_OF(sherman_partitions);
-		sa1100_map.size = sherman_max_flash_size;
+		*parts       = sherman_partitions;
+		nb_parts     = ARRAY_SIZE(sherman_partitions);
 	}
 #endif
-#ifdef CONFIG_SA1100_FLEXANET
-	if (machine_is_flexanet()) {
-		parts = flexanet_partitions;
-		nb_parts = NB_OF(flexanet_partitions);
-		sa1100_map.size = flexanet_max_flash_size;
+#ifdef CONFIG_SA1100_SIMPAD
+	if (machine_is_simpad()) {
+		*parts       = simpad_partitions;
+		nb_parts     = ARRAY_SIZE(simpad_partitions);
 	}
 #endif
 #ifdef CONFIG_SA1100_STORK
 	if (machine_is_stork()) {
-		parts = stork_partitions;
-		nb_parts = NB_OF(stork_partitions);
-		sa1100_map.size = stork_max_flash_size;
+		*parts       = stork_partitions;
+		nb_parts     = ARRAY_SIZE(stork_partitions);
+	}
+#endif
+#ifdef CONFIG_SA1100_TRIZEPS
+	if (machine_is_trizeps()) {
+		*parts       = trizeps_partitions;
+		nb_parts     = ARRAY_SIZE(trizeps_parititons);
+	}
+#endif
+#ifdef CONFIG_SA1100_YOPY
+	if (machine_is_yopy()) {
+		*parts       = yopy_partitions;
+		nb_parts     = ARRAY_SIZE(yopy_partitions);
 	}
 #endif
 
+	return nb_parts;
+}
+#endif
+
+struct sa_info {
+	unsigned long base;
+	unsigned long size;
+	int width;
+	void *vbase;
+	struct map_info *map;
+	struct mtd_info *mtd;
+	struct resource *res;
+};
+
+#define NR_SUBMTD 4
+
+static struct sa_info info[NR_SUBMTD];
+
+static int __init sa1100_setup_mtd(struct sa_info *sa, int nr, struct mtd_info **rmtd)
+{
+	struct mtd_info *subdev[nr];
+	struct map_info *maps;
+	int i, found = 0, ret = 0;
+
 	/*
-	 * Now let's probe for the actual flash.  Do it here since
-	 * specific machine settings might have been set above.
+	 * Allocate the map_info structs in one go.
 	 */
-	printk(KERN_NOTICE "SA1100 flash: probing %d-bit flash bus\n", sa1100_map.buswidth*8);
-	mymtd = do_map_probe("cfi_probe", &sa1100_map);
-	if (!mymtd)
-		return -ENXIO;
-	mymtd->module = THIS_MODULE;
+	maps = kmalloc(sizeof(struct map_info) * nr, GFP_KERNEL);
+	if (!maps)
+		return -ENOMEM;
 
 	/*
-	 * Dynamic partition selection stuff (might override the static ones)
+	 * Claim and then map the memory regions.
 	 */
-#ifdef CONFIG_MTD_REDBOOT_PARTS
-	if (parsed_nr_parts == 0) {
-		int ret = parse_redboot_partitions(mymtd, &parsed_parts);
-		
-		if (ret > 0) {
-			part_type = "RedBoot";
-			parsed_nr_parts = ret;
+	for (i = 0; i < nr; i++) {
+		if (sa[i].base == (unsigned long)-1)
+			break;
+
+		sa[i].res = request_mem_region(sa[i].base, sa[i].size, "sa1100 flash");
+		if (!sa[i].res) {
+			ret = -EBUSY;
+			break;
+		}
+
+		sa[i].map = maps + i;
+		memcpy(sa[i].map, &sa1100_map, sizeof(struct map_info));
+
+		sa[i].vbase = ioremap(sa[i].base, sa[i].size);
+		if (!sa[i].vbase) {
+			ret = -ENOMEM;
+			break;
 		}
+
+		sa[i].map->map_priv_1 = (unsigned long)sa[i].vbase;
+		sa[i].map->buswidth = sa[i].width;
+		sa[i].map->size = sa[i].size;
+
+		/*
+		 * Now let's probe for the actual flash.  Do it here since
+		 * specific machine settings might have been set above.
+		 */
+		sa[i].mtd = do_map_probe("cfi_probe", sa[i].map);
+		if (sa[i].mtd == NULL) {
+			ret = -ENXIO;
+			break;
+		}
+		sa[i].mtd->module = THIS_MODULE;
+		subdev[i] = sa[i].mtd;
+
+		printk(KERN_INFO "SA1100 flash: CFI device at 0x%08lx, %dMiB, "
+			"%d-bit\n", sa[i].base, sa[i].mtd->size >> 20,
+			sa[i].width * 8);
+		found += 1;
 	}
+
+	/*
+	 * ENXIO is special.  It means we didn't find a chip when
+	 * we probed.  We need to tear down the mapping, free the
+	 * resource and mark it as such.
+	 */
+	if (ret == -ENXIO) {
+		iounmap(sa[i].vbase);
+		sa[i].vbase = NULL;
+		release_resource(sa[i].res);
+		sa[i].res = NULL;
+	}
+
+	/*
+	 * If we found one device, don't bother with concat support.
+	 * If we found multiple devices, use concat if we have it
+	 * available, otherwise fail.
+	 */
+	if (ret == 0 || ret == -ENXIO) {
+		if (found == 1) {
+			*rmtd = subdev[0];
+			ret = 0;
+		} else if (found > 1) {
+			/*
+			 * We detected multiple devices.  Concatenate
+			 * them together.
+			 */
+#ifdef CONFIG_MTD_CONCAT
+			*rmtd = mtd_concat_create(subdev, found,
+						  "sa1100 flash");
+			if (*rmtd == NULL)
+				ret = -ENXIO;
+#else
+			printk(KERN_ERR "SA1100 flash: multiple devices "
+			       "found but MTD concat support disabled.\n");
+			ret = -ENXIO;
 #endif
-#ifdef CONFIG_MTD_BOOTLDR_PARTS
-	if (parsed_nr_parts == 0) {
-		int ret = parse_bootldr_partitions(mymtd, &parsed_parts);
-		if (ret > 0) {
-			part_type = "Compaq bootldr";
-			parsed_nr_parts = ret;
 		}
 	}
-#endif
 
-	if (parsed_nr_parts > 0) {
-		parts = parsed_parts;
-		nb_parts = parsed_nr_parts;
+	/*
+	 * If we failed, clean up.
+	 */
+	if (ret) {
+		do {
+			if (sa[i].mtd)
+				map_destroy(sa[i].mtd);
+			if (sa[i].vbase)
+				iounmap(sa[i].vbase);
+			if (sa[i].res)
+				release_resource(sa[i].res);
+		} while (i--);
+
+		kfree(maps);
 	}
 
-	if (nb_parts == 0) {
-		printk(KERN_NOTICE "SA1100 flash: no partition info available, registering whole flash at once\n");
-		add_mtd_device(mymtd);
+	return ret;
+}
+
+static void __exit sa1100_destroy_mtd(struct sa_info *sa, struct mtd_info *mtd)
+{
+	int i;
+
+	del_mtd_partitions(mtd);
+
+	if (mtd != sa[0].mtd)
+		mtd_concat_destroy(mtd);
+
+	for (i = NR_SUBMTD; i >= 0; i--) {
+		if (sa[i].mtd)
+			map_destroy(sa[i].mtd);
+		if (sa[i].vbase)
+			iounmap(sa[i].vbase);
+		if (sa[i].res)
+			release_resource(sa[i].res);
+	}
+	kfree(sa[0].map);
+}
+
+static int __init sa1100_locate_flash(void)
+{
+	int i, nr = -ENODEV;
+
+	if (machine_is_adsbitsy()) {
+		info[0].base = SA1100_CS1_PHYS;
+		info[0].size = SZ_32M;
+		nr = 1;
+	}
+	if (machine_is_assabet()) {
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_32M;
+		info[1].base = SA1100_CS1_PHYS; /* neponset */
+		info[1].size = SZ_32M;
+		nr = 2;
+	}
+	if (machine_is_badge4()) {
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_4M;
+		nr = 1;
+	}
+	if (machine_is_cerf()) {
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_32M;
+		nr = 1;
+	}
+	if (machine_is_consus()) {
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_32M;
+		nr = 1;
+	}
+	if (machine_is_flexanet()) {
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_32M;
+		nr = 1;
+	}
+	if (machine_is_freebird()) {
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_32M;
+		nr = 1;
+	}
+	if (machine_is_frodo()) {
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_32M;
+		nr = 1;
+	}
+	if (machine_is_graphicsclient()) {
+		info[0].base = SA1100_CS1_PHYS;
+		info[0].size = SZ_32M;
+		nr = 1;
+	}
+	if (machine_is_graphicsmaster()) {
+		info[0].base = SA1100_CS1_PHYS;
+		info[0].size = SZ_16M;
+		nr = 1;
+	}
+	if (machine_is_h3xxx()) {
+		sa1100_map.set_vpp = h3xxx_set_vpp;
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_32M;
+		nr = 1;
+	}
+	if (machine_is_huw_webpanel()) {
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_16M;
+		nr = 1;
+	}
+	if (machine_is_itsy()) {
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_32M;
+		nr = 1;
+	}
+	if (machine_is_jornada720()) {
+		sa1100_map.set_vpp = jornada720_set_vpp;
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_32M;
+		nr = 1;
+	}
+	if (machine_is_nanoengine()) {
+		info[0].base = SA1100_CS0_PHYS;
+		info[1].size = SZ_32M;
+		nr = 1;
+	}
+	if (machine_is_pangolin()) {
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_64M;
+		nr = 1;
+	}
+	if (machine_is_pfs168()) {
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_32M;
+		nr = 1;
+	}
+	if (machine_is_pleb()) {
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_4M;
+		info[1].base = SA1100_CS1_PHYS;
+		info[1].size = SZ_4M;
+		nr = 2;
+	}
+	if (machine_is_pt_system3()) {
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_16M;
+		nr = 1;
+	}
+	if (machine_is_shannon()) {
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_4M;
+		nr = 1;
+	}
+	if (machine_is_sherman()) {
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_32M;
+		nr = 1;
+	}
+	if (machine_is_simpad()) {
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_32M;
+		nr = 1;
+	}
+	if (machine_is_stork()) {
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_32M;
+		nr = 1;
+	}
+	if (machine_is_trizeps()) {
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_16M;
+		nr = 1;
+	}
+	if (machine_is_victor()) {
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_2M;
+		nr = 1;
+	}
+	if (machine_is_yopy()) {
+		info[0].base = SA1100_CS0_PHYS;
+		info[0].size = SZ_64M;
+		info[1].base = SA1100_CS1_PHYS;
+		info[1].size = SZ_64M;
+		nr = 2;
+	}
+
+	if (nr < 0)
+		return nr;
+
+	/*
+	 * Retrieve the buswidth from the MSC registers.
+	 * We currently only implement CS0 and CS1 here.
+	 */
+	for (i = 0; i < nr; i++) {
+		switch (info[i].base) {
+		default:
+			printk(KERN_WARNING "SA1100 flash: unknown base address "
+				"0x%08lx, assuming CS0\n", info[i].base);
+		case SA1100_CS0_PHYS:
+			info[i].width = (MSC0 & MSC_RBW) ? 2 : 4;
+			break;
+
+		case SA1100_CS1_PHYS:
+			info[i].width = ((MSC0 >> 16) & MSC_RBW) ? 2 : 4;
+			break;
+		}
+	}
+
+	return nr;
+}
+
+extern int parse_redboot_partitions(struct mtd_info *master, struct mtd_partition **pparts);
+extern int parse_cmdline_partitions(struct mtd_info *master, struct mtd_partition **pparts, char *);
+
+static struct mtd_partition *parsed_parts;
+
+static void __init sa1100_locate_partitions(struct mtd_info *mtd)
+{
+	const char *part_type = NULL;
+	int nr_parts = 0;
+
+	do {
+		/*
+		 * Partition selection stuff.
+		 */
+#ifdef CONFIG_MTD_CMDLINE_PARTS
+		nr_parts = parse_cmdline_partitions(mtd, &parsed_parts, "sa1100");
+		if (nr_parts > 0) {
+			part_type = "command line";
+			break;
+		}
+#endif
+#ifdef CONFIG_MTD_REDBOOT_PARTS
+		nr_parts = parse_redboot_partitions(mtd, &parsed_parts);
+		if (nr_parts > 0) {
+			part_type = "RedBoot";
+			break;
+		}
+#endif
+#ifdef CONFIG_MTD_SA1100_STATICMAP
+		nr_parts = sa1100_static_partitions(&parsed_parts);
+		if (nr_parts > 0) {
+			part_type = "static";
+			break;
+		}
+#endif
+	} while (0);
+
+	if (nr_parts == 0) {
+		printk(KERN_NOTICE "SA1100 flash: no partition info "
+			"available, registering whole flash\n");
+		add_mtd_device(mtd);
 	} else {
-		printk(KERN_NOTICE "Using %s partition definition\n", part_type);
-		add_mtd_partitions(mymtd, parts, nb_parts);
+		printk(KERN_NOTICE "SA1100 flash: using %s partition "
+			"definition\n", part_type);
+		add_mtd_partitions(mtd, parsed_parts, nr_parts);
 	}
-	return 0;
+
+	/* Always succeeds. */
+}
+
+static void __exit sa1100_destroy_partitions(void)
+{
+	if (parsed_parts)
+		kfree(parsed_parts);
+}
+
+static struct mtd_info *mymtd;
+
+static int __init sa1100_mtd_init(void)
+{
+	int ret;
+	int nr;
+
+	nr = sa1100_locate_flash();
+	if (nr < 0)
+		return nr;
+
+	ret = sa1100_setup_mtd(info, nr, &mymtd);
+	if (ret == 0)
+		sa1100_locate_partitions(mymtd);
+
+	return ret;
 }
 
 static void __exit sa1100_mtd_cleanup(void)
 {
-	if (mymtd) {
-		del_mtd_partitions(mymtd);
-		map_destroy(mymtd);
-		if (parsed_parts)
-			kfree(parsed_parts);
-	}
+	sa1100_destroy_mtd(info, mymtd);
+	sa1100_destroy_partitions();
 }
 
 module_init(sa1100_mtd_init);
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
new file mode 100644
index 000000000000..4c16c0e43e0f
--- /dev/null
+++ b/drivers/mtd/mtdconcat.c
@@ -0,0 +1,675 @@
+/*
+ * MTD device concatenation layer
+ *
+ * (C) 2002 Robert Kaiser <rkaiser@sysgo.de>
+ *
+ * This code is GPL
+ *
+ * $Id: mtdconcat.c,v 1.3 2002/05/21 21:04:25 dwmw2 Exp $
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/concat.h>
+
+/*
+ * Our storage structure:
+ * Subdev points to an array of pointers to struct mtd_info objects
+ * which is allocated along with this structure
+ *
+ */
+struct mtd_concat {
+	struct mtd_info mtd;
+	int             num_subdev;
+	struct mtd_info **subdev;
+};
+
+/*
+ * how to calculate the size required for the above structure,
+ * including the pointer array subdev points to:
+ */
+#define SIZEOF_STRUCT_MTD_CONCAT(num_subdev)	\
+	((sizeof(struct mtd_concat) + (num_subdev) * sizeof(struct mtd_info *)))
+
+
+/*
+ * Given a pointer to the MTD object in the mtd_concat structure,
+ * we can retrieve the pointer to that structure with this macro.
+ */
+#define CONCAT(x)  ((struct mtd_concat *)(x))
+
+	
+/* 
+ * MTD methods which look up the relevant subdevice, translate the
+ * effective address and pass through to the subdevice.
+ */
+
+static int concat_read (struct mtd_info *mtd, loff_t from, size_t len, 
+			size_t *retlen, u_char *buf)
+{
+	struct mtd_concat *concat = CONCAT(mtd);
+	int err = -EINVAL;
+	int i;
+
+	*retlen = 0;
+
+	for(i = 0; i < concat->num_subdev; i++)
+	{
+		struct mtd_info *subdev = concat->subdev[i];
+		size_t size, retsize;
+
+		if (from >= subdev->size)
+		{
+			size  = 0;
+			from -= subdev->size;
+		}
+		else
+		{
+			if (from + len > subdev->size)
+				size = subdev->size - from;
+			else
+				size = len;
+
+			err = subdev->read(subdev, from, size, &retsize, buf);
+
+			if(err)
+				break;
+
+			*retlen += retsize;
+			len -= size;
+			if(len == 0)
+				break;
+
+			err = -EINVAL;
+			buf += size;
+			from = 0;
+		}
+	}
+	return err;
+}
+
+static int concat_write (struct mtd_info *mtd, loff_t to, size_t len,
+			size_t *retlen, const u_char *buf)
+{
+	struct mtd_concat *concat = CONCAT(mtd);
+	int err = -EINVAL;
+	int i;
+
+	if (!(mtd->flags & MTD_WRITEABLE))
+		return -EROFS;
+
+	*retlen = 0;
+
+	for(i = 0; i < concat->num_subdev; i++)
+	{
+		struct mtd_info *subdev = concat->subdev[i];
+		size_t size, retsize;
+
+		if (to >= subdev->size)
+		{
+			size  = 0;
+			to -= subdev->size;
+		}
+		else
+		{
+			if (to + len > subdev->size)
+				size = subdev->size - to;
+			else
+				size = len;
+
+			if (!(subdev->flags & MTD_WRITEABLE))
+				err = -EROFS;
+			else
+				err = subdev->write(subdev, to, size, &retsize, buf);
+
+			if(err)
+				break;
+
+			*retlen += retsize;
+			len -= size;
+			if(len == 0)
+				break;
+
+			err = -EINVAL;
+			buf += size;
+			to = 0;
+		}
+	}
+	return err;
+}
+
+static void concat_erase_callback (struct erase_info *instr)
+{
+	wake_up((wait_queue_head_t *)instr->priv);
+}
+
+static int concat_dev_erase(struct mtd_info *mtd, struct erase_info *erase)
+{
+	int err;
+	wait_queue_head_t waitq;
+	DECLARE_WAITQUEUE(wait, current);
+
+	/*
+	 * This code was stol^H^H^H^Hinspired by mtdchar.c
+	 */
+	init_waitqueue_head(&waitq);
+
+	erase->mtd = mtd;
+	erase->callback = concat_erase_callback;
+	erase->priv = (unsigned long)&waitq;
+			
+	/*
+	 * FIXME: Allow INTERRUPTIBLE. Which means
+	 * not having the wait_queue head on the stack.
+	 */
+	err = mtd->erase(mtd, erase);
+	if (!err)
+	{
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		add_wait_queue(&waitq, &wait);
+		if (erase->state != MTD_ERASE_DONE && erase->state != MTD_ERASE_FAILED)
+			schedule();
+		remove_wait_queue(&waitq, &wait);
+		set_current_state(TASK_RUNNING);
+
+		err = (erase->state == MTD_ERASE_FAILED) ? -EIO : 0;
+	}
+	return err;
+}
+
+static int concat_erase (struct mtd_info *mtd, struct erase_info *instr)
+{
+	struct mtd_concat *concat = CONCAT(mtd);
+	struct mtd_info *subdev;
+	int i, err;
+	u_int32_t length;
+	struct erase_info *erase;
+
+	if (!(mtd->flags & MTD_WRITEABLE))
+		return -EROFS;
+
+	if(instr->addr > concat->mtd.size)
+		return -EINVAL;
+
+	if(instr->len + instr->addr > concat->mtd.size)
+		return -EINVAL;
+
+	/*
+	 * Check for proper erase block alignment of the to-be-erased area.
+	 * It is easier to do this based on the super device's erase
+	 * region info rather than looking at each particular sub-device
+	 * in turn.
+	 */
+	if (!concat->mtd.numeraseregions)
+	{	/* the easy case: device has uniform erase block size */
+		if(instr->addr & (concat->mtd.erasesize - 1))
+			return -EINVAL;
+		if(instr->len & (concat->mtd.erasesize - 1))
+			return -EINVAL;
+	}
+	else
+	{	/* device has variable erase size */
+		struct mtd_erase_region_info *erase_regions = concat->mtd.eraseregions;
+
+		/*
+		 * Find the erase region where the to-be-erased area begins:
+		 */
+		for(i = 0; i < concat->mtd.numeraseregions && 
+		           instr->addr >= erase_regions[i].offset; i++)
+			;
+		--i;
+
+		/*
+		 * Now erase_regions[i] is the region in which the
+		 * to-be-erased area begins. Verify that the starting
+		 * offset is aligned to this region's erase size:
+		 */
+		if (instr->addr & (erase_regions[i].erasesize-1))
+			return -EINVAL;
+
+		/*
+		 * now find the erase region where the to-be-erased area ends:
+		 */
+		for(; i < concat->mtd.numeraseregions && 
+		      (instr->addr + instr->len) >=  erase_regions[i].offset ; ++i)
+			;
+		--i;
+		/*
+		 * check if the ending offset is aligned to this region's erase size
+		 */
+		if ((instr->addr + instr->len) & (erase_regions[i].erasesize-1))
+			return -EINVAL;
+	}
+
+	/* make a local copy of instr to avoid modifying the caller's struct */
+	erase = kmalloc(sizeof(struct erase_info),GFP_KERNEL);
+
+	if (!erase)
+		return -ENOMEM;
+
+	*erase = *instr;
+	length = instr->len;
+
+	/*
+	 * find the subdevice where the to-be-erased area begins, adjust
+	 * starting offset to be relative to the subdevice start
+	 */
+	for(i = 0; i < concat->num_subdev; i++)
+	{
+		subdev = concat->subdev[i];
+		if(subdev->size <= erase->addr)
+			erase->addr -= subdev->size;
+		else
+			break;
+    }
+	if(i >= concat->num_subdev)	/* must never happen since size */
+		BUG();					/* limit has been verified above */
+
+	/* now do the erase: */
+	err = 0;
+	for(;length > 0; i++)	/* loop for all subevices affected by this request */
+	{
+		subdev = concat->subdev[i];		/* get current subdevice */
+
+		/* limit length to subdevice's size: */
+		if(erase->addr + length > subdev->size)
+			erase->len = subdev->size - erase->addr;
+		else
+			erase->len = length;
+
+		if (!(subdev->flags & MTD_WRITEABLE))
+		{
+			err = -EROFS;
+			break;
+		}
+		length -= erase->len;
+		if ((err = concat_dev_erase(subdev, erase)))
+		{
+			if(err == -EINVAL)	/* sanity check: must never happen since */
+				BUG();			/* block alignment has been checked above */
+			break;
+		}
+		/*
+		 * erase->addr specifies the offset of the area to be
+		 * erased *within the current subdevice*. It can be
+		 * non-zero only the first time through this loop, i.e.
+		 * for the first subdevice where blocks need to be erased.
+		 * All the following erases must begin at the start of the
+		 * current subdevice, i.e. at offset zero.
+		 */
+		erase->addr = 0;
+	}
+	kfree(erase);
+	if (err)
+		return err;
+
+	instr->state = MTD_ERASE_DONE;
+	if (instr->callback)
+		instr->callback(instr);
+	return 0;
+}
+
+static int concat_lock (struct mtd_info *mtd, loff_t ofs, size_t len)
+{
+	struct mtd_concat *concat = CONCAT(mtd);
+	int i, err = -EINVAL;
+
+	if ((len + ofs) > mtd->size) 
+		return -EINVAL;
+
+	for(i = 0; i < concat->num_subdev; i++)
+	{
+		struct mtd_info *subdev = concat->subdev[i];
+		size_t size;
+
+		if (ofs >= subdev->size)
+		{
+			size  = 0;
+			ofs -= subdev->size;
+		}
+		else
+		{
+			if (ofs + len > subdev->size)
+				size = subdev->size - ofs;
+			else
+				size = len;
+
+			err = subdev->lock(subdev, ofs, size);
+
+			if(err)
+				break;
+
+			len -= size;
+			if(len == 0)
+				break;
+
+			err = -EINVAL;
+			ofs = 0;
+		}
+	}
+	return err;
+}
+
+static int concat_unlock (struct mtd_info *mtd, loff_t ofs, size_t len)
+{
+	struct mtd_concat *concat = CONCAT(mtd);
+	int i, err = 0;
+
+	if ((len + ofs) > mtd->size) 
+		return -EINVAL;
+
+	for(i = 0; i < concat->num_subdev; i++)
+	{
+		struct mtd_info *subdev = concat->subdev[i];
+		size_t size;
+
+		if (ofs >= subdev->size)
+		{
+			size  = 0;
+			ofs -= subdev->size;
+		}
+		else
+		{
+			if (ofs + len > subdev->size)
+				size = subdev->size - ofs;
+			else
+				size = len;
+
+			err = subdev->unlock(subdev, ofs, size);
+
+			if(err)
+				break;
+
+			len -= size;
+			if(len == 0)
+				break;
+
+			err = -EINVAL;
+			ofs = 0;
+		}
+	}
+	return err;
+}
+
+static void concat_sync(struct mtd_info *mtd)
+{
+	struct mtd_concat *concat = CONCAT(mtd);
+	int i;
+
+	for(i = 0; i < concat->num_subdev; i++)
+	{
+		struct mtd_info *subdev = concat->subdev[i];
+		subdev->sync(subdev);
+	}
+}
+
+static int concat_suspend(struct mtd_info *mtd)
+{
+	struct mtd_concat *concat = CONCAT(mtd);
+	int i, rc = 0;
+
+	for(i = 0; i < concat->num_subdev; i++)
+	{
+		struct mtd_info *subdev = concat->subdev[i];
+		if((rc = subdev->suspend(subdev)) < 0)
+			return rc;
+	}
+	return rc;
+}
+
+static void concat_resume(struct mtd_info *mtd)
+{
+	struct mtd_concat *concat = CONCAT(mtd);
+	int i;
+
+	for(i = 0; i < concat->num_subdev; i++)
+	{
+		struct mtd_info *subdev = concat->subdev[i];
+		subdev->resume(subdev);
+	}
+}
+
+/*
+ * This function constructs a virtual MTD device by concatenating
+ * num_devs MTD devices. A pointer to the new device object is
+ * stored to *new_dev upon success. This function does _not_
+ * register any devices: this is the caller's responsibility.
+ */
+struct mtd_info *mtd_concat_create(
+	struct mtd_info *subdev[],	/* subdevices to concatenate */
+	int num_devs,				/* number of subdevices      */
+	char *name)					/* name for the new device   */
+{
+	int i;
+	size_t size;
+	struct mtd_concat *concat;
+	u_int32_t max_erasesize, curr_erasesize;
+	int num_erase_region;
+
+	printk(KERN_NOTICE "Concatenating MTD devices:\n");
+	for(i = 0; i < num_devs; i++)
+		printk(KERN_NOTICE "(%d): \"%s\"\n", i, subdev[i]->name);
+	printk(KERN_NOTICE "into device \"%s\"\n", name);
+
+	/* allocate the device structure */
+	size = SIZEOF_STRUCT_MTD_CONCAT(num_devs);
+	concat = kmalloc (size, GFP_KERNEL);
+	if(!concat)
+	{
+		printk ("memory allocation error while creating concatenated device \"%s\"\n",
+				name);
+			return NULL;
+	}
+	memset(concat, 0, size);
+	concat->subdev = (struct mtd_info **)(concat + 1);
+
+	/*
+	 * Set up the new "super" device's MTD object structure, check for
+	 * incompatibilites between the subdevices.
+	 */
+	concat->mtd.type      = subdev[0]->type;
+	concat->mtd.flags     = subdev[0]->flags;
+	concat->mtd.size      = subdev[0]->size;
+	concat->mtd.erasesize = subdev[0]->erasesize;
+	concat->mtd.oobblock  = subdev[0]->oobblock;
+	concat->mtd.oobsize   = subdev[0]->oobsize;
+	concat->mtd.ecctype   = subdev[0]->ecctype;
+	concat->mtd.eccsize   = subdev[0]->eccsize;
+
+	concat->subdev[0]   = subdev[0];
+
+	for(i = 1; i < num_devs; i++)
+	{
+		if(concat->mtd.type != subdev[i]->type)
+		{
+			kfree(concat);
+			printk ("Incompatible device type on \"%s\"\n", subdev[i]->name);
+			return NULL;
+		}
+		if(concat->mtd.flags != subdev[i]->flags)
+		{	/*
+			 * Expect all flags except MTD_WRITEABLE to be equal on
+			 * all subdevices.
+			 */
+			if((concat->mtd.flags ^ subdev[i]->flags) & ~MTD_WRITEABLE)
+			{
+				kfree(concat);
+				printk ("Incompatible device flags on \"%s\"\n", subdev[i]->name);
+				return NULL;
+			}
+			else	/* if writeable attribute differs, make super device writeable */
+				concat->mtd.flags |= subdev[i]->flags & MTD_WRITEABLE;
+		}
+		concat->mtd.size += subdev[i]->size;
+		if(concat->mtd.oobblock != subdev[i]->oobblock ||
+		   concat->mtd.oobsize  != subdev[i]->oobsize  ||
+		   concat->mtd.ecctype  != subdev[i]->ecctype  ||
+		   concat->mtd.eccsize  != subdev[i]->eccsize)
+		{
+			kfree(concat);
+			printk ("Incompatible OOB or ECC data on \"%s\"\n", subdev[i]->name);
+			return NULL;
+		}
+		concat->subdev[i] = subdev[i];
+		
+	}
+
+	concat->num_subdev  = num_devs;
+	concat->mtd.name    = name;
+
+	/*
+	 * NOTE: for now, we do not provide any readv()/writev() methods
+	 *       because they are messy to implement and they are not
+	 *       used to a great extent anyway.
+	 */
+	concat->mtd.erase   = concat_erase;
+	concat->mtd.read    = concat_read;
+	concat->mtd.write   = concat_write;
+	concat->mtd.sync    = concat_sync;
+	concat->mtd.lock    = concat_lock;
+	concat->mtd.unlock  = concat_unlock;
+	concat->mtd.suspend = concat_suspend;
+	concat->mtd.resume  = concat_resume;
+
+
+	/*
+	 * Combine the erase block size info of the subdevices:
+	 *
+	 * first, walk the map of the new device and see how
+	 * many changes in erase size we have
+	 */
+	max_erasesize = curr_erasesize = subdev[0]->erasesize;
+	num_erase_region = 1;
+	for(i = 0; i < num_devs; i++)
+	{
+		if(subdev[i]->numeraseregions == 0)
+		{	/* current subdevice has uniform erase size */
+			if(subdev[i]->erasesize != curr_erasesize)
+			{	/* if it differs from the last subdevice's erase size, count it */
+				++num_erase_region;
+				curr_erasesize = subdev[i]->erasesize;
+				if(curr_erasesize > max_erasesize)
+					max_erasesize = curr_erasesize;
+			}
+		}
+		else
+		{	/* current subdevice has variable erase size */
+			int j;
+			for(j = 0; j < subdev[i]->numeraseregions; j++)
+			{	/* walk the list of erase regions, count any changes */
+				if(subdev[i]->eraseregions[j].erasesize != curr_erasesize)
+				{
+					++num_erase_region;
+					curr_erasesize = subdev[i]->eraseregions[j].erasesize;
+					if(curr_erasesize > max_erasesize)
+						max_erasesize = curr_erasesize;
+				}
+			}
+		}
+	}
+
+	if(num_erase_region == 1)
+	{	/*
+		 * All subdevices have the same uniform erase size.
+		 * This is easy:
+		 */
+		concat->mtd.erasesize = curr_erasesize;
+		concat->mtd.numeraseregions = 0;
+	}
+	else
+	{	/*
+		 * erase block size varies across the subdevices: allocate
+		 * space to store the data describing the variable erase regions
+		 */
+		struct mtd_erase_region_info *erase_region_p;
+		u_int32_t begin, position;
+
+		concat->mtd.erasesize = max_erasesize;
+		concat->mtd.numeraseregions = num_erase_region;
+		concat->mtd.eraseregions = erase_region_p = kmalloc (
+		     num_erase_region * sizeof(struct mtd_erase_region_info), GFP_KERNEL);
+		if(!erase_region_p)
+		{
+			kfree(concat);
+			printk ("memory allocation error while creating erase region list"
+			        " for device \"%s\"\n", name);
+			return NULL;
+		}
+
+		/*
+		 * walk the map of the new device once more and fill in
+		 * in erase region info:
+		 */
+		curr_erasesize = subdev[0]->erasesize;
+		begin = position = 0;
+		for(i = 0; i < num_devs; i++)
+		{
+			if(subdev[i]->numeraseregions == 0)
+			{	/* current subdevice has uniform erase size */
+				if(subdev[i]->erasesize != curr_erasesize)
+				{	/*
+					 *  fill in an mtd_erase_region_info structure for the area
+					 *  we have walked so far:
+					 */
+					erase_region_p->offset    = begin;
+					erase_region_p->erasesize = curr_erasesize;
+					erase_region_p->numblocks = (position - begin) / curr_erasesize;
+					begin = position;
+
+					curr_erasesize = subdev[i]->erasesize;
+					++erase_region_p;
+				}
+				position += subdev[i]->size;
+			}
+			else
+			{	/* current subdevice has variable erase size */
+				int j;
+				for(j = 0; j < subdev[i]->numeraseregions; j++)
+				{	/* walk the list of erase regions, count any changes */
+					if(subdev[i]->eraseregions[j].erasesize != curr_erasesize)
+					{
+						erase_region_p->offset    = begin;
+						erase_region_p->erasesize = curr_erasesize;
+						erase_region_p->numblocks = (position - begin) / curr_erasesize;
+						begin = position;
+
+						curr_erasesize = subdev[i]->eraseregions[j].erasesize;
+						++erase_region_p;
+					}
+					position += subdev[i]->eraseregions[j].numblocks * curr_erasesize;
+				}
+			}
+		}
+		/* Now write the final entry */
+		erase_region_p->offset    = begin;
+		erase_region_p->erasesize = curr_erasesize;
+		erase_region_p->numblocks = (position - begin) / curr_erasesize;
+	}
+
+	return &concat->mtd;
+}
+
+/* 
+ * This function destroys an MTD object obtained from concat_mtd_devs()
+ */
+
+void mtd_concat_destroy(struct mtd_info *mtd)
+{
+	struct mtd_concat *concat = CONCAT(mtd);
+	if(concat->mtd.numeraseregions)
+		kfree(concat->mtd.eraseregions);
+	kfree(concat);
+}
+
+
+EXPORT_SYMBOL(mtd_concat_create);
+EXPORT_SYMBOL(mtd_concat_destroy);
+
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Robert Kaiser <rkaiser@sysgo.de>");
+MODULE_DESCRIPTION("Generic support for concatenating of MTD devices");
diff --git a/include/linux/mtd/concat.h b/include/linux/mtd/concat.h
new file mode 100644
index 000000000000..ed8dc6755219
--- /dev/null
+++ b/include/linux/mtd/concat.h
@@ -0,0 +1,23 @@
+/*
+ * MTD device concatenation layer definitions
+ *
+ * (C) 2002 Robert Kaiser <rkaiser@sysgo.de>
+ *
+ * This code is GPL
+ *
+ * $Id: concat.h,v 1.1 2002/03/08 16:34:36 rkaiser Exp $
+ */
+
+#ifndef MTD_CONCAT_H
+#define MTD_CONCAT_H
+
+
+struct mtd_info *mtd_concat_create(
+    struct mtd_info *subdev[],  /* subdevices to concatenate */
+    int num_devs,               /* number of subdevices      */
+    char *name);                /* name for the new device   */
+
+void mtd_concat_destroy(struct mtd_info *mtd);
+
+#endif
+
-- 
cgit v1.2.3


From 90df68e70b631886169c9287faebf2742f43484c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@tove.transmeta.com>
Date: Mon, 14 Oct 2002 21:24:37 -0700
Subject: Block layer ioctl cleanups.

Rename old "block_ioctl()" function: it's "scsi_cmd_ioctl()", as that
is what the function does. Rename the whole file "scsi_ioctl.c"
---
 drivers/block/Makefile      |   4 +-
 drivers/block/blkpg.c       |   2 +-
 drivers/block/block_ioctl.c |  83 -----------------
 drivers/block/scsi_ioctl.c  | 215 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/ide/ide.c           |   2 +-
 fs/block_dev.c              |  41 +--------
 include/linux/blkdev.h      |  12 ++-
 7 files changed, 233 insertions(+), 126 deletions(-)
 delete mode 100644 drivers/block/block_ioctl.c
 create mode 100644 drivers/block/scsi_ioctl.c

(limited to 'include/linux')

diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index eff7ee947ea7..8457b1bfa13a 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -9,9 +9,9 @@
 #
 
 export-objs	:= elevator.o ll_rw_blk.o loop.o genhd.o acsi.o \
-		   block_ioctl.o deadline-iosched.o
+		   scsi_ioctl.o deadline-iosched.o
 
-obj-y	:= elevator.o ll_rw_blk.o blkpg.o genhd.o block_ioctl.o deadline-iosched.o
+obj-y	:= elevator.o ll_rw_blk.o blkpg.o genhd.o scsi_ioctl.o deadline-iosched.o
 
 obj-$(CONFIG_MAC_FLOPPY)	+= swim3.o
 obj-$(CONFIG_BLK_DEV_FD)	+= floppy.o
diff --git a/drivers/block/blkpg.c b/drivers/block/blkpg.c
index d5ba72a8ac86..7b55729fa29a 100644
--- a/drivers/block/blkpg.c
+++ b/drivers/block/blkpg.c
@@ -305,6 +305,6 @@ int blk_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg)
 			return 0;
 
 		default:
-			return -EINVAL;
+			return -ENOTTY;
 	}
 }
diff --git a/drivers/block/block_ioctl.c b/drivers/block/block_ioctl.c
deleted file mode 100644
index edde76503d60..000000000000
--- a/drivers/block/block_ioctl.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (C) 2001 Jens Axboe <axboe@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public Licens
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
- *
- */
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/config.h>
-#include <linux/swap.h>
-#include <linux/init.h>
-#include <linux/smp_lock.h>
-#include <linux/module.h>
-#include <linux/blk.h>
-#include <linux/completion.h>
-
-#include <linux/cdrom.h>
-
-int blk_do_rq(request_queue_t *q, struct request *rq)
-{
-	DECLARE_COMPLETION(wait);
-	int err = 0;
-
-	rq->flags |= REQ_NOMERGE;
-	rq->waiting = &wait;
-	elv_add_request(q, rq, 1);
-	generic_unplug_device(q);
-	wait_for_completion(&wait);
-
-	/*
-	 * for now, never retry anything
-	 */
-	if (rq->errors)
-		err = -EIO;
-
-	return err;
-}
-
-int block_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg)
-{
-	request_queue_t *q;
-	struct request *rq;
-	int close = 0, err;
-
-	q = bdev_get_queue(bdev);
-	if (!q)
-		return -ENXIO;
-
-	switch (cmd) {
-		case CDROMCLOSETRAY:
-			close = 1;
-		case CDROMEJECT:
-			rq = blk_get_request(q, WRITE, __GFP_WAIT);
-			rq->flags = REQ_BLOCK_PC;
-			memset(rq->cmd, 0, sizeof(rq->cmd));
-			rq->cmd[0] = GPCMD_START_STOP_UNIT;
-			rq->cmd[4] = 0x02 + (close != 0);
-			err = blk_do_rq(q, rq);
-			blk_put_request(rq);
-			break;
-		default:
-			err = -ENOTTY;
-	}
-
-	blk_put_queue(q);
-	return err;
-}
-
-EXPORT_SYMBOL(block_ioctl);
diff --git a/drivers/block/scsi_ioctl.c b/drivers/block/scsi_ioctl.c
new file mode 100644
index 000000000000..c26646e578da
--- /dev/null
+++ b/drivers/block/scsi_ioctl.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright (C) 2001 Jens Axboe <axboe@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
+ *
+ */
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/config.h>
+#include <linux/swap.h>
+#include <linux/init.h>
+#include <linux/smp_lock.h>
+#include <linux/module.h>
+#include <linux/blk.h>
+#include <linux/completion.h>
+#include <linux/cdrom.h>
+#include <linux/slab.h>
+
+#include <scsi/scsi.h>
+
+#include <asm/uaccess.h>
+
+int blk_do_rq(request_queue_t *q, struct request *rq)
+{
+	DECLARE_COMPLETION(wait);
+	int err = 0;
+
+	rq->flags |= REQ_NOMERGE;
+	rq->waiting = &wait;
+	elv_add_request(q, rq, 1);
+	generic_unplug_device(q);
+	wait_for_completion(&wait);
+
+	/*
+	 * for now, never retry anything
+	 */
+	if (rq->errors)
+		err = -EIO;
+
+	return err;
+}
+
+#include <scsi/sg.h>
+
+static int sg_get_version(int *p)
+{
+	static int sg_version_num = 30527;
+	return put_user(sg_version_num, p);
+}
+
+static int scsi_get_idlun(request_queue_t *q, int *p)
+{
+	return put_user(0, p);
+}
+
+static int scsi_get_bus(request_queue_t *q, int *p)
+{
+	return put_user(0, p);
+}
+
+static int sg_get_timeout(request_queue_t *q)
+{
+	return HZ;
+}
+
+static int sg_set_timeout(request_queue_t *q, int *p)
+{
+	int timeout;
+	int error = get_user(timeout, p);
+	return error;
+}
+
+static int reserved_size = 0;
+
+static int sg_get_reserved_size(request_queue_t *q, int *p)
+{
+	return put_user(reserved_size, p);
+}
+
+static int sg_set_reserved_size(request_queue_t *q, int *p)
+{
+	int size;
+	int error = get_user(size, p);
+	if (!error)
+		reserved_size = size;
+	return error;
+}
+
+static int sg_emulated_host(request_queue_t *q, int *p)
+{
+	return put_user(1, p);
+}
+
+static int sg_io(request_queue_t *q, struct sg_io_hdr *uptr)
+{
+	int i, err;
+	struct sg_io_hdr hdr;
+	struct request *rq;
+	void *buffer;
+
+	if (!access_ok(VERIFY_WRITE, uptr, sizeof(*uptr)))
+		return -EFAULT;
+	if (copy_from_user(&hdr, uptr, sizeof(*uptr)))
+		return -EFAULT;
+
+	if ( hdr.cmd_len > sizeof(rq->cmd) )
+		return -EINVAL;
+
+	buffer = NULL;
+	if (hdr.dxfer_len) {
+		unsigned int bytes = (hdr.dxfer_len + 511) & ~511;
+
+		switch (hdr.dxfer_direction) {
+		default:
+			return -EINVAL;
+		case SG_DXFER_TO_DEV:
+		case SG_DXFER_FROM_DEV:
+		case SG_DXFER_TO_FROM_DEV:
+			break;
+		}
+		buffer = kmalloc(bytes, GFP_USER);
+		if (!buffer)
+			return -ENOMEM;
+		if (hdr.dxfer_direction == SG_DXFER_TO_DEV ||
+		    hdr.dxfer_direction == SG_DXFER_TO_FROM_DEV)
+			copy_from_user(buffer, hdr.dxferp, hdr.dxfer_len);
+	}
+
+	rq = blk_get_request(q, WRITE, __GFP_WAIT);
+	rq->timeout = 60*HZ;
+	rq->data = buffer;
+	rq->data_len = hdr.dxfer_len;
+	rq->flags = REQ_BLOCK_PC;
+	memset(rq->cmd, 0, sizeof(rq->cmd));
+	copy_from_user(rq->cmd, hdr.cmdp, hdr.cmd_len);
+	err = blk_do_rq(q, rq);
+
+	blk_put_request(rq);
+
+	copy_to_user(uptr, &hdr, sizeof(*uptr));
+	if (buffer) {
+		if (hdr.dxfer_direction == SG_DXFER_FROM_DEV ||
+		    hdr.dxfer_direction == SG_DXFER_TO_FROM_DEV)
+			copy_to_user(hdr.dxferp, buffer, hdr.dxfer_len);
+		kfree(buffer);
+	}
+	return err;
+}
+
+int scsi_cmd_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg)
+{
+	request_queue_t *q;
+	struct request *rq;
+	int close = 0, err;
+
+	q = bdev_get_queue(bdev);
+	if (!q)
+		return -ENXIO;
+
+	switch (cmd) {
+		case SG_GET_VERSION_NUM:
+			return sg_get_version((int *) arg);
+		case SCSI_IOCTL_GET_IDLUN:
+			return scsi_get_idlun(q, (int *) arg);
+		case SCSI_IOCTL_GET_BUS_NUMBER:
+			return scsi_get_bus(q, (int *) arg);
+		case SG_SET_TIMEOUT:
+			return sg_set_timeout(q, (int *) arg);
+		case SG_GET_TIMEOUT:
+			return sg_get_timeout(q);
+		case SG_GET_RESERVED_SIZE:
+			return sg_get_reserved_size(q, (int *) arg);
+		case SG_SET_RESERVED_SIZE:
+			return sg_set_reserved_size(q, (int *) arg);
+		case SG_EMULATED_HOST:
+			return sg_emulated_host(q, (int *) arg);
+		case SG_IO:
+			return sg_io(q, (struct sg_io_hdr *) arg);
+		case CDROMCLOSETRAY:
+			close = 1;
+		case CDROMEJECT:
+			rq = blk_get_request(q, WRITE, __GFP_WAIT);
+			rq->flags = REQ_BLOCK_PC;
+			rq->data = NULL;
+			rq->data_len = 0;
+			rq->timeout = 60*HZ;
+			memset(rq->cmd, 0, sizeof(rq->cmd));
+			rq->cmd[0] = GPCMD_START_STOP_UNIT;
+			rq->cmd[4] = 0x02 + (close != 0);
+			err = blk_do_rq(q, rq);
+			blk_put_request(rq);
+			break;
+		default:
+			err = -ENOTTY;
+	}
+
+	blk_put_queue(q);
+	return err;
+}
+
+EXPORT_SYMBOL(scsi_cmd_ioctl);
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index e2380bcb9fe8..00830680bb42 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -2639,7 +2639,7 @@ static int ide_ioctl (struct inode *inode, struct file *file,
 
 		case CDROMEJECT:
 		case CDROMCLOSETRAY:
-			return block_ioctl(inode->i_bdev, cmd, arg);
+			return scsi_cmd_ioctl(inode->i_bdev, cmd, arg);
 
 		case HDIO_GET_BUSSTATE:
 			if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 3b95ff2d40a4..7a3f43f1b186 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -825,44 +825,9 @@ static int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
 			unsigned long arg)
 {
 	struct block_device *bdev = inode->i_bdev;
-	int ret = -EINVAL;
-	switch (cmd) {
-	/*
-	 * deprecated, use the /proc/iosched interface instead
-	 */
-	case BLKELVGET:
-	case BLKELVSET:
-		ret = -ENOTTY;
-		break;
-	case BLKRAGET:
-	case BLKROGET:
-	case BLKBSZGET:
-	case BLKSSZGET:
-	case BLKFRAGET:
-	case BLKSECTGET:
-	case BLKRASET:
-	case BLKFRASET:
-	case BLKBSZSET:
-	case BLKPG:
-		ret = blk_ioctl(bdev, cmd, arg);
-		break;
-	case BLKRRPART:
-		ret = blkdev_reread_part(bdev);
-		break;
-	default:
-		if (bdev->bd_op->ioctl)
-			ret =bdev->bd_op->ioctl(inode, file, cmd, arg);
-		if (ret == -EINVAL) {
-			switch (cmd) {
-				case BLKGETSIZE:
-				case BLKGETSIZE64:
-				case BLKFLSBUF:
-				case BLKROSET:
-					ret = blk_ioctl(bdev,cmd,arg);
-					break;
-			}
-		}
-	}
+	int ret = blk_ioctl(bdev, cmd, arg);
+	if (ret == -ENOTTY && bdev->bd_op->ioctl)
+		ret = bdev->bd_op->ioctl(inode, file, cmd, arg);
 	return ret;
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4929d743683d..607641c6cfb1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -60,6 +60,12 @@ struct request {
 	int tag;
 	void *special;
 	char *buffer;
+
+	/* For packet commands */
+	unsigned int data_len;
+	void *data, *sense;
+
+	unsigned int timeout;
 	struct completion *waiting;
 	struct bio *bio, *biotail;
 	request_queue_t *q;
@@ -85,6 +91,8 @@ enum rq_flag_bits {
 	__REQ_BLOCK_PC,	/* queued down pc from block layer */
 	__REQ_SENSE,	/* sense retrival */
 
+	__REQ_FAILED,	/* set if the request failed */
+	__REQ_QUIET,	/* don't worry about errors */
 	__REQ_SPECIAL,	/* driver suplied command */
 	__REQ_DRIVE_CMD,
 	__REQ_DRIVE_TASK,
@@ -103,6 +111,8 @@ enum rq_flag_bits {
 #define REQ_PC		(1 << __REQ_PC)
 #define REQ_BLOCK_PC	(1 << __REQ_BLOCK_PC)
 #define REQ_SENSE	(1 << __REQ_SENSE)
+#define REQ_FAILED	(1 << __REQ_FAILED)
+#define REQ_QUIET	(1 << __REQ_QUIET)
 #define REQ_SPECIAL	(1 << __REQ_SPECIAL)
 #define REQ_DRIVE_CMD	(1 << __REQ_DRIVE_CMD)
 #define REQ_DRIVE_TASK	(1 << __REQ_DRIVE_TASK)
@@ -301,7 +311,7 @@ extern int blk_remove_plug(request_queue_t *);
 extern void blk_recount_segments(request_queue_t *, struct bio *);
 extern inline int blk_phys_contig_segment(request_queue_t *q, struct bio *, struct bio *);
 extern inline int blk_hw_contig_segment(request_queue_t *q, struct bio *, struct bio *);
-extern int block_ioctl(struct block_device *, unsigned int, unsigned long);
+extern int scsi_cmd_ioctl(struct block_device *, unsigned int, unsigned long);
 extern void blk_start_queue(request_queue_t *q);
 extern void blk_stop_queue(request_queue_t *q);
 extern void __blk_stop_queue(request_queue_t *q);
-- 
cgit v1.2.3


From b8ed178862df2381b14b12c9b3c4f7f39053c5e5 Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Tue, 15 Oct 2002 04:23:25 -0700
Subject: [PATCH] early allocation of ->part

allocation of ->part[] moved to alloc_disk(); alloc_disk() got an
argument (number of minors expected).  Freeing is in put_disk().
---
 arch/m68k/atari/stram.c         |  3 +--
 arch/um/drivers/ubd_kern.c      |  6 ++---
 drivers/acorn/block/fd1772.c    |  2 +-
 drivers/acorn/block/mfmhd.c     |  3 +--
 drivers/block/DAC960.c          |  3 +--
 drivers/block/acsi.c            |  2 +-
 drivers/block/amiflop.c         |  3 +--
 drivers/block/ataflop.c         |  2 +-
 drivers/block/cciss.c           |  9 ++++---
 drivers/block/cpqarray.c        |  7 +++---
 drivers/block/floppy.c          |  2 +-
 drivers/block/genhd.c           | 52 ++++++++++++++++++++---------------------
 drivers/block/loop.c            |  2 +-
 drivers/block/nbd.c             |  3 +--
 drivers/block/paride/pcd.c      |  3 +--
 drivers/block/paride/pd.c       |  3 +--
 drivers/block/paride/pf.c       |  3 +--
 drivers/block/ps2esdi.c         |  3 +--
 drivers/block/rd.c              |  6 ++---
 drivers/block/swim3.c           |  2 +-
 drivers/block/swim_iop.c        |  2 +-
 drivers/block/umem.c            |  3 +--
 drivers/block/xd.c              |  3 +--
 drivers/block/z2ram.c           |  3 +--
 drivers/cdrom/aztcd.c           |  3 +--
 drivers/cdrom/cdu31a.c          |  3 +--
 drivers/cdrom/cm206.c           |  3 +--
 drivers/cdrom/gscd.c            |  3 +--
 drivers/cdrom/mcd.c             |  3 +--
 drivers/cdrom/mcdx.c            |  3 +--
 drivers/cdrom/optcd.c           |  3 +--
 drivers/cdrom/sbpcd.c           |  3 +--
 drivers/cdrom/sjcd.c            |  3 +--
 drivers/cdrom/sonycd535.c       |  3 +--
 drivers/ide/ide-probe.c         |  3 +--
 drivers/ide/legacy/hd.c         |  3 +--
 drivers/md/md.c                 |  3 +--
 drivers/message/i2o/i2o_block.c |  3 +--
 drivers/mtd/ftl.c               |  3 +--
 drivers/mtd/mtdblock.c          |  3 +--
 drivers/mtd/mtdblock_ro.c       |  2 +-
 drivers/mtd/nftlcore.c          |  3 +--
 drivers/s390/block/dasd_genhd.c |  3 +--
 drivers/s390/block/xpram.c      |  3 +--
 drivers/sbus/char/jsflash.c     |  3 +--
 drivers/scsi/sd.c               |  3 +--
 drivers/scsi/sr.c               |  3 +--
 fs/partitions/check.c           |  5 +---
 include/linux/genhd.h           |  3 ++-
 49 files changed, 81 insertions(+), 125 deletions(-)

(limited to 'include/linux')

diff --git a/arch/m68k/atari/stram.c b/arch/m68k/atari/stram.c
index f40e6f70df3b..5d6eac53280a 100644
--- a/arch/m68k/atari/stram.c
+++ b/arch/m68k/atari/stram.c
@@ -1057,7 +1057,7 @@ int __init stram_device_init(void)
 	if (!max_swap_size)
 		/* swapping not enabled */
 		return -ENXIO;
-	stram_disk = alloc_disk();
+	stram_disk = alloc_disk(1);
 	if (!stram_disk)
 		return -ENOMEM;
 
@@ -1070,7 +1070,6 @@ int __init stram_device_init(void)
 	blk_init_queue(BLK_DEFAULT_QUEUE(STRAM_MAJOR), do_stram_request);
 	stram_disk->major = STRAM_MAJOR;
 	stram_disk->first_minor = STRAM_MINOR;
-	stram_disk->minor_shift = 0;
 	stram_disk->fops = &stram_fops;
 	sprintf(stram_disk->disk_name, "stram");
 	set_capacity(stram_disk, (swap_end - swap_start)/512);
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 36995c3f84f6..9229a26c1d16 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -404,12 +404,11 @@ static int ubd_add(int n)
 	if (!dev->file)
 		return -1;
 
-	disk = alloc_disk();
+	disk = alloc_disk(1 << UBD_SHIFT);
 	if (!disk)
 		return -1;
 	disk->major = MAJOR_NR;
 	disk->first_minor = n << UBD_SHIFT;
-	disk->minor_shift = UBD_SHIFT;
 	disk->fops = &ubd_blops;
 	if (fakehd_set)
 		sprintf(disk->disk_name, "hd%c", n + 'a');
@@ -417,14 +416,13 @@ static int ubd_add(int n)
 		sprintf(disk->disk_name, "ubd%d", n);
 
 	if (fake_major) {
-		fake_disk = alloc_disk();
+		fake_disk = alloc_disk(1 << UBD_SHIFT);
 		if (!fake_disk) {
 			put_disk(disk);
 			return -1;
 		}
 		fake_disk->major = fake_major;
 		fake_disk->first_minor = n << UBD_SHIFT;
-		fake_disk->minor_shift = UBD_SHIFT;
 		fake_disk->fops = &ubd_blops;
 		sprintf(fake_disk->disk_name, "ubd%d", n);
 		fake_gendisk[n] = fake_disk;
diff --git a/drivers/acorn/block/fd1772.c b/drivers/acorn/block/fd1772.c
index 85b5b9cd8859..1285b8388421 100644
--- a/drivers/acorn/block/fd1772.c
+++ b/drivers/acorn/block/fd1772.c
@@ -1547,7 +1547,7 @@ int fd1772_init(void)
 		return 0;
 
 	for (i = 0; i < FD_MAX_UNITS; i++) {
-		disks[i] = alloc_disk();
+		disks[i] = alloc_disk(1);
 		if (!disks[i])
 			goto out;
 	}
diff --git a/drivers/acorn/block/mfmhd.c b/drivers/acorn/block/mfmhd.c
index 32bef8806190..b29cb6ad5505 100644
--- a/drivers/acorn/block/mfmhd.c
+++ b/drivers/acorn/block/mfmhd.c
@@ -1336,12 +1336,11 @@ static int __init mfm_init (void)
 		goto out3;
 	
 	for (i = 0; i < mfm_drives; i++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(64);
 		if (!disk)
 			goto Enomem;
 		disk->major = MAJOR_NR;
 		disk->first_minor = i << 6;
-		disk->minor_shift = 6;
 		disk->fops = &mfm_fops;
 		sprintf(disk->disk_name, "mfm%c", 'a'+i);
 		mfm_gendisk[i] = disk;
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 24a1ee66d93b..1c1a72e440e6 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -1962,7 +1962,6 @@ static boolean DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller)
 	sprintf(disk->disk_name, "rd/c%dd%d", Controller->ControllerNumber, n);
 	disk->major = MajorNumber;
 	disk->first_minor = n << DAC960_MaxPartitionsBits;
-	disk->minor_shift = DAC960_MaxPartitionsBits;
 	disk->fops = &DAC960_BlockDeviceOperations;
    }
   /*
@@ -2200,7 +2199,7 @@ static void DAC960_DetectControllers(DAC960_HardwareType_T HardwareType)
 	}
       memset(Controller, 0, sizeof(DAC960_Controller_T));
       for (i = 0; i < DAC960_MaxLogicalDrives; i++) {
-		Controller->disks[i] = alloc_disk();
+		Controller->disks[i] = alloc_disk(1<<DAC960_MaxPartitionsBits);
 		if (!Controller->disks[i])
 			goto Enomem;
       }
diff --git a/drivers/block/acsi.c b/drivers/block/acsi.c
index 5d36adb832e2..006ff8b23e32 100644
--- a/drivers/block/acsi.c
+++ b/drivers/block/acsi.c
@@ -1729,7 +1729,7 @@ int acsi_init( void )
 #endif
 	err = -ENOMEM;
 	for( i = 0; i < NDevices; ++i ) {
-		acsi_gendisk[i] = alloc_disk();
+		acsi_gendisk[i] = alloc_disk(16);
 		if (!acsi_gendisk[i])
 			goto out4;
 	}
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 22790c4145fe..df7d1ac10fe9 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1735,7 +1735,7 @@ static int __init fd_probe_drives(void)
 		fd_probe(drive);
 		if (unit[drive].type->code == FD_NODRIVE)
 			continue;
-		disk = alloc_disk();
+		disk = alloc_disk(1);
 		if (!disk) {
 			unit[drive].type->code = FD_NODRIVE;
 			continue;
@@ -1751,7 +1751,6 @@ static int __init fd_probe_drives(void)
 		printk("fd%d ",drive);
 		disk->major = MAJOR_NR;
 		disk->first_minor = drive;
-		disk->minor_shift = 0;
 		disk->fops = &floppy_fops;
 		sprintf(disk->disk_name, "fd%d", drive);
 		set_capacity(disk, 880*2);
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 12f3ae02b317..eac85e42887f 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1949,7 +1949,7 @@ int __init atari_floppy_init (void)
 	}
 
 	for (i = 0; i < FD_MAX_UNITS; i++) {
-		unit[i].disk = alloc_disk();
+		unit[i].disk = alloc_disk(1);
 		if (!unit[i].disk)
 			goto Enomem;
 	}
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index ebd7a216810f..caa30e1c6e84 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -740,7 +740,7 @@ static int revalidate_allvol(kdev_t dev)
 
 	for(i=0; i< NWD; i++) {
 		struct gendisk *disk = hba[ctlr]->gendisk[i];
-		if (disk->part)
+		if (disk->flags & GENHD_FL_UP)
 			del_gendisk(disk);
 	}
 
@@ -792,7 +792,7 @@ static int deregister_disk(int ctlr, int logvol)
 	spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
 
 	/* invalidate the devices and deregister the disk */ 
-	if (disk->part)
+	if (disk->flags & GENHD_FL_UP)
 		del_gendisk(disk);
 	/* check to see if it was the last disk */
 	if (logvol == h->highest_lun) {
@@ -2274,7 +2274,7 @@ static int alloc_cciss_hba(void)
 	struct gendisk *disk[NWD];
 	int i, n;
 	for (n = 0; n < NWD; n++) {
-		disk[n] = alloc_disk();
+		disk[n] = alloc_disk(1 << NWD_SHIFT);
 		if (!disk[n])
 			goto out;
 	}
@@ -2447,7 +2447,6 @@ static int __init cciss_init_one(struct pci_dev *pdev,
 		sprintf(disk->disk_name, "cciss/c%dd%d", i, j);
 		disk->major = MAJOR_NR + i;
 		disk->first_minor = j << NWD_SHIFT;
-		disk->minor_shift = NWD_SHIFT;
 		if( !(drv->nr_blocks))
 			continue;
 		(BLK_DEFAULT_QUEUE(MAJOR_NR + i))->hardsect_size = drv->block_size;
@@ -2500,7 +2499,7 @@ static void __devexit cciss_remove_one (struct pci_dev *pdev)
 	/* remove it from the disk list */
 	for (j = 0; j < NWD; j++) {
 		struct gendisk *disk = hba[i]->gendisk[j];
-		if (disk->part)
+		if (disk->flags & GENHD_FL_UP)
 			del_gendisk(disk);
 	}
 
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 7bfa29a5bc89..c3b1c4b17ea7 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -304,7 +304,7 @@ static void __exit cpqarray_exit(void)
 		kfree(hba[i]->cmd_pool_bits);
 
 		for (j = 0; j < NWD; j++) {
-			if (ida_gendisk[i][j]->part)
+			if (ida_gendisk[i][j]->flags & GENHD_FL_UP)
 				del_gendisk(ida_gendisk[i][j]);
 			put_disk(ida_gendisk[i][j]);
 		}
@@ -358,7 +358,7 @@ static int __init cpqarray_init(void)
 		}
 		num_cntlrs_reg++;
 		for (j=0; j<NWD; j++) {
-			ida_gendisk[i][j] = alloc_disk();
+			ida_gendisk[i][j] = alloc_disk(1 << NWD_SHIFT);
 			if (!ida_gendisk[i][j])
 				goto Enomem2;
 		}
@@ -405,7 +405,6 @@ static int __init cpqarray_init(void)
 			sprintf(disk->disk_name, "ida/c%dd%d", i, j);
 			disk->major = MAJOR_NR + i;
 			disk->first_minor = j<<NWD_SHIFT;
-			disk->minor_shift = NWD_SHIFT;
 			disk->flags = GENHD_FL_DEVFS;
 			disk->fops = &ida_fops; 
 			if (!drv->nr_blks)
@@ -1428,7 +1427,7 @@ static int revalidate_allvol(kdev_t dev)
 	 */
 	for (i = 0; i < NWD; i++) {
 		struct gendisk *disk = ida_gendisk[ctlr][i];
-		if (disk->part)
+		if (disk->flags & GENDH_FL_UP)
 			del_gendisk(disk);
 	}
 	memset(hba[ctlr]->drv,            0, sizeof(drv_info_t)*NWD);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 924e1e011f76..8783ee17314d 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4240,7 +4240,7 @@ int __init floppy_init(void)
 	raw_cmd = NULL;
 
 	for (i=0; i<N_DRIVE; i++) {
-		disks[i] = alloc_disk();
+		disks[i] = alloc_disk(1);
 		if (!disks[i])
 			goto Enomem;
 	}
diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c
index 3f6d259165f3..b230df7f7b70 100644
--- a/drivers/block/genhd.c
+++ b/drivers/block/genhd.c
@@ -57,34 +57,16 @@ EXPORT_SYMBOL(blk_set_probe);	/* Will go away */
  * This function registers the partitioning information in @gp
  * with the kernel.
  */
-static void add_gendisk(struct gendisk *gp)
+void add_disk(struct gendisk *disk)
 {
-	struct hd_struct *p = NULL;
-
-	if (gp->minor_shift) {
-		size_t size = sizeof(struct hd_struct)*((1<<gp->minor_shift)-1);
-		p = kmalloc(size, GFP_KERNEL);
-		if (!p) {
-			printk(KERN_ERR "out of memory; no partitions for %s\n",
-				gp->disk_name);
-			gp->minor_shift = 0;
-		} else
-			memset(p, 0, size);
-	}
-	gp->part = p;
-
 	write_lock(&gendisk_lock);
-	list_add(&gp->list, &gendisks[gp->major].list);
-	if (gp->minor_shift)
-		list_add_tail(&gp->full_list, &gendisk_list);
+	list_add(&disk->list, &gendisks[disk->major].list);
+	if (disk->minor_shift)
+		list_add_tail(&disk->full_list, &gendisk_list);
 	else
-		INIT_LIST_HEAD(&gp->full_list);
+		INIT_LIST_HEAD(&disk->full_list);
 	write_unlock(&gendisk_lock);
-}
-
-void add_disk(struct gendisk *disk)
-{
-	add_gendisk(disk);
+	disk->flags |= GENHD_FL_UP;
 	register_disk(disk);
 }
 
@@ -225,17 +207,33 @@ __initcall(device_init);
 
 EXPORT_SYMBOL(disk_devclass);
 
-struct gendisk *alloc_disk(void)
+struct gendisk *alloc_disk(int minors)
 {
 	struct gendisk *disk = kmalloc(sizeof(struct gendisk), GFP_KERNEL);
-	if (disk)
+	if (disk) {
 		memset(disk, 0, sizeof(struct gendisk));
+		if (minors > 1) {
+			int size = (minors - 1) * sizeof(struct hd_struct);
+			disk->part = kmalloc(size, GFP_KERNEL);
+			if (!disk->part) {
+				kfree(disk);
+				return NULL;
+			}
+			memset(disk->part, 0, size);
+		}
+		disk->minors = minors;
+		while (minors >>= 1)
+			disk->minor_shift++;
+	}
 	return disk;
 }
 
 void put_disk(struct gendisk *disk)
 {
-	kfree(disk);
+	if (disk) {
+		kfree(disk->part);
+		kfree(disk);
+	}
 }
 EXPORT_SYMBOL(alloc_disk);
 EXPORT_SYMBOL(put_disk);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index e39755017faf..14fa8720f8db 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1075,7 +1075,7 @@ int __init loop_init(void)
 		goto out_mem;
 
 	for (i = 0; i < max_loop; i++) {
-		disks[i] = alloc_disk();
+		disks[i] = alloc_disk(1);
 		if (!disks[i])
 			goto out_mem2;
 	}
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index be27027d32b8..27726bd0246a 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -507,7 +507,7 @@ static int __init nbd_init(void)
 	}
 
 	for (i = 0; i < MAX_NBD; i++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(1);
 		if (!disk)
 			goto out;
 		nbd_dev[i].disk = disk;
@@ -537,7 +537,6 @@ static int __init nbd_init(void)
 		nbd_bytesizes[i] = 0x7ffffc00; /* 2GB */
 		disk->major = MAJOR_NR;
 		disk->first_minor = i;
-		disk->minor_shift = 0;
 		disk->fops = &nbd_fops;
 		sprintf(disk->disk_name, "nbd%d", i);
 		set_capacity(disk, 0x3ffffe);
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 0e4bac2bd1ef..95bedb2a580c 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -281,7 +281,7 @@ static void pcd_init_units(void)
 
 	pcd_drive_count = 0;
 	for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(1);
 		if (!disk)
 			continue;
 		cd->disk = disk;
@@ -303,7 +303,6 @@ static void pcd_init_units(void)
 		cd->info.mask = 0;
 		disk->major = major;
 		disk->first_minor = unit;
-		disk->minor_shift = 0;
 		strcpy(disk->disk_name, cd->name);	/* umm... */
 		disk->fops = &pcd_bdops;
 	}
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 7fdf4a3e4b2a..3a3ad6390118 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -703,14 +703,13 @@ static int pd_detect(void)
 	}
 	for (unit = 0, disk = pd; unit < PD_UNITS; unit++, disk++) {
 		if (disk->present) {
-			struct gendisk *p = alloc_disk();
+			struct gendisk *p = alloc_disk(1 << PD_BITS);
 			if (!p) {
 				disk->present = 0;
 				k--;
 				continue;
 			}
 			strcpy(p->disk_name, disk->name);
-			p->minor_shift = PD_BITS;
 			p->fops = &pd_fops;
 			p->major = major;
 			p->first_minor = unit << PD_BITS;
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index becf37efd5ec..69a2c8e23eae 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -308,7 +308,7 @@ void pf_init_units(void)
 
 	pf_drive_count = 0;
 	for (unit = 0, pf = units; unit < PF_UNITS; unit++, pf++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(1);
 		if (!disk)
 			continue;
 		pf->disk = disk;
@@ -320,7 +320,6 @@ void pf_init_units(void)
 		disk->major = MAJOR_NR;
 		disk->first_minor = unit;
 		strcpy(disk->disk_name, pf->name);
-		disk->minor_shift = 0;
 		disk->fops = &pf_fops;
 		if (!(*drives[unit])[D_PRT])
 			pf_drive_count++;
diff --git a/drivers/block/ps2esdi.c b/drivers/block/ps2esdi.c
index 770fbfd4613f..ed022ff34e3e 100644
--- a/drivers/block/ps2esdi.c
+++ b/drivers/block/ps2esdi.c
@@ -421,13 +421,12 @@ static int __init ps2esdi_geninit(void)
 
 	error = -ENOMEM;
 	for (i = 0; i < ps2esdi_drives; i++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(64);
 		if (!disk)
 			goto err_out4;
 		disk->major = MAJOR_NR;
 		disk->first_minor = i<<6;
 		sprintf(disk->disk_name, "ed%c", 'a'+i);
-		disk->minor_shift = 6;
 		disk->fops = &ps2esdi_fops;
 		ps2esdi_gendisk[i] = disk;
 	}
diff --git a/drivers/block/rd.c b/drivers/block/rd.c
index a0e60c5972a6..391664b9a34f 100644
--- a/drivers/block/rd.c
+++ b/drivers/block/rd.c
@@ -431,17 +431,16 @@ static int __init rd_init (void)
 	}
 
 #ifdef CONFIG_BLK_DEV_INITRD
-	initrd_disk = alloc_disk();
+	initrd_disk = alloc_disk(1);
 	if (!initrd_disk)
 		return -ENOMEM;
 	initrd_disk->major = MAJOR_NR;
 	initrd_disk->first_minor = INITRD_MINOR;
-	initrd_disk->minor_shift = 0;
 	initrd_disk->fops = &rd_bd_op;	
 	sprintf(initrd_disk->disk_name, "initrd");
 #endif
 	for (i = 0; i < NUM_RAMDISKS; i++) {
-		rd_disks[i] = alloc_disk();
+		rd_disks[i] = alloc_disk(1);
 		if (!rd_disks[i])
 			goto out;
 	}
@@ -460,7 +459,6 @@ static int __init rd_init (void)
 		rd_length[i] = rd_size << 10;
 		disk->major = MAJOR_NR;
 		disk->first_minor = i;
-		disk->minor_shift = 0;
 		disk->fops = &rd_bd_op;
 		sprintf(disk->disk_name, "rd%d", i);
 		set_capacity(disk, rd_size * 2);
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index b1cb36f3ca5c..2a5f3afefbfa 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -1037,7 +1037,7 @@ int swim3_init(void)
 		return -ENODEV;
 
 	for (i = 0; i < floppy_count; i++) {
-		disks[i] = alloc_disk();
+		disks[i] = alloc_disk(1);
 		if (!disks[i])
 			goto out;
 	}
diff --git a/drivers/block/swim_iop.c b/drivers/block/swim_iop.c
index 29c2f1696063..3ec747c3f80f 100644
--- a/drivers/block/swim_iop.c
+++ b/drivers/block/swim_iop.c
@@ -188,7 +188,7 @@ int swimiop_init(void)
 	printk("SWIM-IOP: detected %d installed drives.\n", floppy_count);
 
 	for (i = 0; i < floppy_count; i++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(1);
 		if (!disk)
 			continue;
 		disk->major = MAJOR_NR;
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 53dfd2a7c624..3c6a3b8294b8 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -1190,7 +1190,7 @@ int __init mm_init(void)
 	}
 
 	for (i = 0; i < num_cards; i++) {
-		mm_gendisk[i] = alloc_disk();
+		mm_gendisk[i] = alloc_disk(1 << MM_SHIFT);
 		if (!mm_gendisk[i])
 			goto out;
 	}
@@ -1203,7 +1203,6 @@ int __init mm_init(void)
 		spin_lock_init(&cards[i].lock);
 		disk->major = major_nr;
 		disk->first_minor  = i << MM_SHIFT;
-		disk->minor_shift = MM_SHIFT;
 		disk->fops = &mm_fops;
 		set_capacity(disk, cards[i].mm_size << 1);
 		add_disk(disk);
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index 3e3315e81bde..4467ba777d60 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -205,12 +205,11 @@ static int __init xd_init(void)
 		goto out3;
 
 	for (i = 0; i < xd_drives; i++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(64);
 		if (!disk)
 			goto Enomem;
 		disk->major = MAJOR_NR;
 		disk->first_minor = i<<6;
-		disk->minor_shift = 6;
 		sprintf(disk->disk_name, "xd%c", i+'a');
 		disk->fops = &xd_fops;
 		xd_gendisk[i] = disk;
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 30625811de3e..edb2676680e3 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -365,14 +365,13 @@ z2_init( void )
 	    MAJOR_NR );
 	return -EBUSY;
     }
-    z2ram_gendisk = alloc_disk();
+    z2ram_gendisk = alloc_disk(1);
     if (!z2ram_gendisk) {
 	unregister_blkdev( MAJOR_NR, DEVICE_NAME );
 	return -ENOMEM;
     }
     z2ram_gendisk->major = MAJOR_NR;
     z2ram_gendisk->first_minor = 0;
-    z2ram_gendisk->minor_shift = 0;
     z2ram_gendisk->fops = &z2_fops;
     sprintf(z2ram_gendisk->disk_name, "z2ram");
 
diff --git a/drivers/cdrom/aztcd.c b/drivers/cdrom/aztcd.c
index 53f8fe2bafe2..b8e1880d8714 100644
--- a/drivers/cdrom/aztcd.c
+++ b/drivers/cdrom/aztcd.c
@@ -1908,7 +1908,7 @@ static int __init aztcd_init(void)
 	}
 	devfs_register(NULL, "aztcd", DEVFS_FL_DEFAULT, MAJOR_NR, 0,
 		       S_IFBLK | S_IRUGO | S_IWUGO, &azt_fops, NULL);
-	azt_disk = alloc_disk();
+	azt_disk = alloc_disk(1);
 	if (!azt_disk)
 		goto err_out2;
 	if (register_blkdev(MAJOR_NR, "aztcd", &azt_fops) != 0) {
@@ -1921,7 +1921,6 @@ static int __init aztcd_init(void)
 	blk_queue_hardsect_size(BLK_DEFAULT_QUEUE(MAJOR_NR), 2048);
 	azt_disk->major = MAJOR_NR;
 	azt_disk->first_minor = 0;
-	azt_disk->minor_shift = 0;
 	azt_disk->fops = &azt_fops;
 	sprintf(azt_disk->disk_name, "aztcd");
 	add_disk(azt_disk);
diff --git a/drivers/cdrom/cdu31a.c b/drivers/cdrom/cdu31a.c
index 8863cb1254de..f4077094707a 100644
--- a/drivers/cdrom/cdu31a.c
+++ b/drivers/cdrom/cdu31a.c
@@ -3366,12 +3366,11 @@ int __init cdu31a_init(void)
 		goto errout2;
 	}
 
-	disk = alloc_disk();
+	disk = alloc_disk(1);
 	if (!disk)
 		goto errout1;
 	disk->major = MAJOR_NR;
 	disk->first_minor = 0;
-	disk->minor_shift = 0;
 	sprintf(disk->disk_name, "cdu31a");
 	disk->fops = &scd_bdops;
 	disk->flags = GENHD_FL_CD;
diff --git a/drivers/cdrom/cm206.c b/drivers/cdrom/cm206.c
index 0da8b3bcdf30..8a83a381bcc1 100644
--- a/drivers/cdrom/cm206.c
+++ b/drivers/cdrom/cm206.c
@@ -1470,12 +1470,11 @@ int __init cm206_init(void)
 		printk(KERN_INFO "Cannot register for major %d!\n", MAJOR_NR);
 		goto out_blkdev;
 	}
-	disk = alloc_disk();
+	disk = alloc_disk(1);
 	if (!disk)
 		goto out_disk;
 	disk->major = MAJOR_NR;
 	disk->first_minor = 0;
-	disk->minor_shift = 0;
 	sprintf(disk->disk_name, "cm206");
 	disk->fops = &cm206_bdops;
 	disk->flags = GENHD_FL_CD;
diff --git a/drivers/cdrom/gscd.c b/drivers/cdrom/gscd.c
index 9e8a14ce9374..d82b99f5a4b5 100644
--- a/drivers/cdrom/gscd.c
+++ b/drivers/cdrom/gscd.c
@@ -972,12 +972,11 @@ static int __init gscd_init(void)
 		i++;
 	}
 
-	gscd_disk = alloc_disk();
+	gscd_disk = alloc_disk(1);
 	if (!gscd_disk)
 		goto err_out1;
 	gscd_disk->major = MAJOR_NR;
 	gscd_disk->first_minor = 0;
-	gscd_disk->minor_shift = 0;
 	gscd_disk->fops = &gscd_fops;
 	sprintf(gscd_disk->disk_name, "gscd");
 
diff --git a/drivers/cdrom/mcd.c b/drivers/cdrom/mcd.c
index 39eff9436cbf..e6c72eabda52 100644
--- a/drivers/cdrom/mcd.c
+++ b/drivers/cdrom/mcd.c
@@ -1031,7 +1031,7 @@ static void mcd_release(struct cdrom_device_info *cdi)
 
 int __init mcd_init(void)
 {
-	struct gendisk *disk = alloc_disk();
+	struct gendisk *disk = alloc_disk(1);
 	int count;
 	unsigned char result[3];
 	char msg[80];
@@ -1124,7 +1124,6 @@ int __init mcd_init(void)
 
 	disk->major = MAJOR_NR;
 	disk->first_minor = 0;
-	disk->minor_shift = 0;
 	sprintf(disk->disk_name, "mcd");
 	disk->fops = &mcd_bdops;
 	disk->flags = GENHD_FL_CD;
diff --git a/drivers/cdrom/mcdx.c b/drivers/cdrom/mcdx.c
index 7b6aaace0be1..9747c15b926b 100644
--- a/drivers/cdrom/mcdx.c
+++ b/drivers/cdrom/mcdx.c
@@ -1076,7 +1076,7 @@ int __init mcdx_init_drive(int drive)
 		return 1;
 	}
 
-	disk = alloc_disk();
+	disk = alloc_disk(1);
 	if (!disk) {
 		xwarn("init() malloc failed\n");
 		kfree(stuffp);
@@ -1221,7 +1221,6 @@ int __init mcdx_init_drive(int drive)
 	stuffp->info.dev = mk_kdev(MAJOR_NR, drive);
 	disk->major = MAJOR_NR;
 	disk->first_minor = drive;
-	disk->minor_shift = 0;
 	strcpy(disk->disk_name, stuffp->info.name);
 	disk->fops = &mcdx_bdops;
 	disk->flags = GENHD_FL_CD;
diff --git a/drivers/cdrom/optcd.c b/drivers/cdrom/optcd.c
index baf39fd6f708..6abce539684e 100644
--- a/drivers/cdrom/optcd.c
+++ b/drivers/cdrom/optcd.c
@@ -2010,14 +2010,13 @@ static int __init optcd_init(void)
 			"optcd: no Optics Storage CDROM Initialization\n");
 		return -EIO;
 	}
-	optcd_disk = alloc_disk();
+	optcd_disk = alloc_disk(1);
 	if (!optcd_disk) {
 		printk(KERN_ERR "optcd: can't allocate disk\n");
 		return -ENOMEM;
 	}
 	optcd_disk->major = MAJOR_NR;
 	optcd_disk->first_minor = 0;
-	optcd_disk->minor_shift = 0;
 	optcd_disk->fops = &opt_fops;
 	sprintf(optcd_disk->disk_name, "optcd");
 	if (!request_region(optcd_port, 4, "optcd")) {
diff --git a/drivers/cdrom/sbpcd.c b/drivers/cdrom/sbpcd.c
index 409aea0c4f0f..22a4ca708c6f 100644
--- a/drivers/cdrom/sbpcd.c
+++ b/drivers/cdrom/sbpcd.c
@@ -5831,10 +5831,9 @@ int __init sbpcd_init(void)
 		sbpcd_infop->dev = mk_kdev(MAJOR_NR, j);
 		sbpcd_infop->handle = p;
 		p->sbpcd_infop = sbpcd_infop;
-		disk = alloc_disk();
+		disk = alloc_disk(1);
 		disk->major = MAJOR_NR;
 		disk->first_minor = j;
-		disk->minor_shift = 0;
 		disk->fops = &sbpcd_bdops;
 		strcpy(disk->disk_name, sbpcd_infop->name);
 		disk->flags = GENHD_FL_CD;
diff --git a/drivers/cdrom/sjcd.c b/drivers/cdrom/sjcd.c
index c04647548625..9dcdda8741b0 100644
--- a/drivers/cdrom/sjcd.c
+++ b/drivers/cdrom/sjcd.c
@@ -1689,14 +1689,13 @@ static int __init sjcd_init(void)
 	blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), do_sjcd_request, &sjcd_lock);
 	blk_queue_hardsect_size(BLK_DEFAULT_QUEUE(MAJOR_NR), 2048);
 
-	sjcd_disk = alloc_disk();
+	sjcd_disk = alloc_disk(1);
 	if (!sjcd_disk) {
 		printk(KERN_ERR "SJCD: can't allocate disk");
 		goto out1;
 	}
 	sjcd_disk->major = MAJOR_NR,
 	sjcd_disk->first_minor = 0,
-	sjcd_disk->minor_shift = 0,
 	sjcd_disk->fops = &sjcd_fops,
 	sprintf(sjcd_disk->disk_name, "sjcd");
 
diff --git a/drivers/cdrom/sonycd535.c b/drivers/cdrom/sonycd535.c
index d73013c02bad..68e8103a7223 100644
--- a/drivers/cdrom/sonycd535.c
+++ b/drivers/cdrom/sonycd535.c
@@ -1605,12 +1605,11 @@ static int __init sony535_init(void)
 	}
 	initialized = 1;
 
-	cdu_disk = alloc_disk();
+	cdu_disk = alloc_disk(1);
 	if (!cdu_disk)
 		goto out6;
 	cdu_disk->major = MAJOR_NR;
 	cdu_disk->first_minor = 0;
-	cdu_disk->minor_shift = 0;
 	cdu_disk->fops = &cdu_fops;
 	sprintf(cdu_disk->disk_name, "cdu");
 
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 6277ce3cb1e0..478bffc6aed8 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -986,7 +986,7 @@ static void init_gendisk (ide_hwif_t *hwif)
 	units = MAX_DRIVES;
 
 	for (unit = 0; unit < MAX_DRIVES; unit++) {
-		disks[unit] = alloc_disk();
+		disks[unit] = alloc_disk(1 << PARTN_BITS);
 		if (!disks[unit])
 			goto err_kmalloc_gd;
 	}
@@ -996,7 +996,6 @@ static void init_gendisk (ide_hwif_t *hwif)
 		disk->major  = hwif->major;
 		disk->first_minor = unit << PARTN_BITS;
 		sprintf(disk->disk_name,"hd%c",'a'+hwif->index*MAX_DRIVES+unit);
-		disk->minor_shift = PARTN_BITS; 
 		disk->fops = ide_fops;
 		hwif->drives[unit].disk = disk;
 	}
diff --git a/drivers/ide/legacy/hd.c b/drivers/ide/legacy/hd.c
index b0f5f104876d..7dc166b8e646 100644
--- a/drivers/ide/legacy/hd.c
+++ b/drivers/ide/legacy/hd.c
@@ -802,12 +802,11 @@ static int __init hd_init(void)
 		goto out;
 
 	for (drive=0 ; drive < NR_HD ; drive++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(64);
 		if (!disk)
 			goto Enomem;
 		disk->major = MAJOR_NR;
 		disk->first_minor = drive << 6;
-		disk->minor_shift = 6;
 		disk->fops = &hd_fops;
 		sprintf(disk->disk_name, "hd%c", 'a'+drive);
 		hd_gendisk[drive] = disk;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index a40c6af55da5..205bb0fdeee0 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1394,12 +1394,11 @@ static int do_md_run(mddev_t * mddev)
 #endif
 	}
 
-	disk = alloc_disk();
+	disk = alloc_disk(1);
 	if (!disk)
 		return -ENOMEM;
 	disk->major = MD_MAJOR;
 	disk->first_minor = mdidx(mddev);
-	disk->minor_shift = 0;
 	sprintf(disk->disk_name, "md%d", mdidx(mddev));
 	disk->fops = &md_fops;
 
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 0980a0b775c6..b6f8af6193f1 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -1647,7 +1647,7 @@ static int i2o_block_init(void)
 	}
 
 	for (i = 0; i < MAX_I2OB; i++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(16);
 		if (!disk)
 			goto oom;
 		i2o_disk[i] = disk;
@@ -1679,7 +1679,6 @@ static int i2o_block_init(void)
 		struct gendisk *disk = i2ob_disk + i;
 		disk->major = MAJOR_NR;
 		disk->first_minor = i<<4;
-		disk->minor_shift = 4;
 		disk->fops = &i2ob_fops;
 		sprintf(disk->disk_name, "i2o/hd%c", 'a' + i);
 	}
diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index 341ad2252885..e40e34d3c7d6 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -1223,7 +1223,7 @@ static void ftl_notify_add(struct mtd_info *mtd)
 	}
 
 	partition = kmalloc(sizeof(partition_t), GFP_KERNEL);
-	disk = alloc_disk();
+	disk = alloc_disk(1 << PART_BITS);
 		
 	if (!partition||!disk) {
 		printk(KERN_WARNING "No memory to scan for FTL on %s\n",
@@ -1237,7 +1237,6 @@ static void ftl_notify_add(struct mtd_info *mtd)
 	sprintf(disk->disk_name, "ftl%c", 'a' + device);
 	disk->major = FTL_MAJOR;
 	disk->first_minor = device << 4;
-	disk->minor_shift = PART_BITS;
 	disk->fops = &ftl_blk_fops;
 	partition->mtd = mtd;
 	partition->disk = disk;
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index 1ad148bd3364..6b32d3cfb390 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -295,7 +295,7 @@ static int mtdblock_open(struct inode *inode, struct file *file)
 	spin_unlock(&mtdblks_lock);
 
 	mtdblk = kmalloc(sizeof(struct mtdblk_dev), GFP_KERNEL);
-	disk = alloc_disk();
+	disk = alloc_disk(1);
 	if (!mtdblk || !disk)
 		goto Enomem;
 	memset(mtdblk, 0, sizeof(*mtdblk));
@@ -313,7 +313,6 @@ static int mtdblock_open(struct inode *inode, struct file *file)
 	}
 	disk->major = MAJOR_NR;
 	disk->first_minor = dev;
-	disk->minor_shift = 0;
 	disk->fops = &mtd_fops;
 	sprintf(disk->disk_name, "mtd%d", dev);
 	mtdblk->disk = disk;
diff --git a/drivers/mtd/mtdblock_ro.c b/drivers/mtd/mtdblock_ro.c
index 65b97e3a11df..97e8437a75d5 100644
--- a/drivers/mtd/mtdblock_ro.c
+++ b/drivers/mtd/mtdblock_ro.c
@@ -224,7 +224,7 @@ int __init init_mtdblock(void)
 	int i;
 
 	for (i = 0; i < MAX_MTD_DEVICES; i++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(1);
 		if (!disk)
 			goto out;
 		disk->major = MAJOR_NR;
diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c
index 60d26b10740e..155aa92a9429 100644
--- a/drivers/mtd/nftlcore.c
+++ b/drivers/mtd/nftlcore.c
@@ -74,7 +74,7 @@ static void NFTL_setup(struct mtd_info *mtd)
         }
 
 	nftl = kmalloc(sizeof(struct NFTLrecord), GFP_KERNEL);
-	gd = alloc_disk();
+	gd = alloc_disk(1 << NFTL_PARTN_BITS);
 	if (!nftl || !gd) {
 		kfree(nftl);
 		put_disk(gd);
@@ -132,7 +132,6 @@ static void NFTL_setup(struct mtd_info *mtd)
 	sprintf(gd->disk_name, "nftl%c", 'a' + firstfree);
 	gd->major = MAJOR_NR;
 	gd->first_minor = firstfree << NFTL_PARTN_BITS;
-	gd->minor_shift = NFTL_PARTN_BITS;
 	set_capacity(gd, nftl->nr_sects);
 	nftl->disk = gd;
 	add_disk(gd);
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index be6c7dc5aa0a..67597043b718 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -190,14 +190,13 @@ dasd_gendisk_alloc(int devindex)
 		}
 	}
 
-	gdp = alloc_disk();
+	gdp = alloc_disk(1 << DASD_PARTN_BITS);
 	if (!gdp)
 		return ERR_PTR(-ENOMEM);
 
 	/* Initialize gendisk structure. */
 	gdp->major = mi->major;
 	gdp->first_minor = index << DASD_PARTN_BITS;
-	gdp->minor_shift = DASD_PARTN_BITS;
 	gdp->fops = &dasd_device_operations;
 
 	/*
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index 80f8b7573a41..4db75e1b7e73 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -441,7 +441,7 @@ static int __init xpram_setup_blkdev(void)
 	int i, rc = -ENOMEM;
 
 	for (i = 0; i < xpram_devs; i++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(1);
 		if (!disk)
 			goto out;
 		xpram_disks[i] = disk;
@@ -481,7 +481,6 @@ static int __init xpram_setup_blkdev(void)
 		offset += xpram_devices[i].size;
 		disk->major = XPRAM_MAJOR;
 		disk->first_minor = i;
-		disk->minor_shift = 0;
 		disk->fops = &xpram_devops;
 		sprintf(disk->disk_name, "slram%d", i);
 		set_capacity(disk, xpram_sizes[i] << 1);
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index 16386c234938..c0479c824d05 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -622,7 +622,7 @@ static int jsfd_init(void)
 
 	err = -ENOMEM;
 	for (i = 0; i < JSF_MAX; i++) {
-		struct gendisk *disk = alloc_disk();
+		struct gendisk *disk = alloc_disk(1);
 		if (!disk)
 			goto out;
 		jsfd_disk[i] = disk;
@@ -648,7 +648,6 @@ static int jsfd_init(void)
 		disk->first_minor = i;
 		sprintf(disk->disk_name, "jsfd%d", i);
 		disk->fops = &jsfd_fops;
-		disk->minor_shift = 0;
 		set_capacity(disk, jdp->dsize >> 9);
 		add_disk(disk);
 		set_device_ro(MKDEV(JSFD_MAJOR, i), 1);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 5863cdcf9bba..1b7abd00b167 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1386,7 +1386,7 @@ static int sd_attach(Scsi_Device * sdp)
 	    ((sdp->type != TYPE_DISK) && (sdp->type != TYPE_MOD)))
 		return 0;
 
-	gd = alloc_disk();
+	gd = alloc_disk(16);
 	if (!gd)
 		return 1;
 
@@ -1423,7 +1423,6 @@ static int sd_attach(Scsi_Device * sdp)
         gd->de = sdp->de;
 	gd->major = SD_MAJOR(dsk_nr>>4);
 	gd->first_minor = (dsk_nr & 15)<<4;
-	gd->minor_shift = 4;
 	gd->fops = &sd_fops;
 	if (dsk_nr > 26)
 		sprintf(gd->disk_name, "sd%c%c",'a'+dsk_nr/26-1,'a'+dsk_nr%26);
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index ab50575b899c..05fe1b938eb4 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -757,7 +757,7 @@ void sr_finish()
 		 * with loadable modules. */
 		if (cd->disk)
 			continue;
-		disk = alloc_disk();
+		disk = alloc_disk(1);
 		if (!disk)
 			continue;
 		if (cd->disk) {
@@ -766,7 +766,6 @@ void sr_finish()
 		}
 		disk->major = MAJOR_NR;
 		disk->first_minor = i;
-		disk->minor_shift = 0;
 		strcpy(disk->disk_name, cd->cdi.name);
 		disk->fops = &sr_bdops;
 		disk->flags = GENHD_FL_CD;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 5976fa3e466f..b3164b9ca071 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -531,10 +531,7 @@ void del_gendisk(struct gendisk *disk)
 	wipe_partitions(disk);
 	unlink_gendisk(disk);
 	devfs_remove_partitions(disk);
-	if (disk->part) {
-		kfree(disk->part);
-		disk->part = NULL;
-	}
+	disk->flags &= ~GENHD_FL_UP;
 }
 
 struct dev_name {
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 62781b452fe9..70c58d8b7e86 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -69,6 +69,7 @@ struct hd_struct {
 #define GENHD_FL_DRIVERFS  2
 #define GENHD_FL_DEVFS	4
 #define GENHD_FL_CD	8
+#define GENHD_FL_UP	16
 
 struct gendisk {
 	int major;			/* major number of driver */
@@ -262,7 +263,7 @@ char *disk_name (struct gendisk *hd, int part, char *buf);
 extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
 extern void update_partition(struct gendisk *disk, int part);
 
-extern struct gendisk *alloc_disk(void);
+extern struct gendisk *alloc_disk(int minors);
 extern void put_disk(struct gendisk *disk);
 
 /* will go away */
-- 
cgit v1.2.3


From 847c633af8a42e49030ff941fb64fb3ece6c95ef Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Tue, 15 Oct 2002 04:23:32 -0700
Subject: [PATCH] disk->minor_shift cleanup

	new field - disk->minors (1 << disk->minor_shift).  Almost all uses
of ->minor_shift had that form and thus had been replaced.
---
 drivers/block/acsi.c     |  5 ++++-
 drivers/block/blkpg.c    |  6 +++---
 drivers/block/genhd.c    |  6 +++---
 drivers/ide/ide-cd.c     |  1 +
 drivers/ide/ide-disk.c   |  1 +
 drivers/ide/ide-floppy.c |  1 +
 fs/block_dev.c           |  4 ++--
 fs/partitions/check.c    | 18 +++++++++---------
 include/linux/genhd.h    |  1 +
 9 files changed, 25 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/acsi.c b/drivers/block/acsi.c
index 006ff8b23e32..7eb385a51182 100644
--- a/drivers/block/acsi.c
+++ b/drivers/block/acsi.c
@@ -1739,7 +1739,10 @@ int acsi_init( void )
 		sprintf(disk->disk_name, "ad%c", 'a'+i);
 		disk->major = MAJOR_NR;
 		disk->first_minor = i << 4;
-		disk->minor_shift = (acsi_info[i].type==HARDDISK)?4:0;
+		if (acsi_info[i].type != HARDDISK) {
+			disk->minor_shift = 0;
+			disk->minors = 1;
+		}
 		disk->fops = &acsi_fops;
 		set_capacity(disk, acsi_info[i].size);
 		add_disk(disk);
diff --git a/drivers/block/blkpg.c b/drivers/block/blkpg.c
index 7b55729fa29a..7fff17616401 100644
--- a/drivers/block/blkpg.c
+++ b/drivers/block/blkpg.c
@@ -97,7 +97,7 @@ int add_partition(struct block_device *bdev, struct blkpg_partition *p)
 		return -EINVAL;
 	if (part)
 		BUG();
-	if (p->pno <= 0 || p->pno >= (1 << g->minor_shift))
+	if (p->pno <= 0 || p->pno >= g->minors)
 		return -EINVAL;
 
 	/* partition number in use? */
@@ -105,7 +105,7 @@ int add_partition(struct block_device *bdev, struct blkpg_partition *p)
 		return -EBUSY;
 
 	/* overlap? */
-	for (i = 0; i < (1<<g->minor_shift) - 1; i++)
+	for (i = 0; i < g->minors - 1; i++)
 		if (!(ppstart+pplength <= g->part[i].start_sect ||
 		      ppstart >= g->part[i].start_sect + g->part[i].nr_sects))
 			return -EBUSY;
@@ -142,7 +142,7 @@ int del_partition(struct block_device *bdev, struct blkpg_partition *p)
 		return -EINVAL;
 	if (part)
 		BUG();
-	if (p->pno <= 0 || p->pno >= (1 << g->minor_shift))
+	if (p->pno <= 0 || p->pno >= g->minors)
   		return -EINVAL;
 
 	/* existing drive and partition? */
diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c
index b230df7f7b70..8ecb1461f43e 100644
--- a/drivers/block/genhd.c
+++ b/drivers/block/genhd.c
@@ -61,7 +61,7 @@ void add_disk(struct gendisk *disk)
 {
 	write_lock(&gendisk_lock);
 	list_add(&disk->list, &gendisks[disk->major].list);
-	if (disk->minor_shift)
+	if (disk->minors > 1)
 		list_add_tail(&disk->full_list, &gendisk_list);
 	else
 		INIT_LIST_HEAD(&disk->full_list);
@@ -107,7 +107,7 @@ get_gendisk(dev_t dev, int *part)
 		disk = list_entry(p, struct gendisk, list);
 		if (disk->first_minor > minor)
 			continue;
-		if (disk->first_minor + (1<<disk->minor_shift) <= minor)
+		if (disk->first_minor + disk->minors <= minor)
 			continue;
 		read_unlock(&gendisk_lock);
 		*part = minor - disk->first_minor;
@@ -163,7 +163,7 @@ static int show_partition(struct seq_file *part, void *v)
 		sgp->major, sgp->first_minor,
 		(unsigned long long)get_capacity(sgp) >> 1,
 		disk_name(sgp, 0, buf));
-	for (n = 0; n < (1<<sgp->minor_shift) - 1; n++) {
+	for (n = 0; n < sgp->minors - 1; n++) {
 		if (sgp->part[n].nr_sects == 0)
 			continue;
 		seq_printf(part, "%4d  %4d %10llu %s\n",
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 6e5d283aa93a..3471aba90f64 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -3193,6 +3193,7 @@ static int ide_cdrom_attach (ide_drive_t *drive)
 	memset(info, 0, sizeof (struct cdrom_info));
 	drive->driver_data = info;
 	DRIVER(drive)->busy++;
+	g->minors = 1;
 	g->minor_shift = 0;
 	g->de = drive->de;
 	g->flags = GENHD_FL_CD;
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 5ff3daf64280..5b0c1ca8e75d 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -1871,6 +1871,7 @@ static int idedisk_attach(ide_drive_t *drive)
 		goto failed;
 	}
 	DRIVER(drive)->busy--;
+	g->minors = 1 << PARTN_BITS;
 	g->minor_shift = PARTN_BITS;
 	g->de = drive->de;
 	g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0;
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 60e3aed69166..fca1f92f896d 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -2108,6 +2108,7 @@ static int idefloppy_attach (ide_drive_t *drive)
 	DRIVER(drive)->busy++;
 	idefloppy_setup (drive, floppy);
 	DRIVER(drive)->busy--;
+	g->minors = 1 << PARTN_BITS;
 	g->minor_shift = PARTN_BITS;
 	g->de = drive->de;
 	g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 981b8df8efc8..47db3ea5e63b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -540,7 +540,7 @@ int check_disk_change(struct block_device *bdev)
 	disk = get_gendisk(bdev->bd_dev, &part);
 	if (bdops->revalidate)
 		bdops->revalidate(dev);
-	if (disk && disk->minor_shift)
+	if (disk && disk->minors > 1)
 		bdev->bd_invalidated = 1;
 	return 1;
 }
@@ -799,7 +799,7 @@ static int blkdev_reread_part(struct block_device *bdev)
 	struct gendisk *disk = get_gendisk(bdev->bd_dev, &part);
 	int res = 0;
 
-	if (!disk || !disk->minor_shift || bdev != bdev->bd_contains)
+	if (!disk || disk->minors == 1 || bdev != bdev->bd_contains)
 		return -EINVAL;
 	if (part)
 		BUG();
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index b3164b9ca071..72e71ea060e7 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -130,7 +130,7 @@ static DEVICE_ATTR(type,S_IRUGO,partition_device_type_read,NULL);
 
 static void driverfs_create_partitions(struct gendisk *hd)
 {
-	int max_p = 1<<hd->minor_shift;
+	int max_p = hd->minors;
 	struct hd_struct *p = hd->part;
 	char name[DEVICE_NAME_SIZE];
 	char bus_id[BUS_ID_SIZE];
@@ -187,7 +187,7 @@ static void driverfs_create_partitions(struct gendisk *hd)
 
 static void driverfs_remove_partitions(struct gendisk *hd)
 {
-	int max_p = 1<<hd->minor_shift;
+	int max_p = hd->minors;
 	struct device *dev;
 	struct hd_struct *p;
 	int part;
@@ -233,7 +233,7 @@ static void check_partition(struct gendisk *hd, struct block_device *bdev)
 		if (isdigit(state->name[strlen(state->name)-1]))
 			sprintf(state->name, "p");
 	}
-	state->limit = 1<<hd->minor_shift;
+	state->limit = hd->minors;
 	for (i = 0; check_part[i]; i++) {
 		int res, j;
 		struct hd_struct *p;
@@ -298,7 +298,7 @@ static void devfs_create_partitions(struct gendisk *dev)
 	unsigned int devfs_flags = DEVFS_FL_DEFAULT;
 	char dirname[64], symlink[16];
 	static devfs_handle_t devfs_handle;
-	int part, max_p = 1<<dev->minor_shift;
+	int part, max_p = dev->minors;
 	struct hd_struct *p = dev->part;
 
 	if (dev->flags & GENHD_FL_REMOVABLE)
@@ -380,7 +380,7 @@ static void devfs_remove_partitions(struct gendisk *dev)
 {
 #ifdef CONFIG_DEVFS_FS
 	int part;
-	for (part = (1<<dev->minor_shift)-1; part--; ) {
+	for (part = dev->minors-1; part--; ) {
 		devfs_unregister(dev->part[part].de);
 		dev->part[part].de = NULL;
 	}
@@ -401,7 +401,7 @@ void register_disk(struct gendisk *disk)
 		devfs_create_cdrom(disk);
 
 	/* No minors to use for partitions */
-	if (!disk->minor_shift)
+	if (disk->minors == 1)
 		return;
 
 	/* No such device (e.g., media were just removed) */
@@ -458,7 +458,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 	if (res)
 		return res;
 	bdev->bd_invalidated = 0;
-	for (p = 0; p < (1<<disk->minor_shift) - 1; p++) {
+	for (p = 0; p < disk->minors - 1; p++) {
 		disk->part[p].start_sect = 0;
 		disk->part[p].nr_sects = 0;
 	}
@@ -466,7 +466,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 		bdev->bd_op->revalidate(dev);
 	if (get_capacity(disk))
 		check_partition(disk, bdev);
-	for (p = 1; p < (1<<disk->minor_shift); p++)
+	for (p = 1; p < disk->minors; p++)
 		update_partition(disk, p);
 	return res;
 }
@@ -495,7 +495,7 @@ fail:
 
 static int wipe_partitions(struct gendisk *disk)
 {
-	int max_p = 1 << disk->minor_shift;
+	int max_p = disk->minors;
 	kdev_t devp;
 	int res;
 	int p;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 70c58d8b7e86..6e1f68900bba 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -74,6 +74,7 @@ struct hd_struct {
 struct gendisk {
 	int major;			/* major number of driver */
 	int first_minor;
+	int minors;
 	int minor_shift;		/* number of times minor is shifted to
 					   get real minor */
 	char disk_name[16];		/* name of major driver */
-- 
cgit v1.2.3


From 8b290eb199620bd66049bf972ef0d995576cf3b9 Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Tue, 15 Oct 2002 04:23:37 -0700
Subject: [PATCH] device_register() splitup

	new driverfs helpers - device_initialize/device_add and device_del.
The latter is device_unregister() sans the final put_device().  The former
is splitup of device_register() into initialization and insertion into tree.
---
 drivers/base/core.c    | 92 +++++++++++++++++++++++++++++++-------------------
 include/linux/device.h |  3 ++
 2 files changed, 60 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 4fc859d3ab57..83c31723d844 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -149,36 +149,16 @@ void driver_detach(struct device_driver * drv)
 	spin_unlock(&device_lock);
 }
 
-/**
- * device_register - register a device
- * @dev:	pointer to the device structure
- *
- * First, make sure that the device has a parent, create
- * a directory for it, then add it to the parent's list of
- * children.
- *
- * Maintains a global list of all devices, in depth-first ordering.
- * The head for that list is device_root.g_list.
- */
-int device_register(struct device *dev)
+int device_add(struct device *dev)
 {
 	int error;
 
 	if (!dev || !strlen(dev->bus_id))
 		return -EINVAL;
 
-	INIT_LIST_HEAD(&dev->node);
-	INIT_LIST_HEAD(&dev->children);
-	INIT_LIST_HEAD(&dev->g_list);
-	INIT_LIST_HEAD(&dev->driver_list);
-	INIT_LIST_HEAD(&dev->bus_list);
-	INIT_LIST_HEAD(&dev->intf_list);
-	spin_lock_init(&dev->lock);
-	atomic_set(&dev->refcount,2);
-	dev->present = 1;
 	spin_lock(&device_lock);
+	dev->present = 1;
 	if (dev->parent) {
-		get_device_locked(dev->parent);
 		list_add_tail(&dev->g_list,&dev->parent->g_list);
 		list_add_tail(&dev->node,&dev->parent->children);
 	} else
@@ -209,10 +189,48 @@ int device_register(struct device *dev)
 		list_del_init(&dev->g_list);
 		list_del_init(&dev->node);
 		spin_unlock(&device_lock);
-		if (dev->parent)
-			put_device(dev->parent);
 	}
-	put_device(dev);
+	return error;
+}
+
+void device_initialize(struct device *dev)
+{
+	INIT_LIST_HEAD(&dev->node);
+	INIT_LIST_HEAD(&dev->children);
+	INIT_LIST_HEAD(&dev->g_list);
+	INIT_LIST_HEAD(&dev->driver_list);
+	INIT_LIST_HEAD(&dev->bus_list);
+	INIT_LIST_HEAD(&dev->intf_list);
+	spin_lock_init(&dev->lock);
+	atomic_set(&dev->refcount,1);
+	if (dev->parent)
+		get_device(dev->parent);
+}
+
+/**
+ * device_register - register a device
+ * @dev:	pointer to the device structure
+ *
+ * First, make sure that the device has a parent, create
+ * a directory for it, then add it to the parent's list of
+ * children.
+ *
+ * Maintains a global list of all devices, in depth-first ordering.
+ * The head for that list is device_root.g_list.
+ */
+int device_register(struct device *dev)
+{
+	int error;
+
+	if (!dev || !strlen(dev->bus_id))
+		return -EINVAL;
+
+	device_initialize(dev);
+	if (dev->parent)
+		get_device(dev->parent);
+	error = device_add(dev);
+	if (error && dev->parent)
+		put_device(dev->parent);
 	return error;
 }
 
@@ -257,16 +275,7 @@ void put_device(struct device * dev)
 		put_device(parent);
 }
 
-/**
- * device_unregister - unlink device
- * @dev:	device going away
- *
- * The device has been removed from the system, so we disavow knowledge
- * of it. It might not be the final reference to the device, so we mark
- * it as !present, so no more references to it can be acquired.
- * In the end, we decrement the final reference count for it.
- */
-void device_unregister(struct device * dev)
+void device_del(struct device * dev)
 {
 	spin_lock(&device_lock);
 	dev->present = 0;
@@ -293,7 +302,20 @@ void device_unregister(struct device * dev)
 
 	/* remove the driverfs directory */
 	device_remove_dir(dev);
+}
 
+/**
+ * device_unregister - unlink device
+ * @dev:	device going away
+ *
+ * The device has been removed from the system, so we disavow knowledge
+ * of it. It might not be the final reference to the device, so we mark
+ * it as !present, so no more references to it can be acquired.
+ * In the end, we decrement the final reference count for it.
+ */
+void device_unregister(struct device * dev)
+{
+	device_del(dev);
 	put_device(dev);
 }
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 3290c5c40276..80a63939f924 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -329,6 +329,9 @@ dev_set_drvdata (struct device *dev, void *data)
  */
 extern int device_register(struct device * dev);
 extern void device_unregister(struct device * dev);
+extern void device_initialize(struct device * dev);
+extern int device_add(struct device * dev);
+extern void device_del(struct device * dev);
 
 /* driverfs interface for exporting device attributes */
 
-- 
cgit v1.2.3


From c6973580141ce5a4904436b29c0dc5f3d9982951 Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Tue, 15 Oct 2002 04:25:13 -0700
Subject: [PATCH] block ioctl cleanup

	guts of blkpg.c and blkdev_ioctl() sanitized up and moved into a new
file - drivers/block/ioctl.c.  blkpg.c is gone.
---
 drivers/block/Makefile    |   2 +-
 drivers/block/blkpg.c     | 310 ----------------------------------------------
 drivers/block/floppy.c    |  10 --
 drivers/block/ioctl.c     | 231 ++++++++++++++++++++++++++++++++++
 drivers/block/rd.c        |   2 -
 drivers/mtd/mtdblock.c    |   2 -
 drivers/mtd/mtdblock_ro.c |   2 -
 drivers/mtd/nftlcore.c    |   1 -
 fs/block_dev.c            |  29 -----
 include/linux/blkpg.h     |   1 -
 include/linux/fs.h        |   1 +
 11 files changed, 233 insertions(+), 358 deletions(-)
 delete mode 100644 drivers/block/blkpg.c
 create mode 100644 drivers/block/ioctl.c

(limited to 'include/linux')

diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 8457b1bfa13a..6c22bb8963d6 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -11,7 +11,7 @@
 export-objs	:= elevator.o ll_rw_blk.o loop.o genhd.o acsi.o \
 		   scsi_ioctl.o deadline-iosched.o
 
-obj-y	:= elevator.o ll_rw_blk.o blkpg.o genhd.o scsi_ioctl.o deadline-iosched.o
+obj-y	:= elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o deadline-iosched.o
 
 obj-$(CONFIG_MAC_FLOPPY)	+= swim3.o
 obj-$(CONFIG_BLK_DEV_FD)	+= floppy.o
diff --git a/drivers/block/blkpg.c b/drivers/block/blkpg.c
deleted file mode 100644
index 7fff17616401..000000000000
--- a/drivers/block/blkpg.c
+++ /dev/null
@@ -1,310 +0,0 @@
-/*
- * Partition table and disk geometry handling
- *
- * This obsoletes the partition-handling code in genhd.c:
- * Userspace can look at a disk in arbitrary format and tell
- * the kernel what partitions there are on the disk, and how
- * these should be numbered.
- * It also allows one to repartition a disk that is being used.
- *
- * A single ioctl with lots of subfunctions:
- *
- * Device number stuff:
- *    get_whole_disk()          (given the device number of a partition, find
- *                               the device number of the encompassing disk)
- *    get_all_partitions()      (given the device number of a disk, return the
- *                               device numbers of all its known partitions)
- *
- * Partition stuff:
- *    add_partition()
- *    delete_partition()
- *    test_partition_in_use()   (also for test_disk_in_use)
- *
- * Geometry stuff:
- *    get_geometry()
- *    set_geometry()
- *    get_bios_drivedata()
- *
- * For today, only the partition stuff - aeb, 990515
- */
-
-#include <linux/errno.h>
-#include <linux/fs.h>			/* for BLKROSET, ... */
-#include <linux/sched.h>		/* for capable() */
-#include <linux/blk.h>			/* for set_device_ro() */
-#include <linux/blkpg.h>
-#include <linux/genhd.h>
-#include <linux/module.h>               /* for EXPORT_SYMBOL */
-#include <linux/backing-dev.h>
-#include <linux/buffer_head.h>
-
-#include <asm/uaccess.h>
-
-/*
- * What is the data describing a partition?
- *
- * 1. a device number (kdev_t)
- * 2. a starting sector and number of sectors (hd_struct)
- *    given in the part[] array of the gendisk structure for the drive.
- *
- * The number of sectors is replicated in the sizes[] array of
- * the gendisk structure for the major, which again is copied to
- * the blk_size[][] array.
- * (However, hd_struct has the number of 512-byte sectors,
- *  g->sizes[] and blk_size[][] have the number of 1024-byte blocks.)
- * Note that several drives may have the same major.
- */
-
-/*
- * Add a partition.
- *
- * returns: EINVAL: bad parameters
- *          ENXIO: cannot find drive
- *          EBUSY: proposed partition overlaps an existing one
- *                 or has the same number as an existing one
- *          0: all OK.
- */
-int add_partition(struct block_device *bdev, struct blkpg_partition *p)
-{
-	struct gendisk *g;
-	long long ppstart, pplength;
-	int part, i;
-
-	/* convert bytes to sectors */
-	ppstart = (p->start >> 9);
-	pplength = (p->length >> 9);
-
-	/* check for fit in a hd_struct */ 
-	if (sizeof(sector_t) == sizeof(long) && 
-	    sizeof(long long) > sizeof(long)) {
-		long pstart, plength;
-		pstart = ppstart;
-		plength = pplength;
-		if (pstart != ppstart || plength != pplength
-		    || pstart < 0 || plength < 0)
-			return -EINVAL;
-	}
-
-	/* find the drive major */
-	g = get_gendisk(bdev->bd_dev, &part);
-	if (!g)
-		return -ENXIO;
-
-	/* existing drive? */
-
-	/* drive and partition number OK? */
-	if (bdev != bdev->bd_contains)
-		return -EINVAL;
-	if (part)
-		BUG();
-	if (p->pno <= 0 || p->pno >= g->minors)
-		return -EINVAL;
-
-	/* partition number in use? */
-	if (g->part[p->pno - 1].nr_sects != 0)
-		return -EBUSY;
-
-	/* overlap? */
-	for (i = 0; i < g->minors - 1; i++)
-		if (!(ppstart+pplength <= g->part[i].start_sect ||
-		      ppstart >= g->part[i].start_sect + g->part[i].nr_sects))
-			return -EBUSY;
-
-	/* all seems OK */
-	g->part[p->pno - 1].start_sect = ppstart;
-	g->part[p->pno - 1].nr_sects = pplength;
-	update_partition(g, p->pno);
-	return 0;
-}
-
-/*
- * Delete a partition given by partition number
- *
- * returns: EINVAL: bad parameters
- *          ENXIO: cannot find partition
- *          EBUSY: partition is busy
- *          0: all OK.
- *
- * Note that the dev argument refers to the entire disk, not the partition.
- */
-int del_partition(struct block_device *bdev, struct blkpg_partition *p)
-{
-	struct gendisk *g;
-	struct block_device *bdevp;
-	int part;
-	int holder;
-
-	/* find the drive major */
-	g = get_gendisk(bdev->bd_dev, &part);
-	if (!g)
-		return -ENXIO;
-	if (bdev != bdev->bd_contains)
-		return -EINVAL;
-	if (part)
-		BUG();
-	if (p->pno <= 0 || p->pno >= g->minors)
-  		return -EINVAL;
-
-	/* existing drive and partition? */
-	if (g->part[p->pno - 1].nr_sects == 0)
-		return -ENXIO;
-
-	/* partition in use? Incomplete check for now. */
-	bdevp = bdget(MKDEV(g->major, g->first_minor + p->pno));
-	if (!bdevp)
-		return -ENOMEM;
-	if (bd_claim(bdevp, &holder) < 0) {
-		bdput(bdevp);
-		return -EBUSY;
-	}
-
-	/* all seems OK */
-	fsync_bdev(bdevp);
-	invalidate_bdev(bdevp, 0);
-
-	g->part[p->pno - 1].start_sect = 0;
-	g->part[p->pno - 1].nr_sects = 0;
-	update_partition(g, p->pno);
-	bd_release(bdevp);
-	bdput(bdevp);
-
-	return 0;
-}
-
-int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
-{
-	struct blkpg_ioctl_arg a;
-	struct blkpg_partition p;
-	int len;
-
-	if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg)))
-		return -EFAULT;
-
-	switch (a.op) {
-		case BLKPG_ADD_PARTITION:
-		case BLKPG_DEL_PARTITION:
-			len = a.datalen;
-			if (len < sizeof(struct blkpg_partition))
-				return -EINVAL;
-			if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition)))
-				return -EFAULT;
-			if (!capable(CAP_SYS_ADMIN))
-				return -EACCES;
-			if (a.op == BLKPG_ADD_PARTITION)
-				return add_partition(bdev, &p);
-			else
-				return del_partition(bdev, &p);
-		default:
-			return -EINVAL;
-	}
-}
-
-/*
- * Common ioctl's for block devices
- */
-int blk_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg)
-{
-	request_queue_t *q;
-	u64 ullval = 0;
-	int intval;
-	unsigned short usval;
-	kdev_t dev = to_kdev_t(bdev->bd_dev);
-	int holder;
-	struct backing_dev_info *bdi;
-
-	switch (cmd) {
-		case BLKROSET:
-			if (!capable(CAP_SYS_ADMIN))
-				return -EACCES;
-			if (get_user(intval, (int *)(arg)))
-				return -EFAULT;
-			set_device_ro(dev, intval);
-			return 0;
-		case BLKROGET:
-			intval = (bdev_read_only(bdev) != 0);
-			return put_user(intval, (int *)(arg));
-
-		case BLKRASET:
-		case BLKFRASET:
-			if(!capable(CAP_SYS_ADMIN))
-				return -EACCES;
-			bdi = blk_get_backing_dev_info(bdev);
-			if (bdi == NULL)
-				return -ENOTTY;
-			bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
-			return 0;
-
-		case BLKRAGET:
-		case BLKFRAGET:
-			if (!arg)
-				return -EINVAL;
-			bdi = blk_get_backing_dev_info(bdev);
-			if (bdi == NULL)
-				return -ENOTTY;
-			return put_user((bdi->ra_pages * PAGE_CACHE_SIZE) / 512,
-						(long *)arg);
-
-		case BLKSECTGET:
-			if ((q = bdev_get_queue(bdev)) == NULL)
-				return -EINVAL;
-
-			usval = q->max_sectors;
-			blk_put_queue(q);
-			return put_user(usval, (unsigned short *)arg);
-
-		case BLKFLSBUF:
-			if (!capable(CAP_SYS_ADMIN))
-				return -EACCES;
-			fsync_bdev(bdev);
-			invalidate_bdev(bdev, 0);
-			return 0;
-
-		case BLKSSZGET:
-			/* get block device hardware sector size */
-			intval = bdev_hardsect_size(bdev);
-			return put_user(intval, (int *) arg);
-
-		case BLKGETSIZE: 
-		{
-			unsigned long ret;
-			/* size in sectors, works up to 2 TB */
-			ullval = bdev->bd_inode->i_size;
-			ret = ullval >> 9;
-			if ((u64)ret != (ullval >> 9))
-				return -EFBIG;
-			return put_user(ret, (unsigned long *) arg);
-		}
-		
-		case BLKGETSIZE64:
-			/* size in bytes */
-			ullval = bdev->bd_inode->i_size;
-			return put_user(ullval, (u64 *) arg);
-
-		case BLKPG:
-			return blkpg_ioctl(bdev, (struct blkpg_ioctl_arg *) arg);
-		case BLKBSZGET:
-			/* get the logical block size (cf. BLKSSZGET) */
-			intval = block_size(bdev);
-			return put_user(intval, (int *) arg);
-
-		case BLKBSZSET:
-			/* set the logical block size */
-			if (!capable(CAP_SYS_ADMIN))
-				return -EACCES;
-			if (!arg)
-				return -EINVAL;
-			if (get_user(intval, (int *) arg))
-				return -EFAULT;
-			if (intval > PAGE_SIZE || intval < 512 ||
-			    (intval & (intval - 1)))
-				return -EINVAL;
-			if (bd_claim(bdev, &holder) < 0)
-				return -EBUSY;
-			set_blocksize(bdev, intval);
-			bd_release(bdev);
-			return 0;
-
-		default:
-			return -ENOTTY;
-	}
-}
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 8783ee17314d..3fde460ce7ea 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3488,16 +3488,6 @@ static int fd_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 			loc.start = 0;
 			return _COPYOUT(loc);
 		}
-
-		case BLKGETSIZE:
-			ECALL(get_floppy_geometry(drive, type, &g));
-			return put_user(g->size, (unsigned long *) param);
-
-		case BLKGETSIZE64:
-			ECALL(get_floppy_geometry(drive, type, &g));
-			return put_user((u64)g->size << 9, (u64 *) param);
-		/* BLKRRPART is not defined as floppies don't have
-		 * partition tables */
 	}
 
 	/* convert the old style command into a new style command */
diff --git a/drivers/block/ioctl.c b/drivers/block/ioctl.c
new file mode 100644
index 000000000000..fb6a8edb8e21
--- /dev/null
+++ b/drivers/block/ioctl.c
@@ -0,0 +1,231 @@
+#include <linux/sched.h>		/* for capable() */
+#include <linux/blk.h>			/* for set_device_ro() */
+#include <linux/blkpg.h>
+#include <linux/backing-dev.h>
+#include <linux/buffer_head.h>
+#include <asm/uaccess.h>
+
+static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
+{
+	struct block_device *bdevp;
+	int holder;
+	struct gendisk *disk;
+	struct blkpg_ioctl_arg a;
+	struct blkpg_partition p;
+	long long start, length;
+	int part;
+	int i;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg)))
+		return -EFAULT;
+	if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition)))
+		return -EFAULT;
+	disk = get_gendisk(bdev->bd_dev, &part);
+	if (!disk)
+		return -ENXIO;
+	if (bdev != bdev->bd_contains)
+		return -EINVAL;
+	if (part)
+		BUG();
+	part = p.pno;
+	if (part <= 0 || part >= disk->minors)
+		return -EINVAL;
+
+	switch (a.op) {
+		case BLKPG_ADD_PARTITION:
+			start = p.start >> 9;
+			length = p.length >> 9;
+			/* check for fit in a hd_struct */ 
+			if (sizeof(sector_t) == sizeof(long) && 
+			    sizeof(long long) > sizeof(long)) {
+				long pstart = start, plength = length;
+				if (pstart != start || plength != length
+				    || pstart < 0 || plength < 0)
+					return -EINVAL;
+			}
+
+			/* partition number in use? */
+			if (disk->part[part - 1].nr_sects != 0)
+				return -EBUSY;
+
+			/* overlap? */
+			for (i = 0; i < disk->minors - 1; i++) {
+				struct hd_struct *s = &disk->part[i];
+				if (!(start+length <= s->start_sect ||
+				      start >= s->start_sect + s->nr_sects))
+					return -EBUSY;
+			}
+			/* all seems OK */
+			disk->part[part - 1].start_sect = start;
+			disk->part[part - 1].nr_sects = length;
+			update_partition(disk, part);
+			return 0;
+		case BLKPG_DEL_PARTITION:
+			if (disk->part[part - 1].nr_sects == 0)
+				return -ENXIO;
+
+			/* partition in use? Incomplete check for now. */
+			bdevp = bdget(MKDEV(disk->major, disk->first_minor) + part);
+			if (!bdevp)
+				return -ENOMEM;
+			if (bd_claim(bdevp, &holder) < 0) {
+				bdput(bdevp);
+				return -EBUSY;
+			}
+
+			/* all seems OK */
+			fsync_bdev(bdevp);
+			invalidate_bdev(bdevp, 0);
+
+			disk->part[part].start_sect = 0;
+			disk->part[part].nr_sects = 0;
+			update_partition(disk, part);
+			bd_release(bdevp);
+			bdput(bdevp);
+			return 0;
+		default:
+			return -EINVAL;
+	}
+}
+
+static int blkdev_reread_part(struct block_device *bdev)
+{
+	int part;
+	struct gendisk *disk = get_gendisk(bdev->bd_dev, &part);
+	int res = 0;
+
+	if (!disk || disk->minors == 1 || bdev != bdev->bd_contains)
+		return -EINVAL;
+	if (part)
+		BUG();
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	if (down_trylock(&bdev->bd_sem))
+		return -EBUSY;
+	res = rescan_partitions(disk, bdev);
+	up(&bdev->bd_sem);
+	return res;
+}
+
+static int put_ushort(unsigned long arg, unsigned short val)
+{
+	return put_user(val, (unsigned short *)arg);
+}
+
+static int put_int(unsigned long arg, int val)
+{
+	return put_user(val, (int *)arg);
+}
+
+static int put_long(unsigned long arg, long val)
+{
+	return put_user(val, (long *)arg);
+}
+
+static int put_ulong(unsigned long arg, unsigned long val)
+{
+	return put_user(val, (unsigned long *)arg);
+}
+
+static int put_u64(unsigned long arg, u64 val)
+{
+	return put_user(val, (u64 *)arg);
+}
+
+int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
+			unsigned long arg)
+{
+	struct block_device *bdev = inode->i_bdev;
+	struct backing_dev_info *bdi;
+	int holder;
+	int ret, n;
+
+	switch (cmd) {
+	case BLKELVGET:
+	case BLKELVSET:
+		/* deprecated, use the /proc/iosched interface instead */
+		return -ENOTTY;
+	case BLKRAGET:
+	case BLKFRAGET:
+		if (!arg)
+			return -EINVAL;
+		bdi = blk_get_backing_dev_info(bdev);
+		if (bdi == NULL)
+			return -ENOTTY;
+		return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
+	case BLKROGET:
+		return put_int(arg, bdev_read_only(bdev) != 0);
+	case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */
+		return put_int(arg, block_size(bdev));
+	case BLKSSZGET: /* get block device hardware sector size */
+		return put_int(arg, bdev_hardsect_size(bdev));
+	case BLKSECTGET:
+		return put_ushort(arg, bdev->bd_queue->max_sectors);
+	case BLKRASET:
+	case BLKFRASET:
+		if(!capable(CAP_SYS_ADMIN))
+			return -EACCES;
+		bdi = blk_get_backing_dev_info(bdev);
+		if (bdi == NULL)
+			return -ENOTTY;
+		bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
+		return 0;
+	case BLKBSZSET:
+		/* set the logical block size */
+		if (!capable(CAP_SYS_ADMIN))
+			return -EACCES;
+		if (!arg)
+			return -EINVAL;
+		if (get_user(n, (int *) arg))
+			return -EFAULT;
+		if (n > PAGE_SIZE || n < 512 || (n & (n - 1)))
+			return -EINVAL;
+		if (bd_claim(bdev, &holder) < 0)
+			return -EBUSY;
+		set_blocksize(bdev, n);
+		bd_release(bdev);
+		return 0;
+	case BLKPG:
+		return blkpg_ioctl(bdev, (struct blkpg_ioctl_arg *) arg);
+	case BLKRRPART:
+		return blkdev_reread_part(bdev);
+	case BLKGETSIZE:
+		if ((bdev->bd_inode->i_size >> 9) > ~0UL)
+			return -EFBIG;
+		return put_ulong(arg, bdev->bd_inode->i_size >> 9);
+	case BLKGETSIZE64:
+		return put_u64(arg, bdev->bd_inode->i_size);
+	case BLKFLSBUF:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EACCES;
+		if (bdev->bd_op->ioctl) {
+			ret = bdev->bd_op->ioctl(inode, file, cmd, arg);
+			if (ret != -EINVAL)
+				return ret;
+		}
+		fsync_bdev(bdev);
+		invalidate_bdev(bdev, 0);
+		return 0;
+	case BLKROSET:
+		if (bdev->bd_op->ioctl) {
+			ret = bdev->bd_op->ioctl(inode, file, cmd, arg);
+			if (ret != -EINVAL)
+				return ret;
+		}
+		if (!capable(CAP_SYS_ADMIN))
+			return -EACCES;
+		if (get_user(n, (int *)(arg)))
+			return -EFAULT;
+		set_device_ro(to_kdev_t(bdev->bd_dev), n);
+		return 0;
+	default:
+		if (bdev->bd_op->ioctl) {
+			ret = bdev->bd_op->ioctl(inode, file, cmd, arg);
+			if (ret != -EINVAL)
+				return ret;
+		}
+	}
+	return -ENOTTY;
+}
diff --git a/drivers/block/rd.c b/drivers/block/rd.c
index 391664b9a34f..7d72b786080c 100644
--- a/drivers/block/rd.c
+++ b/drivers/block/rd.c
@@ -291,8 +291,6 @@ static int rd_ioctl(struct inode *inode, struct file *file, unsigned int cmd, un
 	if (cmd != BLKFLSBUF)
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
 	/* special: we want to release the ramdisk memory,
 	   it's not like with the other blockdevices where
 	   this ioctl only flushes away the buffer cache. */
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index 6b32d3cfb390..a39bcab25891 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -517,8 +517,6 @@ static int mtdblock_ioctl(struct inode * inode, struct file * file,
 
 	switch (cmd) {
 	case BLKFLSBUF:
-		if(!capable(CAP_SYS_ADMIN))
-			return -EACCES;
 		fsync_bdev(inode->i_bdev);
 		invalidate_bdev(inode->i_bdev, 0);
 		down(&mtdblk->cache_sem);
diff --git a/drivers/mtd/mtdblock_ro.c b/drivers/mtd/mtdblock_ro.c
index 97e8437a75d5..1878f540f3b6 100644
--- a/drivers/mtd/mtdblock_ro.c
+++ b/drivers/mtd/mtdblock_ro.c
@@ -201,8 +201,6 @@ static int mtdblock_ioctl(struct inode * inode, struct file * file,
 	if (!mtd || cmd != BLKFLSBUF)
 		return -EINVAL;
 
-	if(!capable(CAP_SYS_ADMIN))
-		return -EACCES;
 	fsync_bdev(inode->i_bdev);
 	invalidate_bdev(inode->i_bdev, 0);
 	if (mtd->sync)
diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c
index 155aa92a9429..292894af8252 100644
--- a/drivers/mtd/nftlcore.c
+++ b/drivers/mtd/nftlcore.c
@@ -770,7 +770,6 @@ static int nftl_ioctl(struct inode * inode, struct file * file, unsigned int cmd
 		return copy_to_user((void *)arg, &g, sizeof g) ? -EFAULT : 0;
 	}
 	case BLKFLSBUF:
-		if (!capable(CAP_SYS_ADMIN)) return -EACCES;
 		fsync_bdev(inode->i_bdev);
 		invalidate_bdev(inode->i_bdev, 0);
 		if (nftl->mtd->sync)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 47db3ea5e63b..dff0244e63a6 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -793,25 +793,6 @@ int blkdev_close(struct inode * inode, struct file * filp)
 	return blkdev_put(inode->i_bdev, BDEV_FILE);
 }
 
-static int blkdev_reread_part(struct block_device *bdev)
-{
-	int part;
-	struct gendisk *disk = get_gendisk(bdev->bd_dev, &part);
-	int res = 0;
-
-	if (!disk || disk->minors == 1 || bdev != bdev->bd_contains)
-		return -EINVAL;
-	if (part)
-		BUG();
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
-	if (down_trylock(&bdev->bd_sem))
-		return -EBUSY;
-	res = rescan_partitions(disk, bdev);
-	up(&bdev->bd_sem);
-	return res;
-}
-
 static ssize_t blkdev_file_write(struct file *file, const char *buf,
 				   size_t count, loff_t *ppos)
 {
@@ -820,16 +801,6 @@ static ssize_t blkdev_file_write(struct file *file, const char *buf,
 	return generic_file_write_nolock(file, &local_iov, 1, ppos);
 }
 
-static int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
-			unsigned long arg)
-{
-	struct block_device *bdev = inode->i_bdev;
-	int ret = blk_ioctl(bdev, cmd, arg);
-	if (ret == -ENOTTY && bdev->bd_op->ioctl)
-		ret = bdev->bd_op->ioctl(inode, file, cmd, arg);
-	return ret;
-}
-
 struct address_space_operations def_blk_aops = {
 	.readpage	= blkdev_readpage,
 	.writepage	= blkdev_writepage,
diff --git a/include/linux/blkpg.h b/include/linux/blkpg.h
index 3cfedb07f803..571618972e30 100644
--- a/include/linux/blkpg.h
+++ b/include/linux/blkpg.h
@@ -57,7 +57,6 @@ struct blkpg_partition {
 #ifdef __KERNEL__
 
 extern char * partition_name(dev_t dev);
-extern int blk_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg);
 
 #endif /* __KERNEL__ */
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 93148f1659b0..cac13f931cec 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1087,6 +1087,7 @@ extern struct file_operations def_blk_fops;
 extern struct address_space_operations def_blk_aops;
 extern struct file_operations def_fifo_fops;
 extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
+extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long);
 extern int blkdev_get(struct block_device *, mode_t, unsigned, int);
 extern int blkdev_put(struct block_device *, int);
 extern int bd_claim(struct block_device *, void *);
-- 
cgit v1.2.3


From afae25b7c8d594f6349e81dce2b16ce44aa9f0ed Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Tue, 15 Oct 2002 04:25:18 -0700
Subject: [PATCH] preparation to use of driverfs refcounts, part 1 - partitions

	* update_partition() split into add_partition() and delete_partition().
	* all updating of ->part[] is switched to these two (including initial
filling/final cleaning).
	* per-partition devices are allocated on-demand and never reused.
We allocate struct device in add_partition() and put reference to it into
hd_struct.  ->release() for that struct device frees it.  delete_partition()
removes reference from hd_struct and does put_device() on it.  Basically,
we get rid of problems with reused struct device by never reusing them...
	At that point devices for partitions are nice and sane.
---
 drivers/block/ioctl.c |   8 +-
 fs/partitions/check.c | 289 ++++++++++++++++++++++----------------------------
 include/linux/genhd.h |   5 +-
 3 files changed, 134 insertions(+), 168 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/ioctl.c b/drivers/block/ioctl.c
index fb6a8edb8e21..4af05bc32db2 100644
--- a/drivers/block/ioctl.c
+++ b/drivers/block/ioctl.c
@@ -58,9 +58,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
 					return -EBUSY;
 			}
 			/* all seems OK */
-			disk->part[part - 1].start_sect = start;
-			disk->part[part - 1].nr_sects = length;
-			update_partition(disk, part);
+			add_partition(disk, part, start, length);
 			return 0;
 		case BLKPG_DEL_PARTITION:
 			if (disk->part[part - 1].nr_sects == 0)
@@ -79,9 +77,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
 			fsync_bdev(bdevp);
 			invalidate_bdev(bdevp, 0);
 
-			disk->part[part].start_sect = 0;
-			disk->part[part].nr_sects = 0;
-			update_partition(disk, part);
+			delete_partition(disk, part);
 			bd_release(bdevp);
 			bdput(bdevp);
 			return 0;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 72e71ea060e7..a61a83ded312 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -130,96 +130,49 @@ static DEVICE_ATTR(type,S_IRUGO,partition_device_type_read,NULL);
 
 static void driverfs_create_partitions(struct gendisk *hd)
 {
-	int max_p = hd->minors;
-	struct hd_struct *p = hd->part;
-	char name[DEVICE_NAME_SIZE];
-	char bus_id[BUS_ID_SIZE];
-	struct device *dev, *parent;
-	int part;
+	struct device *parent = hd->driverfs_dev;
+	struct device *dev = &hd->disk_dev;
 
 	/* if driverfs not supported by subsystem, skip partitions */
 	if (!(hd->flags & GENHD_FL_DRIVERFS))
 		return;
 
-	parent = hd->driverfs_dev;
-
 	if (parent)  {
-		sprintf(name, "%s", parent->name);
-		sprintf(bus_id, "%s:", parent->bus_id);
+		sprintf(dev->name, "%sdisc", parent->name);
+		sprintf(dev->bus_id, "%sdisc", parent->bus_id);
+		dev->parent = parent;
+		dev->bus = parent->bus;
 	} else {
-		*name = *bus_id = '\0';
+		sprintf(dev->name, "disc");
+		sprintf(dev->bus_id, "disc");
 	}
-
-	dev = &hd->disk_dev;
 	dev->driver_data = (void *)(long)__mkdev(hd->major, hd->first_minor);
-	sprintf(dev->name, "%sdisc", name);
-	sprintf(dev->bus_id, "%sdisc", bus_id);
-	for (part=1; part < max_p; part++) {
-		dev = &p[part-1].hd_driverfs_dev;
-		sprintf(dev->name, "%spart%d", name, part);
-		sprintf(dev->bus_id, "%s:p%d", bus_id, part);
-		if (!p[part-1].nr_sects)
-			continue;
-		dev->driver_data =
-				(void *)(long)__mkdev(hd->major, hd->first_minor+part);
-	}
-
-	dev = &hd->disk_dev;
-	dev->parent = parent;
-	if (parent)
-		dev->bus = parent->bus;
 	device_register(dev);
 	device_create_file(dev, &dev_attr_type);
 	device_create_file(dev, &dev_attr_kdev);
-
-	for (part=0; part < max_p-1; part++) {
-		dev = &p[part].hd_driverfs_dev;
-		dev->parent = parent;
-		if (parent)
-			dev->bus = parent->bus;
-		if (!dev->driver_data)
-			continue;
-		device_register(dev);
-		device_create_file(dev, &dev_attr_type);
-		device_create_file(dev, &dev_attr_kdev);
-	}
 }
 
 static void driverfs_remove_partitions(struct gendisk *hd)
 {
-	int max_p = hd->minors;
-	struct device *dev;
-	struct hd_struct *p;
-	int part;
-
-	for (part=1, p = hd->part; part < max_p; part++, p++) {
-		dev = &p->hd_driverfs_dev;
-		if (dev->driver_data) {
-			device_remove_file(dev, &dev_attr_type);
-			device_remove_file(dev, &dev_attr_kdev);
-			put_device(dev);	
-			dev->driver_data = NULL;
-		}
-	}
-	dev = &hd->disk_dev;
-	if (dev->driver_data) {
-		device_remove_file(dev, &dev_attr_type);
-		device_remove_file(dev, &dev_attr_kdev);
-		put_device(dev);	
-		dev->driver_data = NULL;
-	}
+	struct device *dev = &hd->disk_dev;
+	if (!(hd->flags & GENHD_FL_DRIVERFS))
+		return;
+	device_remove_file(dev, &dev_attr_type);
+	device_remove_file(dev, &dev_attr_kdev);
+	put_device(dev);	
 }
 
-static void check_partition(struct gendisk *hd, struct block_device *bdev)
+static struct parsed_partitions *
+check_partition(struct gendisk *hd, struct block_device *bdev)
 {
+	struct parsed_partitions *state;
 	devfs_handle_t de = NULL;
 	char buf[64];
-	struct parsed_partitions *state;
-	int i;
+	int i, res;
 
 	state = kmalloc(sizeof(struct parsed_partitions), GFP_KERNEL);
 	if (!state)
-		return;
+		return NULL;
 
 	if (hd->flags & GENHD_FL_DEVFS)
 		de = hd->de;
@@ -234,31 +187,19 @@ static void check_partition(struct gendisk *hd, struct block_device *bdev)
 			sprintf(state->name, "p");
 	}
 	state->limit = hd->minors;
-	for (i = 0; check_part[i]; i++) {
-		int res, j;
-		struct hd_struct *p;
+	i = res = 0;
+	while (!res && check_part[i]) {
 		memset(&state->parts, 0, sizeof(state->parts));
-		res = check_part[i](state, bdev);
-		if (!res)
-			continue;
-		if (res < 0) {
-			if (warn_no_part)
-				printk(" unable to read partition table\n");
-			return;
-		} 
-		p = hd->part;
-		for (j = 1; j < state->limit; j++) {
-			p[j-1].start_sect = state->parts[j].from;
-			p[j-1].nr_sects = state->parts[j].size;
-#if CONFIG_BLK_DEV_MD
-			if (!state->parts[j].flags)
-				continue;
-			md_autodetect_dev(bdev->bd_dev+j);
-#endif
-		}
-		return;
+		res = check_part[i++](state, bdev);
 	}
-	printk(" unknown partition table\n");
+	if (res > 0)
+		return state;
+	if (!res)
+		printk(" unknown partition table\n");
+	else if (warn_no_part)
+		printk(" unable to read partition table\n");
+	kfree(state);
+	return NULL;
 }
 
 static void devfs_register_partition(struct gendisk *dev, int part)
@@ -329,9 +270,6 @@ static void devfs_create_partitions(struct gendisk *dev)
 	devfs_auto_unregister(dev->disk_de, slave);
 	if (!(dev->flags & GENHD_FL_DEVFS))
 		devfs_auto_unregister (slave, dir);
-	for (part = 1; part < max_p; part++, p++)
-		if (p->nr_sects)
-			devfs_register_partition(dev, part);
 #endif
 }
 
@@ -379,11 +317,6 @@ static void devfs_create_cdrom(struct gendisk *dev)
 static void devfs_remove_partitions(struct gendisk *dev)
 {
 #ifdef CONFIG_DEVFS_FS
-	int part;
-	for (part = dev->minors-1; part--; ) {
-		devfs_unregister(dev->part[part].de);
-		dev->part[part].de = NULL;
-	}
 	devfs_unregister(dev->disk_de);
 	dev->disk_de = NULL;
 	if (dev->flags & GENHD_FL_CD)
@@ -393,10 +326,69 @@ static void devfs_remove_partitions(struct gendisk *dev)
 #endif
 }
 
+void delete_partition(struct gendisk *disk, int part)
+{
+	struct hd_struct *p = disk->part + part - 1;
+	struct device *dev;
+	if (!p->nr_sects)
+		return;
+	p->start_sect = 0;
+	p->nr_sects = 0;
+	devfs_unregister(p->de);
+	dev = p->hd_driverfs_dev;
+	p->hd_driverfs_dev = NULL;
+	if (dev) {
+		device_remove_file(dev, &dev_attr_type);
+		device_remove_file(dev, &dev_attr_kdev);
+		device_unregister(dev);	
+	}
+}
+
+static void part_release(struct device *dev)
+{
+	kfree(dev);
+}
+
+void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len)
+{
+	struct hd_struct *p = disk->part + part - 1;
+	struct device *parent = disk->disk_dev.parent;
+	struct device *dev;
+
+	p->start_sect = start;
+	p->nr_sects = len;
+	devfs_register_partition(disk, part);
+	if (!(disk->flags & GENHD_FL_DRIVERFS))
+		return;
+	dev = kmalloc(sizeof(struct device), GFP_KERNEL);
+	if (!dev)
+		return;
+	memset(dev, 0, sizeof(struct device));
+	if (parent)  {
+		sprintf(dev->name, "%spart%d", parent->name, part);
+		sprintf(dev->bus_id, "%s:p%d", parent->bus_id, part);
+		dev->parent = parent;
+		dev->bus = parent->bus;
+	} else {
+		sprintf(dev->name, "part%d", part);
+		sprintf(dev->bus_id, "p%d", part);
+	}
+	dev->release = part_release;
+	dev->driver_data =
+		(void *)(long)__mkdev(disk->major, disk->first_minor+part);
+	device_register(dev);
+	device_create_file(dev, &dev_attr_type);
+	device_create_file(dev, &dev_attr_kdev);
+	p->hd_driverfs_dev = dev;
+}
+
 /* Not exported, helper to add_disk(). */
 void register_disk(struct gendisk *disk)
 {
+	struct parsed_partitions *state;
 	struct block_device *bdev;
+	int j;
+
 	if (disk->flags & GENHD_FL_CD)
 		devfs_create_cdrom(disk);
 
@@ -411,45 +403,33 @@ void register_disk(struct gendisk *disk)
 	bdev = bdget(MKDEV(disk->major, disk->first_minor));
 	if (blkdev_get(bdev, FMODE_READ, 0, BDEV_RAW) < 0)
 		return;
-	check_partition(disk, bdev);
+	state = check_partition(disk, bdev);
 	driverfs_create_partitions(disk);
 	devfs_create_partitions(disk);
-	blkdev_put(bdev, BDEV_RAW);
-}
-
-void update_partition(struct gendisk *disk, int part)
-{
-	struct hd_struct *p = disk->part + part - 1;
-	struct device *dev = &p->hd_driverfs_dev;
-
-	if (!p->nr_sects) {
-		if (p->de) {
-			devfs_unregister(p->de);
-			p->de = NULL;
-		}
-		if (dev->driver_data) {
-			device_remove_file(dev, &dev_attr_type);
-			device_remove_file(dev, &dev_attr_kdev);
-			put_device(dev);	
-			dev->driver_data = NULL;
+	if (state) {
+		for (j = 1; j < state->limit; j++) {
+			sector_t size = state->parts[j].size;
+			sector_t from = state->parts[j].from;
+			if (!size)
+				continue;
+			add_partition(disk, j, from, size);
+#if CONFIG_BLK_DEV_MD
+			if (!state->parts[j].flags)
+				continue;
+			md_autodetect_dev(bdev->bd_dev+j);
+#endif
 		}
-		return;
+		kfree(state);
 	}
-	if (!p->de)
-		devfs_register_partition(disk, part);
-	if (dev->driver_data || !(disk->flags & GENHD_FL_DRIVERFS))
-		return;
-	dev->driver_data =
-		(void *)(long)__mkdev(disk->major, disk->first_minor+part);
-	device_register(dev);
-	device_create_file(dev, &dev_attr_type);
-	device_create_file(dev, &dev_attr_kdev);
+	blkdev_put(bdev, BDEV_RAW);
 }
 
 int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 {
 	kdev_t dev = to_kdev_t(bdev->bd_dev);
+	struct parsed_partitions *state;
 	int p, res;
+
 	if (!bdev->bd_invalidated)
 		return 0;
 	if (bdev->bd_part_count)
@@ -458,16 +438,25 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 	if (res)
 		return res;
 	bdev->bd_invalidated = 0;
-	for (p = 0; p < disk->minors - 1; p++) {
-		disk->part[p].start_sect = 0;
-		disk->part[p].nr_sects = 0;
-	}
+	for (p = 1; p < disk->minors; p++)
+		delete_partition(disk, p);
 	if (bdev->bd_op->revalidate)
 		bdev->bd_op->revalidate(dev);
-	if (get_capacity(disk))
-		check_partition(disk, bdev);
-	for (p = 1; p < disk->minors; p++)
-		update_partition(disk, p);
+	if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
+		return res;
+	for (p = 1; p < state->limit; p++) {
+		sector_t size = state->parts[p].size;
+		sector_t from = state->parts[p].from;
+		if (!size)
+			continue;
+		add_partition(disk, p, from, size);
+#if CONFIG_BLK_DEV_MD
+		if (!state->parts[j].flags)
+			continue;
+		md_autodetect_dev(bdev->bd_dev+p);
+#endif
+	}
+	kfree(state);
 	return res;
 }
 
@@ -493,45 +482,25 @@ fail:
 	return NULL;
 }
 
-static int wipe_partitions(struct gendisk *disk)
+void del_gendisk(struct gendisk *disk)
 {
 	int max_p = disk->minors;
 	kdev_t devp;
-	int res;
 	int p;
 
 	/* invalidate stuff */
 	for (p = max_p - 1; p > 0; p--) {
 		devp = mk_kdev(disk->major,disk->first_minor + p);
-#if 0					/* %%% superfluous? */
-		if (disk->part[p-1].nr_sects == 0)
-			continue;
-#endif
-		res = invalidate_device(devp, 1);
-		if (res)
-			return res;
-		disk->part[p-1].start_sect = 0;
-		disk->part[p-1].nr_sects = 0;
+		invalidate_device(devp, 1);
+		delete_partition(disk, p);
 	}
 	devp = mk_kdev(disk->major,disk->first_minor);
-#if 0					/* %%% superfluous? */
-	if (disk->part[p].nr_sects == 0)
-		continue;
-#endif
-	res = invalidate_device(devp, 1);
-	if (res)
-		return res;
+	invalidate_device(devp, 1);
 	disk->capacity = 0;
-	return 0;
-}
-
-void del_gendisk(struct gendisk *disk)
-{
-	driverfs_remove_partitions(disk);
-	wipe_partitions(disk);
+	disk->flags &= ~GENHD_FL_UP;
 	unlink_gendisk(disk);
+	driverfs_remove_partitions(disk);
 	devfs_remove_partitions(disk);
-	disk->flags &= ~GENHD_FL_UP;
 }
 
 struct dev_name {
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 6e1f68900bba..6b859fad6a8a 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -62,7 +62,7 @@ struct hd_struct {
 	sector_t start_sect;
 	sector_t nr_sects;
 	devfs_handle_t de;              /* primary (master) devfs entry  */
-	struct device hd_driverfs_dev;  /* support driverfs hiearchy     */
+	struct device *hd_driverfs_dev;  /* support driverfs hiearchy     */
 };
 
 #define GENHD_FL_REMOVABLE  1
@@ -262,7 +262,8 @@ struct unixware_disklabel {
 char *disk_name (struct gendisk *hd, int part, char *buf);
 
 extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
-extern void update_partition(struct gendisk *disk, int part);
+extern void add_partition(struct gendisk *, int, sector_t, sector_t);
+extern void delete_partition(struct gendisk *, int);
 
 extern struct gendisk *alloc_disk(int minors);
 extern void put_disk(struct gendisk *disk);
-- 
cgit v1.2.3


From b288f6add39cf474fc2ec8087d32d3e1d4c1c6d0 Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Tue, 15 Oct 2002 04:25:24 -0700
Subject: [PATCH] preparation to use of driverfs refcounts, part 2 - disk

	* disk->disk_dev is initialized in alloc_disk(), device_add()'d in
	  add_disk(), device_del()'d in unregister_disk() and device_put() in
	  put_disk().
	* devices of partitions are made its children.
	* attributes of disk one: dev (dev_t of the thing), range (number of
	  minors) and size (in sectors).
	* attributes of partition ones: dev (ditto), start (in sectors) and
	  size (in sectors).
	* disk devices are put on a new bus - "block"
	* if caller of add_disk() had set disk->driverfs_dev, we set symlinks:
	  "device" from disk to underlying device and "block" from underlying
	  device to disk.
	* ->release() of disk_dev frees disk and disk->part.
	At that point we have sane driverfs subtree for each gendisk and
refcount of its root (disk->disk_dev) can act as gendisk refcount.
---
 drivers/block/genhd.c    |  23 ++++-
 drivers/ide/ide-cd.c     |   1 +
 drivers/ide/ide-disk.c   |   1 +
 drivers/ide/ide-floppy.c |   1 +
 drivers/scsi/sr.c        |  34 +-------
 fs/partitions/check.c    | 221 ++++++++++++++++++++++++++++++++---------------
 include/linux/cdrom.h    |   1 -
 7 files changed, 173 insertions(+), 109 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c
index 8ecb1461f43e..ecb2dcdf214d 100644
--- a/drivers/block/genhd.c
+++ b/drivers/block/genhd.c
@@ -192,6 +192,10 @@ struct device_class disk_devclass = {
 	.name		= "disk",
 };
 
+static struct bus_type disk_bus = {
+	name:		"block",
+};
+
 int __init device_init(void)
 {
 	int i;
@@ -200,6 +204,7 @@ int __init device_init(void)
 		INIT_LIST_HEAD(&gendisks[i].list);
 	blk_dev_init();
 	devclass_register(&disk_devclass);
+	bus_register(&disk_bus);
 	return 0;
 }
 
@@ -207,6 +212,13 @@ __initcall(device_init);
 
 EXPORT_SYMBOL(disk_devclass);
 
+static void disk_release(struct device *dev)
+{
+	struct gendisk *disk = dev->driver_data;
+	kfree(disk->part);
+	kfree(disk);
+}
+
 struct gendisk *alloc_disk(int minors)
 {
 	struct gendisk *disk = kmalloc(sizeof(struct gendisk), GFP_KERNEL);
@@ -224,16 +236,19 @@ struct gendisk *alloc_disk(int minors)
 		disk->minors = minors;
 		while (minors >>= 1)
 			disk->minor_shift++;
+		disk->disk_dev.bus = &disk_bus;
+		disk->disk_dev.release = disk_release;
+		disk->disk_dev.driver_data = disk;
+		device_initialize(&disk->disk_dev);
 	}
 	return disk;
 }
 
 void put_disk(struct gendisk *disk)
 {
-	if (disk) {
-		kfree(disk->part);
-		kfree(disk);
-	}
+	if (disk)
+		put_device(&disk->disk_dev);
 }
+
 EXPORT_SYMBOL(alloc_disk);
 EXPORT_SYMBOL(put_disk);
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 3471aba90f64..8fffe423ab14 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -3196,6 +3196,7 @@ static int ide_cdrom_attach (ide_drive_t *drive)
 	g->minors = 1;
 	g->minor_shift = 0;
 	g->de = drive->de;
+	g->driverfs_dev = &drive->gendev;
 	g->flags = GENHD_FL_CD;
 	if (ide_cdrom_setup(drive)) {
 		struct cdrom_device_info *devinfo = &info->devinfo;
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 5b0c1ca8e75d..aecd9a7de7ed 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -1874,6 +1874,7 @@ static int idedisk_attach(ide_drive_t *drive)
 	g->minors = 1 << PARTN_BITS;
 	g->minor_shift = PARTN_BITS;
 	g->de = drive->de;
+	g->driverfs_dev = &drive->gendev;
 	g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0;
 	g->flags |= GENHD_FL_DEVFS;
 	set_capacity(g, current_capacity(drive));
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index fca1f92f896d..f10543ba3d8f 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -2110,6 +2110,7 @@ static int idefloppy_attach (ide_drive_t *drive)
 	DRIVER(drive)->busy--;
 	g->minors = 1 << PARTN_BITS;
 	g->minor_shift = PARTN_BITS;
+	g->driverfs_dev = &drive->gendev;
 	g->de = drive->de;
 	g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0;
 	g->flags |= GENHD_FL_DEVFS;
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 05fe1b938eb4..39af5cce16f0 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -726,24 +726,6 @@ cleanup_dev:
 	return 1;
 }
 
-/* Driverfs file support */
-static ssize_t sr_device_kdev_read(struct device *driverfs_dev, 
-				   char *page, size_t count, loff_t off)
-{
-	kdev_t kdev; 
-	kdev.value=(int)(long)driverfs_dev->driver_data;
-	return off ? 0 : sprintf(page, "%x\n",kdev.value);
-}
-static DEVICE_ATTR(kdev,S_IRUGO,sr_device_kdev_read,NULL);
-
-static ssize_t sr_device_type_read(struct device *driverfs_dev, 
-				   char *page, size_t count, loff_t off) 
-{
-	return off ? 0 : sprintf (page, "CHR\n");
-}
-static DEVICE_ATTR(type,S_IRUGO,sr_device_type_read,NULL);
-
-
 void sr_finish()
 {
 	int i;
@@ -797,22 +779,8 @@ void sr_finish()
 		 */
 		get_capabilities(cd);
 		sr_vendor_init(cd);
-
-		sprintf(cd->cdi.cdrom_driverfs_dev.bus_id, "%s:cd",
-			cd->device->sdev_driverfs_dev.bus_id);
-		sprintf(cd->cdi.cdrom_driverfs_dev.name, "%scdrom",
-			cd->device->sdev_driverfs_dev.name);
-		cd->cdi.cdrom_driverfs_dev.parent = 
-			&cd->device->sdev_driverfs_dev;
-		cd->cdi.cdrom_driverfs_dev.bus = &scsi_driverfs_bus_type;
-		cd->cdi.cdrom_driverfs_dev.driver_data = 
-			(void *)(long)__mkdev(MAJOR_NR, i);
-		device_register(&cd->cdi.cdrom_driverfs_dev);
-		device_create_file(&cd->cdi.cdrom_driverfs_dev,
-				   &dev_attr_type);
-		device_create_file(&cd->cdi.cdrom_driverfs_dev,
-				   &dev_attr_kdev);
 		disk->de = cd->device->de;
+		disk->driverfs_dev = &cd->device->sdev_driverfs_dev;
 		register_cdrom(&cd->cdi);
 		set_capacity(disk, cd->capacity);
 		add_disk(disk);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index a61a83ded312..5fc23d047567 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -18,6 +18,7 @@
 #include <linux/blk.h>
 #include <linux/kmod.h>
 #include <linux/ctype.h>
+#include <../drivers/base/fs/fs.h>	/* Eeeeewwwww */
 
 #include "check.h"
 
@@ -111,57 +112,6 @@ char *disk_name(struct gendisk *hd, int part, char *buf)
 	return buf;
 }
 
-/* Driverfs file support */
-static ssize_t partition_device_kdev_read(struct device *driverfs_dev, 
-			char *page, size_t count, loff_t off)
-{
-	kdev_t kdev; 
-	kdev.value=(int)(long)driverfs_dev->driver_data;
-	return off ? 0 : sprintf (page, "%x\n",kdev.value);
-}
-static DEVICE_ATTR(kdev,S_IRUGO,partition_device_kdev_read,NULL);
-
-static ssize_t partition_device_type_read(struct device *driverfs_dev, 
-			char *page, size_t count, loff_t off) 
-{
-	return off ? 0 : sprintf (page, "BLK\n");
-}
-static DEVICE_ATTR(type,S_IRUGO,partition_device_type_read,NULL);
-
-static void driverfs_create_partitions(struct gendisk *hd)
-{
-	struct device *parent = hd->driverfs_dev;
-	struct device *dev = &hd->disk_dev;
-
-	/* if driverfs not supported by subsystem, skip partitions */
-	if (!(hd->flags & GENHD_FL_DRIVERFS))
-		return;
-
-	if (parent)  {
-		sprintf(dev->name, "%sdisc", parent->name);
-		sprintf(dev->bus_id, "%sdisc", parent->bus_id);
-		dev->parent = parent;
-		dev->bus = parent->bus;
-	} else {
-		sprintf(dev->name, "disc");
-		sprintf(dev->bus_id, "disc");
-	}
-	dev->driver_data = (void *)(long)__mkdev(hd->major, hd->first_minor);
-	device_register(dev);
-	device_create_file(dev, &dev_attr_type);
-	device_create_file(dev, &dev_attr_kdev);
-}
-
-static void driverfs_remove_partitions(struct gendisk *hd)
-{
-	struct device *dev = &hd->disk_dev;
-	if (!(hd->flags & GENHD_FL_DRIVERFS))
-		return;
-	device_remove_file(dev, &dev_attr_type);
-	device_remove_file(dev, &dev_attr_kdev);
-	put_device(dev);	
-}
-
 static struct parsed_partitions *
 check_partition(struct gendisk *hd, struct block_device *bdev)
 {
@@ -326,6 +276,40 @@ static void devfs_remove_partitions(struct gendisk *dev)
 #endif
 }
 
+static ssize_t part_dev_read(struct device *dev,
+			char *page, size_t count, loff_t off)
+{
+	struct gendisk *disk = dev->parent->driver_data;
+	struct hd_struct *p = dev->driver_data;
+	int part = p - disk->part + 1;
+	dev_t base = MKDEV(disk->major, disk->first_minor); 
+	return off ? 0 : sprintf(page, "%04x\n",base + part);
+}
+static ssize_t part_start_read(struct device *dev,
+			char *page, size_t count, loff_t off)
+{
+	struct hd_struct *p = dev->driver_data;
+	return off ? 0 : sprintf(page, "%llu\n",(u64)p->start_sect);
+}
+static ssize_t part_size_read(struct device *dev,
+			char *page, size_t count, loff_t off)
+{
+	struct hd_struct *p = dev->driver_data;
+	return off ? 0 : sprintf(page, "%llu\n",(u64)p->nr_sects);
+}
+static struct device_attribute part_attr_dev = {
+	.attr = {.name = "dev", .mode = S_IRUGO },
+	.show	= part_dev_read
+};
+static struct device_attribute part_attr_start = {
+	.attr = {.name = "start", .mode = S_IRUGO },
+	.show	= part_start_read
+};
+static struct device_attribute part_attr_size = {
+	.attr = {.name = "size", .mode = S_IRUGO },
+	.show	= part_size_read
+};
+
 void delete_partition(struct gendisk *disk, int part)
 {
 	struct hd_struct *p = disk->part + part - 1;
@@ -338,8 +322,9 @@ void delete_partition(struct gendisk *disk, int part)
 	dev = p->hd_driverfs_dev;
 	p->hd_driverfs_dev = NULL;
 	if (dev) {
-		device_remove_file(dev, &dev_attr_type);
-		device_remove_file(dev, &dev_attr_kdev);
+		device_remove_file(dev, &part_attr_size);
+		device_remove_file(dev, &part_attr_start);
+		device_remove_file(dev, &part_attr_dev);
 		device_unregister(dev);	
 	}
 }
@@ -352,43 +337,130 @@ static void part_release(struct device *dev)
 void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len)
 {
 	struct hd_struct *p = disk->part + part - 1;
-	struct device *parent = disk->disk_dev.parent;
+	struct device *parent = &disk->disk_dev;
 	struct device *dev;
 
 	p->start_sect = start;
 	p->nr_sects = len;
 	devfs_register_partition(disk, part);
-	if (!(disk->flags & GENHD_FL_DRIVERFS))
-		return;
 	dev = kmalloc(sizeof(struct device), GFP_KERNEL);
 	if (!dev)
 		return;
 	memset(dev, 0, sizeof(struct device));
-	if (parent)  {
-		sprintf(dev->name, "%spart%d", parent->name, part);
-		sprintf(dev->bus_id, "%s:p%d", parent->bus_id, part);
-		dev->parent = parent;
-		dev->bus = parent->bus;
-	} else {
-		sprintf(dev->name, "part%d", part);
-		sprintf(dev->bus_id, "p%d", part);
-	}
+	dev->parent = parent;
+	sprintf(dev->bus_id, "p%d", part);
 	dev->release = part_release;
-	dev->driver_data =
-		(void *)(long)__mkdev(disk->major, disk->first_minor+part);
+	dev->driver_data = p;
 	device_register(dev);
-	device_create_file(dev, &dev_attr_type);
-	device_create_file(dev, &dev_attr_kdev);
+	device_create_file(dev, &part_attr_dev);
+	device_create_file(dev, &part_attr_start);
+	device_create_file(dev, &part_attr_size);
 	p->hd_driverfs_dev = dev;
 }
 
+static ssize_t disk_dev_read(struct device *dev,
+			char *page, size_t count, loff_t off)
+{
+	struct gendisk *disk = dev->driver_data;
+	dev_t base = MKDEV(disk->major, disk->first_minor); 
+	return off ? 0 : sprintf(page, "%04x\n",base);
+}
+static ssize_t disk_range_read(struct device *dev,
+			char *page, size_t count, loff_t off)
+{
+	struct gendisk *disk = dev->driver_data;
+	return off ? 0 : sprintf(page, "%d\n",disk->minors);
+}
+static ssize_t disk_size_read(struct device *dev,
+			char *page, size_t count, loff_t off)
+{
+	struct gendisk *disk = dev->driver_data;
+	return off ? 0 : sprintf(page, "%llu\n",(u64)get_capacity(disk));
+}
+static struct device_attribute disk_attr_dev = {
+	.attr = {.name = "dev", .mode = S_IRUGO },
+	.show	= disk_dev_read
+};
+static struct device_attribute disk_attr_range = {
+	.attr = {.name = "range", .mode = S_IRUGO },
+	.show	= disk_range_read
+};
+static struct device_attribute disk_attr_size = {
+	.attr = {.name = "size", .mode = S_IRUGO },
+	.show	= disk_size_read
+};
+
+static void disk_driverfs_symlinks(struct gendisk *disk)
+{
+	struct device *target = disk->driverfs_dev;
+	struct device *dev = &disk->disk_dev;
+	struct device *p;
+	char *path;
+	char *s;
+	int length;
+	int depth;
+
+	if (!target)
+		return;
+
+	get_device(target);
+
+	length = get_devpath_length(target);
+	length += strlen("..");
+
+	if (length > PATH_MAX)
+		return;
+
+	if (!(path = kmalloc(length,GFP_KERNEL)))
+		return;
+	memset(path,0,length);
+
+	/* our relative position */
+	strcpy(path,"..");
+
+	fill_devpath(target, path, length);
+	driverfs_create_symlink(&dev->dir, "device", path);
+	kfree(path);
+
+	for (p = target, depth = 0; p; p = p->parent, depth++)
+		;
+	length = get_devpath_length(dev);
+	length += 3 * depth - 1;
+
+	if (length > PATH_MAX)
+		return;
+
+	if (!(path = kmalloc(length,GFP_KERNEL)))
+		return;
+	memset(path,0,length);
+	for (s = path; depth--; s += 3)
+		strcpy(s, "../");
+
+	fill_devpath(dev, path, length);
+	driverfs_create_symlink(&target->dir, "block", path);
+	kfree(path);
+}
+
 /* Not exported, helper to add_disk(). */
 void register_disk(struct gendisk *disk)
 {
+	struct device *dev = &disk->disk_dev;
 	struct parsed_partitions *state;
 	struct block_device *bdev;
+	char *s;
 	int j;
 
+	strcpy(dev->bus_id, disk->disk_name);
+	/* ewww... some of these buggers have / in name... */
+	s = strchr(dev->bus_id, '/');
+	if (s)
+		*s = '!';
+	device_add(dev);
+	device_create_file(dev, &disk_attr_dev);
+	device_create_file(dev, &disk_attr_range);
+	device_create_file(dev, &disk_attr_size);
+	disk_driverfs_symlinks(disk);
+
 	if (disk->flags & GENHD_FL_CD)
 		devfs_create_cdrom(disk);
 
@@ -404,7 +476,6 @@ void register_disk(struct gendisk *disk)
 	if (blkdev_get(bdev, FMODE_READ, 0, BDEV_RAW) < 0)
 		return;
 	state = check_partition(disk, bdev);
-	driverfs_create_partitions(disk);
 	devfs_create_partitions(disk);
 	if (state) {
 		for (j = 1; j < state->limit; j++) {
@@ -499,8 +570,16 @@ void del_gendisk(struct gendisk *disk)
 	disk->capacity = 0;
 	disk->flags &= ~GENHD_FL_UP;
 	unlink_gendisk(disk);
-	driverfs_remove_partitions(disk);
 	devfs_remove_partitions(disk);
+	device_remove_file(&disk->disk_dev, &disk_attr_dev);
+	device_remove_file(&disk->disk_dev, &disk_attr_range);
+	device_remove_file(&disk->disk_dev, &disk_attr_size);
+	driverfs_remove_file(&disk->disk_dev.dir, "device");
+	if (disk->driverfs_dev) {
+		driverfs_remove_file(&disk->driverfs_dev->dir, "block");
+		put_device(disk->driverfs_dev);
+	}
+	device_del(&disk->disk_dev);
 }
 
 struct dev_name {
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index b287b7a24b11..4387203c95b7 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -730,7 +730,6 @@ struct cdrom_device_info {
 	struct cdrom_device_ops  *ops;  /* link to device_ops */
 	struct cdrom_device_info *next; /* next device_info for this major */
 	void *handle;		        /* driver-dependent data */
-	struct device cdrom_driverfs_dev; /* driverfs implementation */
 /* specifications */
         kdev_t dev;	                /* device number */
 	int mask;                       /* mask of capability: disables them */
-- 
cgit v1.2.3


From 68c16870dcfaba7c9e2dd5055a2caf4edcf42e87 Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Tue, 15 Oct 2002 04:25:32 -0700
Subject: [PATCH] refcounts for gendisks

Finally.  We use disk->dev.refcount as a gendisk refcount.  New helper -
get_disk(): atomic_inc on refcount.  get_gendisk() does it on return,
callers of get_gendisk() do put_disk() when they are done.
---
 drivers/block/genhd.c | 10 ++++++++++
 drivers/block/ioctl.c | 47 +++++++++++++++++++++++++++++++++++++----------
 fs/block_dev.c        | 23 ++++++++++++++++++-----
 fs/partitions/check.c |  1 +
 include/linux/genhd.h |  7 +++++--
 5 files changed, 71 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c
index ecb2dcdf214d..1cc4655c04c9 100644
--- a/drivers/block/genhd.c
+++ b/drivers/block/genhd.c
@@ -100,6 +100,8 @@ get_gendisk(dev_t dev, int *part)
 	read_lock(&gendisk_lock);
 	if (gendisks[major].get) {
 		disk = gendisks[major].get(minor);
+		if (disk)
+			get_disk(disk);
 		read_unlock(&gendisk_lock);
 		return disk;
 	}
@@ -109,6 +111,7 @@ get_gendisk(dev_t dev, int *part)
 			continue;
 		if (disk->first_minor + disk->minors <= minor)
 			continue;
+		get_disk(disk);
 		read_unlock(&gendisk_lock);
 		*part = minor - disk->first_minor;
 		return disk;
@@ -244,6 +247,12 @@ struct gendisk *alloc_disk(int minors)
 	return disk;
 }
 
+struct gendisk *get_disk(struct gendisk *disk)
+{
+	atomic_inc(&disk->disk_dev.refcount);
+	return disk;
+}
+
 void put_disk(struct gendisk *disk)
 {
 	if (disk)
@@ -251,4 +260,5 @@ void put_disk(struct gendisk *disk)
 }
 
 EXPORT_SYMBOL(alloc_disk);
+EXPORT_SYMBOL(get_disk);
 EXPORT_SYMBOL(put_disk);
diff --git a/drivers/block/ioctl.c b/drivers/block/ioctl.c
index 4af05bc32db2..de2da2b44cad 100644
--- a/drivers/block/ioctl.c
+++ b/drivers/block/ioctl.c
@@ -25,13 +25,17 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
 	disk = get_gendisk(bdev->bd_dev, &part);
 	if (!disk)
 		return -ENXIO;
-	if (bdev != bdev->bd_contains)
+	if (bdev != bdev->bd_contains) {
+		put_disk(disk);
 		return -EINVAL;
+	}
 	if (part)
 		BUG();
 	part = p.pno;
-	if (part <= 0 || part >= disk->minors)
+	if (part <= 0 || part >= disk->minors) {
+		put_disk(disk);
 		return -EINVAL;
+	}
 
 	switch (a.op) {
 		case BLKPG_ADD_PARTITION:
@@ -42,34 +46,46 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
 			    sizeof(long long) > sizeof(long)) {
 				long pstart = start, plength = length;
 				if (pstart != start || plength != length
-				    || pstart < 0 || plength < 0)
+				    || pstart < 0 || plength < 0) {
+					put_disk(disk);
 					return -EINVAL;
+				}
 			}
 
 			/* partition number in use? */
-			if (disk->part[part - 1].nr_sects != 0)
+			if (disk->part[part - 1].nr_sects != 0) {
+				put_disk(disk);
 				return -EBUSY;
+			}
 
 			/* overlap? */
 			for (i = 0; i < disk->minors - 1; i++) {
 				struct hd_struct *s = &disk->part[i];
 				if (!(start+length <= s->start_sect ||
-				      start >= s->start_sect + s->nr_sects))
+				      start >= s->start_sect + s->nr_sects)) {
+					put_disk(disk);
 					return -EBUSY;
+				}
 			}
 			/* all seems OK */
 			add_partition(disk, part, start, length);
+			put_disk(disk);
 			return 0;
 		case BLKPG_DEL_PARTITION:
-			if (disk->part[part - 1].nr_sects == 0)
+			if (disk->part[part - 1].nr_sects == 0) {
+				put_disk(disk);
 				return -ENXIO;
+			}
 
 			/* partition in use? Incomplete check for now. */
 			bdevp = bdget(MKDEV(disk->major, disk->first_minor) + part);
-			if (!bdevp)
+			if (!bdevp) {
+				put_disk(disk);
 				return -ENOMEM;
+			}
 			if (bd_claim(bdevp, &holder) < 0) {
 				bdput(bdevp);
+				put_disk(disk);
 				return -EBUSY;
 			}
 
@@ -80,8 +96,10 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
 			delete_partition(disk, part);
 			bd_release(bdevp);
 			bdput(bdevp);
+			put_disk(disk);
 			return 0;
 		default:
+			put_disk(disk);
 			return -EINVAL;
 	}
 }
@@ -92,16 +110,25 @@ static int blkdev_reread_part(struct block_device *bdev)
 	struct gendisk *disk = get_gendisk(bdev->bd_dev, &part);
 	int res = 0;
 
-	if (!disk || disk->minors == 1 || bdev != bdev->bd_contains)
+	if (!disk)
 		return -EINVAL;
+	if (disk->minors == 1 || bdev != bdev->bd_contains) {
+		put_disk(disk);
+		return -EINVAL;
+	}
 	if (part)
 		BUG();
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_SYS_ADMIN)) {
+		put_disk(disk);
 		return -EACCES;
-	if (down_trylock(&bdev->bd_sem))
+	}
+	if (down_trylock(&bdev->bd_sem)) {
+		put_disk(disk);
 		return -EBUSY;
+	}
 	res = rescan_partitions(disk, bdev);
 	up(&bdev->bd_sem);
+	put_disk(disk);
 	return res;
 }
 
diff --git a/fs/block_dev.c b/fs/block_dev.c
index dff0244e63a6..d029636b07e6 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -542,6 +542,7 @@ int check_disk_change(struct block_device *bdev)
 		bdops->revalidate(dev);
 	if (disk && disk->minors > 1)
 		bdev->bd_invalidated = 1;
+	put_disk(disk);
 	return 1;
 }
 
@@ -553,7 +554,9 @@ int full_check_disk_change(struct block_device *bdev)
 		BUG();
 	down(&bdev->bd_sem);
 	if (check_disk_change(bdev)) {
-		rescan_partitions(get_gendisk(bdev->bd_dev, &n), bdev);
+		struct gendisk *disk = get_gendisk(bdev->bd_dev, &n);
+		rescan_partitions(disk, bdev);
+		put_disk(disk);
 		res = 1;
 	}
 	up(&bdev->bd_sem);
@@ -622,13 +625,18 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
 			struct block_device *disk;
 			disk = bdget(MKDEV(g->major, g->first_minor));
 			ret = -ENOMEM;
-			if (!disk)
+			if (!disk) {
+				put_disk(g);
 				goto out1;
+			}
 			ret = blkdev_get(disk, file->f_mode, file->f_flags, BDEV_RAW);
-			if (ret)
+			if (ret) {
+				put_disk(g);
 				goto out1;
+			}
 			bdev->bd_contains = disk;
 		}
+		put_disk(g);
 	}
 	if (bdev->bd_contains == bdev) {
 		int part;
@@ -643,8 +651,10 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
 
 		if (bdev->bd_op->open) {
 			ret = bdev->bd_op->open(inode, file);
-			if (ret)
+			if (ret) {
+				put_disk(g);
 				goto out2;
+			}
 		}
 		if (!bdev->bd_openers) {
 			struct backing_dev_info *bdi;
@@ -662,6 +672,7 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
 		}
 		if (bdev->bd_invalidated)
 			rescan_partitions(g, bdev);
+		put_disk(g);
 	} else {
 		down(&bdev->bd_contains->bd_sem);
 		bdev->bd_contains->bd_part_count++;
@@ -673,15 +684,17 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
 			inode->i_data.backing_dev_info =
 			   bdev->bd_inode->i_data.backing_dev_info =
 			   bdev->bd_contains->bd_inode->i_data.backing_dev_info;
-			if (!p->nr_sects) {
+			if (!(g->flags & GENHD_FL_UP) || !p->nr_sects) {
 				bdev->bd_contains->bd_part_count--;
 				up(&bdev->bd_contains->bd_sem);
+				put_disk(g);
 				ret = -ENXIO;
 				goto out2;
 			}
 			bdev->bd_queue = bdev->bd_contains->bd_queue;
 			bdev->bd_offset = p->start_sect;
 			bd_set_size(bdev, (loff_t) p->nr_sects << 9);
+			put_disk(g);
 		}
 		up(&bdev->bd_contains->bd_sem);
 	}
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 5fc23d047567..e6ed1a443116 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -616,6 +616,7 @@ char *partition_name(dev_t dev)
 	dname->name = NULL;
 	if (hd)
 		dname->name = disk_name(hd, part, dname->namebuf);
+	put_disk(hd);
 	if (!dname->name) {
 		sprintf(dname->namebuf, "[dev %s]", kdevname(to_kdev_t(dev)));
 		dname->name = dname->namebuf;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 6b859fad6a8a..030ee2f87891 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -266,6 +266,7 @@ extern void add_partition(struct gendisk *, int, sector_t, sector_t);
 extern void delete_partition(struct gendisk *, int);
 
 extern struct gendisk *alloc_disk(int minors);
+extern struct gendisk *get_disk(struct gendisk *disk);
 extern void put_disk(struct gendisk *disk);
 
 /* will go away */
@@ -273,9 +274,11 @@ extern void blk_set_probe(int major, struct gendisk *(p)(int));
 
 static inline unsigned int disk_index (kdev_t dev)
 {
-	int part;
+	int part, res;
 	struct gendisk *g = get_gendisk(kdev_t_to_nr(dev), &part);
-	return g ? (minor(dev) >> g->minor_shift) : 0;
+	res = g ? (minor(dev) >> g->minor_shift) : 0;
+	put_disk(g);
+	return res;
 }
 
 #endif
-- 
cgit v1.2.3


From 5682bcc620dbee99319997718c8929ec0d797854 Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@math.psu.edu>
Date: Tue, 15 Oct 2002 04:25:37 -0700
Subject: [PATCH] bdev->bd_disk introduced

There we go - now we can put a reference to gendisk into block_device.  Which
we do in do_open().  Most of the callers of get_gendisk() are simply using
bdev->bd_disk now (and most of the put_disk() calls introduced on previous
step disappear).  We also put that pointer into struct request - ->rq_disk.
That allows to get rid of disk_index() kludges in md.c (we simply count
relevant IO in the struct gendisk fields) and kill the export of get_gendisk().
	Notice that by now we can move _all_ IO counters into gendisk.  That
will kill a bunch of per-major arrays and more importantly, allow to merge
sard in clean way.  FWIW, we probably could show them as disk/partitions
attributes in driverfs...
---
 drivers/block/genhd.c     | 10 ++-----
 drivers/block/ioctl.c     | 65 +++++++++---------------------------------
 drivers/block/ll_rw_blk.c | 15 +++++++++-
 drivers/block/rd.c        |  1 +
 drivers/md/md.c           | 23 ++-------------
 fs/block_dev.c            | 72 ++++++++++++++++++-----------------------------
 include/linux/blkdev.h    |  1 +
 include/linux/fs.h        |  1 +
 include/linux/genhd.h     | 13 +++------
 9 files changed, 68 insertions(+), 133 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c
index 1cc4655c04c9..449e69061bbc 100644
--- a/drivers/block/genhd.c
+++ b/drivers/block/genhd.c
@@ -61,10 +61,7 @@ void add_disk(struct gendisk *disk)
 {
 	write_lock(&gendisk_lock);
 	list_add(&disk->list, &gendisks[disk->major].list);
-	if (disk->minors > 1)
-		list_add_tail(&disk->full_list, &gendisk_list);
-	else
-		INIT_LIST_HEAD(&disk->full_list);
+	list_add_tail(&disk->full_list, &gendisk_list);
 	write_unlock(&gendisk_lock);
 	disk->flags |= GENHD_FL_UP;
 	register_disk(disk);
@@ -120,8 +117,6 @@ get_gendisk(dev_t dev, int *part)
 	return NULL;
 }
 
-EXPORT_SYMBOL(get_gendisk);
-
 #ifdef CONFIG_PROC_FS
 /* iterator */
 static void *part_start(struct seq_file *part, loff_t *pos)
@@ -158,7 +153,7 @@ static int show_partition(struct seq_file *part, void *v)
 		seq_puts(part, "major minor  #blocks  name\n\n");
 
 	/* Don't show non-partitionable devices or empty devices */
-	if (!get_capacity(sgp))
+	if (!get_capacity(sgp) || sgp->minors == 1)
 		return 0;
 
 	/* show the full disk and all non-0 size partitions of it */
@@ -239,6 +234,7 @@ struct gendisk *alloc_disk(int minors)
 		disk->minors = minors;
 		while (minors >>= 1)
 			disk->minor_shift++;
+		INIT_LIST_HEAD(&disk->full_list);
 		disk->disk_dev.bus = &disk_bus;
 		disk->disk_dev.release = disk_release;
 		disk->disk_dev.driver_data = disk;
diff --git a/drivers/block/ioctl.c b/drivers/block/ioctl.c
index de2da2b44cad..e420c691763d 100644
--- a/drivers/block/ioctl.c
+++ b/drivers/block/ioctl.c
@@ -22,21 +22,12 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
 		return -EFAULT;
 	if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition)))
 		return -EFAULT;
-	disk = get_gendisk(bdev->bd_dev, &part);
-	if (!disk)
-		return -ENXIO;
-	if (bdev != bdev->bd_contains) {
-		put_disk(disk);
+	disk = bdev->bd_disk;
+	if (bdev != bdev->bd_contains)
 		return -EINVAL;
-	}
-	if (part)
-		BUG();
 	part = p.pno;
-	if (part <= 0 || part >= disk->minors) {
-		put_disk(disk);
+	if (part <= 0 || part >= disk->minors)
 		return -EINVAL;
-	}
-
 	switch (a.op) {
 		case BLKPG_ADD_PARTITION:
 			start = p.start >> 9;
@@ -46,49 +37,33 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
 			    sizeof(long long) > sizeof(long)) {
 				long pstart = start, plength = length;
 				if (pstart != start || plength != length
-				    || pstart < 0 || plength < 0) {
-					put_disk(disk);
+				    || pstart < 0 || plength < 0)
 					return -EINVAL;
-				}
 			}
-
 			/* partition number in use? */
-			if (disk->part[part - 1].nr_sects != 0) {
-				put_disk(disk);
+			if (disk->part[part - 1].nr_sects != 0)
 				return -EBUSY;
-			}
-
 			/* overlap? */
 			for (i = 0; i < disk->minors - 1; i++) {
 				struct hd_struct *s = &disk->part[i];
 				if (!(start+length <= s->start_sect ||
-				      start >= s->start_sect + s->nr_sects)) {
-					put_disk(disk);
+				      start >= s->start_sect + s->nr_sects))
 					return -EBUSY;
-				}
 			}
 			/* all seems OK */
 			add_partition(disk, part, start, length);
-			put_disk(disk);
 			return 0;
 		case BLKPG_DEL_PARTITION:
-			if (disk->part[part - 1].nr_sects == 0) {
-				put_disk(disk);
+			if (disk->part[part - 1].nr_sects == 0)
 				return -ENXIO;
-			}
-
 			/* partition in use? Incomplete check for now. */
 			bdevp = bdget(MKDEV(disk->major, disk->first_minor) + part);
-			if (!bdevp) {
-				put_disk(disk);
+			if (!bdevp)
 				return -ENOMEM;
-			}
 			if (bd_claim(bdevp, &holder) < 0) {
 				bdput(bdevp);
-				put_disk(disk);
 				return -EBUSY;
 			}
-
 			/* all seems OK */
 			fsync_bdev(bdevp);
 			invalidate_bdev(bdevp, 0);
@@ -96,39 +71,25 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
 			delete_partition(disk, part);
 			bd_release(bdevp);
 			bdput(bdevp);
-			put_disk(disk);
 			return 0;
 		default:
-			put_disk(disk);
 			return -EINVAL;
 	}
 }
 
 static int blkdev_reread_part(struct block_device *bdev)
 {
-	int part;
-	struct gendisk *disk = get_gendisk(bdev->bd_dev, &part);
-	int res = 0;
+	struct gendisk *disk = bdev->bd_disk;
+	int res;
 
-	if (!disk)
-		return -EINVAL;
-	if (disk->minors == 1 || bdev != bdev->bd_contains) {
-		put_disk(disk);
+	if (disk->minors == 1 || bdev != bdev->bd_contains)
 		return -EINVAL;
-	}
-	if (part)
-		BUG();
-	if (!capable(CAP_SYS_ADMIN)) {
-		put_disk(disk);
+	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
-	}
-	if (down_trylock(&bdev->bd_sem)) {
-		put_disk(disk);
+	if (down_trylock(&bdev->bd_sem))
 		return -EBUSY;
-	}
 	res = rescan_partitions(disk, bdev);
 	up(&bdev->bd_sem);
-	put_disk(disk);
 	return res;
 }
 
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index ea56c1d8456c..eb877e50a8d1 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -1427,7 +1427,19 @@ void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)
 	int rw = rq_data_dir(rq);
 	unsigned int index;
 
-	index = disk_index(rq->rq_dev);
+	if (!rq->rq_disk)
+		return;
+
+	if (rw == READ) {
+		rq->rq_disk->rio += new_io;
+		rq->rq_disk->reads += nr_sectors;
+	} else if (rw == WRITE) {
+		rq->rq_disk->wio += new_io;
+		rq->rq_disk->writes += nr_sectors;
+	}
+
+	index = rq->rq_disk->first_minor >> rq->rq_disk->minor_shift;
+
 	if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
 		return;
 
@@ -1747,6 +1759,7 @@ get_rq:
 	req->waiting = NULL;
 	req->bio = req->biotail = bio;
 	req->rq_dev = to_kdev_t(bio->bi_bdev->bd_dev);
+	req->rq_disk = bio->bi_bdev->bd_disk;
 	add_request(q, req, insert_here);
 out:
 	if (freereq)
diff --git a/drivers/block/rd.c b/drivers/block/rd.c
index 7d72b786080c..bbd247fa29dc 100644
--- a/drivers/block/rd.c
+++ b/drivers/block/rd.c
@@ -381,6 +381,7 @@ static int rd_open(struct inode * inode, struct file * filp)
 		rd_bdev[unit]->bd_inode->i_mapping->a_ops = &ramdisk_aops;
 		rd_bdev[unit]->bd_inode->i_size = rd_length[unit];
 		rd_bdev[unit]->bd_queue = &blk_dev[MAJOR_NR].request_queue;
+		rd_bdev[unit]->bd_disk = get_disk(rd_disks[unit]);
 	}
 
 	return 0;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 205bb0fdeee0..784e3b69213e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2731,18 +2731,9 @@ int unregister_md_personality(int pnum)
 	return 0;
 }
 
-static unsigned int sync_io[DK_MAX_MAJOR][DK_MAX_DISK];
 void md_sync_acct(mdk_rdev_t *rdev, unsigned long nr_sectors)
 {
-	kdev_t dev = to_kdev_t(rdev->bdev->bd_dev);
-	unsigned int major = major(dev);
-	unsigned int index;
-
-	index = disk_index(dev);
-	if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
-		return;
-
-	sync_io[major][index] += nr_sectors;
+	rdev->bdev->bd_disk->sync_io += nr_sectors;
 }
 
 static int is_mddev_idle(mddev_t *mddev)
@@ -2754,16 +2745,8 @@ static int is_mddev_idle(mddev_t *mddev)
 
 	idle = 1;
 	ITERATE_RDEV(mddev,rdev,tmp) {
-		kdev_t dev = to_kdev_t(rdev->bdev->bd_dev);
-		int major = major(dev);
-		int idx = disk_index(dev);
-
-		if ((idx >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
-			continue;
-
-		curr_events = kstat.dk_drive_rblk[major][idx] +
-						kstat.dk_drive_wblk[major][idx] ;
-		curr_events -= sync_io[major][idx];
+		struct gendisk *disk = rdev->bdev->bd_disk;
+		curr_events = disk->reads + disk->writes - disk->sync_io;
 		if ((curr_events - rdev->last_events) > 32) {
 			rdev->last_events = curr_events;
 			idle = 0;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index d029636b07e6..1ad7f467993b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -526,8 +526,6 @@ int check_disk_change(struct block_device *bdev)
 {
 	struct block_device_operations * bdops = bdev->bd_op;
 	kdev_t dev = to_kdev_t(bdev->bd_dev);
-	struct gendisk *disk;
-	int part;
 
 	if (bdops->check_media_change == NULL)
 		return 0;
@@ -537,26 +535,21 @@ int check_disk_change(struct block_device *bdev)
 	if (invalidate_device(dev, 0))
 		printk("VFS: busy inodes on changed media.\n");
 
-	disk = get_gendisk(bdev->bd_dev, &part);
 	if (bdops->revalidate)
 		bdops->revalidate(dev);
-	if (disk && disk->minors > 1)
+	if (bdev->bd_disk->minors > 1)
 		bdev->bd_invalidated = 1;
-	put_disk(disk);
 	return 1;
 }
 
 int full_check_disk_change(struct block_device *bdev)
 {
 	int res = 0;
-	int n;
 	if (bdev->bd_contains != bdev)
 		BUG();
 	down(&bdev->bd_sem);
 	if (check_disk_change(bdev)) {
-		struct gendisk *disk = get_gendisk(bdev->bd_dev, &n);
-		rescan_partitions(disk, bdev);
-		put_disk(disk);
+		rescan_partitions(bdev->bd_disk, bdev);
 		res = 1;
 	}
 	up(&bdev->bd_sem);
@@ -598,6 +591,8 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
 	kdev_t dev = to_kdev_t(bdev->bd_dev);
 	struct module *owner = NULL;
 	struct block_device_operations *ops, *old;
+	struct gendisk *disk;
+	int part;
 
 	lock_kernel();
 	ops = get_blkfops(major(dev));
@@ -617,53 +612,41 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
 		if (owner)
 			__MOD_DEC_USE_COUNT(owner);
 	}
+	disk = get_gendisk(bdev->bd_dev, &part);
+	if (!disk)
+		goto out1;
 	if (!bdev->bd_contains) {
-		int part;
-		struct gendisk *g = get_gendisk(bdev->bd_dev, &part);
 		bdev->bd_contains = bdev;
-		if (g && part) {
-			struct block_device *disk;
-			disk = bdget(MKDEV(g->major, g->first_minor));
+		if (part) {
+			struct block_device *whole;
+			whole = bdget(MKDEV(disk->major, disk->first_minor));
 			ret = -ENOMEM;
-			if (!disk) {
-				put_disk(g);
+			if (!whole)
 				goto out1;
-			}
-			ret = blkdev_get(disk, file->f_mode, file->f_flags, BDEV_RAW);
-			if (ret) {
-				put_disk(g);
+			ret = blkdev_get(whole, file->f_mode, file->f_flags, BDEV_RAW);
+			if (ret)
 				goto out1;
-			}
-			bdev->bd_contains = disk;
+			bdev->bd_contains = whole;
 		}
-		put_disk(g);
 	}
 	if (bdev->bd_contains == bdev) {
-		int part;
-		struct gendisk *g = get_gendisk(bdev->bd_dev, &part);
-
+		if (!bdev->bd_openers)
+			bdev->bd_disk = disk;
 		if (!bdev->bd_queue) {
 			struct blk_dev_struct *p = blk_dev + major(dev);
 			bdev->bd_queue = &p->request_queue;
 			if (p->queue)
 				bdev->bd_queue =  p->queue(dev);
 		}
-
 		if (bdev->bd_op->open) {
 			ret = bdev->bd_op->open(inode, file);
-			if (ret) {
-				put_disk(g);
+			if (ret)
 				goto out2;
-			}
 		}
 		if (!bdev->bd_openers) {
 			struct backing_dev_info *bdi;
-			sector_t sect = 0;
-
 			bdev->bd_offset = 0;
-			if (g)
-				sect = get_capacity(g);
-			bd_set_size(bdev, (loff_t)sect << 9);
+			bd_set_size(bdev, (loff_t)get_capacity(disk) << 9);
 			bdi = blk_get_backing_dev_info(bdev);
 			if (bdi == NULL)
 				bdi = &default_backing_dev_info;
@@ -671,34 +654,31 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
 			bdev->bd_inode->i_data.backing_dev_info = bdi;
 		}
 		if (bdev->bd_invalidated)
-			rescan_partitions(g, bdev);
-		put_disk(g);
+			rescan_partitions(disk, bdev);
 	} else {
 		down(&bdev->bd_contains->bd_sem);
 		bdev->bd_contains->bd_part_count++;
 		if (!bdev->bd_openers) {
-			int part;
-			struct gendisk *g = get_gendisk(bdev->bd_dev, &part);
 			struct hd_struct *p;
-			p = g->part + part - 1;
+			p = disk->part + part - 1;
 			inode->i_data.backing_dev_info =
 			   bdev->bd_inode->i_data.backing_dev_info =
 			   bdev->bd_contains->bd_inode->i_data.backing_dev_info;
-			if (!(g->flags & GENHD_FL_UP) || !p->nr_sects) {
+			if (!(disk->flags & GENHD_FL_UP) || !p->nr_sects) {
 				bdev->bd_contains->bd_part_count--;
 				up(&bdev->bd_contains->bd_sem);
-				put_disk(g);
 				ret = -ENXIO;
 				goto out2;
 			}
 			bdev->bd_queue = bdev->bd_contains->bd_queue;
 			bdev->bd_offset = p->start_sect;
 			bd_set_size(bdev, (loff_t) p->nr_sects << 9);
-			put_disk(g);
+			bdev->bd_disk = disk;
 		}
 		up(&bdev->bd_contains->bd_sem);
 	}
-	bdev->bd_openers++;
+	if (bdev->bd_openers++)
+		put_disk(disk);
 	up(&bdev->bd_sem);
 	unlock_kernel();
 	return 0;
@@ -712,6 +692,7 @@ out2:
 		}
 	}
 out1:
+	put_disk(disk);
 	if (!old) {
 		bdev->bd_op = NULL;
 		if (owner)
@@ -785,15 +766,18 @@ int blkdev_put(struct block_device *bdev, int kind)
 		up(&bdev->bd_contains->bd_sem);
 	}
 	if (!bdev->bd_openers) {
+		struct gendisk *disk = bdev->bd_disk;
 		if (bdev->bd_op->owner)
 			__MOD_DEC_USE_COUNT(bdev->bd_op->owner);
 		bdev->bd_op = NULL;
 		bdev->bd_queue = NULL;
+		bdev->bd_disk = NULL;
 		bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
 		if (bdev != bdev->bd_contains) {
 			blkdev_put(bdev->bd_contains, BDEV_RAW);
 			bdev->bd_contains = NULL;
 		}
+		put_disk(disk);
 	}
 	unlock_kernel();
 	up(&bdev->bd_sem);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 607641c6cfb1..ccb56d58de6a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -34,6 +34,7 @@ struct request {
 
 	int rq_status;	/* should split this into a few status bits */
 	kdev_t rq_dev;
+	struct gendisk *rq_disk;
 	int errors;
 	sector_t sector;
 	unsigned long nr_sectors;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cac13f931cec..bca164f4265a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -359,6 +359,7 @@ struct block_device {
 	sector_t		bd_offset;
 	unsigned		bd_part_count;
 	int			bd_invalidated;
+	struct gendisk *	bd_disk;
 };
 
 struct inode {
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 030ee2f87891..9de2f51ae935 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -90,6 +90,10 @@ struct gendisk {
 	devfs_handle_t disk_de;		/* piled higher and deeper */
 	struct device *driverfs_dev;
 	struct device disk_dev;
+
+	unsigned sync_io;		/* RAID */
+	unsigned reads, writes;
+	unsigned rio, wio;
 };
 
 /* drivers/block/genhd.c */
@@ -272,15 +276,6 @@ extern void put_disk(struct gendisk *disk);
 /* will go away */
 extern void blk_set_probe(int major, struct gendisk *(p)(int));
 
-static inline unsigned int disk_index (kdev_t dev)
-{
-	int part, res;
-	struct gendisk *g = get_gendisk(kdev_t_to_nr(dev), &part);
-	res = g ? (minor(dev) >> g->minor_shift) : 0;
-	put_disk(g);
-	return res;
-}
-
 #endif
 
 #endif
-- 
cgit v1.2.3


From 19bb2ab92d37ae85fc6d4cb6ae1fea6a8de027b1 Mon Sep 17 00:00:00 2001
From: John Levon <levon@movementarian.org>
Date: Tue, 15 Oct 2002 04:30:26 -0700
Subject: [PATCH] oprofile - hooks

This implements the simple hooks we need to catch unmappings, and to
make sure no stale task_struct*'s are ever used by the main oprofile
core mechanism.  If disabled, it compiles to nothing.
---
 arch/i386/Config.help   |  5 +++
 arch/i386/config.in     |  7 ++++
 include/linux/profile.h | 56 ++++++++++++++++++++++++++++++
 kernel/Makefile         |  5 +--
 kernel/exit.c           | 10 ++++--
 kernel/profile.c        | 91 +++++++++++++++++++++++++++++++++++++++++++++++++
 mm/mmap.c               |  8 +++++
 7 files changed, 177 insertions(+), 5 deletions(-)
 create mode 100644 include/linux/profile.h
 create mode 100644 kernel/profile.c

(limited to 'include/linux')

diff --git a/arch/i386/Config.help b/arch/i386/Config.help
index 299dea0c6536..d6f3cdc95f05 100644
--- a/arch/i386/Config.help
+++ b/arch/i386/Config.help
@@ -1048,6 +1048,11 @@ CONFIG_DEBUG_OBSOLETE
   Say Y here if you want to reduce the chances of the tree compiling,
   and are prepared to dig into driver internals to fix compile errors.
 
+Profiling support
+CONFIG_PROFILING
+  Say Y here to enable the extended profiling support mechanisms used
+  by profilers such as OProfile.
+ 
 Software Suspend
 CONFIG_SOFTWARE_SUSPEND
   Enable the possibilty of suspendig machine. It doesn't need APM.
diff --git a/arch/i386/config.in b/arch/i386/config.in
index 784e35d23bce..97a9b862d72f 100644
--- a/arch/i386/config.in
+++ b/arch/i386/config.in
@@ -442,6 +442,13 @@ source drivers/usb/Config.in
 
 source net/bluetooth/Config.in
 
+if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+   mainmenu_option next_comment
+   comment 'Profiling support'
+   bool 'Profiling support (EXPERIMENTAL)' CONFIG_PROFILING
+   endmenu
+fi
+ 
 mainmenu_option next_comment
 comment 'Kernel hacking'
 if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
diff --git a/include/linux/profile.h b/include/linux/profile.h
new file mode 100644
index 000000000000..15c1e91198b0
--- /dev/null
+++ b/include/linux/profile.h
@@ -0,0 +1,56 @@
+#ifndef _LINUX_PROFILE_H
+#define _LINUX_PROFILE_H
+
+#ifdef __KERNEL__
+ 
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <asm/errno.h>
+ 
+enum profile_type {
+	EXIT_TASK,
+	EXIT_MMAP,
+	EXEC_UNMAP
+};
+
+#ifdef CONFIG_PROFILING
+ 
+struct notifier_block;
+struct task_struct;
+struct mm_struct;
+ 
+/* task is in do_exit() */
+void profile_exit_task(struct task_struct * task);
+ 
+/* change of vma mappings */
+void profile_exec_unmap(struct mm_struct * mm);
+
+/* exit of all vmas for a task */
+void profile_exit_mmap(struct mm_struct * mm);
+
+int profile_event_register(enum profile_type, struct notifier_block * n);
+
+int profile_event_unregister(enum profile_type, struct notifier_block * n);
+ 
+#else
+
+static inline int profile_event_register(enum profile_type t, struct notifier_block * n)
+{
+	return -ENOSYS;
+}
+ 
+static inline int profile_event_unregister(enum profile_type t, struct notifier_block * n)
+{
+	return -ENOSYS;
+}
+ 
+#define profile_exit_task(a) do { } while (0)
+#define profile_exec_unmap(a) do { } while (0)
+#define profile_exit_mmap(a) do { } while (0)
+ 
+#endif /* CONFIG_PROFILING */
+ 
+#endif /* __KERNEL__ */
+ 
+#endif /* _LINUX_PROFILE_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index b3fce6d3ac9c..8e18771791de 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -3,9 +3,10 @@
 #
 
 export-objs = signal.o sys.o kmod.o workqueue.o ksyms.o pm.o exec_domain.o \
-	      printk.o platform.o suspend.o dma.o module.o cpufreq.o
+		printk.o platform.o suspend.o dma.o module.o cpufreq.o \
+		profile.o
 
-obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
+obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
 	    module.o exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o futex.o platform.o pid.o
diff --git a/kernel/exit.c b/kernel/exit.c
index 6ed07def4c62..c2b0f6eeff0f 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -19,6 +19,7 @@
 #include <linux/file.h>
 #include <linux/binfmts.h>
 #include <linux/ptrace.h>
+#include <linux/profile.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -59,11 +60,12 @@ void release_task(struct task_struct * p)
 {
 	struct dentry *proc_dentry;
 	task_t *leader;
-
-	if (p->state < TASK_ZOMBIE)
-		BUG();
+ 
+	BUG_ON(p->state < TASK_ZOMBIE);
+ 
 	if (p != current)
 		wait_task_inactive(p);
+
 	atomic_dec(&p->user->processes);
 	security_ops->task_free_security(p);
 	free_uid(p->user);
@@ -635,6 +637,8 @@ NORET_TYPE void do_exit(long code)
 				current->comm, current->pid,
 				preempt_count());
 
+	profile_exit_task(tsk);
+ 
 fake_volatile:
 	acct_process(code);
 	__exit_mm(tsk);
diff --git a/kernel/profile.c b/kernel/profile.c
new file mode 100644
index 000000000000..7ebffe971ca8
--- /dev/null
+++ b/kernel/profile.c
@@ -0,0 +1,91 @@
+/*
+ *  linux/kernel/profile.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/profile.h>
+#include <linux/bootmem.h>
+#include <linux/notifier.h>
+#include <linux/mm.h>
+
+/* Profile event notifications */
+ 
+#ifdef CONFIG_PROFILING
+ 
+static DECLARE_RWSEM(profile_rwsem);
+static struct notifier_block * exit_task_notifier;
+static struct notifier_block * exit_mmap_notifier;
+static struct notifier_block * exec_unmap_notifier;
+ 
+void profile_exit_task(struct task_struct * task)
+{
+	down_read(&profile_rwsem);
+	notifier_call_chain(&exit_task_notifier, 0, task);
+	up_read(&profile_rwsem);
+}
+ 
+void profile_exit_mmap(struct mm_struct * mm)
+{
+	down_read(&profile_rwsem);
+	notifier_call_chain(&exit_mmap_notifier, 0, mm);
+	up_read(&profile_rwsem);
+}
+
+void profile_exec_unmap(struct mm_struct * mm)
+{
+	down_read(&profile_rwsem);
+	notifier_call_chain(&exec_unmap_notifier, 0, mm);
+	up_read(&profile_rwsem);
+}
+
+int profile_event_register(enum profile_type type, struct notifier_block * n)
+{
+	int err = -EINVAL;
+ 
+	down_write(&profile_rwsem);
+ 
+	switch (type) {
+		case EXIT_TASK:
+			err = notifier_chain_register(&exit_task_notifier, n);
+			break;
+		case EXIT_MMAP:
+			err = notifier_chain_register(&exit_mmap_notifier, n);
+			break;
+		case EXEC_UNMAP:
+			err = notifier_chain_register(&exec_unmap_notifier, n);
+			break;
+	}
+ 
+	up_write(&profile_rwsem);
+ 
+	return err;
+}
+
+ 
+int profile_event_unregister(enum profile_type type, struct notifier_block * n)
+{
+	int err = -EINVAL;
+ 
+	down_write(&profile_rwsem);
+ 
+	switch (type) {
+		case EXIT_TASK:
+			err = notifier_chain_unregister(&exit_task_notifier, n);
+			break;
+		case EXIT_MMAP:
+			err = notifier_chain_unregister(&exit_mmap_notifier, n);
+			break;
+		case EXEC_UNMAP:
+			err = notifier_chain_unregister(&exec_unmap_notifier, n);
+			break;
+	}
+
+	up_write(&profile_rwsem);
+	return err;
+}
+
+#endif /* CONFIG_PROFILING */
+
+EXPORT_SYMBOL_GPL(profile_event_register);
+EXPORT_SYMBOL_GPL(profile_event_unregister);
diff --git a/mm/mmap.c b/mm/mmap.c
index 5d43e84413b1..90ae8b22ab96 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -16,6 +16,7 @@
 #include <linux/fs.h>
 #include <linux/personality.h>
 #include <linux/security.h>
+#include <linux/profile.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
@@ -1104,6 +1105,10 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
 	if (mpnt->vm_start >= end)
 		return 0;
 
+	/* Something will probably happen, so notify. */
+	if (mpnt->vm_file && (mpnt->vm_flags & VM_EXEC))
+		profile_exec_unmap(mm);
+ 
 	/*
 	 * If we need to split any vma, do it now to save pain later.
 	 */
@@ -1253,7 +1258,10 @@ void exit_mmap(struct mm_struct * mm)
 	mmu_gather_t *tlb;
 	struct vm_area_struct * mpnt;
 
+	profile_exit_mmap(mm);
+ 
 	release_segments(mm);
+ 
 	spin_lock(&mm->page_table_lock);
 
 	tlb = tlb_gather_mmu(mm, 1);
-- 
cgit v1.2.3


From 7e1aee05c99cfbb7e5cf33bae11ab9fa8df6c57c Mon Sep 17 00:00:00 2001
From: John Levon <levon@movementarian.org>
Date: Tue, 15 Oct 2002 04:30:32 -0700
Subject: [PATCH] oprofile - dcookies

This implements the persistent path-to-dcookies mapping, and adds a
system call for the user-space profiler to look up the profile data, so
it can tag profiles to specific binaries.
---
 arch/i386/kernel/entry.S  |   1 +
 fs/Makefile               |   4 +-
 fs/dcache.c               |   1 +
 fs/dcookies.c             | 323 ++++++++++++++++++++++++++++++++++++++++++++++
 include/asm-i386/unistd.h |   2 +
 include/linux/dcache.h    |   3 +
 include/linux/dcookies.h  |  69 ++++++++++
 kernel/sys.c              |   2 +
 8 files changed, 404 insertions(+), 1 deletion(-)
 create mode 100644 fs/dcookies.c
 create mode 100644 include/linux/dcookies.h

(limited to 'include/linux')

diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 557b684431c5..e873703e0c34 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -736,6 +736,7 @@ ENTRY(sys_call_table)
 	.long sys_alloc_hugepages /* 250 */
 	.long sys_free_hugepages
 	.long sys_exit_group
+	.long sys_lookup_dcookie
 
 	.rept NR_syscalls-(.-sys_call_table)/4
 		.long sys_ni_syscall
diff --git a/fs/Makefile b/fs/Makefile
index d902bdd8bda3..a4320cf860ac 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -6,7 +6,7 @@
 # 
 
 export-objs :=	open.o dcache.o buffer.o bio.o inode.o dquot.o mpage.o aio.o \
-                fcntl.o read_write.o
+                fcntl.o read_write.o dcookies.o
 
 obj-y :=	open.o read_write.o devices.o file_table.o buffer.o \
 		bio.o super.o block_dev.o char_dev.o stat.o exec.o pipe.o \
@@ -40,6 +40,8 @@ obj-y				+= partitions/
 obj-y				+= driverfs/
 obj-y				+= devpts/
 
+obj-$(CONFIG_PROFILING)		+= dcookies.o
+ 
 # Do not add any filesystems before this line
 obj-$(CONFIG_EXT3_FS)		+= ext3/ # Before ext2 so root fs can be ext3
 obj-$(CONFIG_JBD)		+= jbd/
diff --git a/fs/dcache.c b/fs/dcache.c
index ef0871dbcdb2..d0fcfeba16ee 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -637,6 +637,7 @@ struct dentry * d_alloc(struct dentry * parent, const struct qstr *name)
 	dentry->d_op = NULL;
 	dentry->d_fsdata = NULL;
 	dentry->d_mounted = 0;
+	dentry->d_cookie = NULL;
 	INIT_LIST_HEAD(&dentry->d_hash);
 	INIT_LIST_HEAD(&dentry->d_lru);
 	INIT_LIST_HEAD(&dentry->d_subdirs);
diff --git a/fs/dcookies.c b/fs/dcookies.c
new file mode 100644
index 000000000000..0236c146b451
--- /dev/null
+++ b/fs/dcookies.c
@@ -0,0 +1,323 @@
+/*
+ * dcookies.c
+ *
+ * Copyright 2002 John Levon <levon@movementarian.org>
+ *
+ * Persistent cookie-path mappings. These are used by
+ * profilers to convert a per-task EIP value into something
+ * non-transitory that can be processed at a later date.
+ * This is done by locking the dentry/vfsmnt pair in the
+ * kernel until released by the tasks needing the persistent
+ * objects. The tag is simply an unsigned long that refers
+ * to the pair and can be looked up from userspace.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/mount.h>
+#include <linux/dcache.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/dcookies.h>
+#include <asm/uaccess.h>
+
+/* The dcookies are allocated from a kmem_cache and
+ * hashed onto a small number of lists. None of the
+ * code here is particularly performance critical
+ */
+struct dcookie_struct {
+	struct dentry * dentry;
+	struct vfsmount * vfsmnt;
+	struct list_head hash_list;
+};
+
+static LIST_HEAD(dcookie_users);
+static DECLARE_MUTEX(dcookie_sem);
+static kmem_cache_t * dcookie_cache;
+static struct list_head * dcookie_hashtable;
+static size_t hash_size;
+
+static inline int is_live(void)
+{
+	return !(list_empty(&dcookie_users));
+}
+
+
+/* The dentry is locked, its address will do for the cookie */
+static inline unsigned long dcookie_value(struct dcookie_struct * dcs)
+{
+	return (unsigned long)dcs->dentry;
+}
+
+
+static size_t dcookie_hash(unsigned long dcookie)
+{
+	return (dcookie >> 2) & (hash_size - 1);
+}
+
+
+static struct dcookie_struct * find_dcookie(unsigned long dcookie)
+{
+	struct dcookie_struct * found = 0;
+	struct dcookie_struct * dcs;
+	struct list_head * pos;
+	struct list_head * list;
+
+	list = dcookie_hashtable + dcookie_hash(dcookie);
+
+	list_for_each(pos, list) {
+		dcs = list_entry(pos, struct dcookie_struct, hash_list);
+		if (dcookie_value(dcs) == dcookie) {
+			found = dcs;
+			break;
+		}
+	}
+
+	return found;
+}
+
+
+static void hash_dcookie(struct dcookie_struct * dcs)
+{
+	struct list_head * list = dcookie_hashtable + dcookie_hash(dcookie_value(dcs));
+	list_add(&dcs->hash_list, list);
+}
+
+
+static struct dcookie_struct * alloc_dcookie(struct dentry * dentry,
+	struct vfsmount * vfsmnt)
+{
+	struct dcookie_struct * dcs = kmem_cache_alloc(dcookie_cache, GFP_KERNEL);
+	if (!dcs)
+		return NULL;
+
+	atomic_inc(&dentry->d_count);
+	atomic_inc(&vfsmnt->mnt_count);
+	dentry->d_cookie = dcs;
+
+	dcs->dentry = dentry;
+	dcs->vfsmnt = vfsmnt;
+	hash_dcookie(dcs);
+
+	return dcs;
+}
+
+
+/* This is the main kernel-side routine that retrieves the cookie
+ * value for a dentry/vfsmnt pair.
+ */
+int get_dcookie(struct dentry * dentry, struct vfsmount * vfsmnt,
+	unsigned long * cookie)
+{
+	int err = 0;
+	struct dcookie_struct * dcs;
+
+	down(&dcookie_sem);
+
+	if (!is_live()) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	dcs = dentry->d_cookie;
+
+	if (!dcs)
+		dcs = alloc_dcookie(dentry, vfsmnt);
+
+	if (!dcs) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	*cookie = dcookie_value(dcs);
+
+out:
+	up(&dcookie_sem);
+	return err;
+}
+
+
+/* And here is where the userspace process can look up the cookie value
+ * to retrieve the path.
+ */
+asmlinkage int sys_lookup_dcookie(unsigned long cookie, char * buf, size_t len)
+{
+	char * kbuf;
+	char * path;
+	int err = -EINVAL;
+	size_t pathlen;
+	struct dcookie_struct * dcs;
+
+	/* we could leak path information to users
+	 * without dir read permission without this
+	 */
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	down(&dcookie_sem);
+
+	if (!is_live()) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!(dcs = find_dcookie(cookie)))
+		goto out;
+
+	err = -ENOMEM;
+	kbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!kbuf)
+		goto out;
+	memset(kbuf, 0, PAGE_SIZE);
+
+	/* FIXME: (deleted) ? */
+	path = d_path(dcs->dentry, dcs->vfsmnt, kbuf, PAGE_SIZE);
+
+	err = 0;
+
+	pathlen = kbuf + PAGE_SIZE - path;
+	if (len > pathlen)
+		len = pathlen;
+
+	if (copy_to_user(buf, path, len))
+		err = -EFAULT;
+
+	kfree(kbuf);
+out:
+	up(&dcookie_sem);
+	return err;
+}
+
+
+static int dcookie_init(void)
+{
+	struct list_head * d;
+	unsigned int i, hash_bits;
+	int err = -ENOMEM;
+
+	dcookie_cache = kmem_cache_create("dcookie_cache",
+		sizeof(struct dcookie_struct),
+		0, 0, NULL, NULL);
+
+	if (!dcookie_cache)
+		goto out;
+
+	dcookie_hashtable = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!dcookie_hashtable)
+		goto out_kmem;
+
+	err = 0;
+
+	/*
+	 * Find the power-of-two list-heads that can fit into the allocation..
+	 * We don't guarantee that "sizeof(struct list_head)" is necessarily
+	 * a power-of-two.
+	 */
+	hash_size = PAGE_SIZE / sizeof(struct list_head);
+	hash_bits = 0;
+	do {
+		hash_bits++;
+	} while ((hash_size >> hash_bits) != 0);
+	hash_bits--;
+
+	/*
+	 * Re-calculate the actual number of entries and the mask
+	 * from the number of bits we can fit.
+	 */
+	hash_size = 1UL << hash_bits;
+
+	/* And initialize the newly allocated array */
+	d = dcookie_hashtable;
+	i = hash_size;
+	do {
+		INIT_LIST_HEAD(d);
+		d++;
+		i--;
+	} while (i);
+
+out:
+	return err;
+out_kmem:
+	kmem_cache_destroy(dcookie_cache);
+	goto out;
+}
+
+
+static void free_dcookie(struct dcookie_struct * dcs)
+{
+	dcs->dentry->d_cookie = NULL;
+	dput(dcs->dentry);
+	mntput(dcs->vfsmnt);
+	kmem_cache_free(dcookie_cache, dcs);
+}
+
+
+static void dcookie_exit(void)
+{
+	struct list_head * list;
+	struct list_head * pos;
+	struct list_head * pos2;
+	struct dcookie_struct * dcs;
+	size_t i;
+
+	for (i = 0; i < hash_size; ++i) {
+		list = dcookie_hashtable + i;
+		list_for_each_safe(pos, pos2, list) {
+			dcs = list_entry(pos, struct dcookie_struct, hash_list);
+			list_del(&dcs->hash_list);
+			free_dcookie(dcs);
+		}
+	}
+
+	kfree(dcookie_hashtable);
+	kmem_cache_destroy(dcookie_cache);
+}
+
+
+struct dcookie_user {
+	struct list_head next;
+};
+ 
+struct dcookie_user * dcookie_register(void)
+{
+	struct dcookie_user * user;
+
+	down(&dcookie_sem);
+
+	user = kmalloc(sizeof(struct dcookie_user), GFP_KERNEL);
+	if (!user)
+		goto out;
+
+	if (!is_live() && dcookie_init())
+		goto out_free;
+
+	list_add(&user->next, &dcookie_users);
+
+out:
+	up(&dcookie_sem);
+	return user;
+out_free:
+	kfree(user);
+	user = NULL;
+	goto out;
+}
+
+
+void dcookie_unregister(struct dcookie_user * user)
+{
+	down(&dcookie_sem);
+
+	list_del(&user->next);
+	kfree(user);
+
+	if (!is_live())
+		dcookie_exit();
+
+	up(&dcookie_sem);
+}
+
+EXPORT_SYMBOL_GPL(dcookie_register);
+EXPORT_SYMBOL_GPL(dcookie_unregister);
+EXPORT_SYMBOL_GPL(get_dcookie);
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index 8765a0f82aff..159dfa7fefe1 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -257,6 +257,8 @@
 #define __NR_alloc_hugepages	250
 #define __NR_free_hugepages	251
 #define __NR_exit_group		252
+#define __NR_lookup_dcookie	253
+  
 
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
 
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 71708edafce9..76a5085043e1 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -66,6 +66,8 @@ static __inline__ unsigned int full_name_hash(const unsigned char * name, unsign
 
 #define DNAME_INLINE_LEN 16
 
+struct dcookie_struct;
+ 
 struct dentry {
 	atomic_t d_count;
 	unsigned int d_flags;
@@ -84,6 +86,7 @@ struct dentry {
 	unsigned long d_vfs_flags;
 	void * d_fsdata;		/* fs-specific data */
 	unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
+	struct dcookie_struct * d_cookie; /* cookie, if any */
 };
 
 struct dentry_operations {
diff --git a/include/linux/dcookies.h b/include/linux/dcookies.h
new file mode 100644
index 000000000000..b2ae9692dc05
--- /dev/null
+++ b/include/linux/dcookies.h
@@ -0,0 +1,69 @@
+/*
+ * dcookies.h
+ *
+ * Persistent cookie-path mappings
+ *
+ * Copyright 2002 John Levon <levon@movementarian.org>
+ */
+
+#ifndef DCOOKIES_H
+#define DCOOKIES_H
+ 
+#include <linux/config.h>
+
+#ifdef CONFIG_PROFILING
+ 
+#include <linux/types.h>
+ 
+struct dcookie_user;
+ 
+/**
+ * dcookie_register - register a user of dcookies
+ *
+ * Register as a dcookie user. Returns %NULL on failure.
+ */
+struct dcookie_user * dcookie_register(void);
+
+/**
+ * dcookie_unregister - unregister a user of dcookies
+ *
+ * Unregister as a dcookie user. This may invalidate
+ * any dcookie values returned from get_dcookie().
+ */
+void dcookie_unregister(struct dcookie_user * user);
+  
+/**
+ * get_dcookie - acquire a dcookie
+ *
+ * Convert the given dentry/vfsmount pair into
+ * a cookie value.
+ *
+ * Returns -EINVAL if no living task has registered as a
+ * dcookie user.
+ *
+ * Returns 0 on success, with *cookie filled in
+ */
+int get_dcookie(struct dentry * dentry, struct vfsmount * vfsmnt,
+	unsigned long * cookie);
+
+#else
+
+struct dcookie_user * dcookie_register(void)
+{
+	return 0;
+}
+
+void dcookie_unregister(struct dcookie_user * user)
+{
+	return;
+}
+ 
+static inline int get_dcookie(struct dentry * dentry,
+	struct vfsmount * vfsmnt, unsigned long * cookie)
+{
+	return -ENOSYS;
+} 
+ 
+#endif /* CONFIG_PROFILING */
+ 
+#endif /* DCOOKIES_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 5b7e84384cfa..3c2992ac68f2 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -20,6 +20,7 @@
 #include <linux/device.h>
 #include <linux/times.h>
 #include <linux/security.h>
+#include <linux/dcookies.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -202,6 +203,7 @@ asmlinkage long sys_ni_syscall(void)
 cond_syscall(sys_nfsservctl)
 cond_syscall(sys_quotactl)
 cond_syscall(sys_acct)
+cond_syscall(sys_lookup_dcookie)
 
 static int set_one_prio(struct task_struct *p, int niceval, int error)
 {
-- 
cgit v1.2.3


From 120790b8fe2d901d99f459a567fefbb35c2d15e1 Mon Sep 17 00:00:00 2001
From: John Levon <levon@movementarian.org>
Date: Tue, 15 Oct 2002 04:30:38 -0700
Subject: [PATCH] oprofile - timer hook

This implements a simple hook into the profiling timer for x86 so that
non-perfctr machines can still use oprofile.  This has proven useful for
laptops and the like.

It also reduces header dependencies a bit by centralising readprofile
code
---
 arch/i386/kernel/Makefile         |  1 +
 arch/i386/kernel/apic.c           | 12 ++--------
 arch/i386/kernel/i386_ksyms.c     |  3 +++
 arch/i386/kernel/profile.c        | 45 +++++++++++++++++++++++++++++++++++
 arch/i386/kernel/time.c           |  5 ----
 arch/i386/mach-generic/do_timer.h |  3 +--
 arch/i386/mach-visws/do_timer.h   |  3 +--
 fs/proc/proc_misc.c               |  1 +
 include/asm-i386/hw_irq.h         | 49 ++++++++++++++++++++++++++++++++-------
 include/linux/profile.h           | 11 +++++++++
 include/linux/sched.h             |  4 ----
 init/main.c                       | 20 ++--------------
 kernel/profile.c                  | 30 ++++++++++++++++++++++++
 kernel/timer.c                    |  4 ----
 14 files changed, 138 insertions(+), 53 deletions(-)
 create mode 100644 arch/i386/kernel/profile.c

(limited to 'include/linux')

diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index d201c60ac5c2..55f9312b7f39 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o nmi.o
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
 obj-$(CONFIG_SOFTWARE_SUSPEND)	+= suspend.o
 obj-$(CONFIG_X86_NUMAQ)		+= numaq.o
+obj-$(CONFIG_PROFILING)		+= profile.o
 
 EXTRA_AFLAGS   := -traditional
 
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index c2f56438f749..bff34a4d1dcf 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -1008,17 +1008,9 @@ int setup_profiling_timer(unsigned int multiplier)
 
 inline void smp_local_timer_interrupt(struct pt_regs * regs)
 {
-	int user = user_mode(regs);
 	int cpu = smp_processor_id();
 
-	/*
-	 * The profiling function is SMP safe. (nothing can mess
-	 * around with "current", and the profiling counters are
-	 * updated with atomic operations). This is especially
-	 * useful with a profiling multiplier != 1
-	 */
-	if (!user)
-		x86_do_profile(regs->eip);
+	x86_do_profile(regs);
 
 	if (--prof_counter[cpu] <= 0) {
 		/*
@@ -1036,7 +1028,7 @@ inline void smp_local_timer_interrupt(struct pt_regs * regs)
 		}
 
 #ifdef CONFIG_SMP
-		update_process_times(user);
+		update_process_times(user_mode(regs));
 #endif
 	}
 
diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
index 79c204a1f476..9314e0b9f880 100644
--- a/arch/i386/kernel/i386_ksyms.c
+++ b/arch/i386/kernel/i386_ksyms.c
@@ -167,6 +167,9 @@ EXPORT_SYMBOL(get_wchan);
 
 EXPORT_SYMBOL(rtc_lock);
 
+EXPORT_SYMBOL_GPL(register_profile_notifier);
+EXPORT_SYMBOL_GPL(unregister_profile_notifier);
+ 
 #undef memcpy
 #undef memset
 extern void * memset(void *,int,__kernel_size_t);
diff --git a/arch/i386/kernel/profile.c b/arch/i386/kernel/profile.c
new file mode 100644
index 000000000000..334af20585cb
--- /dev/null
+++ b/arch/i386/kernel/profile.c
@@ -0,0 +1,45 @@
+/*
+ *	linux/arch/i386/kernel/profile.c
+ *
+ *	(C) 2002 John Levon <levon@movementarian.org>
+ *
+ */
+
+#include <linux/profile.h>
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+#include <linux/irq.h>
+#include <asm/hw_irq.h> 
+ 
+static struct notifier_block * profile_listeners;
+static rwlock_t profile_lock = RW_LOCK_UNLOCKED;
+ 
+int register_profile_notifier(struct notifier_block * nb)
+{
+	int err;
+	write_lock_irq(&profile_lock);
+	err = notifier_chain_register(&profile_listeners, nb);
+	write_unlock_irq(&profile_lock);
+	return err;
+}
+
+
+int unregister_profile_notifier(struct notifier_block * nb)
+{
+	int err;
+	write_lock_irq(&profile_lock);
+	err = notifier_chain_unregister(&profile_listeners, nb);
+	write_unlock_irq(&profile_lock);
+	return err;
+}
+
+
+void x86_profile_hook(struct pt_regs * regs)
+{
+	/* we would not even need this lock if
+	 * we had a global cli() on register/unregister
+	 */ 
+	read_lock(&profile_lock);
+	notifier_call_chain(&profile_listeners, 0, regs);
+	read_unlock(&profile_lock);
+}
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 4e3b4f1cb4b3..cf53d2c1d50a 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -64,11 +64,6 @@ extern spinlock_t i8259A_lock;
 
 #include "do_timer.h"
 
-/*
- * for x86_do_profile()
- */
-#include <linux/irq.h>
-
 u64 jiffies_64;
 
 unsigned long cpu_khz;	/* Detected as we calibrate the TSC */
diff --git a/arch/i386/mach-generic/do_timer.h b/arch/i386/mach-generic/do_timer.h
index 7ee964b2ebf2..4a24f8ad0635 100644
--- a/arch/i386/mach-generic/do_timer.h
+++ b/arch/i386/mach-generic/do_timer.h
@@ -20,8 +20,7 @@ static inline void do_timer_interrupt_hook(struct pt_regs *regs)
  * system, in that case we have to call the local interrupt handler.
  */
 #ifndef CONFIG_X86_LOCAL_APIC
-	if (!user_mode(regs))
-		x86_do_profile(regs->eip);
+	x86_do_profile(regs);
 #else
 	if (!using_apic_timer)
 		smp_local_timer_interrupt(regs);
diff --git a/arch/i386/mach-visws/do_timer.h b/arch/i386/mach-visws/do_timer.h
index b2c1cbed5cb9..d19c7063e17d 100644
--- a/arch/i386/mach-visws/do_timer.h
+++ b/arch/i386/mach-visws/do_timer.h
@@ -15,8 +15,7 @@ static inline void do_timer_interrupt_hook(struct pt_regs *regs)
  * system, in that case we have to call the local interrupt handler.
  */
 #ifndef CONFIG_X86_LOCAL_APIC
-	if (!user_mode(regs))
-		x86_do_profile(regs->eip);
+	x86_do_profile(regs);
 #else
 	if (!using_apic_timer)
 		smp_local_timer_interrupt(regs);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 7bdea5bbe922..cbafa4129498 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -38,6 +38,7 @@
 #include <linux/smp_lock.h>
 #include <linux/seq_file.h>
 #include <linux/times.h>
+#include <linux/profile.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
diff --git a/include/asm-i386/hw_irq.h b/include/asm-i386/hw_irq.h
index f23f4f75ce65..1a60daa9172e 100644
--- a/include/asm-i386/hw_irq.h
+++ b/include/asm-i386/hw_irq.h
@@ -13,6 +13,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/profile.h>
 #include <asm/atomic.h>
 #include <asm/irq.h>
 
@@ -65,20 +66,31 @@ extern char _stext, _etext;
 
 #define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
 
-extern unsigned long prof_cpu_mask;
-extern unsigned int * prof_buffer;
-extern unsigned long prof_len;
-extern unsigned long prof_shift;
-
 /*
- * x86 profiling function, SMP safe. We might want to do this in
- * assembly totally?
+ * The profiling function is SMP safe. (nothing can mess
+ * around with "current", and the profiling counters are
+ * updated with atomic operations). This is especially
+ * useful with a profiling multiplier != 1
  */
-static inline void x86_do_profile (unsigned long eip)
+static inline void x86_do_profile(struct pt_regs * regs)
 {
+	unsigned long eip;
+	extern unsigned long prof_cpu_mask;
+	extern char _stext;
+#ifdef CONFIG_PROFILING
+	extern void x86_profile_hook(struct pt_regs *);
+ 
+	x86_profile_hook(regs);
+#endif
+ 
+	if (user_mode(regs))
+		return;
+ 
 	if (!prof_buffer)
 		return;
 
+	eip = regs->eip;
+ 
 	/*
 	 * Only measure the CPUs specified by /proc/irq/prof_cpu_mask.
 	 * (default is all CPUs.)
@@ -97,7 +109,28 @@ static inline void x86_do_profile (unsigned long eip)
 		eip = prof_len-1;
 	atomic_inc((atomic_t *)&prof_buffer[eip]);
 }
+ 
+struct notifier_block;
+ 
+#ifdef CONFIG_PROFILING
+ 
+int register_profile_notifier(struct notifier_block * nb);
+int unregister_profile_notifier(struct notifier_block * nb);
+
+#else
+
+static inline int register_profile_notifier(struct notifier_block * nb)
+{
+	return -ENOSYS;
+}
+
+static inline int unregister_profile_notifier(struct notifier_block * nb)
+{
+	return -ENOSYS;
+}
 
+#endif /* CONFIG_PROFILING */
+ 
 #ifdef CONFIG_SMP /*more of this file should probably be ifdefed SMP */
 static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {
 	if (IO_APIC_IRQ(i))
diff --git a/include/linux/profile.h b/include/linux/profile.h
index 15c1e91198b0..11fbe9cec572 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -8,6 +8,17 @@
 #include <linux/init.h>
 #include <asm/errno.h>
  
+/* parse command line */
+int __init profile_setup(char * str);
+ 
+/* init basic kernel profiler */
+void __init profile_init(void);
+
+extern unsigned int * prof_buffer;
+extern unsigned long prof_len;
+extern unsigned long prof_shift;
+
+
 enum profile_type {
 	EXIT_TASK,
 	EXIT_MMAP,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 89c4ead4cf4b..764a3ebf3c24 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -492,10 +492,6 @@ extern unsigned long itimer_ticks;
 extern unsigned long itimer_next;
 extern void do_timer(struct pt_regs *);
 
-extern unsigned int * prof_buffer;
-extern unsigned long prof_len;
-extern unsigned long prof_shift;
-
 extern void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr));
 extern void FASTCALL(__wake_up_locked(wait_queue_head_t *q, unsigned int mode));
 extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr));
diff --git a/init/main.c b/init/main.c
index c6023edc03f3..1850a1c3686d 100644
--- a/init/main.c
+++ b/init/main.c
@@ -30,6 +30,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/security.h>
 #include <linux/workqueue.h>
+#include <linux/profile.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -52,7 +53,6 @@
 #error Sorry, your GCC is too old. It builds incorrect kernels.
 #endif
 
-extern char _stext, _etext;
 extern char *linux_banner;
 
 static int init(void *);
@@ -130,13 +130,6 @@ __setup("maxcpus=", maxcpus);
 static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
 char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
 
-static int __init profile_setup(char *str)
-{
-    int par;
-    if (get_option(&str,&par)) prof_shift = par;
-	return 1;
-}
-
 __setup("profile=", profile_setup);
 
 static int __init checksetup(char *line)
@@ -411,16 +404,7 @@ asmlinkage void __init start_kernel(void)
 #ifdef CONFIG_MODULES
 	init_modules();
 #endif
-	if (prof_shift) {
-		unsigned int size;
-		/* only text is profiled */
-		prof_len = (unsigned long) &_etext - (unsigned long) &_stext;
-		prof_len >>= prof_shift;
-		
-		size = prof_len * sizeof(unsigned int) + PAGE_SIZE-1;
-		prof_buffer = (unsigned int *) alloc_bootmem(size);
-	}
-
+	profile_init();
 	kmem_cache_init();
 	local_irq_enable();
 	calibrate_delay();
diff --git a/kernel/profile.c b/kernel/profile.c
index 7ebffe971ca8..756f142b1f35 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -9,6 +9,36 @@
 #include <linux/notifier.h>
 #include <linux/mm.h>
 
+extern char _stext, _etext;
+
+unsigned int * prof_buffer;
+unsigned long prof_len;
+unsigned long prof_shift;
+
+int __init profile_setup(char * str)
+{
+	int par;
+	if (get_option(&str,&par))
+		prof_shift = par;
+	return 1;
+}
+
+
+void __init profile_init(void)
+{
+	unsigned int size;
+ 
+	if (!prof_shift) 
+		return;
+ 
+	/* only text is profiled */
+	prof_len = (unsigned long) &_etext - (unsigned long) &_stext;
+	prof_len >>= prof_shift;
+		
+	size = prof_len * sizeof(unsigned int) + PAGE_SIZE - 1;
+	prof_buffer = (unsigned int *) alloc_bootmem(size);
+}
+
 /* Profile event notifications */
  
 #ifdef CONFIG_PROFILING
diff --git a/kernel/timer.c b/kernel/timer.c
index bf0077634c93..2d30f7fd0ecb 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -406,10 +406,6 @@ long time_adj;				/* tick adjust (scaled 1 / HZ)	*/
 long time_reftime;			/* time at last adjustment (s)	*/
 long time_adjust;
 
-unsigned int * prof_buffer;
-unsigned long prof_len;
-unsigned long prof_shift;
-
 /*
  * this routine handles the overflow of the microsecond field
  *
-- 
cgit v1.2.3


From 99ee21eefb9e88a850072e7dab6f0f078f560ece Mon Sep 17 00:00:00 2001
From: John Levon <levon@movementarian.org>
Date: Tue, 15 Oct 2002 04:30:56 -0700
Subject: [PATCH] oprofile - core

Add the oprofile core.  The core design is very similar to that we
discussed in private mail.  The nasty details should be documented in
the patch below.
---
 drivers/oprofile/buffer_sync.c    | 394 ++++++++++++++++++++++++++++++++++++++
 drivers/oprofile/buffer_sync.h    |  19 ++
 drivers/oprofile/cpu_buffer.c     | 135 +++++++++++++
 drivers/oprofile/cpu_buffer.h     |  45 +++++
 drivers/oprofile/event_buffer.c   | 186 ++++++++++++++++++
 drivers/oprofile/event_buffer.h   |  42 ++++
 drivers/oprofile/oprof.c          | 154 +++++++++++++++
 drivers/oprofile/oprof.h          |  34 ++++
 drivers/oprofile/oprofile_files.c |  91 +++++++++
 drivers/oprofile/oprofile_stats.c |  77 ++++++++
 drivers/oprofile/oprofile_stats.h |  31 +++
 drivers/oprofile/oprofilefs.c     | 306 +++++++++++++++++++++++++++++
 include/linux/oprofile.h          |  98 ++++++++++
 13 files changed, 1612 insertions(+)
 create mode 100644 drivers/oprofile/buffer_sync.c
 create mode 100644 drivers/oprofile/buffer_sync.h
 create mode 100644 drivers/oprofile/cpu_buffer.c
 create mode 100644 drivers/oprofile/cpu_buffer.h
 create mode 100644 drivers/oprofile/event_buffer.c
 create mode 100644 drivers/oprofile/event_buffer.h
 create mode 100644 drivers/oprofile/oprof.c
 create mode 100644 drivers/oprofile/oprof.h
 create mode 100644 drivers/oprofile/oprofile_files.c
 create mode 100644 drivers/oprofile/oprofile_stats.c
 create mode 100644 drivers/oprofile/oprofile_stats.h
 create mode 100644 drivers/oprofile/oprofilefs.c
 create mode 100644 include/linux/oprofile.h

(limited to 'include/linux')

diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
new file mode 100644
index 000000000000..46360ee22da2
--- /dev/null
+++ b/drivers/oprofile/buffer_sync.c
@@ -0,0 +1,394 @@
+/**
+ * @file buffer_sync.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ *
+ * This is the core of the buffer management. Each
+ * CPU buffer is processed and entered into the
+ * global event buffer. Such processing is necessary
+ * in several circumstances, mentioned below.
+ *
+ * The processing does the job of converting the
+ * transitory EIP value into a persistent dentry/offset
+ * value that the profiler can record at its leisure.
+ *
+ * See fs/dcookies.c for a description of the dentry/offset
+ * objects.
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/timer.h>
+#include <linux/dcookies.h>
+#include <linux/notifier.h>
+#include <linux/profile.h>
+#include <linux/workqueue.h>
+
+#include "event_buffer.h"
+#include "cpu_buffer.h"
+#include "oprofile_stats.h"
+ 
+#define DEFAULT_EXPIRE (HZ / 4)
+ 
+static void wq_sync_buffers(void *);
+static DECLARE_WORK(sync_wq, wq_sync_buffers, 0);
+ 
+static struct timer_list sync_timer;
+static void timer_ping(unsigned long data);
+static void sync_cpu_buffers(void);
+
+ 
+/* We must make sure to process every entry in the CPU buffers
+ * before a task got the PF_EXITING flag, otherwise we will hold
+ * references to a possibly freed task_struct. We are safe with
+ * samples past the PF_EXITING point in do_exit(), because we
+ * explicitly check for that in cpu_buffer.c 
+ */
+static int exit_task_notify(struct notifier_block * self, unsigned long val, void * data)
+{
+	sync_cpu_buffers();
+	return 0;
+}
+ 
+/* There are two cases of tasks modifying task->mm->mmap list we
+ * must concern ourselves with. First, when a task is about to
+ * exit (exit_mmap()), we should process the buffer to deal with
+ * any samples in the CPU buffer, before we lose the ->mmap information
+ * we need. Second, a task may unmap (part of) an executable mmap,
+ * so we want to process samples before that happens too
+ */
+static int mm_notify(struct notifier_block * self, unsigned long val, void * data)
+{
+	sync_cpu_buffers();
+	return 0;
+}
+
+ 
+static struct notifier_block exit_task_nb = {
+	.notifier_call	= exit_task_notify,
+};
+
+static struct notifier_block exec_unmap_nb = {
+	.notifier_call	= mm_notify,
+};
+
+static struct notifier_block exit_mmap_nb = {
+	.notifier_call	= mm_notify,
+};
+ 
+ 
+int sync_start(void)
+{
+	int err = profile_event_register(EXIT_TASK, &exit_task_nb);
+	if (err)
+		goto out;
+	err = profile_event_register(EXIT_MMAP, &exit_mmap_nb);
+	if (err)
+		goto out2;
+	err = profile_event_register(EXEC_UNMAP, &exec_unmap_nb);
+	if (err)
+		goto out3;
+ 
+	sync_timer.function = timer_ping;
+	sync_timer.expires = jiffies + DEFAULT_EXPIRE;
+	add_timer(&sync_timer);
+out:
+	return err;
+out3:
+	profile_event_unregister(EXIT_MMAP, &exit_mmap_nb);
+out2:
+	profile_event_unregister(EXIT_TASK, &exit_task_nb);
+	goto out;
+}
+
+
+void sync_stop(void)
+{
+	profile_event_unregister(EXIT_TASK, &exit_task_nb);
+	profile_event_unregister(EXIT_MMAP, &exit_mmap_nb);
+	profile_event_unregister(EXEC_UNMAP, &exec_unmap_nb);
+	del_timer_sync(&sync_timer);
+}
+
+ 
+/* Optimisation. We can manage without taking the dcookie sem
+ * because we cannot reach this code without at least one
+ * dcookie user still being registered (namely, the reader
+ * of the event buffer). */
+static inline unsigned long fast_get_dcookie(struct dentry * dentry,
+	struct vfsmount * vfsmnt)
+{
+	unsigned long cookie;
+ 
+	if (dentry->d_cookie)
+		return (unsigned long)dentry;
+	get_dcookie(dentry, vfsmnt, &cookie);
+	return cookie;
+}
+
+ 
+/* Look up the dcookie for the task's first VM_EXECUTABLE mapping,
+ * which corresponds loosely to "application name". This is
+ * not strictly necessary but allows oprofile to associate
+ * shared-library samples with particular applications
+ */
+static unsigned long get_exec_dcookie(struct mm_struct * mm)
+{
+	unsigned long cookie = 0;
+	struct vm_area_struct * vma;
+ 
+	if (!mm)
+		goto out;
+ 
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		if (!vma->vm_file)
+			continue;
+		if (!vma->vm_flags & VM_EXECUTABLE)		
+			continue;
+		cookie = fast_get_dcookie(vma->vm_file->f_dentry,
+			vma->vm_file->f_vfsmnt);
+		break;
+	}
+
+out:
+	return cookie;
+}
+
+
+/* Convert the EIP value of a sample into a persistent dentry/offset
+ * pair that can then be added to the global event buffer. We make
+ * sure to do this lookup before a mm->mmap modification happens so
+ * we don't lose track.
+ */
+static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, off_t * offset)
+{
+	unsigned long cookie = 0;
+	struct vm_area_struct * vma;
+
+	for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
+		if (!vma)
+			goto out;
+ 
+		if (!vma->vm_file)
+			continue;
+
+		if (addr < vma->vm_start || addr >= vma->vm_end)
+			continue;
+
+		cookie = fast_get_dcookie(vma->vm_file->f_dentry,
+			vma->vm_file->f_vfsmnt);
+		*offset = (vma->vm_pgoff << PAGE_SHIFT) + addr - vma->vm_start; 
+		break;
+	}
+out:
+	return cookie;
+}
+
+
+static unsigned long last_cookie = ~0UL;
+ 
+static void add_cpu_switch(int i)
+{
+	add_event_entry(ESCAPE_CODE);
+	add_event_entry(CPU_SWITCH_CODE);
+	add_event_entry(i);
+	last_cookie = ~0UL;
+}
+
+ 
+static void add_ctx_switch(pid_t pid, unsigned long cookie)
+{
+	add_event_entry(ESCAPE_CODE);
+	add_event_entry(CTX_SWITCH_CODE); 
+	add_event_entry(pid);
+	add_event_entry(cookie);
+}
+
+ 
+static void add_cookie_switch(unsigned long cookie)
+{
+	add_event_entry(ESCAPE_CODE);
+	add_event_entry(COOKIE_SWITCH_CODE);
+	add_event_entry(cookie);
+}
+
+ 
+static void add_sample_entry(unsigned long offset, unsigned long event)
+{
+	add_event_entry(offset);
+	add_event_entry(event);
+}
+
+
+static void add_us_sample(struct mm_struct * mm, struct op_sample * s)
+{
+	unsigned long cookie;
+	off_t offset;
+ 
+ 	cookie = lookup_dcookie(mm, s->eip, &offset);
+ 
+	if (!cookie)
+		return;
+
+	if (cookie != last_cookie) {
+		add_cookie_switch(cookie);
+		last_cookie = cookie;
+	}
+
+	add_sample_entry(offset, s->event);
+}
+
+ 
+static inline int is_kernel(unsigned long val)
+{
+	return val > __PAGE_OFFSET;
+}
+
+
+/* Add a sample to the global event buffer. If possible the
+ * sample is converted into a persistent dentry/offset pair
+ * for later lookup from userspace.
+ */
+static void add_sample(struct mm_struct * mm, struct op_sample * s)
+{
+	if (is_kernel(s->eip)) {
+		add_sample_entry(s->eip, s->event);
+	} else if (mm) {
+		add_us_sample(mm, s);
+	}
+}
+ 
+ 
+static void release_mm(struct mm_struct * mm)
+{
+	if (mm)
+		up_read(&mm->mmap_sem);
+}
+
+
+/* Take the task's mmap_sem to protect ourselves from
+ * races when we do lookup_dcookie().
+ */
+static struct mm_struct * take_task_mm(struct task_struct * task)
+{
+	struct mm_struct * mm;
+	task_lock(task);
+	mm = task->mm;
+	task_unlock(task);
+ 
+	/* if task->mm !NULL, mm_count must be at least 1. It cannot
+	 * drop to 0 without the task exiting, which will have to sleep
+	 * on buffer_sem first. So we do not need to mark mm_count
+	 * ourselves.
+	 */
+	if (mm) {
+		/* More ugliness. If a task took its mmap
+		 * sem then came to sleep on buffer_sem we
+		 * will deadlock waiting for it. So we can
+		 * but try. This will lose samples :/
+		 */
+		if (!down_read_trylock(&mm->mmap_sem)) {
+			/* FIXME: this underestimates samples lost */
+			atomic_inc(&oprofile_stats.sample_lost_mmap_sem);
+			mm = NULL;
+		}
+	}
+ 
+	return mm;
+}
+ 
+ 
+static inline int is_ctx_switch(unsigned long val)
+{
+	return val == ~0UL;
+}
+ 
+
+/* Sync one of the CPU's buffers into the global event buffer.
+ * Here we need to go through each batch of samples punctuated
+ * by context switch notes, taking the task's mmap_sem and doing
+ * lookup in task->mm->mmap to convert EIP into dcookie/offset
+ * value.
+ */
+static void sync_buffer(struct oprofile_cpu_buffer * cpu_buf)
+{
+	struct mm_struct * mm = 0;
+	struct task_struct * new;
+	unsigned long cookie;
+	int i;
+ 
+	for (i=0; i < cpu_buf->pos; ++i) {
+		struct op_sample * s = &cpu_buf->buffer[i];
+ 
+		if (is_ctx_switch(s->eip)) {
+			new = (struct task_struct *)s->event;
+ 
+			release_mm(mm);
+			mm = take_task_mm(new);
+ 
+			cookie = get_exec_dcookie(mm);
+			add_ctx_switch(new->pid, cookie);
+		} else {
+			add_sample(mm, s);
+		}
+	}
+	release_mm(mm);
+
+	cpu_buf->pos = 0;
+}
+ 
+ 
+/* Process each CPU's local buffer into the global
+ * event buffer.
+ */
+static void sync_cpu_buffers(void)
+{
+	int i;
+
+	down(&buffer_sem);
+ 
+	for (i = 0; i < NR_CPUS; ++i) {
+		struct oprofile_cpu_buffer * cpu_buf;
+ 
+		if (!cpu_possible(i))
+			continue;
+ 
+		cpu_buf = &cpu_buffer[i];
+ 
+		/* We take a spin lock even though we might
+		 * sleep. It's OK because other users are try
+		 * lockers only, and this region is already
+		 * protected by buffer_sem. It's raw to prevent
+		 * the preempt bogometer firing. Fruity, huh ? */
+		_raw_spin_lock(&cpu_buf->int_lock);
+		add_cpu_switch(i);
+		sync_buffer(cpu_buf);
+		_raw_spin_unlock(&cpu_buf->int_lock);
+	}
+
+	up(&buffer_sem);
+ 
+	mod_timer(&sync_timer, jiffies + DEFAULT_EXPIRE);
+}
+ 
+
+static void wq_sync_buffers(void * data)
+{
+	sync_cpu_buffers();
+}
+ 
+ 
+/* It is possible that we could have no munmap() or
+ * other events for a period of time. This will lead
+ * the CPU buffers to overflow and lose samples and
+ * context switches. We try to reduce the problem
+ * by timing out when nothing happens for a while.
+ */
+static void timer_ping(unsigned long data)
+{
+	schedule_work(&sync_wq);
+	/* timer is re-added by the scheduled task */
+}
diff --git a/drivers/oprofile/buffer_sync.h b/drivers/oprofile/buffer_sync.h
new file mode 100644
index 000000000000..a8def27d8502
--- /dev/null
+++ b/drivers/oprofile/buffer_sync.h
@@ -0,0 +1,19 @@
+/**
+ * @file buffer_sync.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#ifndef OPROFILE_BUFFER_SYNC_H
+#define OPROFILE_BUFFER_SYNC_H
+ 
+/* add the necessary profiling hooks */
+int sync_start(void);
+
+/* remove the hooks */
+void sync_stop(void);
+ 
+#endif /* OPROFILE_BUFFER_SYNC_H */
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
new file mode 100644
index 000000000000..42af606defd4
--- /dev/null
+++ b/drivers/oprofile/cpu_buffer.c
@@ -0,0 +1,135 @@
+/**
+ * @file cpu_buffer.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ *
+ * Each CPU has a local buffer that stores PC value/event
+ * pairs. We also log context switches when we notice them.
+ * Eventually each CPU's buffer is processed into the global
+ * event buffer by sync_cpu_buffers().
+ *
+ * We use a local buffer for two reasons: an NMI or similar
+ * interrupt cannot synchronise, and high sampling rates
+ * would lead to catastrophic global synchronisation if
+ * a global buffer was used.
+ */
+
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+ 
+#include "cpu_buffer.h"
+#include "oprof.h"
+#include "oprofile_stats.h"
+
+struct oprofile_cpu_buffer cpu_buffer[NR_CPUS] __cacheline_aligned;
+
+static unsigned long buffer_size;
+ 
+static void __free_cpu_buffers(int num)
+{
+	int i;
+ 
+	for (i=0; i < num; ++i) {
+		struct oprofile_cpu_buffer * b = &cpu_buffer[i];
+ 
+		if (!cpu_possible(i)) 
+			continue;
+ 
+		vfree(b->buffer);
+	}
+}
+ 
+ 
+int alloc_cpu_buffers(void)
+{
+	int i;
+ 
+	buffer_size = fs_cpu_buffer_size;
+ 
+	for (i=0; i < NR_CPUS; ++i) {
+		struct oprofile_cpu_buffer * b = &cpu_buffer[i];
+ 
+		if (!cpu_possible(i)) 
+			continue;
+ 
+		b->buffer = vmalloc(sizeof(struct op_sample) * buffer_size);
+		if (!b->buffer)
+			goto fail;
+ 
+		spin_lock_init(&b->int_lock);
+		b->pos = 0;
+		b->last_task = 0;
+		b->sample_received = 0;
+		b->sample_lost_locked = 0;
+		b->sample_lost_overflow = 0;
+	}
+	return 0;
+fail:
+	__free_cpu_buffers(i);
+	return -ENOMEM;
+}
+ 
+
+void free_cpu_buffers(void)
+{
+	__free_cpu_buffers(NR_CPUS);
+}
+
+ 
+/* Note we can't use a semaphore here as this is supposed to
+ * be safe from any context. Instead we trylock the CPU's int_lock.
+ * int_lock is taken by the processing code in sync_cpu_buffers()
+ * so we avoid disturbing that.
+ */
+void oprofile_add_sample(unsigned long eip, unsigned long event, int cpu)
+{
+	struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[cpu];
+	struct task_struct * task;
+
+	/* temporary ? */
+	BUG_ON(!oprofile_started);
+ 
+	cpu_buf->sample_received++;
+ 
+	if (!spin_trylock(&cpu_buf->int_lock)) {
+		cpu_buf->sample_lost_locked++;
+		return;
+	}
+
+	if (cpu_buf->pos > buffer_size - 2) {
+		cpu_buf->sample_lost_overflow++;
+		goto out;
+	}
+ 
+	task = current;
+
+	/* notice a task switch */
+	if (cpu_buf->last_task != task) {
+		cpu_buf->last_task = task;
+		if (!(task->flags & PF_EXITING)) {
+			cpu_buf->buffer[cpu_buf->pos].eip = ~0UL;
+			cpu_buf->buffer[cpu_buf->pos].event = (unsigned long)task;
+			cpu_buf->pos++;
+		}
+	}
+ 
+	/* If the task is exiting it's not safe to take a sample
+	 * as the task_struct is about to be freed. We can't just
+	 * notify at release_task() time because of CLONE_DETACHED
+	 * tasks that release_task() themselves.
+	 */
+	if (task->flags & PF_EXITING) {
+		cpu_buf->sample_lost_task_exit++;
+		goto out;
+	}
+ 
+	cpu_buf->buffer[cpu_buf->pos].eip = eip;
+	cpu_buf->buffer[cpu_buf->pos].event = event;
+	cpu_buf->pos++;
+out:
+	spin_unlock(&cpu_buf->int_lock);
+}
diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h
new file mode 100644
index 000000000000..87ce0a18550d
--- /dev/null
+++ b/drivers/oprofile/cpu_buffer.h
@@ -0,0 +1,45 @@
+/**
+ * @file cpu_buffer.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#ifndef OPROFILE_CPU_BUFFER_H
+#define OPROFILE_CPU_BUFFER_H
+
+#include <linux/types.h>
+#include <linux/spinlock.h>
+ 
+struct task_struct;
+ 
+/* allocate a sample buffer for each CPU */
+int alloc_cpu_buffers(void);
+
+void free_cpu_buffers(void);
+ 
+/* CPU buffer is composed of such entries (which are
+ * also used for context switch notes)
+ */
+struct op_sample {
+	unsigned long eip;
+	unsigned long event;
+};
+ 
+struct oprofile_cpu_buffer {
+	spinlock_t int_lock;
+	/* protected by int_lock */
+	unsigned long pos;
+	struct task_struct * last_task;
+	struct op_sample * buffer;
+	unsigned long sample_received;
+	unsigned long sample_lost_locked;
+	unsigned long sample_lost_overflow;
+	unsigned long sample_lost_task_exit;
+} ____cacheline_aligned;
+
+extern struct oprofile_cpu_buffer cpu_buffer[];
+
+#endif /* OPROFILE_CPU_BUFFER_H */
diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c
new file mode 100644
index 000000000000..3552be34eca7
--- /dev/null
+++ b/drivers/oprofile/event_buffer.c
@@ -0,0 +1,186 @@
+/**
+ * @file event_buffer.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ *
+ * This is the global event buffer that the user-space
+ * daemon reads from. The event buffer is an untyped array
+ * of unsigned longs. Entries are prefixed by the
+ * escape value ESCAPE_CODE followed by an identifying code.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/dcookies.h>
+#include <linux/oprofile.h>
+#include <asm/uaccess.h>
+#include <asm/atomic.h>
+
+#include "event_buffer.h"
+#include "cpu_buffer.h"
+#include "oprof.h"
+#include "oprofile_stats.h"
+
+DECLARE_MUTEX(buffer_sem);
+ 
+static unsigned long buffer_opened;
+static DECLARE_WAIT_QUEUE_HEAD(buffer_wait);
+static unsigned long * event_buffer;
+static unsigned long buffer_size;
+static unsigned long buffer_watershed;
+static size_t buffer_pos;
+/* atomic_t because wait_event checks it outside of buffer_sem */
+static atomic_t buffer_ready = ATOMIC_INIT(0);
+
+/* Add an entry to the event buffer. When we
+ * get near to the end we wake up the process
+ * sleeping on the read() of the file.
+ */
+void add_event_entry(unsigned long value)
+{
+	if (buffer_pos == buffer_size) {
+		atomic_inc(&oprofile_stats.event_lost_overflow);
+		return;
+	}
+
+	event_buffer[buffer_pos] = value;
+	if (++buffer_pos == buffer_size - buffer_watershed) {
+		atomic_set(&buffer_ready, 1);
+		wake_up(&buffer_wait);
+	}
+}
+
+
+/* Wake up the waiting process if any. This happens
+ * on "echo 0 >/dev/oprofile/enable" so the daemon
+ * processes the data remaining in the event buffer.
+ */
+void wake_up_buffer_waiter(void)
+{
+	down(&buffer_sem);
+	atomic_set(&buffer_ready, 1);
+	wake_up(&buffer_wait);
+	up(&buffer_sem);
+}
+
+ 
+int alloc_event_buffer(void)
+{
+	int err = -ENOMEM;
+
+	spin_lock(&oprofilefs_lock);
+	buffer_size = fs_buffer_size;
+	buffer_watershed = fs_buffer_watershed;
+	spin_unlock(&oprofilefs_lock);
+ 
+	if (buffer_watershed >= buffer_size)
+		return -EINVAL;
+ 
+	event_buffer = vmalloc(sizeof(unsigned long) * buffer_size);
+	if (!event_buffer)
+		goto out; 
+
+	err = 0;
+out:
+	return err;
+}
+
+
+void free_event_buffer(void)
+{
+	vfree(event_buffer);
+}
+
+ 
+int event_buffer_open(struct inode * inode, struct file * file)
+{
+	int err = -EPERM;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (test_and_set_bit(0, &buffer_opened))
+		return -EBUSY;
+
+	/* Register as a user of dcookies
+	 * to ensure they persist for the lifetime of
+	 * the open event file
+	 */
+	err = -EINVAL;
+	file->private_data = dcookie_register();
+	if (!file->private_data)
+		goto out;
+		 
+	if ((err = oprofile_setup()))
+		goto fail;
+
+	/* NB: the actual start happens from userspace
+	 * echo 1 >/dev/oprofile/enable
+	 */
+ 
+	return 0;
+
+fail:
+	dcookie_unregister(file->private_data);
+out:
+	clear_bit(0, &buffer_opened);
+	return err;
+}
+
+
+int event_buffer_release(struct inode * inode, struct file * file)
+{
+	oprofile_stop();
+	oprofile_shutdown();
+	dcookie_unregister(file->private_data);
+	buffer_pos = 0;
+	atomic_set(&buffer_ready, 0);
+	clear_bit(0, &buffer_opened);
+	return 0;
+}
+
+
+ssize_t event_buffer_read(struct file * file, char * buf, size_t count, loff_t * offset)
+{
+	int retval = -EINVAL;
+	size_t const max = buffer_size * sizeof(unsigned long);
+
+	/* handling partial reads is more trouble than it's worth */
+	if (count != max || *offset)
+		return -EINVAL;
+
+	/* wait for the event buffer to fill up with some data */
+	wait_event_interruptible(buffer_wait, atomic_read(&buffer_ready));
+	if (signal_pending(current))
+		return -EINTR;
+
+	down(&buffer_sem);
+
+	atomic_set(&buffer_ready, 0);
+
+	retval = -EFAULT;
+
+	count = buffer_pos * sizeof(unsigned long);
+ 
+	if (copy_to_user(buf, event_buffer, count))
+		goto out;
+
+	retval = count;
+	buffer_pos = 0;
+ 
+out:
+	up(&buffer_sem);
+	return retval;
+}
+ 
+struct file_operations event_buffer_fops = {
+	.open		= event_buffer_open,
+	.release	= event_buffer_release,
+	.read		= event_buffer_read,
+};
diff --git a/drivers/oprofile/event_buffer.h b/drivers/oprofile/event_buffer.h
new file mode 100644
index 000000000000..11d2ed4dea42
--- /dev/null
+++ b/drivers/oprofile/event_buffer.h
@@ -0,0 +1,42 @@
+/**
+ * @file event_buffer.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#ifndef EVENT_BUFFER_H
+#define EVENT_BUFFER_H
+
+#include <linux/types.h> 
+#include <linux/sem.h>
+ 
+int alloc_event_buffer(void);
+
+void free_event_buffer(void);
+ 
+/* wake up the process sleeping on the event file */
+void wake_up_buffer_waiter(void);
+ 
+/* Each escaped entry is prefixed by ESCAPE_CODE
+ * then one of the following codes, then the
+ * relevant data.
+ */
+#define ESCAPE_CODE		~0UL
+#define CTX_SWITCH_CODE 	1
+#define CPU_SWITCH_CODE 	2
+#define COOKIE_SWITCH_CODE 	3
+ 
+/* add data to the event buffer */
+void add_event_entry(unsigned long data);
+ 
+extern struct file_operations event_buffer_fops;
+ 
+/* mutex between sync_cpu_buffers() and the
+ * file reading code.
+ */
+extern struct semaphore buffer_sem;
+ 
+#endif /* EVENT_BUFFER_H */
diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c
new file mode 100644
index 000000000000..1cae1bc13c8d
--- /dev/null
+++ b/drivers/oprofile/oprof.c
@@ -0,0 +1,154 @@
+/**
+ * @file oprof.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/dcookies.h>
+#include <linux/notifier.h>
+#include <linux/profile.h>
+#include <linux/oprofile.h>
+
+#include "oprof.h"
+#include "event_buffer.h"
+#include "cpu_buffer.h"
+#include "buffer_sync.h"
+#include "oprofile_stats.h"
+ 
+struct oprofile_operations * oprofile_ops;
+enum oprofile_cpu oprofile_cpu_type;
+unsigned long oprofile_started;
+static unsigned long is_setup;
+static DECLARE_MUTEX(start_sem);
+
+int oprofile_setup(void)
+{
+	int err;
+ 
+	if ((err = alloc_cpu_buffers()))
+		goto out;
+
+	if ((err = alloc_event_buffer()))
+		goto out1;
+ 
+	if (oprofile_ops->setup && (err = oprofile_ops->setup()))
+		goto out2;
+ 
+	/* Note even though this starts part of the
+	 * profiling overhead, it's necessary to prevent
+	 * us missing task deaths and eventually oopsing
+	 * when trying to process the event buffer.
+	 */
+	if ((err = sync_start()))
+		goto out3;
+
+	down(&start_sem);
+	is_setup = 1;
+	up(&start_sem);
+	return 0;
+ 
+out3:
+	if (oprofile_ops->shutdown)
+		oprofile_ops->shutdown();
+out2:
+	free_event_buffer();
+out1:
+	free_cpu_buffers();
+out:
+	return err;
+}
+
+
+/* Actually start profiling (echo 1>/dev/oprofile/enable) */
+int oprofile_start(void)
+{
+	int err = -EINVAL;
+ 
+	down(&start_sem);
+ 
+	if (!is_setup)
+		goto out;
+
+	err = 0; 
+ 
+	if (oprofile_started)
+		goto out;
+ 
+	if ((err = oprofile_ops->start()))
+		goto out;
+
+	oprofile_started = 1;
+	oprofile_reset_stats();
+out:
+	up(&start_sem); 
+	return err;
+}
+
+ 
+/* echo 0>/dev/oprofile/enable */
+void oprofile_stop(void)
+{
+	down(&start_sem);
+	if (!oprofile_started)
+		goto out;
+	oprofile_ops->stop();
+	oprofile_started = 0;
+	/* wake up the daemon to read what remains */
+	wake_up_buffer_waiter();
+out:
+	up(&start_sem);
+}
+
+
+void oprofile_shutdown(void)
+{
+	sync_stop();
+	if (oprofile_ops->shutdown)
+		oprofile_ops->shutdown(); 
+	/* down() is also necessary to synchronise all pending events
+	 * before freeing */
+	down(&buffer_sem);
+	is_setup = 0;
+	up(&buffer_sem);
+	free_event_buffer();
+	free_cpu_buffers();
+}
+
+ 
+static int __init oprofile_init(void)
+{
+	int err;
+
+	/* Architecture must fill in the interrupt ops and the
+	 * logical CPU type.
+	 */
+	err = oprofile_arch_init(&oprofile_ops, &oprofile_cpu_type);
+	if (err)
+		goto out;
+
+	err = oprofilefs_register();
+	if (err)
+		goto out;
+ 
+out:
+	return err;
+}
+
+
+static void __exit oprofile_exit(void)
+{
+	oprofilefs_unregister();
+}
+
+MODULE_LICENSE("GPL");
+module_init(oprofile_init);
+module_exit(oprofile_exit);
diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h
new file mode 100644
index 000000000000..9f19ba5f39b9
--- /dev/null
+++ b/drivers/oprofile/oprof.h
@@ -0,0 +1,34 @@
+/**
+ * @file oprof.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#ifndef OPROF_H
+#define OPROF_H
+
+#include <linux/spinlock.h>
+#include <linux/oprofile.h>
+ 
+int oprofile_setup(void);
+void oprofile_shutdown(void); 
+
+int oprofilefs_register(void);
+void oprofilefs_unregister(void);
+
+int oprofile_start(void);
+void oprofile_stop(void);
+
+extern unsigned long fs_buffer_size;
+extern unsigned long fs_cpu_buffer_size;
+extern unsigned long fs_buffer_watershed;
+extern enum oprofile_cpu oprofile_cpu_type;
+extern struct oprofile_operations * oprofile_ops;
+extern unsigned long oprofile_started;
+ 
+void oprofile_create_files(struct super_block * sb, struct dentry * root);
+ 
+#endif /* OPROF_H */
diff --git a/drivers/oprofile/oprofile_files.c b/drivers/oprofile/oprofile_files.c
new file mode 100644
index 000000000000..22d8bf5994b6
--- /dev/null
+++ b/drivers/oprofile/oprofile_files.c
@@ -0,0 +1,91 @@
+/**
+ * @file oprofile_files.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#include <linux/oprofile.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+ 
+#include "oprof.h"
+#include "event_buffer.h"
+#include "oprofile_stats.h"
+ 
+unsigned long fs_buffer_size = 131072;
+unsigned long fs_cpu_buffer_size = 8192;
+unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */
+
+ 
+static int simple_open(struct inode * inode, struct file * filp)
+{
+	return 0;
+}
+
+
+static ssize_t cpu_type_read(struct file * file, char * buf, size_t count, loff_t * offset)
+{
+	unsigned long cpu_type = oprofile_cpu_type;
+
+	return oprofilefs_ulong_to_user(&cpu_type, buf, count, offset);
+}
+ 
+ 
+static struct file_operations cpu_type_fops = {
+	.open		= simple_open,
+	.read		= cpu_type_read,
+};
+ 
+ 
+static ssize_t enable_read(struct file * file, char * buf, size_t count, loff_t * offset)
+{
+	return oprofilefs_ulong_to_user(&oprofile_started, buf, count, offset);
+}
+
+
+static ssize_t enable_write(struct file *file, char const * buf, size_t count, loff_t * offset)
+{
+	unsigned long val;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = oprofilefs_ulong_from_user(&val, buf, count);
+	if (retval)
+		return retval;
+ 
+	if (val)
+		retval = oprofile_start();
+	else
+		oprofile_stop();
+
+	if (retval)
+		return retval;
+	return count;
+}
+
+ 
+static struct file_operations enable_fops = {
+	.open		= simple_open,
+	.read		= enable_read,
+	.write		= enable_write,
+};
+
+ 
+void oprofile_create_files(struct super_block * sb, struct dentry * root)
+{
+	oprofilefs_create_file(sb, root, "enable", &enable_fops);
+	oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
+	oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size);
+	oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed);
+	oprofilefs_create_ulong(sb, root, "cpu_buffer_size", &fs_cpu_buffer_size);
+	oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops); 
+	oprofile_create_stats_files(sb, root);
+	if (oprofile_ops->create_files)
+		oprofile_ops->create_files(sb, root);
+}
diff --git a/drivers/oprofile/oprofile_stats.c b/drivers/oprofile/oprofile_stats.c
new file mode 100644
index 000000000000..479d8315558f
--- /dev/null
+++ b/drivers/oprofile/oprofile_stats.c
@@ -0,0 +1,77 @@
+/**
+ * @file oprofile_stats.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ */
+
+#include <linux/oprofile.h>
+#include <linux/smp.h>
+ 
+#include "oprofile_stats.h"
+#include "cpu_buffer.h"
+ 
+struct oprofile_stat_struct oprofile_stats;
+ 
+void oprofile_reset_stats(void)
+{
+	struct oprofile_cpu_buffer * cpu_buf; 
+	int i;
+ 
+	for (i = 0; i < NR_CPUS; ++i) {
+		if (!cpu_possible(i))
+			continue;
+
+		cpu_buf = &cpu_buffer[i]; 
+		cpu_buf->sample_received = 0;
+		cpu_buf->sample_lost_locked = 0;
+		cpu_buf->sample_lost_overflow = 0;
+		cpu_buf->sample_lost_task_exit = 0;
+	}
+ 
+	atomic_set(&oprofile_stats.sample_lost_mmap_sem, 0);
+	atomic_set(&oprofile_stats.event_lost_overflow, 0);
+}
+
+
+void oprofile_create_stats_files(struct super_block * sb, struct dentry * root)
+{
+	struct oprofile_cpu_buffer * cpu_buf;
+	struct dentry * cpudir;
+	struct dentry * dir;
+	char buf[10];
+	int i;
+
+	dir = oprofilefs_mkdir(sb, root, "stats");
+	if (!dir)
+		return;
+
+	for (i = 0; i < NR_CPUS; ++i) {
+		if (!cpu_possible(i))
+			continue;
+
+		cpu_buf = &cpu_buffer[i]; 
+		snprintf(buf, 6, "cpu%d", i);
+		cpudir = oprofilefs_mkdir(sb, dir, buf);
+ 
+		/* Strictly speaking access to these ulongs is racy,
+		 * but we can't simply lock them, and they are
+		 * informational only.
+		 */
+		oprofilefs_create_ro_ulong(sb, cpudir, "sample_received",
+			&cpu_buf->sample_received);
+		oprofilefs_create_ro_ulong(sb, cpudir, "sample_lost_locked",
+			&cpu_buf->sample_lost_locked);
+		oprofilefs_create_ro_ulong(sb, cpudir, "sample_lost_overflow",
+			&cpu_buf->sample_lost_overflow);
+		oprofilefs_create_ro_ulong(sb, cpudir, "sample_lost_task_exit",
+			&cpu_buf->sample_lost_task_exit);
+	}
+ 
+	oprofilefs_create_ro_atomic(sb, dir, "sample_lost_mmap_sem",
+		&oprofile_stats.sample_lost_mmap_sem);
+	oprofilefs_create_ro_atomic(sb, dir, "event_lost_overflow",
+		&oprofile_stats.event_lost_overflow);
+}
diff --git a/drivers/oprofile/oprofile_stats.h b/drivers/oprofile/oprofile_stats.h
new file mode 100644
index 000000000000..8ca3596c2bef
--- /dev/null
+++ b/drivers/oprofile/oprofile_stats.h
@@ -0,0 +1,31 @@
+/**
+ * @file oprofile_stats.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ */
+
+#ifndef OPROFILE_STATS_H
+#define OPROFILE_STATS_H
+
+#include <asm/atomic.h>
+ 
+struct oprofile_stat_struct {
+	atomic_t sample_lost_mmap_sem;
+	atomic_t event_lost_overflow;
+};
+
+extern struct oprofile_stat_struct oprofile_stats;
+ 
+/* reset all stats to zero */
+void oprofile_reset_stats(void);
+ 
+struct super_block;
+struct dentry;
+ 
+/* create the stats/ dir */
+void oprofile_create_stats_files(struct super_block * sb, struct dentry * root);
+
+#endif /* OPROFILE_STATS_H */
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
new file mode 100644
index 000000000000..a86100975cb8
--- /dev/null
+++ b/drivers/oprofile/oprofilefs.c
@@ -0,0 +1,306 @@
+/**
+ * @file oprofilefs.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ *
+ * A simple filesystem for configuration and
+ * access of oprofile.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+#include <linux/fs.h>
+#include <linux/dcache.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/oprofile.h>
+#include <asm/uaccess.h>
+
+#include "oprof.h"
+
+#define OPROFILEFS_MAGIC 0x6f70726f
+
+spinlock_t oprofilefs_lock = SPIN_LOCK_UNLOCKED;
+
+static struct inode * oprofilefs_get_inode(struct super_block * sb, int mode)
+{
+	struct inode * inode = new_inode(sb);
+
+	if (inode) {
+		inode->i_mode = mode;
+		inode->i_uid = 0;
+		inode->i_gid = 0;
+		inode->i_blksize = PAGE_CACHE_SIZE;
+		inode->i_blocks = 0;
+		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	}
+	return inode;
+}
+
+
+static struct super_operations s_ops = {
+	.statfs		= simple_statfs,
+	.drop_inode 	= generic_delete_inode,
+};
+
+#define TMPBUFSIZE 50
+
+ssize_t oprofilefs_ulong_to_user(unsigned long * val, char * buf, size_t count, loff_t * offset)
+{
+	char tmpbuf[TMPBUFSIZE];
+	size_t maxlen;
+
+	if (!count)
+		return 0;
+
+	spin_lock(&oprofilefs_lock);
+	maxlen = snprintf(tmpbuf, TMPBUFSIZE, "%lu\n", *val);
+	spin_unlock(&oprofilefs_lock);
+	if (maxlen > TMPBUFSIZE)
+		maxlen = TMPBUFSIZE;
+
+	if (*offset > maxlen)
+		return 0;
+
+	if (count > maxlen - *offset)
+		count = maxlen - *offset;
+
+	if (copy_to_user(buf, tmpbuf + *offset, count))
+		return -EFAULT;
+
+	*offset += count;
+
+	return count;
+}
+
+
+int oprofilefs_ulong_from_user(unsigned long * val, char const * buf, size_t count)
+{
+	char tmpbuf[TMPBUFSIZE];
+
+	if (!count)
+		return 0;
+
+	if (count > TMPBUFSIZE - 1)
+		return -EINVAL;
+
+	memset(tmpbuf, 0x0, TMPBUFSIZE);
+
+	if (copy_from_user(tmpbuf, buf, count))
+		return -EFAULT;
+
+	spin_lock(&oprofilefs_lock);
+	*val = simple_strtoul(tmpbuf, NULL, 10);
+	spin_unlock(&oprofilefs_lock);
+	return 0;
+}
+
+
+static ssize_t ulong_read_file(struct file * file, char * buf, size_t count, loff_t * offset)
+{
+	return oprofilefs_ulong_to_user(file->private_data, buf, count, offset);
+}
+
+
+static ssize_t ulong_write_file(struct file * file, char const * buf, size_t count, loff_t * offset)
+{
+	unsigned long * value = file->private_data;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = oprofilefs_ulong_from_user(value, buf, count);
+
+	if (retval)
+		return retval;
+	return count;
+}
+
+
+static int default_open(struct inode * inode, struct file * filp)
+{
+	if (inode->u.generic_ip)
+		filp->private_data = inode->u.generic_ip;
+	return 0;
+}
+
+
+static struct file_operations ulong_fops = {
+	.read		= ulong_read_file,
+	.write		= ulong_write_file,
+	.open		= default_open,
+};
+
+
+static struct file_operations ulong_ro_fops = {
+	.read		= ulong_read_file,
+	.open		= default_open,
+};
+
+
+static struct dentry * __oprofilefs_create_file(struct super_block * sb,
+	struct dentry * root, char const * name, struct file_operations * fops)
+{
+	struct dentry * dentry;
+	struct inode * inode;
+	struct qstr qname;
+	qname.name = name;
+	qname.len = strlen(name);
+	qname.hash = full_name_hash(qname.name, qname.len);
+	dentry = d_alloc(root, &qname);
+	if (!dentry)
+		return 0;
+	inode = oprofilefs_get_inode(sb, S_IFREG | 0644);
+	if (!inode) {
+		dput(dentry);
+		return 0;
+	}
+	inode->i_fop = fops;
+	d_add(dentry, inode);
+	return dentry;
+}
+
+
+int oprofilefs_create_ulong(struct super_block * sb, struct dentry * root,
+	char const * name, unsigned long * val)
+{
+	struct dentry * d = __oprofilefs_create_file(sb, root, name, &ulong_fops);
+	if (!d)
+		return -EFAULT;
+
+	d->d_inode->u.generic_ip = val;
+	return 0;
+}
+
+
+int oprofilefs_create_ro_ulong(struct super_block * sb, struct dentry * root,
+	char const * name, unsigned long * val)
+{
+	struct dentry * d = __oprofilefs_create_file(sb, root, name, &ulong_ro_fops);
+	if (!d)
+		return -EFAULT;
+
+	d->d_inode->u.generic_ip = val;
+	return 0;
+}
+
+
+static ssize_t atomic_read_file(struct file * file, char * buf, size_t count, loff_t * offset)
+{
+	atomic_t * aval = file->private_data;
+	unsigned long val = atomic_read(aval);
+	return oprofilefs_ulong_to_user(&val, buf, count, offset);
+}
+ 
+
+static struct file_operations atomic_ro_fops = {
+	.read		= atomic_read_file,
+	.open		= default_open,
+};
+ 
+
+int oprofilefs_create_ro_atomic(struct super_block * sb, struct dentry * root,
+	char const * name, atomic_t * val)
+{
+	struct dentry * d = __oprofilefs_create_file(sb, root, name, &atomic_ro_fops);
+	if (!d)
+		return -EFAULT;
+
+	d->d_inode->u.generic_ip = val;
+	return 0;
+}
+
+ 
+int oprofilefs_create_file(struct super_block * sb, struct dentry * root,
+	char const * name, struct file_operations * fops)
+{
+	if (!__oprofilefs_create_file(sb, root, name, fops))
+		return -EFAULT;
+	return 0;
+}
+
+
+struct dentry * oprofilefs_mkdir(struct super_block * sb,
+	struct dentry * root, char const * name)
+{
+	struct dentry * dentry;
+	struct inode * inode;
+	struct qstr qname;
+	qname.name = name;
+	qname.len = strlen(name);
+	qname.hash = full_name_hash(qname.name, qname.len);
+	dentry = d_alloc(root, &qname);
+	if (!dentry)
+		return 0;
+	inode = oprofilefs_get_inode(sb, S_IFDIR | 0755);
+	if (!inode) {
+		dput(dentry);
+		return 0;
+	}
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = &simple_dir_operations;
+	d_add(dentry, inode);
+	return dentry;
+}
+
+
+static int oprofilefs_fill_super(struct super_block * sb, void * data, int silent)
+{
+	struct inode * root_inode;
+	struct dentry * root_dentry;
+
+	sb->s_blocksize = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_magic = OPROFILEFS_MAGIC;
+	sb->s_op = &s_ops;
+
+	root_inode = oprofilefs_get_inode(sb, S_IFDIR | 0755);
+	if (!root_inode)
+		return -ENOMEM;
+	root_inode->i_op = &simple_dir_inode_operations;
+	root_inode->i_fop = &simple_dir_operations;
+	root_dentry = d_alloc_root(root_inode);
+	if (!root_dentry) {
+		iput(root_inode);
+		return -ENOMEM;
+	}
+
+	sb->s_root = root_dentry;
+
+	oprofile_create_files(sb, root_dentry);
+
+	// FIXME: verify kill_litter_super removes our dentries
+	return 0;
+}
+
+
+static struct super_block * oprofilefs_get_sb(struct file_system_type * fs_type,
+	int flags, char * dev_name, void * data)
+{
+	return get_sb_single(fs_type, flags, data, oprofilefs_fill_super);
+}
+
+
+static struct file_system_type oprofilefs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "oprofilefs",
+	.get_sb		= oprofilefs_get_sb,
+	.kill_sb	= kill_litter_super,
+};
+
+
+int __init oprofilefs_register(void)
+{
+	return register_filesystem(&oprofilefs_type);
+}
+
+
+void __exit oprofilefs_unregister(void)
+{
+	unregister_filesystem(&oprofilefs_type);
+}
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
new file mode 100644
index 000000000000..982b64e0518a
--- /dev/null
+++ b/include/linux/oprofile.h
@@ -0,0 +1,98 @@
+/**
+ * @file oprofile.h
+ *
+ * API for machine-specific interrupts to interface
+ * to oprofile.
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#ifndef OPROFILE_H
+#define OPROFILE_H
+
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+ 
+struct super_block;
+struct dentry;
+struct file_operations;
+ 
+enum oprofile_cpu {
+	OPROFILE_CPU_PPRO,
+	OPROFILE_CPU_PII,
+	OPROFILE_CPU_PIII,
+	OPROFILE_CPU_ATHLON,
+	OPROFILE_CPU_TIMER
+};
+
+/* Operations structure to be filled in */
+struct oprofile_operations {
+	/* create any necessary configuration files in the oprofile fs.
+	 * Optional. */
+	int (*create_files)(struct super_block * sb, struct dentry * root);
+	/* Do any necessary interrupt setup. Optional. */
+	int (*setup)(void);
+	/* Do any necessary interrupt shutdown. Optional. */
+	void (*shutdown)(void);
+	/* Start delivering interrupts. */
+	int (*start)(void);
+	/* Stop delivering interrupts. */
+	void (*stop)(void);
+};
+
+/**
+ * One-time initialisation. *ops must be set to a filled-in
+ * operations structure. oprofile_cpu_type must be set.
+ * Return 0 on success.
+ */
+int oprofile_arch_init(struct oprofile_operations ** ops, enum oprofile_cpu * cpu);
+ 
+/**
+ * Add a sample. This may be called from any context. Pass
+ * smp_processor_id() as cpu.
+ */
+extern void FASTCALL(oprofile_add_sample(unsigned long eip, unsigned long event, int cpu));
+
+/**
+ * Create a file of the given name as a child of the given root, with
+ * the specified file operations.
+ */
+int oprofilefs_create_file(struct super_block * sb, struct dentry * root,
+	char const * name, struct file_operations * fops);
+ 
+/** Create a file for read/write access to an unsigned long. */
+int oprofilefs_create_ulong(struct super_block * sb, struct dentry * root,
+	char const * name, ulong * val);
+ 
+/** Create a file for read-only access to an unsigned long. */
+int oprofilefs_create_ro_ulong(struct super_block * sb, struct dentry * root,
+	char const * name, ulong * val);
+ 
+/** Create a file for read-only access to an atomic_t. */
+int oprofilefs_create_ro_atomic(struct super_block * sb, struct dentry * root,
+	char const * name, atomic_t * val);
+ 
+/** create a directory */
+struct dentry * oprofilefs_mkdir(struct super_block * sb, struct dentry * root,
+	char const * name);
+
+/**
+ * Convert an unsigned long value into ASCII and copy it to the user buffer @buf,
+ * updating *offset appropriately. Returns bytes written or -EFAULT.
+ */
+ssize_t oprofilefs_ulong_to_user(unsigned long * val, char * buf, size_t count, loff_t * offset);
+
+/**
+ * Read an ASCII string for a number from a userspace buffer and fill *val on success.
+ * Returns 0 on success, < 0 on error.
+ */
+int oprofilefs_ulong_from_user(unsigned long * val, char const * buf, size_t count);
+
+/** lock for read/write safety */
+extern spinlock_t oprofilefs_lock;
+ 
+#endif /* OPROFILE_H */
-- 
cgit v1.2.3


From f35e65513f6bd0a346c8e51e78c8893bb3143c9f Mon Sep 17 00:00:00 2001
From: John Levon <levon@movementarian.org>
Date: Tue, 15 Oct 2002 04:31:08 -0700
Subject: [PATCH] oprofile - dcookies need to use u32

Make dcookies use a stable size regardless of whether we're
on a 32-bit or 64-bit platform.
---
 drivers/oprofile/buffer_sync.c | 24 ++++++++++++------------
 drivers/oprofile/oprof.c       |  1 -
 fs/dcookies.c                  | 14 +++++++-------
 include/linux/dcookies.h       |  4 ++--
 4 files changed, 21 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index 46360ee22da2..79b92c1c7965 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -118,13 +118,13 @@ void sync_stop(void)
  * because we cannot reach this code without at least one
  * dcookie user still being registered (namely, the reader
  * of the event buffer). */
-static inline unsigned long fast_get_dcookie(struct dentry * dentry,
+static inline u32 fast_get_dcookie(struct dentry * dentry,
 	struct vfsmount * vfsmnt)
 {
-	unsigned long cookie;
+	u32 cookie;
  
 	if (dentry->d_cookie)
-		return (unsigned long)dentry;
+		return (u32)dentry;
 	get_dcookie(dentry, vfsmnt, &cookie);
 	return cookie;
 }
@@ -135,9 +135,9 @@ static inline unsigned long fast_get_dcookie(struct dentry * dentry,
  * not strictly necessary but allows oprofile to associate
  * shared-library samples with particular applications
  */
-static unsigned long get_exec_dcookie(struct mm_struct * mm)
+static u32 get_exec_dcookie(struct mm_struct * mm)
 {
-	unsigned long cookie = 0;
+	u32 cookie = 0;
 	struct vm_area_struct * vma;
  
 	if (!mm)
@@ -163,9 +163,9 @@ out:
  * sure to do this lookup before a mm->mmap modification happens so
  * we don't lose track.
  */
-static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, off_t * offset)
+static u32 lookup_dcookie(struct mm_struct * mm, unsigned long addr, off_t * offset)
 {
-	unsigned long cookie = 0;
+	u32 cookie = 0;
 	struct vm_area_struct * vma;
 
 	for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
@@ -188,7 +188,7 @@ out:
 }
 
 
-static unsigned long last_cookie = ~0UL;
+static u32 last_cookie = ~0UL;
  
 static void add_cpu_switch(int i)
 {
@@ -199,7 +199,7 @@ static void add_cpu_switch(int i)
 }
 
  
-static void add_ctx_switch(pid_t pid, unsigned long cookie)
+static void add_ctx_switch(pid_t pid, u32 cookie)
 {
 	add_event_entry(ESCAPE_CODE);
 	add_event_entry(CTX_SWITCH_CODE); 
@@ -208,7 +208,7 @@ static void add_ctx_switch(pid_t pid, unsigned long cookie)
 }
 
  
-static void add_cookie_switch(unsigned long cookie)
+static void add_cookie_switch(u32 cookie)
 {
 	add_event_entry(ESCAPE_CODE);
 	add_event_entry(COOKIE_SWITCH_CODE);
@@ -225,7 +225,7 @@ static void add_sample_entry(unsigned long offset, unsigned long event)
 
 static void add_us_sample(struct mm_struct * mm, struct op_sample * s)
 {
-	unsigned long cookie;
+	u32 cookie;
 	off_t offset;
  
  	cookie = lookup_dcookie(mm, s->eip, &offset);
@@ -317,7 +317,7 @@ static void sync_buffer(struct oprofile_cpu_buffer * cpu_buf)
 {
 	struct mm_struct * mm = 0;
 	struct task_struct * new;
-	unsigned long cookie;
+	u32 cookie;
 	int i;
  
 	for (i=0; i < cpu_buf->pos; ++i) {
diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c
index 1cae1bc13c8d..91e120f1ac75 100644
--- a/drivers/oprofile/oprof.c
+++ b/drivers/oprofile/oprof.c
@@ -13,7 +13,6 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/sched.h>
-#include <linux/dcookies.h>
 #include <linux/notifier.h>
 #include <linux/profile.h>
 #include <linux/oprofile.h>
diff --git a/fs/dcookies.c b/fs/dcookies.c
index 0236c146b451..d589103eb820 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -8,7 +8,7 @@
  * non-transitory that can be processed at a later date.
  * This is done by locking the dentry/vfsmnt pair in the
  * kernel until released by the tasks needing the persistent
- * objects. The tag is simply an unsigned long that refers
+ * objects. The tag is simply an u32 that refers
  * to the pair and can be looked up from userspace.
  */
 
@@ -46,19 +46,19 @@ static inline int is_live(void)
 
 
 /* The dentry is locked, its address will do for the cookie */
-static inline unsigned long dcookie_value(struct dcookie_struct * dcs)
+static inline u32 dcookie_value(struct dcookie_struct * dcs)
 {
-	return (unsigned long)dcs->dentry;
+	return (u32)dcs->dentry;
 }
 
 
-static size_t dcookie_hash(unsigned long dcookie)
+static size_t dcookie_hash(u32 dcookie)
 {
 	return (dcookie >> 2) & (hash_size - 1);
 }
 
 
-static struct dcookie_struct * find_dcookie(unsigned long dcookie)
+static struct dcookie_struct * find_dcookie(u32 dcookie)
 {
 	struct dcookie_struct * found = 0;
 	struct dcookie_struct * dcs;
@@ -109,7 +109,7 @@ static struct dcookie_struct * alloc_dcookie(struct dentry * dentry,
  * value for a dentry/vfsmnt pair.
  */
 int get_dcookie(struct dentry * dentry, struct vfsmount * vfsmnt,
-	unsigned long * cookie)
+	u32 * cookie)
 {
 	int err = 0;
 	struct dcookie_struct * dcs;
@@ -142,7 +142,7 @@ out:
 /* And here is where the userspace process can look up the cookie value
  * to retrieve the path.
  */
-asmlinkage int sys_lookup_dcookie(unsigned long cookie, char * buf, size_t len)
+asmlinkage int sys_lookup_dcookie(u32 cookie, char * buf, size_t len)
 {
 	char * kbuf;
 	char * path;
diff --git a/include/linux/dcookies.h b/include/linux/dcookies.h
index b2ae9692dc05..7c4d3319e7d0 100644
--- a/include/linux/dcookies.h
+++ b/include/linux/dcookies.h
@@ -44,7 +44,7 @@ void dcookie_unregister(struct dcookie_user * user);
  * Returns 0 on success, with *cookie filled in
  */
 int get_dcookie(struct dentry * dentry, struct vfsmount * vfsmnt,
-	unsigned long * cookie);
+	u32 * cookie);
 
 #else
 
@@ -59,7 +59,7 @@ void dcookie_unregister(struct dcookie_user * user)
 }
  
 static inline int get_dcookie(struct dentry * dentry,
-	struct vfsmount * vfsmnt, unsigned long * cookie)
+	struct vfsmount * vfsmnt, u32 * cookie)
 {
 	return -ENOSYS;
 } 
-- 
cgit v1.2.3


From 5d6af116099e9ba443b2c24e18cfdb0bd2f749f2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 15 Oct 2002 04:35:16 -0700
Subject: [PATCH] futex-2.5.42-A2

This is my current futex patchset against BK-curr.  It mostly includes
must-have crash/correctness fixes from Martin Wirth, tested and reworked
somewhat by myself:

 - crash fix: futex_close did not detach from the vcache. Detach cleanups.
   (Martin Wirth)

 - memory leak fix: forgotten put_page() in a rare path in __pin_page().
   (Martin Wirth)

 - crash fix: do not do any quickcheck in unqueue_me(). (Martin, me)

 - correctness fix: the fastpath in __pin_page() now handles reserved
   pages the same way get_user_pages() does. (Martin Wirth)

 - queueing improvement: __attach_vcache() now uses list_add_tail() to
   avoid the reversal of the futex queue if a COW happens. (Martin Wirth)

 - simplified alignment check in sys_futex. (Martin Wirth)

 - comment fix: make it clear how the vcache hash quickcheck works. (me)
---
 include/linux/vcache.h |  2 +-
 kernel/futex.c         | 29 +++++++++++++----------------
 mm/vcache.c            | 19 ++++++++-----------
 3 files changed, 22 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vcache.h b/include/linux/vcache.h
index d5756643332c..5708fe6a908a 100644
--- a/include/linux/vcache.h
+++ b/include/linux/vcache.h
@@ -18,7 +18,7 @@ extern void __attach_vcache(vcache_t *vcache,
 		struct mm_struct *mm,
 		void (*callback)(struct vcache_s *data, struct page *new_page));
 
-extern void detach_vcache(vcache_t *vcache);
+extern void __detach_vcache(vcache_t *vcache);
 
 extern void invalidate_vcache(unsigned long address, struct mm_struct *mm,
 				struct page *new_page);
diff --git a/kernel/futex.c b/kernel/futex.c
index d268c3c1b758..4aa2115c4d66 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -115,8 +115,9 @@ static struct page *__pin_page(unsigned long addr)
 	 * Do a quick atomic lookup first - this is the fastpath.
 	 */
 	page = follow_page(mm, addr, 0);
-	if (likely(page != NULL)) {
-		get_page(page);
+	if (likely(page != NULL)) {	
+		if (!PageReserved(page))
+			get_page(page);
 		return page;
 	}
 
@@ -140,8 +141,10 @@ repeat_lookup:
 	 * check for races:
 	 */
 	tmp = follow_page(mm, addr, 0);
-	if (tmp != page)
+	if (tmp != page) {
+		put_page(page);
 		goto repeat_lookup;
+	}
 
 	return page;
 }
@@ -176,6 +179,7 @@ static int futex_wake(unsigned long uaddr, int offset, int num)
 
 		if (this->page == page && this->offset == offset) {
 			list_del_init(i);
+			__detach_vcache(&this->vcache);
 			tell_waiter(this);
 			ret++;
 			if (ret >= num)
@@ -235,15 +239,15 @@ static inline int unqueue_me(struct futex_q *q)
 {
 	int ret = 0;
 
-	detach_vcache(&q->vcache);
-
+	spin_lock(&vcache_lock);
 	spin_lock(&futex_lock);
 	if (!list_empty(&q->list)) {
 		list_del(&q->list);
+		__detach_vcache(&q->vcache);
 		ret = 1;
 	}
 	spin_unlock(&futex_lock);
-
+	spin_unlock(&vcache_lock);
 	return ret;
 }
 
@@ -314,13 +318,7 @@ static int futex_close(struct inode *inode, struct file *filp)
 {
 	struct futex_q *q = filp->private_data;
 
-	spin_lock(&futex_lock);
-	if (!list_empty(&q->list)) {
-		list_del(&q->list);
-		/* Noone can be polling on us now. */
-		BUG_ON(waitqueue_active(&q->waiters));
-	}
-	spin_unlock(&futex_lock);
+	unqueue_me(q);
 	unpin_page(q->page);
 	kfree(filp->private_data);
 	return 0;
@@ -436,9 +434,8 @@ asmlinkage int sys_futex(unsigned long uaddr, int op, int val, struct timespec *
 
 	pos_in_page = uaddr % PAGE_SIZE;
 
-	/* Must be "naturally" aligned, and not on page boundary. */
-	if ((pos_in_page % __alignof__(int)) != 0
-	    || pos_in_page + sizeof(int) > PAGE_SIZE)
+	/* Must be "naturally" aligned */
+	if (pos_in_page % sizeof(int))
 		return -EINVAL;
 
 	switch (op) {
diff --git a/mm/vcache.c b/mm/vcache.c
index ea6bc9d2b259..599e0f25490d 100644
--- a/mm/vcache.c
+++ b/mm/vcache.c
@@ -41,14 +41,12 @@ void __attach_vcache(vcache_t *vcache,
 
 	hash_head = hash_vcache(address, mm);
 
-	list_add(&vcache->hash_entry, hash_head);
+	list_add_tail(&vcache->hash_entry, hash_head);
 }
 
-void detach_vcache(vcache_t *vcache)
+void __detach_vcache(vcache_t *vcache)
 {
-	spin_lock(&vcache_lock);
-	list_del(&vcache->hash_entry);
-	spin_unlock(&vcache_lock);
+	list_del_init(&vcache->hash_entry);
 }
 
 void invalidate_vcache(unsigned long address, struct mm_struct *mm,
@@ -61,12 +59,11 @@ void invalidate_vcache(unsigned long address, struct mm_struct *mm,
 
 	hash_head = hash_vcache(address, mm);
 	/*
-	 * This is safe, because this path is called with the mm
-	 * semaphore read-held, and the add/remove path calls with the
-	 * mm semaphore write-held. So while other mm's might add new
-	 * entries in parallel, and *this* mm is locked out, so if the
-	 * list is empty now then we do not have to take the vcache
-	 * lock to see it's really empty.
+	 * This is safe, because this path is called with the pagetable
+	 * lock held. So while other mm's might add new entries in
+	 * parallel, *this* mm is locked out, so if the list is empty
+	 * now then we do not have to take the vcache lock to see it's
+	 * really empty.
 	 */
 	if (likely(list_empty(hash_head)))
 		return;
-- 
cgit v1.2.3


From b354d9171b5c04720daa2c1f993a10f15e372286 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@fys.uio.no>
Date: Tue, 15 Oct 2002 05:30:42 -0700
Subject: [PATCH] A basic NFSv4 client for 2.5.x

Further cleanups

Separate the static and dynamic filesystem data retrieval calls as per the
NFSv3 spec. This also simplifies things for NFSv4, since many of the
attributes in the fsinfo+fstat combined call are not mandatory to
implement.
---
 fs/nfs/inode.c          | 55 ++++++++++++++++++++++++++-----------------------
 fs/nfs/nfs2xdr.c        | 29 +++++---------------------
 fs/nfs/nfs3proc.c       | 40 ++++++++++++++++++++++++++---------
 fs/nfs/nfs3xdr.c        | 18 +++++++---------
 fs/nfs/proc.c           | 53 ++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/nfs_xdr.h | 28 +++++++++++++++++++++----
 6 files changed, 146 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index f6cfe092fb13..940173f909b8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -240,7 +240,13 @@ int nfs_sb_init(struct super_block *sb)
 {
 	struct nfs_server	*server;
 	struct inode		*root_inode = NULL;
-	struct nfs_fsinfo	fsinfo;
+	struct nfs_fattr	fattr;
+	struct nfs_fsinfo	fsinfo = {
+					.fattr = &fattr,
+				};
+	struct nfs_pathconf pathinfo = {
+			.fattr = &fattr,
+	};
 
 	/* We probably want something more informative here */
 	snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
@@ -265,31 +271,27 @@ int nfs_sb_init(struct super_block *sb)
 	sb->s_root->d_op = &nfs_dentry_operations;
 
 	/* Get some general file system info */
-        if (server->rpc_ops->statfs(server, &server->fh, &fsinfo) >= 0) {
-		if (server->namelen == 0)
-			server->namelen = fsinfo.namelen;
-	} else {
+        if (server->rpc_ops->fsinfo(server, &server->fh, &fsinfo) < 0) {
 		printk(KERN_NOTICE "NFS: cannot retrieve file system info.\n");
 		goto out_no_root;
         }
-
+	if (server->namelen == 0 &&
+	    server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
+		server->namelen = pathinfo.max_namelen;
 	/* Work out a lot of parameters */
 	if (server->rsize == 0)
 		server->rsize = nfs_block_size(fsinfo.rtpref, NULL);
 	if (server->wsize == 0)
 		server->wsize = nfs_block_size(fsinfo.wtpref, NULL);
-	/* NFSv3: we don't have bsize, but rather rtmult and wtmult... */
-	if (!fsinfo.bsize)
-		fsinfo.bsize = (fsinfo.rtmult>fsinfo.wtmult) ? fsinfo.rtmult : fsinfo.wtmult;
-	/* Also make sure we don't go below rsize/wsize since
-	 * RPC calls are expensive */
-	if (fsinfo.bsize < server->rsize)
-		fsinfo.bsize = server->rsize;
-	if (fsinfo.bsize < server->wsize)
-		fsinfo.bsize = server->wsize;
-
-	if (sb->s_blocksize == 0)
-		sb->s_blocksize = nfs_block_bits(fsinfo.bsize, &sb->s_blocksize_bits);
+	if (sb->s_blocksize == 0) {
+		if (fsinfo.wtmult == 0) {
+			sb->s_blocksize = 512;
+			sb->s_blocksize_bits = 9;
+		} else
+			sb->s_blocksize = nfs_block_bits(fsinfo.wtmult,
+							 &sb->s_blocksize_bits);
+	}
+
 	if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax)
 		server->rsize = nfs_block_size(fsinfo.rtmax, NULL);
 	if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax)
@@ -472,29 +474,30 @@ nfs_statfs(struct super_block *sb, struct statfs *buf)
 	struct nfs_server *server = NFS_SB(sb);
 	unsigned char blockbits;
 	unsigned long blockres;
-	struct nfs_fsinfo res;
+	struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode);
+	struct nfs_fattr fattr;
+	struct nfs_fsstat res = {
+			.fattr = &fattr,
+	};
 	int error;
 
 	lock_kernel();
 
-	error = server->rpc_ops->statfs(server, NFS_FH(sb->s_root->d_inode), &res);
+	error = server->rpc_ops->statfs(server, rootfh, &res);
 	buf->f_type = NFS_SUPER_MAGIC;
 	if (error < 0)
 		goto out_err;
 
-	if (res.bsize == 0)
-		res.bsize = sb->s_blocksize;
-	buf->f_bsize = nfs_block_bits(res.bsize, &blockbits);
+	buf->f_bsize = sb->s_blocksize;
+	blockbits = sb->s_blocksize_bits;
 	blockres = (1 << blockbits) - 1;
 	buf->f_blocks = (res.tbytes + blockres) >> blockbits;
 	buf->f_bfree = (res.fbytes + blockres) >> blockbits;
 	buf->f_bavail = (res.abytes + blockres) >> blockbits;
 	buf->f_files = res.tfiles;
 	buf->f_ffree = res.afiles;
-	if (res.namelen == 0 || res.namelen > server->namelen)
-		res.namelen = server->namelen;
-	buf->f_namelen = res.namelen;
 
+	buf->f_namelen = server->namelen;
  out:
 	unlock_kernel();
 
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 8dc92b8b3a1c..8e652afdfea4 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -596,37 +596,18 @@ nfs_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
  * Decode STATFS reply
  */
 static int
-nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
+nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs2_fsstat *res)
 {
 	int	status;
-	u32	xfer_size;
 
 	if ((status = ntohl(*p++)))
 		return -nfs_stat_to_errno(status);
 
-	/* For NFSv2, we more or less have to guess the preferred
-	 * read/write/readdir sizes from the single 'transfer size'
-	 * value.
-	 */
-	xfer_size = ntohl(*p++);	/* tsize */
-	res->rtmax  = 8 * 1024;
-	res->rtpref = xfer_size;
-	res->rtmult = xfer_size;
-	res->wtmax  = 8 * 1024;
-	res->wtpref = xfer_size;
-	res->wtmult = xfer_size;
-	res->dtpref = PAGE_CACHE_SIZE;
-	res->maxfilesize = 0x7FFFFFFF;	/* just a guess */
+	res->tsize  = ntohl(*p++);
 	res->bsize  = ntohl(*p++);
-
-	res->tbytes = ntohl(*p++) * res->bsize;
-	res->fbytes = ntohl(*p++) * res->bsize;
-	res->abytes = ntohl(*p++) * res->bsize;
-	res->tfiles = 0;
-	res->ffiles = 0;
-	res->afiles = 0;
-	res->namelen = 0;
-
+	res->blocks = ntohl(*p++);
+	res->bfree  = ntohl(*p++);
+	res->bavail = ntohl(*p++);
 	return 0;
 }
 
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 1ddb51374cba..790c27ead44f 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -639,24 +639,42 @@ nfs3_proc_mknod(struct inode *dir, struct qstr *name, struct iattr *sattr,
 	return status;
 }
 
-/*
- * This is a combo call of fsstat and fsinfo
- */
 static int
 nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
-		 struct nfs_fsinfo *info)
+		 struct nfs_fsstat *stat)
 {
 	int	status;
 
 	dprintk("NFS call  fsstat\n");
-	memset((char *)info, 0, sizeof(*info));
-	status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, info, 0);
-	if (status < 0)
-		goto error;
+	stat->fattr->valid = 0;
+	status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0);
+	dprintk("NFS reply statfs: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
+		 struct nfs_fsinfo *info)
+{
+	int	status;
+
+	dprintk("NFS call  fsinfo\n");
+	info->fattr->valid = 0;
 	status = rpc_call(server->client, NFS3PROC_FSINFO, fhandle, info, 0);
+	dprintk("NFS reply fsinfo: %d\n", status);
+	return status;
+}
 
-error:
-	dprintk("NFS reply statfs: %d\n", status);
+static int
+nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
+		   struct nfs_pathconf *info)
+{
+	int	status;
+
+	dprintk("NFS call  pathconf\n");
+	info->fattr->valid = 0;
+	status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0);
+	dprintk("NFS reply pathconf: %d\n", status);
 	return status;
 }
 
@@ -824,6 +842,8 @@ struct nfs_rpc_ops	nfs_v3_clientops = {
 	.readdir	= nfs3_proc_readdir,
 	.mknod		= nfs3_proc_mknod,
 	.statfs		= nfs3_proc_statfs,
+	.fsinfo		= nfs3_proc_fsinfo,
+	.pathconf	= nfs3_proc_pathconf,
 	.decode_dirent	= nfs3_decode_dirent,
 	.read_setup	= nfs3_proc_read_setup,
 	.write_setup	= nfs3_proc_write_setup,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index b0c77b19fff9..2a813fb65365 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -912,14 +912,13 @@ nfs3_xdr_linkres(struct rpc_rqst *req, u32 *p, struct nfs3_linkres *res)
  * Decode FSSTAT reply
  */
 static int
-nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
+nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsstat *res)
 {
-	struct nfs_fattr dummy;
 	int		status;
 
 	status = ntohl(*p++);
 
-	p = xdr_decode_post_op_attr(p, &dummy);
+	p = xdr_decode_post_op_attr(p, res->fattr);
 	if (status != 0)
 		return -nfs_stat_to_errno(status);
 
@@ -940,12 +939,11 @@ nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
 static int
 nfs3_xdr_fsinfores(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
 {
-	struct nfs_fattr dummy;
 	int		status;
 
 	status = ntohl(*p++);
 
-	p = xdr_decode_post_op_attr(p, &dummy);
+	p = xdr_decode_post_op_attr(p, res->fattr);
 	if (status != 0)
 		return -nfs_stat_to_errno(status);
 
@@ -959,6 +957,7 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
 	p = xdr_decode_hyper(p, &res->maxfilesize);
 
 	/* ignore time_delta and properties */
+	res->lease_time = 0;
 	return 0;
 }
 
@@ -966,18 +965,17 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
  * Decode PATHCONF reply
  */
 static int
-nfs3_xdr_pathconfres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
+nfs3_xdr_pathconfres(struct rpc_rqst *req, u32 *p, struct nfs_pathconf *res)
 {
-	struct nfs_fattr dummy;
 	int		status;
 
 	status = ntohl(*p++);
 
-	p = xdr_decode_post_op_attr(p, &dummy);
+	p = xdr_decode_post_op_attr(p, res->fattr);
 	if (status != 0)
 		return -nfs_stat_to_errno(status);
-	res->linkmax = ntohl(*p++);
-	res->namelen = ntohl(*p++);
+	res->max_link = ntohl(*p++);
+	res->max_namelen = ntohl(*p++);
 
 	/* ignore remaining fields */
 	return 0;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 2ad13ec4cd27..a5a1c373444d 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -460,17 +460,62 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 
 static int
 nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
-			struct nfs_fsinfo *info)
+			struct nfs_fsstat *stat)
 {
+	struct nfs2_fsstat fsinfo;
 	int	status;
 
 	dprintk("NFS call  statfs\n");
-	memset((char *)info, 0, sizeof(*info));
-	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, info, 0);
+	stat->fattr->valid = 0;
+	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
 	dprintk("NFS reply statfs: %d\n", status);
+	if (status)
+		goto out;
+	stat->tbytes = (u64)fsinfo.blocks * fsinfo.bsize;
+	stat->fbytes = (u64)fsinfo.bfree  * fsinfo.bsize;
+	stat->abytes = (u64)fsinfo.bavail * fsinfo.bsize;
+	stat->tfiles = 0;
+	stat->ffiles = 0;
+	stat->afiles = 0;
+out:
+	return status;
+}
+
+static int
+nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
+			struct nfs_fsinfo *info)
+{
+	struct nfs2_fsstat fsinfo;
+	int	status;
+
+	dprintk("NFS call  fsinfo\n");
+	info->fattr->valid = 0;
+	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &info, 0);
+	dprintk("NFS reply fsinfo: %d\n", status);
+	if (status)
+		goto out;
+	info->rtmax  = NFS_MAXDATA;
+	info->rtpref = fsinfo.tsize;
+	info->rtmult = fsinfo.bsize;
+	info->wtmax  = NFS_MAXDATA;
+	info->wtpref = fsinfo.tsize;
+	info->wtmult = fsinfo.bsize;
+	info->dtpref = fsinfo.tsize;
+	info->maxfilesize = 0x7FFFFFFF;
+	info->lease_time = 0;
+out:
 	return status;
 }
 
+static int
+nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
+		  struct nfs_pathconf *info)
+{
+	info->max_link = 0;
+	info->max_namelen = NFS2_MAXNAMLEN;
+	return 0;
+}
+
 extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
 
 static void
@@ -590,6 +635,8 @@ struct nfs_rpc_ops	nfs_v2_clientops = {
 	.readdir	= nfs_proc_readdir,
 	.mknod		= nfs_proc_mknod,
 	.statfs		= nfs_proc_statfs,
+	.fsinfo		= nfs_proc_fsinfo,
+	.pathconf	= nfs_proc_pathconf,
 	.decode_dirent	= nfs_decode_dirent,
 	.read_setup	= nfs_proc_read_setup,
 	.write_setup	= nfs_proc_write_setup,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index b71b1b217c70..e542fe6982c5 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -50,6 +50,7 @@ struct nfs_fattr {
  * Info on the file system
  */
 struct nfs_fsinfo {
+	struct nfs_fattr	*fattr; /* Post-op attributes */
 	__u32			rtmax;	/* max.  read transfer size */
 	__u32			rtpref;	/* pref. read transfer size */
 	__u32			rtmult;	/* reads should be multiple of this */
@@ -58,16 +59,31 @@ struct nfs_fsinfo {
 	__u32			wtmult;	/* writes should be multiple of this */
 	__u32			dtpref;	/* pref. readdir transfer size */
 	__u64			maxfilesize;
-	__u64			bsize;	/* block size */
+	__u32			lease_time; /* in seconds */
+};
+
+struct nfs_fsstat {
+	struct nfs_fattr	*fattr; /* Post-op attributes */
 	__u64			tbytes;	/* total size in bytes */
 	__u64			fbytes;	/* # of free bytes */
 	__u64			abytes;	/* # of bytes available to user */
 	__u64			tfiles;	/* # of files */
 	__u64			ffiles;	/* # of free files */
 	__u64			afiles;	/* # of files available to user */
-	__u32			linkmax;/* max # of hard links */
-	__u32			namelen;/* max name length */
-	__u32			lease_time; /* in seconds */
+};
+
+struct nfs2_fsstat {
+	__u32			tsize;  /* Server transfer size */
+	__u32			bsize;  /* Filesystem block size */
+	__u32			blocks; /* No. of "bsize" blocks on filesystem */
+	__u32			bfree;  /* No. of free "bsize" blocks */
+	__u32			bavail; /* No. of available "bsize" blocks */
+};
+
+struct nfs_pathconf {
+	struct nfs_fattr	*fattr; /* Post-op attributes */
+	__u32			max_link; /* max # of hard links */
+	__u32			max_namelen; /* max name length */
 };
 
 /*
@@ -391,7 +407,11 @@ struct nfs_rpc_ops {
 	int	(*mknod)   (struct inode *, struct qstr *, struct iattr *,
 			    dev_t, struct nfs_fh *, struct nfs_fattr *);
 	int	(*statfs)  (struct nfs_server *, struct nfs_fh *,
+			    struct nfs_fsstat *);
+	int	(*fsinfo)  (struct nfs_server *, struct nfs_fh *,
 			    struct nfs_fsinfo *);
+	int	(*pathconf) (struct nfs_server *, struct nfs_fh *,
+			     struct nfs_pathconf *);
 	u32 *	(*decode_dirent)(u32 *, struct nfs_entry *, int plus);
 	void	(*read_setup)   (struct nfs_read_data *, unsigned int count);
 	void	(*write_setup)  (struct nfs_write_data *, unsigned int count, int how);
-- 
cgit v1.2.3


From ee17e0d649279f5cc07ed24506b0fcea53dbe899 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@fys.uio.no>
Date: Tue, 15 Oct 2002 05:30:48 -0700
Subject: [PATCH] A basic NFSv4 client for 2.5.x

Define the new NFSv4 data structure for passing user information
from the 'mount' program in nfs4_mount.h.

If CONFIG_NFS_V4 is defined
        Add code to parse the mount structure into the superblock.
        Declare the NFSv4 filesystem to the VFS.
---
 fs/nfs/inode.c             | 238 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs.h        |   2 +-
 include/linux/nfs3.h       |   5 +
 include/linux/nfs4_mount.h |  70 +++++++++++++
 include/linux/nfs_fs.h     |  30 ++++++
 include/linux/nfs_fs_sb.h  |  10 ++
 include/linux/nfs_mount.h  |   8 +-
 7 files changed, 361 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/nfs4_mount.h

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 940173f909b8..685d9eb076cb 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -28,6 +28,7 @@
 #include <linux/sunrpc/stats.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
 #include <linux/nfs_flushd.h>
 #include <linux/lockd/bind.h>
 #include <linux/smp_lock.h>
@@ -157,6 +158,7 @@ nfs_put_super(struct super_block *sb)
 		lockd_down();	/* release rpc.lockd */
 	rpciod_down();		/* release rpciod */
 
+	destroy_nfsv4_state(server);
 	kfree(server->hostname);
 }
 
@@ -1283,6 +1285,239 @@ static struct file_system_type nfs_fs_type = {
 	.fs_flags	= FS_ODD_RENAME,
 };
 
+#ifdef CONFIG_NFS_V4
+
+static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent)
+{
+	struct nfs_server *server;
+	struct rpc_xprt *xprt = NULL;
+	struct rpc_clnt *clnt = NULL;
+	struct rpc_timeout timeparms;
+	rpc_authflavor_t authflavour;
+	int proto, err = -EIO;
+
+	sb->s_blocksize_bits = 0;
+	sb->s_blocksize = 0;
+	server = NFS_SB(sb);
+	if (data->rsize != 0)
+		server->rsize = nfs_block_size(data->rsize, NULL);
+	if (data->wsize != 0)
+		server->wsize = nfs_block_size(data->wsize, NULL);
+	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
+
+	/* NFSv4 doesn't use NLM locking */
+	server->flags |= NFS_MOUNT_NONLM;
+
+	server->acregmin = data->acregmin*HZ;
+	server->acregmax = data->acregmax*HZ;
+	server->acdirmin = data->acdirmin*HZ;
+	server->acdirmax = data->acdirmax*HZ;
+
+	server->rpc_ops = &nfs_v4_clientops;
+	/* Initialize timeout values */
+
+	timeparms.to_initval = data->timeo * HZ / 10;
+	timeparms.to_retries = data->retrans;
+	timeparms.to_exponential = 1;
+	if (!timeparms.to_retries)
+		timeparms.to_retries = 5;
+
+	proto = data->proto;
+	/* Which IP protocol do we use? */
+	switch (proto) {
+	case IPPROTO_TCP:
+		timeparms.to_maxval  = RPC_MAX_TCP_TIMEOUT;
+		if (!timeparms.to_initval)
+			timeparms.to_initval = 600 * HZ / 10;
+		break;
+	case IPPROTO_UDP:
+		timeparms.to_maxval  = RPC_MAX_UDP_TIMEOUT;
+		if (!timeparms.to_initval)
+			timeparms.to_initval = 11 * HZ / 10;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Now create transport and client */
+	xprt = xprt_create_proto(proto, &server->addr, &timeparms);
+	if (xprt == NULL) {
+		printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
+		goto out_fail;
+	}
+
+	authflavour = RPC_AUTH_UNIX;
+	if (data->auth_flavourlen != 0) {
+		if (data->auth_flavourlen > 1)
+			printk(KERN_INFO "NFS: cannot yet deal with multiple auth flavours.\n");
+		if (copy_from_user(authflavour, data->auth_flavours, sizeof(authflavour))) {
+			err = -EFAULT;
+			goto out_fail;
+		}
+	}
+	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
+				 server->rpc_ops->version, authflavour);
+	if (clnt == NULL) {
+		printk(KERN_WARNING "NFS: cannot create RPC client.\n");
+		xprt_destroy(xprt);
+		goto out_fail;
+	}
+
+	clnt->cl_intr     = (server->flags & NFS4_MOUNT_INTR) ? 1 : 0;
+	clnt->cl_softrtry = (server->flags & NFS4_MOUNT_SOFT) ? 1 : 0;
+	clnt->cl_chatty   = 1;
+	server->client    = clnt;
+
+	/* Fire up rpciod if not yet running */
+	if (rpciod_up() != 0) {
+		printk(KERN_WARNING "NFS: couldn't start rpciod!\n");
+		goto out_shutdown;
+	}
+
+	if (create_nfsv4_state(server, data))
+		goto out_shutdown;
+
+	err = nfs_sb_init(sb);
+	if (err == 0)
+		return 0;
+	rpciod_down();
+	destroy_nfsv4_state(server);
+out_shutdown:
+	rpc_shutdown_client(server->client);
+out_fail:
+	return err;
+}
+
+static int nfs4_compare_super(struct super_block *sb, void *data)
+{
+	struct nfs_server *server = data;
+	struct nfs_server *old = NFS_SB(sb);
+
+	if (strcmp(server->hostname, old->hostname) != 0)
+		return 0;
+	if (strcmp(server->mnt_path, old->mnt_path) != 0)
+		return 0;
+	return 1;
+}
+
+static void *
+nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
+{
+	void *p = NULL;
+
+	if (!src->len)
+		return ERR_PTR(-EINVAL);
+	if (src->len < maxlen)
+		maxlen = src->len;
+	if (dst == NULL) {
+		p = dst = kmalloc(maxlen + 1, GFP_KERNEL);
+		if (p == NULL)
+			return ERR_PTR(-ENOMEM);
+	}
+	if (copy_from_user(dst, src->data, maxlen)) {
+		if (p != NULL)
+			kfree(p);
+		return ERR_PTR(-EFAULT);
+	}
+	dst[maxlen] = '\0';
+	return dst;
+}
+
+static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
+	int flags, char *dev_name, void *raw_data)
+{
+	int error;
+	struct nfs_server *server;
+	struct super_block *s;
+	struct nfs4_mount_data *data = raw_data;
+	void *p;
+
+	if (!data) {
+		printk("nfs_read_super: missing data argument\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+	memset(server, 0, sizeof(struct nfs_server));
+
+	if (data->version != NFS4_MOUNT_VERSION) {
+		printk("nfs warning: mount version %s than kernel\n",
+			data->version < NFS_MOUNT_VERSION ? "older" : "newer");
+	}
+
+	p = nfs_copy_user_string(NULL, &data->hostname, 256);
+	if (IS_ERR(p))
+		goto out_err;
+	server->hostname = p;
+
+	p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
+	if (IS_ERR(p))
+		goto out_err;
+	server->mnt_path = p;
+
+	p = nfs_copy_user_string(server->ip_addr, &data->client_addr,
+			sizeof(server->ip_addr));
+	if (IS_ERR(p))
+		goto out_err;
+
+	/* We now require that the mount process passes the remote address */
+	if (data->host_addrlen != sizeof(server->addr)) {
+		s = ERR_PTR(-EINVAL);
+		goto out_free;
+	}
+	if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) {
+		s = ERR_PTR(-EFAULT);
+		goto out_free;
+	}
+	if (server->addr.sin_family != AF_INET ||
+	    server->addr.sin_addr.s_addr == INADDR_ANY) {
+		printk("NFS: mount program didn't pass remote IP address!\n");
+		s = ERR_PTR(-EINVAL);
+		goto out_free;
+	}
+
+	s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
+
+	if (IS_ERR(s) || s->s_root)
+		goto out_free;
+
+	s->s_flags = flags;
+
+	error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+	if (error) {
+		up_write(&s->s_umount);
+		deactivate_super(s);
+		return ERR_PTR(error);
+	}
+	s->s_flags |= MS_ACTIVE;
+	return s;
+out_err:
+	s = (struct super_block *)p;
+out_free:
+	if (server->mnt_path)
+		kfree(server->mnt_path);
+	if (server->hostname)
+		kfree(server->hostname);
+	kfree(server);
+	return s;
+}
+
+static struct file_system_type nfs4_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "nfs4",
+	.get_sb		= nfs4_get_sb,
+	.kill_sb	= nfs_kill_super,
+	.fs_flags	= FS_ODD_RENAME,
+};
+#define register_nfs4fs() register_filesystem(&nfs4_fs_type)
+#define unregister_nfs4fs() unregister_filesystem(&nfs4_fs_type)
+#else
+#define register_nfs4fs() (0)
+#define unregister_nfs4fs()
+#endif
+
 extern int nfs_init_nfspagecache(void);
 extern void nfs_destroy_nfspagecache(void);
 extern int nfs_init_readpagecache(void);
@@ -1374,6 +1609,8 @@ static int __init init_nfs_fs(void)
         err = register_filesystem(&nfs_fs_type);
 	if (err)
 		goto out;
+	if ((err = register_nfs4fs()) != 0)
+		goto out;
 	return 0;
 out:
 	rpc_proc_unregister("nfs");
@@ -1398,6 +1635,7 @@ static void __exit exit_nfs_fs(void)
 	rpc_proc_unregister("nfs");
 #endif
 	unregister_filesystem(&nfs_fs_type);
+	unregister_nfs4fs();
 }
 
 /* Not quite true; I just maintain it */
diff --git a/include/linux/nfs.h b/include/linux/nfs.h
index 181e8decebfc..d99650a19b55 100644
--- a/include/linux/nfs.h
+++ b/include/linux/nfs.h
@@ -120,7 +120,7 @@ enum nfs_ftype {
 /*
  * This is the kernel NFS client file handle representation
  */
-#define NFS_MAXFHSIZE		64
+#define NFS_MAXFHSIZE		128
 struct nfs_fh {
 	unsigned short		size;
 	unsigned char		data[NFS_MAXFHSIZE];
diff --git a/include/linux/nfs3.h b/include/linux/nfs3.h
index 359c73e00841..7f11fa589207 100644
--- a/include/linux/nfs3.h
+++ b/include/linux/nfs3.h
@@ -59,6 +59,11 @@ enum nfs3_ftype {
 	NF3BAD  = 8
 };
 
+struct nfs3_fh {
+	unsigned short size;
+	unsigned char  data[NFS3_FHSIZE];
+};
+
 #define NFS3_VERSION		3
 #define NFS3PROC_NULL		0
 #define NFS3PROC_GETATTR	1
diff --git a/include/linux/nfs4_mount.h b/include/linux/nfs4_mount.h
new file mode 100644
index 000000000000..9a782c2bbdd3
--- /dev/null
+++ b/include/linux/nfs4_mount.h
@@ -0,0 +1,70 @@
+#ifndef _LINUX_NFS4_MOUNT_H
+#define _LINUX_NFS4_MOUNT_H
+
+/*
+ *  linux/include/linux/nfs4_mount.h
+ *
+ *  Copyright (C) 2002  Trond Myklebust
+ *
+ *  structure passed from user-space to kernel-space during an nfsv4 mount
+ */
+
+/*
+ * WARNING!  Do not delete or change the order of these fields.  If
+ * a new field is required then add it to the end.  The version field
+ * tracks which fields are present.  This will ensure some measure of
+ * mount-to-kernel version compatibility.  Some of these aren't used yet
+ * but here they are anyway.
+ */
+#define NFS4_MOUNT_VERSION	1
+
+struct nfs_string {
+	unsigned int len;
+	const char* data;
+};
+
+struct nfs4_mount_data {
+	int version;				/* 1 */
+	int flags;				/* 1 */
+	int rsize;				/* 1 */
+	int wsize;				/* 1 */
+	int timeo;				/* 1 */
+	int retrans;				/* 1 */
+	int acregmin;				/* 1 */
+	int acregmax;				/* 1 */
+	int acdirmin;				/* 1 */
+	int acdirmax;				/* 1 */
+
+	/* see the definition of 'struct clientaddr4' in RFC3010 */
+	struct nfs_string client_addr;		/* 1 */
+
+	/* Mount path */
+	struct nfs_string mnt_path;		/* 1 */
+
+	/* Server details */
+	struct nfs_string hostname;		/* 1 */
+	/* Server IP address */
+	unsigned int host_addrlen;		/* 1 */
+	struct sockaddr* host_addr;		/* 1 */
+
+	/* Transport protocol to use */
+	int proto;				/* 1 */
+
+	/* Pseudo-flavours to use for authentication. See RFC2623 */
+	int auth_flavourlen;			/* 1 */
+	int *auth_flavours;			/* 1 */
+};
+
+/* bits in the flags field */
+/* Note: the fields that correspond to existing NFSv2/v3 mount options
+ * 	 should mirror the values from include/linux/nfs_mount.h
+ */
+
+#define NFS4_MOUNT_SOFT		0x0001	/* 1 */
+#define NFS4_MOUNT_INTR		0x0002	/* 1 */
+#define NFS4_MOUNT_NOCTO	0x0010	/* 1 */
+#define NFS4_MOUNT_NOAC		0x0020	/* 1 */
+#define NFS4_MOUNT_STRICTLOCK	0x1000	/* 1 */
+#define NFS4_MOUNT_FLAGMASK	0xFFFF
+
+#endif
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 4c35f7cbb97c..76ab4ecc3ea8 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -472,6 +472,36 @@ extern void * nfs_root_data(void);
 
 #define NFS_JUKEBOX_RETRY_TIME (5 * HZ)
 
+#ifdef CONFIG_NFS_V4
+
+extern struct nfs4_client *nfs4_get_client(void);
+extern void nfs4_put_client(struct nfs4_client *clp);
+
+struct nfs4_mount_data;
+static inline int
+create_nfsv4_state(struct nfs_server *server, struct nfs4_mount_data *data)
+{
+	server->nfs4_state = NULL;
+	return 0;
+}
+
+static inline void
+destroy_nfsv4_state(struct nfs_server *server)
+{
+	if (server->mnt_path) {
+		kfree(server->mnt_path);
+		server->mnt_path = NULL;
+	}
+	if (server->nfs4_state) {
+		nfs4_put_client(server->nfs4_state);
+		server->nfs4_state = NULL;
+	}
+}
+#else
+#define create_nfsv4_state(server, data)  0
+#define destroy_nfsv4_state(server)       do { } while (0)
+#endif
+
 #endif /* __KERNEL__ */
 
 /*
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 080c98fed1dd..d171608d7105 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -30,6 +30,16 @@ struct nfs_server {
 				lru_busy;
 	struct nfs_fh		fh;
 	struct sockaddr_in	addr;
+#if CONFIG_NFS_V4
+	/* Our own IP address, as a null-terminated string.
+	 * This is used to generate the clientid, and the callback address.
+	 */
+	char			ip_addr[16];
+	char *			mnt_path;
+	struct nfs4_client *	nfs4_state;	/* all NFSv4 state starts here */
+	unsigned long		lease_time;	/* in jiffies */
+	unsigned long		last_renewal;	/* in jiffies */
+#endif
 };
 
 /* Server capabilities */
diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h
index 2b552936eeca..223ed3462064 100644
--- a/include/linux/nfs_mount.h
+++ b/include/linux/nfs_mount.h
@@ -10,6 +10,8 @@
  */
 #include <linux/in.h>
 #include <linux/nfs.h>
+#include <linux/nfs2.h>
+#include <linux/nfs3.h>
 
 /*
  * WARNING!  Do not delete or change the order of these fields.  If
@@ -37,7 +39,7 @@ struct nfs_mount_data {
 	char		hostname[256];		/* 1 */
 	int		namlen;			/* 2 */
 	unsigned int	bsize;			/* 3 */
-	struct nfs_fh	root;			/* 4 */
+	struct nfs3_fh	root;			/* 4 */
 };
 
 /* bits in the flags field */
@@ -53,6 +55,10 @@ struct nfs_mount_data {
 #define NFS_MOUNT_KERBEROS	0x0100	/* 3 */
 #define NFS_MOUNT_NONLM		0x0200	/* 3 */
 #define NFS_MOUNT_BROKEN_SUID	0x0400	/* 4 */
+#if 0
+#define NFS_MOUNT_STRICTLOCK	0x1000	/* reserved for NFSv4 */
+#define NFS_MOUNT_SECFLAVOUR	0x2000	/* reserved */
+#endif
 #define NFS_MOUNT_FLAGMASK	0xFFFF
 
 #endif
-- 
cgit v1.2.3


From bf5344dc4c1c97bd8c837f8fa62315f13784f94d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@fys.uio.no>
Date: Tue, 15 Oct 2002 05:30:53 -0700
Subject: [PATCH] A basic NFSv4 client for 2.5.x

Now that all the hooks are in place, this large patch imports all
of the new code for the NFSv4 client.
  nfs4proc.c   - procedure vectors
  nfs4xdr.c    - XDR
  nfs4state.c  - state bookkeeping (very minimal for now)
  nfs4renewd.c - a daemon (implemented as an rpc_task) to keep
                 state from expiring on the server

Note: The RPCSEC_GSS authentication code is not yet included here.
  For the moment we make do with AUTH_UNIX aka. AUTH_SYS.

  Neither is the code to do upcalls to userland in order to do
  uid/gid <-> name mappings. Instead, stubs have been added to
  translate everything to 'nobody:nobody' == '-2:-2'
---
 fs/nfs/Makefile         |    1 +
 fs/nfs/file.c           |    6 +
 fs/nfs/inode.c          |    7 +-
 fs/nfs/nfs4proc.c       | 1577 +++++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4renewd.c     |  110 +++
 fs/nfs/nfs4state.c      |   81 +++
 fs/nfs/nfs4xdr.c        | 1777 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs_fs.h  |   19 +
 include/linux/nfs_xdr.h |  234 ++++++-
 9 files changed, 3807 insertions(+), 5 deletions(-)
 create mode 100644 fs/nfs/nfs4proc.c
 create mode 100644 fs/nfs/nfs4renewd.c
 create mode 100644 fs/nfs/nfs4state.c
 create mode 100644 fs/nfs/nfs4xdr.c

(limited to 'include/linux')

diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 836322c2be06..c098a522553b 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -8,6 +8,7 @@ nfs-y 			:= dir.o file.o flushd.o inode.o nfs2xdr.o pagelist.o \
 			   proc.o read.o symlink.o unlink.o write.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o mount_clnt.o      
 nfs-$(CONFIG_NFS_V3)	+= nfs3proc.o nfs3xdr.o
+nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o
 nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
 nfs-objs		:= $(nfs-y)
 
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 3443f647ed2f..f02b7c9c7f36 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -259,6 +259,12 @@ nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
 	if (!inode)
 		return -EINVAL;
 
+	/* This will be in a forthcoming patch. */
+	if (NFS_PROTO(inode)->version == 4) {
+		printk(KERN_INFO "NFS: file locking over NFSv4 is not yet supported\n");
+		return -EIO;
+	}
+
 	/* No mandatory locks over NFS */
 	if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
 		return -ENOLCK;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 685d9eb076cb..39027f2af310 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -77,8 +77,13 @@ static struct rpc_version *	nfs_version[] = {
 	NULL,
 	NULL,
 	&nfs_version2,
-#ifdef CONFIG_NFS_V3
+#if defined(CONFIG_NFS_V3)
 	&nfs_version3,
+#elif defined(CONFIG_NFS_V4)
+	NULL,
+#endif
+#if defined(CONFIG_NFS_V4)
+	&nfs_version4,
 #endif
 };
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
new file mode 100644
index 000000000000..8608fd9b3a30
--- /dev/null
+++ b/fs/nfs/nfs4proc.c
@@ -0,0 +1,1577 @@
+/*
+ *  fs/nfs/nfs4proc.c
+ *
+ *  Client-side procedure declarations for NFSv4.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *  Andy Adamson   <andros@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/mm.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <linux/smp_lock.h>
+
+#define NFSDBG_FACILITY		NFSDBG_PROC
+
+#define GET_OP(cp,name)		&cp->ops[cp->req_nops].u.name
+#define OPNUM(cp)		cp->ops[cp->req_nops].opnum
+
+extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
+
+static nfs4_stateid zero_stateid =
+  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+static spinlock_t renew_lock = SPIN_LOCK_UNLOCKED;
+
+static void
+nfs4_setup_compound(struct nfs4_compound *cp, struct nfs4_op *ops,
+		    struct nfs_server *server, char *tag)
+{
+	memset(cp, 0, sizeof(*cp));
+	cp->ops = ops;
+	cp->server = server;
+
+#if NFS4_DEBUG
+	cp->taglen = strlen(tag);
+	cp->tag = tag;
+#endif
+}
+
+static void
+nfs4_setup_access(struct nfs4_compound *cp, u32 req_access, u32 *resp_supported, u32 *resp_access)
+{
+	struct nfs4_access *access = GET_OP(cp, access);
+	
+	access->ac_req_access = req_access;
+	access->ac_resp_supported = resp_supported;
+	access->ac_resp_access = resp_access;
+	
+	OPNUM(cp) = OP_ACCESS;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_close(struct nfs4_compound *cp, nfs4_stateid stateid, u32 seqid)
+{
+	struct nfs4_close *close = GET_OP(cp, close);
+
+	close->cl_stateid = stateid;
+	close->cl_seqid = seqid;
+
+	OPNUM(cp) = OP_CLOSE;
+	cp->req_nops++;
+	cp->renew_index = cp->req_nops;
+}
+
+static void
+nfs4_setup_commit(struct nfs4_compound *cp, u64 start, u32 len, struct nfs_writeverf *verf)
+{
+	struct nfs4_commit *commit = GET_OP(cp, commit);
+
+	commit->co_start = start;
+	commit->co_len = len;
+	commit->co_verifier = verf;
+
+	OPNUM(cp) = OP_COMMIT;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_create_dir(struct nfs4_compound *cp, struct qstr *name,
+		      struct iattr *sattr, struct nfs4_change_info *info)
+{
+	struct nfs4_create *create = GET_OP(cp, create);
+	
+	create->cr_ftype = NF4DIR;
+	create->cr_namelen = name->len;
+	create->cr_name = name->name;
+	create->cr_attrs = sattr;
+	create->cr_cinfo = info;
+	
+	OPNUM(cp) = OP_CREATE;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_create_symlink(struct nfs4_compound *cp, struct qstr *name,
+			  struct qstr *linktext, struct iattr *sattr,
+			  struct nfs4_change_info *info)
+{
+	struct nfs4_create *create = GET_OP(cp, create);
+
+	create->cr_ftype = NF4LNK;
+	create->cr_textlen = linktext->len;
+	create->cr_text = linktext->name;
+	create->cr_namelen = name->len;
+	create->cr_name = name->name;
+	create->cr_attrs = sattr;
+	create->cr_cinfo = info;
+
+	OPNUM(cp) = OP_CREATE;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_create_special(struct nfs4_compound *cp, struct qstr *name,
+			    dev_t dev, struct iattr *sattr,
+			    struct nfs4_change_info *info)
+{
+	int mode = sattr->ia_mode;
+	struct nfs4_create *create = GET_OP(cp, create);
+
+	BUG_ON(!(sattr->ia_valid & ATTR_MODE));
+	BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode));
+	
+	if (S_ISFIFO(mode))
+		create->cr_ftype = NF4FIFO;
+	else if (S_ISBLK(mode)) {
+		create->cr_ftype = NF4BLK;
+		create->cr_specdata1 = MAJOR(dev);
+		create->cr_specdata2 = MINOR(dev);
+	}
+	else if (S_ISCHR(mode)) {
+		create->cr_ftype = NF4CHR;
+		create->cr_specdata1 = MAJOR(dev);
+		create->cr_specdata2 = MINOR(dev);
+	}
+	else
+		create->cr_ftype = NF4SOCK;
+	
+	create->cr_namelen = name->len;
+	create->cr_name = name->name;
+	create->cr_attrs = sattr;
+	create->cr_cinfo = info;
+
+	OPNUM(cp) = OP_CREATE;
+	cp->req_nops++;
+}
+
+/*
+ * This is our standard bitmap for GETATTR requests.
+ */
+u32 nfs4_fattr_bitmap[2] = {
+	FATTR4_WORD0_TYPE
+	| FATTR4_WORD0_CHANGE
+	| FATTR4_WORD0_SIZE
+	| FATTR4_WORD0_FSID
+	| FATTR4_WORD0_FILEID,
+	FATTR4_WORD1_MODE
+	| FATTR4_WORD1_NUMLINKS
+	| FATTR4_WORD1_OWNER
+	| FATTR4_WORD1_OWNER_GROUP
+	| FATTR4_WORD1_RAWDEV
+	| FATTR4_WORD1_SPACE_USED
+	| FATTR4_WORD1_TIME_ACCESS
+	| FATTR4_WORD1_TIME_METADATA
+	| FATTR4_WORD1_TIME_MODIFY
+};
+
+u32 nfs4_statfs_bitmap[2] = {
+	FATTR4_WORD0_FILES_AVAIL
+	| FATTR4_WORD0_FILES_FREE
+	| FATTR4_WORD0_FILES_TOTAL,
+	FATTR4_WORD1_SPACE_AVAIL
+	| FATTR4_WORD1_SPACE_FREE
+	| FATTR4_WORD1_SPACE_TOTAL
+};
+
+u32 nfs4_fsinfo_bitmap[2] = {
+	FATTR4_WORD0_MAXFILESIZE
+	| FATTR4_WORD0_MAXREAD
+        | FATTR4_WORD0_MAXWRITE
+	| FATTR4_WORD0_LEASE_TIME,
+	0
+};
+
+u32 nfs4_pathconf_bitmap[2] = {
+	FATTR4_WORD0_MAXLINK
+	| FATTR4_WORD0_MAXNAME,
+	0
+};
+
+/* mount bitmap: fattr bitmap + lease time */
+u32 nfs4_mount_bitmap[2] = {
+	FATTR4_WORD0_TYPE
+	| FATTR4_WORD0_CHANGE
+	| FATTR4_WORD0_SIZE
+	| FATTR4_WORD0_FSID
+	| FATTR4_WORD0_FILEID
+	| FATTR4_WORD0_LEASE_TIME,
+	FATTR4_WORD1_MODE
+	| FATTR4_WORD1_NUMLINKS
+	| FATTR4_WORD1_OWNER
+	| FATTR4_WORD1_OWNER_GROUP
+	| FATTR4_WORD1_RAWDEV
+	| FATTR4_WORD1_SPACE_USED
+	| FATTR4_WORD1_TIME_ACCESS
+	| FATTR4_WORD1_TIME_METADATA
+	| FATTR4_WORD1_TIME_MODIFY
+};
+
+static inline void
+__nfs4_setup_getattr(struct nfs4_compound *cp, u32 *bitmap,
+		     struct nfs_fattr *fattr,
+		     struct nfs_fsstat *fsstat,
+		     struct nfs_fsinfo *fsinfo,
+		     struct nfs_pathconf *pathconf,
+		     u32 *bmres)
+{
+        struct nfs4_getattr *getattr = GET_OP(cp, getattr);
+
+        getattr->gt_bmval = bitmap;
+        getattr->gt_attrs = fattr;
+	getattr->gt_fsstat = fsstat;
+	getattr->gt_fsinfo = fsinfo;
+	getattr->gt_pathconf = pathconf;
+	getattr->gt_bmres = bmres;
+
+        OPNUM(cp) = OP_GETATTR;
+        cp->req_nops++;
+}
+
+static void
+nfs4_setup_getattr(struct nfs4_compound *cp,
+		struct nfs_fattr *fattr,
+		u32 *bmres)
+{
+	__nfs4_setup_getattr(cp, nfs4_fattr_bitmap, fattr,
+			NULL, NULL, NULL, bmres);
+}
+
+static void
+nfs4_setup_getrootattr(struct nfs4_compound *cp,
+		struct nfs_fattr *fattr,
+		struct nfs_fsinfo *fsinfo,
+		u32 *bmres)
+{
+	__nfs4_setup_getattr(cp, nfs4_mount_bitmap,
+			fattr, NULL, fsinfo, NULL, bmres);
+}
+
+static void
+nfs4_setup_statfs(struct nfs4_compound *cp,
+		struct nfs_fsstat *fsstat,
+		u32 *bmres)
+{
+	__nfs4_setup_getattr(cp, nfs4_statfs_bitmap,
+			NULL, fsstat, NULL, NULL, bmres);
+}
+
+static void
+nfs4_setup_fsinfo(struct nfs4_compound *cp,
+		struct nfs_fsinfo *fsinfo,
+		u32 *bmres)
+{
+	__nfs4_setup_getattr(cp, nfs4_fsinfo_bitmap,
+			NULL, NULL, fsinfo, NULL, bmres);
+}
+
+static void
+nfs4_setup_pathconf(struct nfs4_compound *cp,
+		struct nfs_pathconf *pathconf,
+		u32 *bmres)
+{
+	__nfs4_setup_getattr(cp, nfs4_pathconf_bitmap,
+			NULL, NULL, NULL, pathconf, bmres);
+}
+
+static void
+nfs4_setup_getfh(struct nfs4_compound *cp, struct nfs_fh *fhandle)
+{
+	struct nfs4_getfh *getfh = GET_OP(cp, getfh);
+
+	getfh->gf_fhandle = fhandle;
+
+	OPNUM(cp) = OP_GETFH;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_link(struct nfs4_compound *cp, struct qstr *name,
+		struct nfs4_change_info *info)
+{
+	struct nfs4_link *link = GET_OP(cp, link);
+
+	link->ln_namelen = name->len;
+	link->ln_name = name->name;
+	link->ln_cinfo = info;
+
+	OPNUM(cp) = OP_LINK;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_lookup(struct nfs4_compound *cp, struct qstr *q)
+{
+	struct nfs4_lookup *lookup = GET_OP(cp, lookup);
+
+	lookup->lo_name = q;
+
+	OPNUM(cp) = OP_LOOKUP;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_putfh(struct nfs4_compound *cp, struct nfs_fh *fhandle)
+{
+	struct nfs4_putfh *putfh = GET_OP(cp, putfh);
+
+	putfh->pf_fhandle = fhandle;
+
+	OPNUM(cp) = OP_PUTFH;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_putrootfh(struct nfs4_compound *cp)
+{
+        OPNUM(cp) = OP_PUTROOTFH;
+        cp->req_nops++;
+}
+
+static void
+nfs4_setup_open(struct nfs4_compound *cp, int flags, struct qstr *name,
+		struct iattr *sattr, char *stateid, struct nfs4_change_info *cinfo,
+		u32 *rflags)
+{
+	struct nfs4_open *open = GET_OP(cp, open);
+
+	BUG_ON(cp->flags);
+	
+	open->op_share_access = flags & 3;
+	open->op_opentype = (flags & O_CREAT) ? NFS4_OPEN_CREATE : NFS4_OPEN_NOCREATE;
+	open->op_createmode = NFS4_CREATE_UNCHECKED;
+	open->op_attrs = sattr;
+	if (flags & O_EXCL) {
+		u32 *p = (u32 *) open->op_verifier;
+		p[0] = jiffies;
+		p[1] = current->pid;
+		open->op_createmode = NFS4_CREATE_EXCLUSIVE;
+	}
+	open->op_name = name;
+	open->op_stateid = stateid;
+	open->op_cinfo = cinfo;
+	open->op_rflags = rflags;
+
+	OPNUM(cp) = OP_OPEN;
+	cp->req_nops++;
+	cp->renew_index = cp->req_nops;
+}
+
+static void
+nfs4_setup_open_confirm(struct nfs4_compound *cp, char *stateid)
+{
+	struct nfs4_open_confirm *open_confirm = GET_OP(cp, open_confirm);
+	
+	open_confirm->oc_stateid = stateid;
+
+	OPNUM(cp) = OP_OPEN_CONFIRM;
+	cp->req_nops++;
+	cp->renew_index = cp->req_nops;
+}
+
+static void
+nfs4_setup_read(struct nfs4_compound *cp, u64 offset, u32 length,
+		struct page **pages, unsigned int pgbase, u32 *eofp, u32 *bytes_read)
+{
+	struct nfs4_read *read = GET_OP(cp, read);
+
+	read->rd_offset = offset;
+	read->rd_length = length;
+	read->rd_pages = pages;
+	read->rd_pgbase = pgbase;
+	read->rd_eof = eofp;
+	read->rd_bytes_read = bytes_read;
+
+	OPNUM(cp) = OP_READ;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_readdir(struct nfs4_compound *cp, u64 cookie, u32 *verifier,
+		     struct page **pages, unsigned int bufsize, struct dentry *dentry)
+{
+	u32 *start, *p;
+	struct nfs4_readdir *readdir = GET_OP(cp, readdir);
+
+	BUG_ON(bufsize < 80);
+	readdir->rd_cookie = (cookie > 2) ? cookie : 0;
+	memcpy(readdir->rd_req_verifier, verifier, sizeof(nfs4_verifier));
+	readdir->rd_count = bufsize;
+	readdir->rd_bmval[0] = FATTR4_WORD0_FILEID;
+	readdir->rd_bmval[1] = 0;
+	readdir->rd_pages = pages;
+	readdir->rd_pgbase = 0;
+	
+	OPNUM(cp) = OP_READDIR;
+	cp->req_nops++;
+
+	if (cookie >= 2)
+		return;
+	
+	/*
+	 * NFSv4 servers do not return entries for '.' and '..'
+	 * Therefore, we fake these entries here.  We let '.'
+	 * have cookie 0 and '..' have cookie 1.  Note that
+	 * when talking to the server, we always send cookie 0
+	 * instead of 1 or 2.
+	 */
+	start = p = (u32 *)kmap(*pages);
+	
+	if (cookie == 0) {
+		*p++ = xdr_one;                                  /* next */
+		*p++ = xdr_zero;                   /* cookie, first word */
+		*p++ = xdr_one;                   /* cookie, second word */
+		*p++ = xdr_one;                             /* entry len */
+		memcpy(p, ".\0\0\0", 4);                        /* entry */
+		p++;
+		*p++ = xdr_one;                         /* bitmap length */
+		*p++ = htonl(FATTR4_WORD0_FILEID);             /* bitmap */
+		*p++ = htonl(8);              /* attribute buffer length */
+		p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_inode));
+	}
+	
+	*p++ = xdr_one;                                  /* next */
+	*p++ = xdr_zero;                   /* cookie, first word */
+	*p++ = xdr_two;                   /* cookie, second word */
+	*p++ = xdr_two;                             /* entry len */
+	memcpy(p, "..\0\0", 4);                         /* entry */
+	p++;
+	*p++ = xdr_one;                         /* bitmap length */
+	*p++ = htonl(FATTR4_WORD0_FILEID);             /* bitmap */
+	*p++ = htonl(8);              /* attribute buffer length */
+	p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_parent->d_inode));
+
+	readdir->rd_pgbase = (char *)p - (char *)start;
+	readdir->rd_count -= readdir->rd_pgbase;
+	kunmap(*pages);
+}
+
+static void
+nfs4_setup_readlink(struct nfs4_compound *cp, int count, struct page **pages)
+{
+	struct nfs4_readlink *readlink = GET_OP(cp, readlink);
+
+	readlink->rl_count = count;
+	readlink->rl_pages = pages;
+
+	OPNUM(cp) = OP_READLINK;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_remove(struct nfs4_compound *cp, struct qstr *name, struct nfs4_change_info *cinfo)
+{
+	struct nfs4_remove *remove = GET_OP(cp, remove);
+
+	remove->rm_namelen = name->len;
+	remove->rm_name = name->name;
+	remove->rm_cinfo = cinfo;
+
+	OPNUM(cp) = OP_REMOVE;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_rename(struct nfs4_compound *cp, struct qstr *old, struct qstr *new,
+		  struct nfs4_change_info *old_cinfo, struct nfs4_change_info *new_cinfo)
+{
+	struct nfs4_rename *rename = GET_OP(cp, rename);
+
+	rename->rn_oldnamelen = old->len;
+	rename->rn_oldname = old->name;
+	rename->rn_newnamelen = new->len;
+	rename->rn_newname = new->name;
+	rename->rn_src_cinfo = old_cinfo;
+	rename->rn_dst_cinfo = new_cinfo;
+
+	OPNUM(cp) = OP_RENAME;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_renew(struct nfs4_compound *cp)
+{
+	OPNUM(cp) = OP_RENEW;
+	cp->req_nops++;
+	cp->renew_index = cp->req_nops;
+}
+
+static void
+nfs4_setup_restorefh(struct nfs4_compound *cp)
+{
+        OPNUM(cp) = OP_RESTOREFH;
+        cp->req_nops++;
+}
+
+static void
+nfs4_setup_savefh(struct nfs4_compound *cp)
+{
+        OPNUM(cp) = OP_SAVEFH;
+        cp->req_nops++;
+}
+
+static void
+nfs4_setup_setattr(struct nfs4_compound *cp, char *stateid, struct iattr *iap)
+{
+	struct nfs4_setattr *setattr = GET_OP(cp, setattr);
+
+	setattr->st_stateid = stateid;
+	setattr->st_iap = iap;
+	
+	OPNUM(cp) = OP_SETATTR;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_setclientid(struct nfs4_compound *cp, u32 program, unsigned short port)
+{
+	struct nfs4_setclientid *setclientid = GET_OP(cp, setclientid);
+	struct nfs_server *server = cp->server;
+	struct timeval tv;
+	u32 *p;
+
+	do_gettimeofday(&tv);
+	p = (u32 *)setclientid->sc_verifier;
+	*p++ = tv.tv_sec;
+	*p++ = tv.tv_usec;
+	setclientid->sc_name = server->ip_addr;
+	sprintf(setclientid->sc_netid, "udp");
+	sprintf(setclientid->sc_uaddr, "%s.%d.%d", server->ip_addr, port >> 8, port & 255);
+	setclientid->sc_prog = program;
+	setclientid->sc_cb_ident = 0;
+	
+	OPNUM(cp) = OP_SETCLIENTID;
+	cp->req_nops++;
+}
+
+static void
+nfs4_setup_setclientid_confirm(struct nfs4_compound *cp)
+{
+	OPNUM(cp) = OP_SETCLIENTID_CONFIRM;
+	cp->req_nops++;
+	cp->renew_index = cp->req_nops;
+}
+
+static void
+nfs4_setup_write(struct nfs4_compound *cp, u64 offset, u32 length, int stable,
+		 struct page **pages, unsigned int pgbase, u32 *bytes_written,
+		 struct nfs_writeverf *verf)
+{
+	struct nfs4_write *write = GET_OP(cp, write);
+
+	write->wr_offset = offset;
+	write->wr_stable_how = stable;
+	write->wr_len = length;
+	write->wr_bytes_written = bytes_written;
+	write->wr_verf = verf;
+
+	write->wr_pages = pages;
+	write->wr_pgbase = pgbase;
+
+	OPNUM(cp) = OP_WRITE;
+	cp->req_nops++;
+}
+
+static inline void
+process_lease(struct nfs4_compound *cp)
+{
+	struct nfs_server *server;
+	
+        /*
+         * Generic lease processing: If this operation contains a
+	 * lease-renewing operation, and it succeeded, update the RENEW time
+	 * in the superblock.  Instead of the current time, we use the time
+	 * when the request was sent out.  (All we know is that the lease was
+	 * renewed sometime between then and now, and we have to assume the
+	 * worst case.)
+	 *
+	 * Notes:
+	 *   (1) renewd doesn't acquire the spinlock when messing with
+	 *     server->last_renewal; this is OK since rpciod always runs
+	 *     under the BKL.
+	 *   (2) cp->timestamp was set at the end of XDR encode.
+         */
+	if (!cp->renew_index)
+		return;
+	if (!cp->toplevel_status || cp->resp_nops > cp->renew_index) {
+		server = cp->server;
+		spin_lock(&renew_lock);
+		if (server->last_renewal < cp->timestamp)
+			server->last_renewal = cp->timestamp;
+		spin_unlock(&renew_lock);
+	}
+}
+
+static int
+nfs4_call_compound(struct nfs4_compound *cp, struct rpc_cred *cred, int flags)
+{
+	int status;
+	struct rpc_message msg = {
+		.rpc_proc = NFSPROC4_COMPOUND,
+		.rpc_argp = cp,
+		.rpc_resp = cp,
+		.rpc_cred = cred,
+	};
+
+	status = rpc_call_sync(cp->server->client, &msg, flags);
+	if (!status)
+		process_lease(cp);
+	
+	return status;
+}
+
+static inline void
+process_cinfo(struct nfs4_change_info *info, struct nfs_fattr *fattr)
+{
+	BUG_ON((fattr->valid & NFS_ATTR_FATTR) == 0);
+	BUG_ON((fattr->valid & NFS_ATTR_FATTR_V4) == 0);
+	
+	if (fattr->change_attr == info->after) {
+		fattr->pre_change_attr = info->before;
+		fattr->valid |= NFS_ATTR_PRE_CHANGE;
+		fattr->timestamp = jiffies;
+	}
+}
+
+static int
+do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr,
+	struct nfs_fattr *fattr, struct nfs_fh *fhandle, u32 *seqid, char *stateid)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[7];
+	struct nfs4_change_info	dir_cinfo;
+	struct nfs_fattr	dir_attr;
+	u32			dir_bmres[2];
+	u32			bmres[2];
+	u32			rflags;
+	int			status;
+
+	dir_attr.valid = 0;
+	fattr->valid = 0;
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "open");
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_savefh(&compound);
+	nfs4_setup_open(&compound, flags, name, sattr, stateid, &dir_cinfo, &rflags);
+	nfs4_setup_getattr(&compound, fattr, bmres);
+	nfs4_setup_getfh(&compound, fhandle);
+	nfs4_setup_restorefh(&compound);
+	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
+	if ((status = nfs4_call_compound(&compound, NULL, 0)))
+		return status;
+
+	process_cinfo(&dir_cinfo, &dir_attr);
+	nfs_refresh_inode(dir, &dir_attr);
+	if (!(rflags & NFS4_OPEN_RESULT_CONFIRM)) {
+		*seqid = 1;
+		return 0;
+	}
+	*seqid = 2;
+
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "open_confirm");
+	nfs4_setup_putfh(&compound, fhandle);
+	nfs4_setup_open_confirm(&compound, stateid);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static int
+do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
+	   struct nfs_fh *fhandle, struct iattr *sattr, char *stateid)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[3];
+	u32			bmres[2];
+
+	fattr->valid = 0;
+	nfs4_setup_compound(&compound, ops, server, "setattr");
+	nfs4_setup_putfh(&compound, fhandle);
+	nfs4_setup_setattr(&compound, stateid, sattr);
+	nfs4_setup_getattr(&compound, fattr, bmres);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static int
+do_close(struct nfs_server *server, struct nfs_fh *fhandle, u32 seqid, char *stateid)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[2];
+	
+	nfs4_setup_compound(&compound, ops, server, "close");
+	nfs4_setup_putfh(&compound, fhandle);
+	nfs4_setup_close(&compound, stateid, seqid);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static int
+nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
+		   struct nfs_fattr *fattr)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[4];
+	struct nfs_fsinfo	fsinfo;
+	u32			bmres[2];
+	unsigned char *		p;
+	struct qstr		q;
+	int			status;
+
+	fattr->valid = 0;
+
+	if (!(server->nfs4_state = nfs4_get_client()))
+		return -ENOMEM;
+
+	/* 
+	 * SETCLIENTID.
+	 * Until delegations are imported, we don't bother setting the program
+	 * number and port to anything meaningful.
+	 */
+	nfs4_setup_compound(&compound, ops, server, "setclientid");
+	nfs4_setup_setclientid(&compound, 0, 0);
+	if ((status = nfs4_call_compound(&compound, NULL, 0)))
+		goto out;
+
+	/*
+	 * SETCLIENTID_CONFIRM, plus root filehandle.
+	 * We also get the lease time here.
+	 */
+	nfs4_setup_compound(&compound, ops, server, "setclientid_confirm");
+	nfs4_setup_setclientid_confirm(&compound);
+	nfs4_setup_putrootfh(&compound);
+	nfs4_setup_getrootattr(&compound, fattr, &fsinfo, bmres);
+	nfs4_setup_getfh(&compound, fhandle);
+	if ((status = nfs4_call_compound(&compound, NULL, 0)))
+		goto out;
+	
+	/*
+	 * Now that we have instantiated the clientid and determined
+	 * the lease time, we can initialize the renew daemon for this
+	 * server.
+	 */
+	server->lease_time = fsinfo.lease_time * HZ;
+	if ((status = nfs4_init_renewd(server)))
+		goto out;
+	
+	/*
+	 * Now we do a seperate LOOKUP for each component of the mount path.
+	 * The LOOKUPs are done seperately so that we can conveniently
+	 * catch an ERR_WRONGSEC if it occurs along the way...
+	 */
+	p = server->mnt_path;
+	for (;;) {
+		while (*p == '/')
+			p++;
+		if (!*p)
+			break;
+		q.name = p;
+		while (*p && (*p != '/'))
+			p++;
+		q.len = p - q.name;
+
+		nfs4_setup_compound(&compound, ops, server, "mount");
+		nfs4_setup_putfh(&compound, fhandle);
+		nfs4_setup_lookup(&compound, &q);
+		nfs4_setup_getattr(&compound, fattr, bmres);
+		nfs4_setup_getfh(&compound, fhandle);
+		status = nfs4_call_compound(&compound, NULL, 0);
+		if (!status)
+			continue;
+		if (status == -ENOENT) {
+			printk(KERN_NOTICE "NFS: mount path %s does not exist!\n", server->mnt_path);
+			printk(KERN_NOTICE "NFS: suggestion: try mounting '/' instead.\n");
+		}
+		break;
+	}
+
+out:
+	return status;
+}
+
+static int
+nfs4_proc_getattr(struct inode *inode, struct nfs_fattr *fattr)
+{
+	struct nfs4_compound compound;
+	struct nfs4_op ops[2];
+	u32 bmres[2];
+
+	fattr->valid = 0;
+
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "getattr");
+	nfs4_setup_putfh(&compound, NFS_FH(inode));
+	nfs4_setup_getattr(&compound, fattr, bmres);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static int
+nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
+		  struct iattr *sattr)
+{
+	struct inode *		inode = dentry->d_inode;
+	int			size_change = sattr->ia_valid & ATTR_SIZE;
+	struct nfs_fh		throwaway_fh;
+	u32			seqid;
+	nfs4_stateid		stateid;
+	int			status;
+
+	fattr->valid = 0;
+	
+	if (size_change) {
+		status = do_open(dentry->d_parent->d_inode, &dentry->d_name,
+				 NFS4_SHARE_ACCESS_WRITE, NULL, fattr,
+				 &throwaway_fh, &seqid, stateid);
+		if (status)
+			return status;
+
+		/*
+		 * Because OPEN is always done by name in nfsv4, it is
+		 * possible that we opened a different file by the same
+		 * name.  We can recognize this race condition, but we
+		 * can't do anything about it besides returning an error.
+		 *
+		 * XXX: Should we compare filehandles too, as in
+		 * nfs_find_actor()?
+		 */
+		if (fattr->fileid != NFS_FILEID(inode)) {
+			printk(KERN_WARNING "nfs: raced in setattr, returning -EIO\n");
+			do_close(NFS_SERVER(inode), NFS_FH(inode), seqid, stateid);
+			return -EIO;
+		}
+	}
+	else
+		memcpy(stateid, zero_stateid, sizeof(nfs4_stateid));
+	
+	status = do_setattr(NFS_SERVER(inode), fattr, NFS_FH(inode), sattr, stateid);
+	if (size_change)
+		do_close(NFS_SERVER(inode), NFS_FH(inode), seqid, stateid);
+	return status;
+}
+
+static int
+nfs4_proc_lookup(struct inode *dir, struct qstr *name,
+		 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[5];
+	struct nfs_fattr	dir_attr;
+	u32			dir_bmres[2];
+	u32			bmres[2];
+	int			status;
+
+	dir_attr.valid = 0;
+	fattr->valid = 0;
+	
+	dprintk("NFS call  lookup %s\n", name->name);
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "lookup");
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
+	nfs4_setup_lookup(&compound, name);
+	nfs4_setup_getattr(&compound, fattr, bmres);
+	nfs4_setup_getfh(&compound, fhandle);
+	status = nfs4_call_compound(&compound, NULL, 0);
+	dprintk("NFS reply lookup: %d\n", status);
+
+	if (status >= 0)
+		status = nfs_refresh_inode(dir, &dir_attr);
+	return status;
+}
+
+static int
+nfs4_proc_access(struct inode *inode, struct rpc_cred *cred, int mode)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[3];
+	struct nfs_fattr	fattr;
+	u32			bmres[2];
+	u32			req_access = 0, resp_supported, resp_access;
+	int			status;
+
+	fattr.valid = 0;
+
+	/*
+	 * Determine which access bits we want to ask for...
+	 */
+	if (mode & MAY_READ)
+		req_access |= NFS4_ACCESS_READ;
+	if (S_ISDIR(inode->i_mode)) {
+		if (mode & MAY_WRITE)
+			req_access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE;
+		if (mode & MAY_EXEC)
+			req_access |= NFS4_ACCESS_LOOKUP;
+	}
+	else {
+		if (mode & MAY_WRITE)
+			req_access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND;
+		if (mode & MAY_EXEC)
+			req_access |= NFS4_ACCESS_EXECUTE;
+	}
+
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "access");
+	nfs4_setup_putfh(&compound, NFS_FH(inode));
+	nfs4_setup_getattr(&compound, &fattr, bmres);
+	nfs4_setup_access(&compound, req_access, &resp_supported, &resp_access);
+	status = nfs4_call_compound(&compound, cred, 0);
+	nfs_refresh_inode(inode, &fattr);
+
+	if (!status) {
+		if (req_access != resp_supported) {
+			printk(KERN_NOTICE "NFS: server didn't support all access bits!\n");
+			status = -ENOTSUPP;
+		}
+		else if (req_access != resp_access)
+			status = -EACCES;
+	}
+	return status;
+}
+
+/*
+ * TODO: For the time being, we don't try to get any attributes
+ * along with any of the zero-copy operations READ, READDIR,
+ * READLINK, WRITE.
+ *
+ * In the case of the first three, we want to put the GETATTR
+ * after the read-type operation -- this is because it is hard
+ * to predict the length of a GETATTR response in v4, and thus
+ * align the READ data correctly.  This means that the GETATTR
+ * may end up partially falling into the page cache, and we should
+ * shift it into the 'tail' of the xdr_buf before processing.
+ * To do this efficiently, we need to know the total length
+ * of data received, which doesn't seem to be available outside
+ * of the RPC layer.
+ *
+ * In the case of WRITE, we also want to put the GETATTR after
+ * the operation -- in this case because we want to make sure
+ * we get the post-operation mtime and size.  This means that
+ * we can't use xdr_encode_pages() as written: we need a variant
+ * of it which would leave room in the 'tail' iovec.
+ *
+ * Both of these changes to the XDR layer would in fact be quite
+ * minor, but I decided to leave them for a subsequent patch.
+ */
+static int
+nfs4_proc_readlink(struct inode *inode, struct page *page)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[2];
+
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "readlink");
+	nfs4_setup_putfh(&compound, NFS_FH(inode));
+	nfs4_setup_readlink(&compound, PAGE_CACHE_SIZE, &page);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static int
+nfs4_proc_read(struct inode *inode, struct rpc_cred *cred,
+	       struct nfs_fattr *fattr, int flags,
+	       unsigned int base, unsigned int count,
+	       struct page *page, int *eofp)
+{
+	u64			offset = page_offset(page) + base;
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[2];
+	u32			bytes_read;
+	int			status;
+
+	fattr->valid = 0;
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "read [sync]");
+	nfs4_setup_putfh(&compound, NFS_FH(inode));
+	nfs4_setup_read(&compound, offset, count, &page, base, eofp, &bytes_read);
+	status = nfs4_call_compound(&compound, cred, 0);
+
+	if (status >= 0)
+		status = bytes_read;
+	return status;
+}
+
+static int
+nfs4_proc_write(struct inode *inode, struct rpc_cred *cred,
+		struct nfs_fattr *fattr, int flags,
+		unsigned int base, unsigned int count,
+		struct page *page, struct nfs_writeverf *verf)
+{
+	u64			offset = page_offset(page) + base;
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[2];
+	u32			bytes_written;
+	int			stable = (flags & NFS_RW_SYNC) ? NFS_FILE_SYNC : NFS_UNSTABLE;
+	int			rpcflags = (flags & NFS_RW_SWAP) ? NFS_RPC_SWAPFLAGS : 0;
+	int			status;
+
+	fattr->valid = 0;
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "write [sync]");
+	nfs4_setup_putfh(&compound, NFS_FH(inode));
+	nfs4_setup_write(&compound, offset, count, stable, &page, base, &bytes_written, verf);
+	status = nfs4_call_compound(&compound, cred, rpcflags);
+	
+	if (status >= 0)
+		status = bytes_written;
+	return status;
+}
+
+static int
+nfs4_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
+		 int flags, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+	int			oflags;
+	u32			seqid;
+	nfs4_stateid		stateid;
+	int 			status;
+
+	oflags = NFS4_SHARE_ACCESS_READ | O_CREAT | (flags & O_EXCL);
+	status = do_open(dir, name, oflags, sattr, fattr, fhandle, &seqid, stateid);
+	if (!status) {
+		if (flags & O_EXCL)
+			status = do_setattr(NFS_SERVER(dir), fattr, fhandle, sattr, stateid);
+		do_close(NFS_SERVER(dir), fhandle, seqid, stateid);
+	}
+	return status;
+}
+
+static int
+nfs4_proc_remove(struct inode *dir, struct qstr *name)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[3];
+	struct nfs4_change_info	dir_cinfo;
+	struct nfs_fattr	dir_attr;
+	u32			dir_bmres[2];
+	int			status;
+
+	dir_attr.valid = 0;
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "remove");
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_remove(&compound, name, &dir_cinfo);
+	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
+	status = nfs4_call_compound(&compound, NULL, 0);
+
+	if (!status) {
+		process_cinfo(&dir_cinfo, &dir_attr);
+		nfs_refresh_inode(dir, &dir_attr);
+	}
+	return status;
+}
+
+struct unlink_desc {
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[3];
+	struct nfs4_change_info	cinfo;
+	struct nfs_fattr	attrs;
+};
+
+static int
+nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *name)
+{
+	struct unlink_desc *	up;
+	struct nfs4_compound *	cp;
+	u32			bmres[2];
+
+	up = (struct unlink_desc *) kmalloc(sizeof(*up), GFP_KERNEL);
+	if (!up)
+		return -ENOMEM;
+	cp = &up->compound;
+	
+	nfs4_setup_compound(cp, up->ops, NFS_SERVER(dir->d_inode), "unlink_setup");
+	nfs4_setup_putfh(cp, NFS_FH(dir->d_inode));
+	nfs4_setup_remove(cp, name, &up->cinfo);
+	nfs4_setup_getattr(cp, &up->attrs, bmres);
+	
+	msg->rpc_proc = NFSPROC4_COMPOUND;
+	msg->rpc_argp = cp;
+	msg->rpc_resp = cp;
+	return 0;
+}
+
+static int
+nfs4_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
+{
+	struct rpc_message *msg = &task->tk_msg;
+	struct unlink_desc *up;
+	
+	if (msg->rpc_argp) {
+		up = (struct unlink_desc *) msg->rpc_argp;
+		process_lease(&up->compound);
+		process_cinfo(&up->cinfo, &up->attrs);
+		nfs_refresh_inode(dir->d_inode, &up->attrs);
+		kfree(up);
+		msg->rpc_argp = NULL;
+	}
+	return 0;
+}
+
+static int
+nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
+		 struct inode *new_dir, struct qstr *new_name)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[7];
+	struct nfs4_change_info	old_cinfo, new_cinfo;
+	struct nfs_fattr	old_dir_attr, new_dir_attr;
+	u32			old_dir_bmres[2], new_dir_bmres[2];
+	int			status;
+
+	old_dir_attr.valid = 0;
+	new_dir_attr.valid = 0;
+	
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(old_dir), "rename");
+	nfs4_setup_putfh(&compound, NFS_FH(old_dir));
+	nfs4_setup_savefh(&compound);
+	nfs4_setup_putfh(&compound, NFS_FH(new_dir));
+	nfs4_setup_rename(&compound, old_name, new_name, &old_cinfo, &new_cinfo);
+	nfs4_setup_getattr(&compound, &new_dir_attr, new_dir_bmres);
+	nfs4_setup_restorefh(&compound);
+	nfs4_setup_getattr(&compound, &old_dir_attr, old_dir_bmres);
+	status = nfs4_call_compound(&compound, NULL, 0);
+
+	if (!status) {
+		process_cinfo(&old_cinfo, &old_dir_attr);
+		process_cinfo(&new_cinfo, &new_dir_attr);
+		nfs_refresh_inode(old_dir, &old_dir_attr);
+		nfs_refresh_inode(new_dir, &new_dir_attr);
+	}
+	return status;
+}
+
+static int
+nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[7];
+	struct nfs4_change_info	dir_cinfo;
+	struct nfs_fattr	dir_attr, fattr;
+	u32			dir_bmres[2], bmres[2];
+	int			status;
+	
+	dir_attr.valid = 0;
+	fattr.valid = 0;
+	
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "link");
+	nfs4_setup_putfh(&compound, NFS_FH(inode));
+	nfs4_setup_savefh(&compound);
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_link(&compound, name, &dir_cinfo);
+	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
+	nfs4_setup_restorefh(&compound);
+	nfs4_setup_getattr(&compound, &fattr, bmres);
+	status = nfs4_call_compound(&compound, NULL, 0);
+
+	if (!status) {
+		process_cinfo(&dir_cinfo, &dir_attr);
+		nfs_refresh_inode(dir, &dir_attr);
+		nfs_refresh_inode(inode, &fattr);
+	}
+	return status;
+}
+
+static int
+nfs4_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
+		  struct iattr *sattr, struct nfs_fh *fhandle,
+		  struct nfs_fattr *fattr)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[7];
+	struct nfs_fattr	dir_attr;
+	u32			dir_bmres[2], bmres[2];
+	struct nfs4_change_info	dir_cinfo;
+	int			status;
+
+	dir_attr.valid = 0;
+	fattr->valid = 0;
+	
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "symlink");
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_savefh(&compound);
+	nfs4_setup_create_symlink(&compound, name, path, sattr, &dir_cinfo);
+	nfs4_setup_getattr(&compound, fattr, bmres);
+	nfs4_setup_getfh(&compound, fhandle);
+	nfs4_setup_restorefh(&compound);
+	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
+	status = nfs4_call_compound(&compound, NULL, 0);
+
+	if (!status) {
+		process_cinfo(&dir_cinfo, &dir_attr);
+		nfs_refresh_inode(dir, &dir_attr);
+	}
+	return status;
+}
+
+static int
+nfs4_proc_mkdir(struct inode *dir, struct qstr *name, struct iattr *sattr,
+		struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[7];
+	struct nfs_fattr	dir_attr;
+	u32			dir_bmres[2], bmres[2];
+	struct nfs4_change_info	dir_cinfo;
+	int			status;
+
+	dir_attr.valid = 0;
+	fattr->valid = 0;
+	
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "mkdir");
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_savefh(&compound);
+	nfs4_setup_create_dir(&compound, name, sattr, &dir_cinfo);
+	nfs4_setup_getattr(&compound, fattr, bmres);
+	nfs4_setup_getfh(&compound, fhandle);
+	nfs4_setup_restorefh(&compound);
+	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
+	status = nfs4_call_compound(&compound, NULL, 0);
+
+	if (!status) {
+		process_cinfo(&dir_cinfo, &dir_attr);
+		nfs_refresh_inode(dir, &dir_attr);
+	}
+	return status;
+}
+
+static int
+nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
+                  u64 cookie, struct page *page, unsigned int count, int plus)
+{
+	struct inode		*dir = dentry->d_inode;
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[2];
+	int			status;
+
+	lock_kernel();
+
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "readdir");
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_readdir(&compound, cookie, NFS_COOKIEVERF(dir), &page, count, dentry);
+	status = nfs4_call_compound(&compound, cred, 0);
+
+	unlock_kernel();
+	return status;
+}
+
+static int
+nfs4_proc_mknod(struct inode *dir, struct qstr *name, struct iattr *sattr,
+		dev_t rdev, struct nfs_fh *fh, struct nfs_fattr *fattr)
+{
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[7];
+	struct nfs_fattr	dir_attr;
+	u32			dir_bmres[2], bmres[2];
+	struct nfs4_change_info	dir_cinfo;
+	int			status;
+
+	dir_attr.valid = 0;
+	fattr->valid = 0;
+	
+	nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "mknod");
+	nfs4_setup_putfh(&compound, NFS_FH(dir));
+	nfs4_setup_savefh(&compound);
+	nfs4_setup_create_special(&compound, name, rdev,sattr, &dir_cinfo);
+	nfs4_setup_getattr(&compound, fattr, bmres);
+	nfs4_setup_getfh(&compound, fh);
+	nfs4_setup_restorefh(&compound);
+	nfs4_setup_getattr(&compound, &dir_attr, dir_bmres);
+	status = nfs4_call_compound(&compound, NULL, 0);
+
+	if (!status) {
+		process_cinfo(&dir_cinfo, &dir_attr);
+		nfs_refresh_inode(dir, &dir_attr);
+	}
+	return status;
+}
+
+static int
+nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
+		 struct nfs_fsstat *fsstat)
+{
+	struct nfs4_compound compound;
+	struct nfs4_op ops[2];
+	u32 bmres[2];
+
+	memset(fsstat, 0, sizeof(*fsstat));
+	nfs4_setup_compound(&compound, ops, server, "statfs");
+	nfs4_setup_putfh(&compound, fhandle);
+	nfs4_setup_statfs(&compound, fsstat, bmres);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static int
+nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
+		 struct nfs_fsinfo *fsinfo)
+{
+	struct nfs4_compound compound;
+	struct nfs4_op ops[2];
+	u32 bmres[2];
+
+	memset(fsinfo, 0, sizeof(*fsinfo));
+	nfs4_setup_compound(&compound, ops, server, "statfs");
+	nfs4_setup_putfh(&compound, fhandle);
+	nfs4_setup_fsinfo(&compound, fsinfo, bmres);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static int
+nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
+		   struct nfs_pathconf *pathconf)
+{
+	struct nfs4_compound compound;
+	struct nfs4_op ops[2];
+	u32 bmres[2];
+
+	memset(pathconf, 0, sizeof(*pathconf));
+	nfs4_setup_compound(&compound, ops, server, "statfs");
+	nfs4_setup_putfh(&compound, fhandle);
+	nfs4_setup_pathconf(&compound, pathconf, bmres);
+	return nfs4_call_compound(&compound, NULL, 0);
+}
+
+static void
+nfs4_read_done(struct rpc_task *task)
+{
+	struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
+
+	process_lease(&data->u.v4.compound);
+	nfs_readpage_result(task, data->u.v4.res_count, data->u.v4.res_eof);
+}
+
+static void
+nfs4_proc_read_setup(struct nfs_read_data *data, unsigned int count)
+{
+	struct rpc_task	*task = &data->task;
+	struct nfs4_compound *cp = &data->u.v4.compound;
+	struct rpc_message msg = {
+		.rpc_proc = NFSPROC4_COMPOUND,
+		.rpc_argp = cp,
+		.rpc_resp = cp,
+		.rpc_cred = data->cred,
+	};
+	struct inode *inode = data->inode;
+	struct nfs_page *req = nfs_list_entry(data->pages.next);
+	int flags;
+
+	nfs4_setup_compound(cp, data->u.v4.ops, NFS_SERVER(inode), "read [async]");
+	nfs4_setup_putfh(cp, NFS_FH(inode));
+	nfs4_setup_read(cp, req_offset(req) + req->wb_offset,
+			count, data->pagevec, req->wb_offset,
+			&data->u.v4.res_eof,
+			&data->u.v4.res_count);
+
+	/* N.B. Do we need to test? Never called for swapfile inode */
+	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs4_read_done, flags);
+	task->tk_calldata = data;
+	/* Release requests */
+	task->tk_release = nfs_readdata_release;
+
+	rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs4_write_done(struct rpc_task *task)
+{
+	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+	
+	process_lease(&data->u.v4.compound);
+	nfs_writeback_done(task, data->u.v4.arg_stable,
+			   data->u.v4.arg_count, data->u.v4.res_count);
+}
+
+static void
+nfs4_proc_write_setup(struct nfs_write_data *data, unsigned int count, int how)
+{
+	struct rpc_task	*task = &data->task;
+	struct nfs4_compound *cp = &data->u.v4.compound;
+	struct rpc_message msg = {
+		.rpc_proc = NFSPROC4_COMPOUND,
+		.rpc_argp = cp,
+		.rpc_resp = cp,
+		.rpc_cred = data->cred,
+	};
+	struct inode *inode = data->inode;
+	struct nfs_page *req = nfs_list_entry(data->pages.next);
+	int stable;
+	int flags;
+	
+	if (how & FLUSH_STABLE) {
+		if (!NFS_I(inode)->ncommit)
+			stable = NFS_FILE_SYNC;
+		else
+			stable = NFS_DATA_SYNC;
+	} else
+		stable = NFS_UNSTABLE;
+
+	nfs4_setup_compound(cp, data->u.v4.ops, NFS_SERVER(inode), "write [async]");
+	nfs4_setup_putfh(cp, NFS_FH(inode));
+	nfs4_setup_write(cp, req_offset(req) + req->wb_offset,
+			 count, stable, data->pagevec, req->wb_offset,
+			 &data->u.v4.res_count, &data->verf);
+
+	/* Set the initial flags for the task.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs4_write_done, flags);
+	task->tk_calldata = data;
+	/* Release requests */
+	task->tk_release = nfs_writedata_release;
+
+	rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs4_commit_done(struct rpc_task *task)
+{
+	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+	
+	process_lease(&data->u.v4.compound);
+	nfs_commit_done(task);
+}
+
+static void
+nfs4_proc_commit_setup(struct nfs_write_data *data, u64 start, u32 len, int how)
+{
+	struct rpc_task	*task = &data->task;
+	struct nfs4_compound *cp = &data->u.v4.compound;
+	struct rpc_message msg = {
+		.rpc_proc = NFSPROC4_COMPOUND,
+		.rpc_argp = cp,
+		.rpc_resp = cp,
+		.rpc_cred = data->cred,
+	};	
+	struct inode *inode = data->inode;
+	int flags;
+	
+	nfs4_setup_compound(cp, data->u.v4.ops, NFS_SERVER(inode), "commit [async]");
+	nfs4_setup_putfh(cp, NFS_FH(inode));
+	nfs4_setup_commit(cp, start, len, &data->verf);
+	
+	/* Set the initial flags for the task.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs4_commit_done, flags);
+	task->tk_calldata = data;
+	/* Release requests */
+	task->tk_release = nfs_writedata_release;
+	
+	rpc_call_setup(task, &msg, 0);	
+}
+
+/*
+ * nfs4_proc_renew(): This is not one of the nfs_rpc_ops; it is a special
+ * standalone procedure for queueing an asynchronous RENEW.
+ */
+struct renew_desc {
+	struct rpc_task		task;
+	struct nfs4_compound	compound;
+	struct nfs4_op		ops[1];
+};
+
+static void
+renew_done(struct rpc_task *task)
+{
+	struct nfs4_compound *cp = (struct nfs4_compound *) task->tk_msg.rpc_argp;
+	process_lease(cp);
+}
+
+static void
+renew_release(struct rpc_task *task)
+{
+	kfree(task->tk_calldata);
+	task->tk_calldata = NULL;
+}
+
+int
+nfs4_proc_renew(struct nfs_server *server)
+{
+	struct renew_desc *rp;
+	struct rpc_task *task;
+	struct nfs4_compound *cp;
+	struct rpc_message msg;
+
+	rp = (struct renew_desc *) kmalloc(sizeof(*rp), GFP_KERNEL);
+	if (!rp)
+		return -ENOMEM;
+	cp = &rp->compound;
+	task = &rp->task;
+	
+	nfs4_setup_compound(cp, rp->ops, server, "renew");
+	nfs4_setup_renew(cp);
+	
+	msg.rpc_proc = NFSPROC4_COMPOUND;
+	msg.rpc_argp = cp;
+	msg.rpc_resp = cp;
+	msg.rpc_cred = NULL;
+	rpc_init_task(task, server->client, renew_done, RPC_TASK_ASYNC);
+	rpc_call_setup(task, &msg, 0);
+	task->tk_calldata = rp;
+	task->tk_release = renew_release;
+	
+	return rpc_execute(task);
+}
+
+struct nfs_rpc_ops	nfs_v4_clientops = {
+	.version	= 4,			/* protocol version */
+	.getroot	= nfs4_proc_get_root,
+	.getattr	= nfs4_proc_getattr,
+	.setattr	= nfs4_proc_setattr,
+	.lookup		= nfs4_proc_lookup,
+	.access		= nfs4_proc_access,
+	.readlink	= nfs4_proc_readlink,
+	.read		= nfs4_proc_read,
+	.write		= nfs4_proc_write,
+	.commit		= NULL,
+	.create		= nfs4_proc_create,
+	.remove		= nfs4_proc_remove,
+	.unlink_setup	= nfs4_proc_unlink_setup,
+	.unlink_done	= nfs4_proc_unlink_done,
+	.rename		= nfs4_proc_rename,
+	.link		= nfs4_proc_link,
+	.symlink	= nfs4_proc_symlink,
+	.mkdir		= nfs4_proc_mkdir,
+	.rmdir		= nfs4_proc_remove,
+	.readdir	= nfs4_proc_readdir,
+	.mknod		= nfs4_proc_mknod,
+	.statfs		= nfs4_proc_statfs,
+	.fsinfo		= nfs4_proc_fsinfo,
+	.pathconf	= nfs4_proc_pathconf,
+	.decode_dirent	= nfs4_decode_dirent,
+	.read_setup	= nfs4_proc_read_setup,
+	.write_setup	= nfs4_proc_write_setup,
+	.commit_setup	= nfs4_proc_commit_setup,
+};
+
+/*
+ * Local variables:
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
new file mode 100644
index 000000000000..4ba871885dbc
--- /dev/null
+++ b/fs/nfs/nfs4renewd.c
@@ -0,0 +1,110 @@
+/*
+ *  fs/nfs/nfs4renewd.c
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Implementation of the NFSv4 "renew daemon", which wakes up periodically to
+ * send a RENEW, to keep state alive on the server.  The daemon is implemented
+ * as an rpc_task, not a real kernel thread, so it always runs in rpciod's
+ * context.  There is one renewd per nfs_server.
+ *
+ * TODO: If the send queue gets backlogged (e.g., if the server goes down),
+ * we will keep filling the queue with periodic RENEW requests.  We need a
+ * mechanism for ensuring that if renewd successfully sends off a request,
+ * then it only wakes up when the request is finished.  Maybe use the
+ * child task framework of the RPC layer?
+ */
+
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/clnt.h>
+
+#include <linux/nfs.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+
+static RPC_WAITQ(nfs4_renewd_queue, "nfs4_renewd_queue");
+
+static void
+renewd(struct rpc_task *task)
+{
+	struct nfs_server *server = (struct nfs_server *)task->tk_calldata;
+	unsigned long lease = server->lease_time;
+	unsigned long last = server->last_renewal;
+	unsigned long timeout;
+
+	if (!server->nfs4_state)
+		timeout = (2 * lease) / 3;
+	else if (jiffies < last + lease/3)
+		timeout = (2 * lease) / 3 + last - jiffies;
+	else {
+		/* Queue an asynchronous RENEW. */
+		nfs4_proc_renew(server);
+		timeout = (2 * lease) / 3;
+	}
+
+	if (timeout < 5 * HZ)    /* safeguard */
+		timeout = 5 * HZ;
+	task->tk_timeout = timeout;
+	task->tk_action = renewd;
+	task->tk_exit = NULL;
+	rpc_sleep_on(&nfs4_renewd_queue, task, NULL, NULL);
+	return;
+}
+
+int
+nfs4_init_renewd(struct nfs_server *server)
+{
+	struct rpc_task *task;
+	int status;
+
+	lock_kernel();
+	status = -ENOMEM;
+	task = rpc_new_task(server->client, NULL, RPC_TASK_ASYNC);
+	if (!task)
+		goto out;
+	task->tk_calldata = server;
+	task->tk_action = renewd;
+	status = rpc_execute(task);
+
+out:
+	unlock_kernel();
+	return status;
+}
+
+/*
+ * Local variables:
+ *   c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
new file mode 100644
index 000000000000..ecbc54fb1048
--- /dev/null
+++ b/fs/nfs/nfs4state.c
@@ -0,0 +1,81 @@
+/*
+ *  fs/nfs/nfs4state.c
+ *
+ *  Client-side XDR for NFSv4.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Implementation of the NFSv4 state model.  For the time being,
+ * this is minimal, but will be made much more complex in a
+ * subsequent patch.
+ */
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/nfs_fs.h>
+
+/*
+ * nfs4_get_client(): returns an empty client structure
+ * nfs4_put_client(): drops reference to client structure
+ *
+ * Since these are allocated/deallocated very rarely, we don't
+ * bother putting them in a slab cache...
+ */
+struct nfs4_client *
+nfs4_get_client(void)
+{
+        struct nfs4_client *clp;
+
+        if ((clp = kmalloc(sizeof(*clp), GFP_KERNEL))) {
+                atomic_set(&clp->cl_count, 1);
+                clp->cl_clientid = 0;
+                INIT_LIST_HEAD(&clp->cl_lockowners);
+        }
+        return clp;
+}
+
+void
+nfs4_put_client(struct nfs4_client *clp)
+{
+        BUG_ON(!clp);
+        BUG_ON(!atomic_read(&clp->cl_count));
+        
+        if (atomic_dec_and_test(&clp->cl_count)) {
+                BUG_ON(!list_empty(&clp->cl_lockowners));
+                kfree(clp);
+        }
+}
+
+/*
+ * Local variables:
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
new file mode 100644
index 000000000000..edbf0e2a02d7
--- /dev/null
+++ b/fs/nfs/nfs4xdr.c
@@ -0,0 +1,1777 @@
+/*
+ *  fs/nfs/nfs4xdr.c
+ *
+ *  Client-side XDR for NFSv4.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *  Andy Adamson   <andros@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/param.h>
+#include <linux/time.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/pagemap.h>
+#include <linux/proc_fs.h>
+#include <linux/kdev_t.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+
+/* Emperically, it seems that the NFS client gets confused if
+ * cookies larger than this are returned -- presumably a
+ * signedness issue?
+ */
+#define COOKIE_MAX		0x7fffffff
+
+#define NFS4_CLIENTID(server)	((server)->nfs4_state->cl_clientid)
+
+#define NFSDBG_FACILITY		NFSDBG_XDR
+
+/* Mapping from NFS error code to "errno" error code. */
+#define errno_NFSERR_IO		EIO
+
+extern int			nfs_stat_to_errno(int);
+
+#define NFS4_enc_void_sz	0
+#define NFS4_dec_void_sz	0
+#define NFS4_enc_compound_sz	1024  /* XXX: large enough? */
+#define NFS4_dec_compound_sz	1024  /* XXX: large enough? */
+
+static struct {
+	unsigned int	mode;
+	unsigned int	nfs2type;
+} nfs_type2fmt[] = {
+	{ 0,		NFNON	     },
+	{ S_IFREG,	NFREG	     },
+	{ S_IFDIR,	NFDIR	     },
+	{ S_IFBLK,	NFBLK	     },
+	{ S_IFCHR,	NFCHR	     },
+	{ S_IFLNK,	NFLNK	     },
+	{ S_IFSOCK,	NFSOCK	     },
+	{ S_IFIFO,	NFFIFO	     },
+	{ 0,		NFNON	     },
+	{ 0,		NFNON	     },
+};
+
+/*
+ * START OF "GENERIC" ENCODE ROUTINES.
+ *   These may look a little ugly since they are imported from a "generic"
+ * set of XDR encode/decode routines which are intended to be shared by
+ * all of our NFSv4 implementations (OpenBSD, MacOS X...).
+ *
+ * If the pain of reading these is too great, it should be a straightforward
+ * task to translate them into Linux-specific versions which are more
+ * consistent with the style used in NFSv2/v3...
+ */
+#define ENCODE_HEAD						\
+	u32 *p;
+#define ENCODE_TAIL						\
+	return 0
+
+#define WRITE32(n)               *p++ = htonl(n)
+#define WRITE64(n)               do {				\
+	*p++ = htonl((u32)((n) >> 32));				\
+	*p++ = htonl((u32)(n));					\
+} while (0)
+#define WRITEMEM(ptr,nbytes)     do {				\
+	p = xdr_writemem(p, ptr, nbytes);			\
+} while (0)
+
+#define RESERVE_SPACE(nbytes)	do { BUG_ON(cp->p + XDR_QUADLEN(nbytes) > cp->end); p = cp->p; } while (0)
+#define ADJUST_ARGS()           cp->p = p
+
+static inline
+u32 *xdr_writemem(u32 *p, const void *ptr, int nbytes)
+{
+	int tmp = XDR_QUADLEN(nbytes);
+	if (!tmp)
+		return p;
+	p[tmp-1] = 0;
+	memcpy(p, ptr, nbytes);
+	return p + tmp;
+}
+
+/*
+ * FIXME: The following dummy entries will be replaced once the userland
+ * upcall gets in...
+ */
+static int
+encode_uid(char *p, uid_t uid)
+{
+	strcpy(p, "nobody");
+	return 6;
+}
+
+/*
+ * FIXME: The following dummy entries will be replaced once the userland
+ * upcall gets in...
+ */
+static int
+encode_gid(char *p, gid_t gid)
+{
+	strcpy(p, "nobody");
+	return 6;
+}
+
+static int
+encode_attrs(struct nfs4_compound *cp, struct iattr *iap)
+{
+	char owner_name[256];
+	char owner_group[256];
+	int owner_namelen = 0;
+	int owner_grouplen = 0;
+	u32 *q;
+	int len;
+	u32 bmval0 = 0;
+	u32 bmval1 = 0;
+	int status;
+	ENCODE_HEAD;
+
+	/*
+	 * We reserve enough space to write the entire attribute buffer at once.
+	 * In the worst-case, this would be
+	 *   12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime)
+	 *          = 36 bytes, plus any contribution from variable-length fields
+	 *            such as owner/group/acl's.
+	 */
+	len = 36;
+
+	/* Sigh */
+	if (iap->ia_valid & ATTR_UID) {
+		status = owner_namelen = encode_uid(owner_name, iap->ia_uid);
+		if (status < 0) {
+			printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n",
+			       iap->ia_uid);
+			goto out;
+		}
+		len += XDR_QUADLEN(owner_namelen);
+	}
+	if (iap->ia_valid & ATTR_GID) {
+		status = owner_grouplen = encode_gid(owner_group, iap->ia_gid);
+		if (status < 0) {
+			printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n",
+			       iap->ia_gid);
+			goto out;
+		}
+		len += XDR_QUADLEN(owner_grouplen);
+	}
+	RESERVE_SPACE(len);
+
+	/*
+	 * We write the bitmap length now, but leave the bitmap and the attribute
+	 * buffer length to be backfilled at the end of this routine.
+	 */
+	WRITE32(2);
+	q = p;
+	p += 3;
+
+	if (iap->ia_valid & ATTR_SIZE) {
+		bmval0 |= FATTR4_WORD0_SIZE;
+		WRITE64(iap->ia_size);
+	}
+	if (iap->ia_valid & ATTR_MODE) {
+		bmval1 |= FATTR4_WORD1_MODE;
+		WRITE32(iap->ia_mode);
+	}
+	if (iap->ia_valid & ATTR_UID) {
+		bmval1 |= FATTR4_WORD1_OWNER;
+		WRITE32(owner_namelen);
+		WRITEMEM(owner_name, owner_namelen);
+		p += owner_namelen;
+	}
+	if (iap->ia_valid & ATTR_GID) {
+		bmval1 |= FATTR4_WORD1_OWNER_GROUP;
+		WRITE32(owner_grouplen);
+		WRITEMEM(owner_group, owner_grouplen);
+		p += owner_namelen;
+	}
+	if (iap->ia_valid & ATTR_ATIME_SET) {
+		bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
+		WRITE32(NFS4_SET_TO_CLIENT_TIME);
+		WRITE32(0);
+		WRITE32(iap->ia_mtime);
+		WRITE32(0);
+	}
+	else if (iap->ia_valid & ATTR_ATIME) {
+		bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
+		WRITE32(NFS4_SET_TO_SERVER_TIME);
+	}
+	if (iap->ia_valid & ATTR_MTIME_SET) {
+		bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
+		WRITE32(NFS4_SET_TO_CLIENT_TIME);
+		WRITE32(0);
+		WRITE32(iap->ia_mtime);
+		WRITE32(0);
+	}
+	else if (iap->ia_valid & ATTR_MTIME) {
+		bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
+		WRITE32(NFS4_SET_TO_SERVER_TIME);
+	}
+	
+	ADJUST_ARGS();
+	
+	/*
+	 * Now we backfill the bitmap and the attribute buffer length.
+	 */
+	len = (char *)p - (char *)q - 12;
+	*q++ = htonl(bmval0);
+	*q++ = htonl(bmval1);
+	*q++ = htonl(len);
+
+	status = 0;
+out:
+	return status;
+}
+
+static int
+encode_access(struct nfs4_compound *cp, struct nfs4_access *access)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(8);
+	WRITE32(OP_ACCESS);
+	WRITE32(access->ac_req_access);
+	ADJUST_ARGS();
+	
+	ENCODE_TAIL;
+}
+
+static int
+encode_close(struct nfs4_compound *cp, struct nfs4_close *close)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(20);
+	WRITE32(OP_CLOSE);
+	WRITE32(close->cl_seqid);
+	WRITEMEM(close->cl_stateid, sizeof(nfs4_stateid));
+	ADJUST_ARGS();
+	
+	ENCODE_TAIL;
+}
+
+static int
+encode_commit(struct nfs4_compound *cp, struct nfs4_commit *commit)
+{
+        ENCODE_HEAD;
+        
+        RESERVE_SPACE(16);
+        WRITE32(OP_COMMIT);
+        WRITE64(commit->co_start);
+        WRITE32(commit->co_len);
+        ADJUST_ARGS();
+
+        ENCODE_TAIL;
+}
+
+static int
+encode_create(struct nfs4_compound *cp, struct nfs4_create *create)
+{
+	ENCODE_HEAD;
+	
+	RESERVE_SPACE(8);
+	WRITE32(OP_CREATE);
+	WRITE32(create->cr_ftype);
+	ADJUST_ARGS();
+
+	switch (create->cr_ftype) {
+	case NF4LNK:
+		RESERVE_SPACE(4 + create->cr_textlen);
+		WRITE32(create->cr_textlen);
+		WRITEMEM(create->cr_text, create->cr_textlen);
+		ADJUST_ARGS();
+		break;
+
+	case NF4BLK: case NF4CHR:
+		RESERVE_SPACE(8);
+		WRITE32(create->cr_specdata1);
+		WRITE32(create->cr_specdata2);
+		ADJUST_ARGS();
+		break;
+
+	default:
+		break;
+	}
+
+	RESERVE_SPACE(4 + create->cr_namelen);
+	WRITE32(create->cr_namelen);
+	WRITEMEM(create->cr_name, create->cr_namelen);
+	ADJUST_ARGS();
+
+	return encode_attrs(cp, create->cr_attrs);
+}
+
+static int
+encode_getattr(struct nfs4_compound *cp, struct nfs4_getattr *getattr)
+{
+        ENCODE_HEAD;
+
+        RESERVE_SPACE(16);
+        WRITE32(OP_GETATTR);
+        WRITE32(2);
+        WRITE32(getattr->gt_bmval[0]);
+        WRITE32(getattr->gt_bmval[1]);
+        ADJUST_ARGS();
+
+        ENCODE_TAIL;
+}
+
+static int
+encode_getfh(struct nfs4_compound *cp)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_GETFH);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_link(struct nfs4_compound *cp, struct nfs4_link *link)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(8 + link->ln_namelen);
+	WRITE32(OP_LINK);
+	WRITE32(link->ln_namelen);
+	WRITEMEM(link->ln_name, link->ln_namelen);
+	ADJUST_ARGS();
+	
+	ENCODE_TAIL;
+}
+
+static int
+encode_lookup(struct nfs4_compound *cp, struct nfs4_lookup *lookup)
+{
+	int len = lookup->lo_name->len;
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(8 + len);
+	WRITE32(OP_LOOKUP);
+	WRITE32(len);
+	WRITEMEM(lookup->lo_name->name, len);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_open(struct nfs4_compound *cp, struct nfs4_open *open)
+{
+	static int global_id = 0;
+	int id = global_id++;
+	int status;
+	ENCODE_HEAD;
+	
+	/* seqid, share_access, share_deny, clientid, ownerlen, owner, opentype */
+	RESERVE_SPACE(52);
+	WRITE32(OP_OPEN);
+	WRITE32(0);                       /* seqid */
+	WRITE32(open->op_share_access);
+	WRITE32(0);                       /* for us, share_deny== 0 always */
+	WRITE64(NFS4_CLIENTID(cp->server));
+	WRITE32(4);
+	WRITE32(id);
+	WRITE32(open->op_opentype);
+	ADJUST_ARGS();
+	
+	if (open->op_opentype == NFS4_OPEN_CREATE) {
+		if (open->op_createmode == NFS4_CREATE_EXCLUSIVE) {
+			RESERVE_SPACE(12);
+			WRITE32(open->op_createmode);
+			WRITEMEM(open->op_verifier, sizeof(nfs4_verifier));
+			ADJUST_ARGS();
+		}
+		else if (open->op_attrs) {
+			RESERVE_SPACE(4);
+			WRITE32(open->op_createmode);
+			ADJUST_ARGS();
+			if ((status = encode_attrs(cp, open->op_attrs)))
+				return status;
+		}
+		else {
+			RESERVE_SPACE(12);
+			WRITE32(open->op_createmode);
+			WRITE32(0);
+			WRITE32(0);
+			ADJUST_ARGS();
+		}
+	}
+
+	RESERVE_SPACE(8 + open->op_name->len);
+	WRITE32(NFS4_OPEN_CLAIM_NULL);
+	WRITE32(open->op_name->len);
+	WRITEMEM(open->op_name->name, open->op_name->len);
+	ADJUST_ARGS();
+	
+	ENCODE_TAIL;
+}
+
+static int
+encode_open_confirm(struct nfs4_compound *cp, struct nfs4_open_confirm *open_confirm)
+{
+	ENCODE_HEAD;
+
+	/*
+	 * Note: In this "stateless" implementation, the OPEN_CONFIRM
+	 * seqid is always equal to 1.
+	 */
+	RESERVE_SPACE(24);
+	WRITE32(OP_OPEN_CONFIRM);
+	WRITEMEM(open_confirm->oc_stateid, sizeof(nfs4_stateid));
+	WRITE32(1);
+	ADJUST_ARGS();
+	
+	ENCODE_TAIL;
+}
+
+static int
+encode_putfh(struct nfs4_compound *cp, struct nfs4_putfh *putfh)
+{
+	int len = putfh->pf_fhandle->size;
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(8 + len);
+	WRITE32(OP_PUTFH);
+	WRITE32(len);
+	WRITEMEM(putfh->pf_fhandle->data, len);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_putrootfh(struct nfs4_compound *cp)
+{
+        ENCODE_HEAD;
+        
+        RESERVE_SPACE(4);
+        WRITE32(OP_PUTROOTFH);
+        ADJUST_ARGS();
+
+        ENCODE_TAIL;
+}
+
+static int
+encode_read(struct nfs4_compound *cp, struct nfs4_read *read, struct rpc_rqst *req)
+{
+	struct rpc_auth	*auth = req->rq_task->tk_auth;
+	int		replen;
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(32);
+	WRITE32(OP_READ);
+	WRITE32(0);   /* all-zero stateid! */
+	WRITE32(0);
+	WRITE32(0);
+	WRITE32(0);
+	WRITE64(read->rd_offset);
+	WRITE32(read->rd_length);
+	ADJUST_ARGS();
+
+	/* set up reply iovec
+	 *    toplevel status + taglen + rescount + OP_PUTFH + status
+	 *       + OP_READ + status + eof + datalen = 9
+	 */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + 9 + XDR_QUADLEN(cp->taglen)) << 2;
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	xdr_inline_pages(&req->rq_rcv_buf, replen,
+			 read->rd_pages, read->rd_pgbase, read->rd_length);
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_readdir(struct nfs4_compound *cp, struct nfs4_readdir *readdir, struct rpc_rqst *req)
+{
+	struct rpc_auth *auth = req->rq_task->tk_auth;
+	int replen;
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(40);
+	WRITE32(OP_READDIR);
+	WRITE64(readdir->rd_cookie);
+	WRITEMEM(readdir->rd_req_verifier, sizeof(nfs4_verifier));
+	WRITE32(readdir->rd_count >> 5);  /* meaningless "dircount" field */
+	WRITE32(readdir->rd_count);
+	WRITE32(2);
+	WRITE32(readdir->rd_bmval[0]);
+	WRITE32(readdir->rd_bmval[1]);
+	ADJUST_ARGS();
+
+	/* set up reply iovec
+	 *    toplevel_status + taglen + rescount + OP_PUTFH + status
+	 *      + OP_READDIR + status + verifer(2)  = 9
+	 */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + 9 + XDR_QUADLEN(cp->taglen)) << 2;
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	xdr_inline_pages(&req->rq_rcv_buf, replen, readdir->rd_pages,
+			 readdir->rd_pgbase, readdir->rd_count);
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_readlink(struct nfs4_compound *cp, struct nfs4_readlink *readlink, struct rpc_rqst *req)
+{
+	struct rpc_auth *auth = req->rq_task->tk_auth;
+	int replen;
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_READLINK);
+	ADJUST_ARGS();
+
+	/* set up reply iovec
+	 *    toplevel_status + taglen + rescount + OP_PUTFH + status
+	 *      + OP_READLINK + status  = 7
+	 */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + 7 + XDR_QUADLEN(cp->taglen)) << 2;
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	xdr_inline_pages(&req->rq_rcv_buf, replen, readlink->rl_pages, 0, readlink->rl_count);
+	
+	ENCODE_TAIL;
+}
+
+static int
+encode_remove(struct nfs4_compound *cp, struct nfs4_remove *remove)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(8 + remove->rm_namelen);
+	WRITE32(OP_REMOVE);
+	WRITE32(remove->rm_namelen);
+	WRITEMEM(remove->rm_name, remove->rm_namelen);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_rename(struct nfs4_compound *cp, struct nfs4_rename *rename)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(8 + rename->rn_oldnamelen);
+	WRITE32(OP_RENAME);
+	WRITE32(rename->rn_oldnamelen);
+	WRITEMEM(rename->rn_oldname, rename->rn_oldnamelen);
+	ADJUST_ARGS();
+	
+	RESERVE_SPACE(8 + rename->rn_newnamelen);
+	WRITE32(rename->rn_newnamelen);
+	WRITEMEM(rename->rn_newname, rename->rn_newnamelen);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_renew(struct nfs4_compound *cp)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(12);
+	WRITE32(OP_RENEW);
+	WRITE64(NFS4_CLIENTID(cp->server));
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_restorefh(struct nfs4_compound *cp)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_RESTOREFH);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_savefh(struct nfs4_compound *cp)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_SAVEFH);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_setattr(struct nfs4_compound *cp, struct nfs4_setattr *setattr)
+{
+	int status;
+	ENCODE_HEAD;
+	
+        RESERVE_SPACE(20);
+        WRITE32(OP_SETATTR);
+	WRITEMEM(setattr->st_stateid, sizeof(nfs4_stateid));
+        ADJUST_ARGS();
+
+        if ((status = encode_attrs(cp, setattr->st_iap)))
+		return status;
+
+        ENCODE_TAIL;
+}
+
+static int
+encode_setclientid(struct nfs4_compound *cp, struct nfs4_setclientid *setclientid)
+{
+	u32 total_len;
+	u32 len1, len2, len3;
+	ENCODE_HEAD;
+
+	len1 = strlen(setclientid->sc_name);
+	len2 = strlen(setclientid->sc_netid);
+	len3 = strlen(setclientid->sc_uaddr);
+	total_len = XDR_QUADLEN(len1) + XDR_QUADLEN(len2) + XDR_QUADLEN(len3);
+	total_len = (total_len << 2) + 32;
+
+	RESERVE_SPACE(total_len);
+	WRITE32(OP_SETCLIENTID);
+	WRITEMEM(setclientid->sc_verifier, sizeof(nfs4_verifier));
+	WRITE32(len1);
+	WRITEMEM(setclientid->sc_name, len1);
+	WRITE32(setclientid->sc_prog);
+	WRITE32(len2);
+	WRITEMEM(setclientid->sc_netid, len2);
+	WRITE32(len3);
+	WRITEMEM(setclientid->sc_uaddr, len3);
+	WRITE32(setclientid->sc_cb_ident);
+	ADJUST_ARGS();
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_setclientid_confirm(struct nfs4_compound *cp)
+{
+        ENCODE_HEAD;
+
+        RESERVE_SPACE(12 + sizeof(nfs4_verifier));
+        WRITE32(OP_SETCLIENTID_CONFIRM);
+        WRITE64(cp->server->nfs4_state->cl_clientid);
+        WRITEMEM(cp->server->nfs4_state->cl_confirm,sizeof(nfs4_verifier));
+        ADJUST_ARGS();
+
+        ENCODE_TAIL;
+}
+
+static int
+encode_write(struct nfs4_compound *cp, struct nfs4_write *write, struct rpc_rqst *req)
+{
+	struct xdr_buf *sndbuf = &req->rq_snd_buf;
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(36);
+	WRITE32(OP_WRITE);
+	WRITE32(0xffffffff);     /* magic stateid -1 */
+	WRITE32(0xffffffff);
+	WRITE32(0xffffffff);
+	WRITE32(0xffffffff);
+	WRITE64(write->wr_offset);
+	WRITE32(write->wr_stable_how);
+	WRITE32(write->wr_len);
+	ADJUST_ARGS();
+
+	sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
+	xdr_encode_pages(sndbuf, write->wr_pages, write->wr_pgbase, write->wr_len);
+
+	ENCODE_TAIL;
+}
+
+static int
+encode_compound(struct nfs4_compound *cp, struct rpc_rqst *req)
+{
+	int i, status = 0;
+	ENCODE_HEAD;
+
+	dprintk("encode_compound: tag=%.*s\n", (int)cp->taglen, cp->tag);
+	
+	RESERVE_SPACE(12 + cp->taglen);
+	WRITE32(cp->taglen);
+	WRITEMEM(cp->tag, cp->taglen);
+	WRITE32(NFS4_MINOR_VERSION);
+	WRITE32(cp->req_nops);
+	ADJUST_ARGS();
+
+	for (i = 0; i < cp->req_nops; i++) {
+		switch (cp->ops[i].opnum) {
+		case OP_ACCESS:
+			status = encode_access(cp, &cp->ops[i].u.access);
+			break;
+		case OP_CLOSE:
+			status = encode_close(cp, &cp->ops[i].u.close);
+			break;
+		case OP_COMMIT:
+			status = encode_commit(cp, &cp->ops[i].u.commit);
+			break;
+		case OP_CREATE:
+			status = encode_create(cp, &cp->ops[i].u.create);
+			break;
+		case OP_GETATTR:
+			status = encode_getattr(cp, &cp->ops[i].u.getattr);
+			break;
+		case OP_GETFH:
+			status = encode_getfh(cp);
+			break;
+		case OP_LINK:
+			status = encode_link(cp, &cp->ops[i].u.link);
+			break;
+		case OP_LOOKUP:
+			status = encode_lookup(cp, &cp->ops[i].u.lookup);
+			break;
+		case OP_OPEN:
+			status = encode_open(cp, &cp->ops[i].u.open);
+			break;
+		case OP_OPEN_CONFIRM:
+			status = encode_open_confirm(cp, &cp->ops[i].u.open_confirm);
+			break;
+		case OP_PUTFH:
+			status = encode_putfh(cp, &cp->ops[i].u.putfh);
+			break;
+		case OP_PUTROOTFH:
+			status = encode_putrootfh(cp);
+			break;
+		case OP_READ:
+			status = encode_read(cp, &cp->ops[i].u.read, req);
+			break;
+		case OP_READDIR:
+			status = encode_readdir(cp, &cp->ops[i].u.readdir, req);
+			break;
+		case OP_READLINK:
+			status = encode_readlink(cp, &cp->ops[i].u.readlink, req);
+			break;
+		case OP_REMOVE:
+			status = encode_remove(cp, &cp->ops[i].u.remove);
+			break;
+		case OP_RENAME:
+			status = encode_rename(cp, &cp->ops[i].u.rename);
+			break;
+		case OP_RENEW:
+			status = encode_renew(cp);
+			break;
+		case OP_RESTOREFH:
+			status = encode_restorefh(cp);
+			break;
+		case OP_SAVEFH:
+			status = encode_savefh(cp);
+			break;
+		case OP_SETATTR:
+			status = encode_setattr(cp, &cp->ops[i].u.setattr);
+			break;
+		case OP_SETCLIENTID:
+			status = encode_setclientid(cp, &cp->ops[i].u.setclientid);
+			break;
+		case OP_SETCLIENTID_CONFIRM:
+			status = encode_setclientid_confirm(cp);
+			break;
+		case OP_WRITE:
+			status = encode_write(cp, &cp->ops[i].u.write, req);
+			break;
+		default:
+			BUG();
+		}
+		if (status)
+			return status;
+	}
+	
+	ENCODE_TAIL;
+}
+/*
+ * END OF "GENERIC" ENCODE ROUTINES.
+ */
+
+
+/*
+ * Encode void argument
+ */
+static int
+nfs4_xdr_enc_void(struct rpc_rqst *req, u32 *p, void *dummy)
+{
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode COMPOUND argument
+ */
+static int
+nfs4_xdr_enc_compound(struct rpc_rqst *req, u32 *p, struct nfs4_compound *cp)
+{
+	int status;
+	struct xdr_buf *sndbuf = &req->rq_snd_buf;
+	
+	cp->p = p;
+	cp->end = (u32 *) ((char *)req->rq_svec[0].iov_base + req->rq_svec[0].iov_len);
+	status = encode_compound(cp, req);
+	cp->timestamp = jiffies;
+
+	if (!status && !sndbuf->page_len)
+		req->rq_slen = xdr_adjust_iovec(sndbuf->head, cp->p);
+	return status;
+}
+
+
+/*
+ * START OF "GENERIC" DECODE ROUTINES.
+ *   These may look a little ugly since they are imported from a "generic"
+ * set of XDR encode/decode routines which are intended to be shared by
+ * all of our NFSv4 implementations (OpenBSD, MacOS X...).
+ *
+ * If the pain of reading these is too great, it should be a straightforward
+ * task to translate them into Linux-specific versions which are more
+ * consistent with the style used in NFSv2/v3...
+ */
+#define DECODE_HEAD				\
+	u32 *p;					\
+	int status
+#define DECODE_TAIL				\
+	status = 0;				\
+out:						\
+	return status;				\
+xdr_error:					\
+	printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \
+	status = -EIO;				\
+	goto out
+
+#define READ32(x)         (x) = ntohl(*p++)
+#define READ64(x)         do {			\
+	(x) = (u64)ntohl(*p++) << 32;		\
+	(x) |= ntohl(*p++);			\
+} while (0)
+#define READTIME(x)       do {			\
+	p++;					\
+	(x) = (u64)ntohl(*p++) << 32;		\
+	(x) |= ntohl(*p++);			\
+} while (0)
+#define COPYMEM(x,nbytes) do {			\
+	memcpy((x), p, nbytes);			\
+	p += XDR_QUADLEN(nbytes);		\
+} while (0)
+
+#define READ_BUF(nbytes)  do {			\
+	if (nbytes > (u32)((char *)cp->end - (char *)cp->p))  \
+		goto xdr_error;			\
+	p = cp->p;				\
+	cp->p += XDR_QUADLEN(nbytes);		\
+} while (0)
+
+/*
+ * FIXME: The following dummy entry will be replaced once the userland
+ * upcall gets in...
+ */
+static int
+decode_uid(char *p, u32 len, uid_t *uid)
+{
+	*uid = -2;
+	return 0;
+}
+
+/*
+ * FIXME: The following dummy entry will be replaced once the userland
+ * upcall gets in...
+ */
+static int
+decode_gid(char *p, u32 len, gid_t *gid)
+{
+	*gid = -2;
+	return 0;
+}
+
+static int
+decode_change_info(struct nfs4_compound *cp, struct nfs4_change_info *cinfo)
+{
+	DECODE_HEAD;
+
+	READ_BUF(20);
+	READ32(cinfo->atomic);
+	READ64(cinfo->before);
+	READ64(cinfo->after);
+	
+	DECODE_TAIL;
+}
+
+static int
+decode_access(struct nfs4_compound *cp, int nfserr, struct nfs4_access *access)
+{
+	u32 supp, acc;
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		READ_BUF(8);
+		READ32(supp);
+		READ32(acc);
+
+		status = -EIO;
+		if ((supp & ~access->ac_req_access) || (acc & ~supp)) {
+			printk(KERN_NOTICE "NFS: server returned bad bits in access call!\n");
+			goto out;
+		}
+		*access->ac_resp_supported = supp;
+		*access->ac_resp_access = acc;
+	}
+	
+	DECODE_TAIL;
+}
+
+static int
+decode_close(struct nfs4_compound *cp, int nfserr, struct nfs4_close *close)
+{
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		READ_BUF(sizeof(nfs4_stateid));
+		COPYMEM(close->cl_stateid, sizeof(nfs4_stateid));
+	}
+	
+	DECODE_TAIL;
+}
+
+static int
+decode_commit(struct nfs4_compound *cp, int nfserr, struct nfs4_commit *commit)
+{
+        DECODE_HEAD;
+
+        if (!nfserr) {
+                READ_BUF(8);
+                COPYMEM(commit->co_verifier->verifier, 8);
+        }
+
+        DECODE_TAIL;
+}
+
+static int
+decode_create(struct nfs4_compound *cp, int nfserr, struct nfs4_create *create)
+{
+	u32 bmlen;
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		if ((status = decode_change_info(cp, create->cr_cinfo)))
+			goto out;
+		READ_BUF(4);
+		READ32(bmlen);
+		if (bmlen > 2)
+			goto xdr_error;
+		READ_BUF(bmlen << 2);
+	}
+
+	DECODE_TAIL;
+}
+
+extern u32 nfs4_fattr_bitmap[2];
+extern u32 nfs4_fsinfo_bitmap[2];
+extern u32 nfs4_fsstat_bitmap[2];
+extern u32 nfs4_pathconf_bitmap[2];
+
+static int
+decode_getattr(struct nfs4_compound *cp, int nfserr, struct nfs4_getattr *getattr)
+{
+        struct nfs_fattr *nfp = getattr->gt_attrs;
+	struct nfs_fsstat *fsstat = getattr->gt_fsstat;
+	struct nfs_fsinfo *fsinfo = getattr->gt_fsinfo;
+	struct nfs_pathconf *pathconf = getattr->gt_pathconf;
+        u32 bmlen;
+        u32 bmval0 = 0;
+        u32 bmval1 = 0;
+        u32 attrlen;
+        u32 dummy32;
+        u32 len = 0;
+	unsigned int type;
+	int fmode = 0;
+        DECODE_HEAD;
+	
+        if (nfserr)
+                goto success;
+        
+        READ_BUF(4);
+        READ32(bmlen);
+        if (bmlen > 2)
+                goto xdr_error;
+	
+        READ_BUF((bmlen << 2) + 4);
+        if (bmlen > 0)
+                READ32(bmval0);
+        if (bmlen > 1)
+                READ32(bmval1);
+        READ32(attrlen);
+
+	if ((bmval0 & ~getattr->gt_bmval[0]) ||
+	    (bmval1 & ~getattr->gt_bmval[1])) {
+		dprintk("read_attrs: server returned bad attributes!\n");
+		goto xdr_error;
+	}
+	getattr->gt_bmres[0] = bmval0;
+	getattr->gt_bmres[1] = bmval1;
+
+	/*
+	 * In case the server doesn't return some attributes,
+	 * we initialize them here to some nominal values..
+	 */
+	if (nfp) {
+		nfp->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4;
+		nfp->nlink = 1;
+		nfp->timestamp = jiffies;
+	}
+	if (fsinfo) {
+		fsinfo->rtmult = fsinfo->wtmult = 512;  /* ??? */
+		fsinfo->lease_time = 60;
+	}
+
+        if (bmval0 & FATTR4_WORD0_TYPE) {
+                READ_BUF(4);
+                len += 4;
+                READ32(type);
+                if (type < NF4REG || type > NF4NAMEDATTR) {
+                        dprintk("read_attrs: bad type %d\n", type);
+                        goto xdr_error;
+                }
+		nfp->type = nfs_type2fmt[type].nfs2type;
+		fmode = nfs_type2fmt[type].mode;
+                dprintk("read_attrs: type=%d\n", (u32)nfp->type);
+        }
+        if (bmval0 & FATTR4_WORD0_CHANGE) {
+                READ_BUF(8);
+                len += 8;
+                READ64(nfp->change_attr);
+                dprintk("read_attrs: changeid=%Ld\n", (u64)nfp->change_attr);
+        }
+        if (bmval0 & FATTR4_WORD0_SIZE) {
+                READ_BUF(8);
+                len += 8;
+                READ64(nfp->size);
+                dprintk("read_attrs: size=%Ld\n", (u64)nfp->size);
+        }
+        if (bmval0 & FATTR4_WORD0_FSID) {
+                READ_BUF(16);
+                len += 16;
+                READ64(nfp->fsid_u.nfs4.major);
+                READ64(nfp->fsid_u.nfs4.minor);
+                dprintk("read_attrs: fsid=0x%Lx/0x%Lx\n",
+			nfp->fsid_u.nfs4.major, nfp->fsid_u.nfs4.minor);
+        }
+        if (bmval0 & FATTR4_WORD0_LEASE_TIME) {
+                READ_BUF(4);
+                len += 4;
+                READ32(fsinfo->lease_time);
+                dprintk("read_attrs: lease_time=%d\n", fsinfo->lease_time);
+        }
+        if (bmval0 & FATTR4_WORD0_FILEID) {
+                READ_BUF(8);
+                len += 8;
+                READ64(nfp->fileid);
+                dprintk("read_attrs: fileid=%Ld\n", nfp->fileid);
+        }
+	if (bmval0 & FATTR4_WORD0_FILES_AVAIL) {
+		READ_BUF(8);
+		len += 8;
+		READ64(fsstat->afiles);
+		dprintk("read_attrs: files_avail=0x%Lx\n", fsstat->afiles);
+	}
+        if (bmval0 & FATTR4_WORD0_FILES_FREE) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsstat->ffiles);
+                dprintk("read_attrs: files_free=0x%Lx\n", fsstat->ffiles);
+        }
+        if (bmval0 & FATTR4_WORD0_FILES_TOTAL) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsstat->tfiles);
+                dprintk("read_attrs: files_tot=0x%Lx\n", fsstat->tfiles);
+        }
+        if (bmval0 & FATTR4_WORD0_MAXFILESIZE) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsinfo->maxfilesize);
+                dprintk("read_attrs: maxfilesize=0x%Lx\n", fsinfo->maxfilesize);
+        }
+	if (bmval0 & FATTR4_WORD0_MAXLINK) {
+		READ_BUF(4);
+		len += 4;
+		READ32(pathconf->max_link);
+		dprintk("read_attrs: maxlink=%d\n", pathconf->max_link);
+	}
+        if (bmval0 & FATTR4_WORD0_MAXNAME) {
+                READ_BUF(4);
+                len += 4;
+                READ32(pathconf->max_namelen);
+                dprintk("read_attrs: maxname=%d\n", pathconf->max_namelen);
+        }
+        if (bmval0 & FATTR4_WORD0_MAXREAD) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsinfo->rtmax);
+		fsinfo->rtpref = fsinfo->dtpref = fsinfo->rtmax;
+                dprintk("read_attrs: maxread=%d\n", fsinfo->rtmax);
+        }
+        if (bmval0 & FATTR4_WORD0_MAXWRITE) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsinfo->wtmax);
+		fsinfo->wtpref = fsinfo->wtmax;
+                dprintk("read_attrs: maxwrite=%d\n", fsinfo->wtmax);
+        }
+	
+        if (bmval1 & FATTR4_WORD1_MODE) {
+                READ_BUF(4);
+                len += 4;
+                READ32(dummy32);
+		nfp->mode = (dummy32 & ~S_IFMT) | fmode;
+                dprintk("read_attrs: mode=0%o\n", nfp->mode);
+        }
+        if (bmval1 & FATTR4_WORD1_NUMLINKS) {
+                READ_BUF(4);
+                len += 4;
+                READ32(nfp->nlink);
+                dprintk("read_attrs: nlinks=0%o\n", nfp->nlink);
+        }
+        if (bmval1 & FATTR4_WORD1_OWNER) {
+                READ_BUF(4);
+                len += 4;
+                READ32(dummy32);    /* name length */
+                if (dummy32 > XDR_MAX_NETOBJ) {
+			dprintk("read_attrs: name too long!\n");
+                        goto xdr_error;
+                }
+                READ_BUF(dummy32);
+                len += (XDR_QUADLEN(dummy32) << 2);
+                if ((status = decode_uid((char *)p, dummy32, &nfp->uid))) {
+                        dprintk("read_attrs: gss_get_num failed!\n");
+                        goto out;
+                }
+                dprintk("read_attrs: uid=%d\n", (int)nfp->uid);
+        }
+        if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
+                READ_BUF(4);
+                len += 4;
+                READ32(dummy32);
+                if (dummy32 > XDR_MAX_NETOBJ) {
+                        dprintk("read_attrs: name too long!\n");
+                        goto xdr_error;
+                }
+                READ_BUF(dummy32);
+                len += (XDR_QUADLEN(dummy32) << 2);
+                if ((status = decode_gid((char *)p, dummy32, &nfp->gid))) {
+                        dprintk("read_attrs: gss_get_num failed!\n");
+                        goto out;
+                }
+                dprintk("read_attrs: gid=%d\n", (int)nfp->gid);
+        }
+        if (bmval1 & FATTR4_WORD1_RAWDEV) {
+                READ_BUF(8);
+                len += 8;
+                READ32(dummy32);
+		nfp->rdev = (dummy32 << MINORBITS);
+                READ32(dummy32);
+		nfp->rdev |= (dummy32 & MINORMASK);
+                dprintk("read_attrs: rdev=%d\n", nfp->rdev);
+        }
+        if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsstat->abytes);
+                dprintk("read_attrs: savail=0x%Lx\n", fsstat->abytes);
+        }
+	if (bmval1 & FATTR4_WORD1_SPACE_FREE) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsstat->fbytes);
+                dprintk("read_attrs: sfree=0x%Lx\n", fsstat->fbytes);
+        }
+        if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) {
+                READ_BUF(8);
+                len += 8;
+                READ64(fsstat->tbytes);
+                dprintk("read_attrs: stotal=0x%Lx\n", fsstat->tbytes);
+        }
+        if (bmval1 & FATTR4_WORD1_SPACE_USED) {
+                READ_BUF(8);
+                len += 8;
+                READ64(nfp->du.nfs3.used);
+                dprintk("read_attrs: sused=0x%Lx\n", nfp->du.nfs3.used);
+        }
+        if (bmval1 & FATTR4_WORD1_TIME_ACCESS) {
+                READ_BUF(12);
+                len += 12;
+                READTIME(nfp->atime);
+                dprintk("read_attrs: atime=%d\n", (int)nfp->atime);
+        }
+        if (bmval1 & FATTR4_WORD1_TIME_METADATA) {
+                READ_BUF(12);
+                len += 12;
+                READTIME(nfp->ctime);
+                dprintk("read_attrs: ctime=%d\n", (int)nfp->ctime);
+        }
+        if (bmval1 & FATTR4_WORD1_TIME_MODIFY) {
+                READ_BUF(12);
+                len += 12;
+                READTIME(nfp->mtime);
+                dprintk("read_attrs: mtime=%d\n", (int)nfp->mtime);
+        }
+        if (len != attrlen)
+                goto xdr_error;
+	
+success:
+        DECODE_TAIL;
+}
+
+static int
+decode_getfh(struct nfs4_compound *cp, int nfserr, struct nfs4_getfh *getfh)
+{
+	struct nfs_fh *fh = getfh->gf_fhandle;
+	int len;
+        DECODE_HEAD;
+
+	/* Zero handle first to allow comparisons */
+	memset(fh, 0, sizeof(*fh));
+		
+        if (!nfserr) {
+                READ_BUF(4);
+		READ32(len);
+		if (len > NFS_MAXFHSIZE)
+			goto xdr_error;
+		fh->size = len;
+                READ_BUF(len);
+                COPYMEM(fh->data, len);
+        }
+
+        DECODE_TAIL;
+}
+
+static int
+decode_link(struct nfs4_compound *cp, int nfserr, struct nfs4_link *link)
+{
+	int status = 0;
+	
+	if (!nfserr)
+		status = decode_change_info(cp, link->ln_cinfo);
+	return status;
+}
+
+static int
+decode_open(struct nfs4_compound *cp, int nfserr, struct nfs4_open *open)
+{
+	u32 bmlen, delegation_type;
+	DECODE_HEAD;
+	
+	if (!nfserr) {
+		READ_BUF(sizeof(nfs4_stateid));
+		COPYMEM(open->op_stateid, sizeof(nfs4_stateid));
+
+		decode_change_info(cp, open->op_cinfo);
+
+		READ_BUF(8);
+		READ32(*open->op_rflags);
+		READ32(bmlen);
+		if (bmlen > 10)
+			goto xdr_error;
+		
+		READ_BUF((bmlen << 2) + 4);
+		p += bmlen;
+		READ32(delegation_type);
+		if (delegation_type != NFS4_OPEN_DELEGATE_NONE)
+			goto xdr_error;
+	}
+	
+	DECODE_TAIL;
+}
+
+static int
+decode_open_confirm(struct nfs4_compound *cp, int nfserr, struct nfs4_open_confirm *open_confirm)
+{
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		READ_BUF(sizeof(nfs4_stateid));
+		COPYMEM(open_confirm->oc_stateid, sizeof(nfs4_stateid));
+	}
+
+	DECODE_TAIL;
+}
+
+static int
+decode_read(struct nfs4_compound *cp, int nfserr, struct nfs4_read *read)
+{
+	u32 throwaway;
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		READ_BUF(8);
+		if (read->rd_eof)
+			READ32(*read->rd_eof);
+		else
+			READ32(throwaway);
+		READ32(*read->rd_bytes_read);
+		if (*read->rd_bytes_read > read->rd_length)
+			goto xdr_error;
+	}
+
+	DECODE_TAIL;
+}
+
+static int
+decode_readdir(struct nfs4_compound *cp, int nfserr, struct rpc_rqst *req, struct nfs4_readdir *readdir)
+{
+	struct xdr_buf	*rcvbuf = &req->rq_rcv_buf;
+	struct page	*page = *rcvbuf->pages;
+	unsigned int	pglen = rcvbuf->page_len;
+	u32		*end, *entry;
+	u32		len, attrlen, word;
+	int 		i;
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		READ_BUF(8);
+		COPYMEM(readdir->rd_resp_verifier, 8);
+
+		BUG_ON(pglen > PAGE_CACHE_SIZE);
+		p   = (u32 *) kmap(page);
+		end = (u32 *) ((char *)p + pglen + readdir->rd_pgbase);
+
+		while (*p++) {
+			entry = p - 1;
+			if (p + 3 > end)
+				goto short_pkt;
+			p += 2;     /* cookie */
+			len = ntohl(*p++);  /* filename length */
+			if (len > NFS4_MAXNAMLEN) {
+				printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len);
+				goto err_unmap;
+			}
+			
+			p += XDR_QUADLEN(len);
+			if (p + 1 > end)
+				goto short_pkt;
+			len = ntohl(*p++);  /* bitmap length */
+			if (len > 10) {
+				printk(KERN_WARNING "NFS: giant bitmap in readdir (len 0x%x)\n", len);
+				goto err_unmap;
+			}
+			if (p + len + 1 > end)
+				goto short_pkt;
+			attrlen = 0;
+			for (i = 0; i < len; i++) {
+				word = ntohl(*p++);
+				if (!word)
+					continue;
+				else if (i == 0 && word == FATTR4_WORD0_FILEID) {
+					attrlen = 8;
+					continue;
+				}
+				printk(KERN_WARNING "NFS: unexpected bitmap word in readdir (0x%x)\n", word);
+				goto err_unmap;
+			}
+			if (ntohl(*p++) != attrlen) {
+				printk(KERN_WARNING "NFS: unexpected attrlen in readdir\n");
+				goto err_unmap;
+			}
+			p += XDR_QUADLEN(attrlen);
+			if (p + 1 > end)
+				goto short_pkt;
+		}
+		kunmap(page);
+	}
+	
+	DECODE_TAIL;
+short_pkt:
+	printk(KERN_NOTICE "NFS: short packet in readdir reply!\n");
+	/* truncate listing */
+	kunmap(page);
+	entry[0] = entry[1] = 0;
+	return 0;
+err_unmap:
+	kunmap(page);
+	return -errno_NFSERR_IO;
+}
+
+static int
+decode_readlink(struct nfs4_compound *cp, int nfserr, struct rpc_rqst *req, struct nfs4_readlink *readlink)
+{
+	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
+	u32 *strlen;
+	u32 len;
+	char *string;
+
+	if (!nfserr) {
+		/*
+		 * The XDR encode routine has set things up so that
+		 * the link text will be copied directly into the
+		 * buffer.  We just have to do overflow-checking,
+		 * and and null-terminate the text (the VFS expects
+		 * null-termination).
+		 */
+		strlen = (u32 *) kmap(rcvbuf->pages[0]);
+		len = ntohl(*strlen);
+		if (len > PAGE_CACHE_SIZE - 5) {
+			printk(KERN_WARNING "nfs: server returned giant symlink!\n");
+			kunmap(rcvbuf->pages[0]);
+			return -EIO;
+		}
+		*strlen = len;
+		
+		string = (char *)(strlen + 1);
+		string[len] = '\0';
+		kunmap(rcvbuf->pages[0]);
+	}
+	return 0;
+}
+
+static int
+decode_remove(struct nfs4_compound *cp, int nfserr, struct nfs4_remove *remove)
+{
+	int status;
+
+	status = 0;
+	if (!nfserr) 
+		status = decode_change_info(cp, remove->rm_cinfo);
+	return status;
+}
+
+static int
+decode_rename(struct nfs4_compound *cp, int nfserr, struct nfs4_rename *rename)
+{
+	int status = 0;
+
+	if (!nfserr) {
+		if ((status = decode_change_info(cp, rename->rn_src_cinfo)))
+			goto out;
+		if ((status = decode_change_info(cp, rename->rn_dst_cinfo)))
+			goto out;
+	}
+out:
+	return status;
+}
+
+static int
+decode_setattr(struct nfs4_compound *cp)
+{
+        u32 bmlen;
+        DECODE_HEAD;
+        
+        READ_BUF(4);
+        READ32(bmlen);
+        if (bmlen > 10)
+                goto xdr_error;
+        READ_BUF(bmlen << 2);
+
+        DECODE_TAIL;
+}
+
+static int
+decode_setclientid(struct nfs4_compound *cp, int nfserr)
+{
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		READ_BUF(8 + sizeof(nfs4_verifier));
+		READ64(cp->server->nfs4_state->cl_clientid);
+		COPYMEM(cp->server->nfs4_state->cl_confirm, sizeof(nfs4_verifier));
+	}
+	else if (nfserr == NFSERR_CLID_INUSE) {
+		u32 len;
+
+		/* skip netid string */
+		READ_BUF(4);
+		READ32(len);
+		READ_BUF(len);
+
+		/* skip uaddr string */
+		READ_BUF(4);
+		READ32(len);
+		READ_BUF(len);
+	}
+
+	DECODE_TAIL;
+}
+
+static int
+decode_write(struct nfs4_compound *cp, int nfserr, struct nfs4_write *write)
+{
+	DECODE_HEAD;
+
+	if (!nfserr) {
+		READ_BUF(16);
+		READ32(*write->wr_bytes_written);
+		if (*write->wr_bytes_written > write->wr_len)
+			goto xdr_error;
+		READ32(write->wr_verf->committed);
+		COPYMEM(write->wr_verf->verifier, 8);
+	}
+
+	DECODE_TAIL;
+}
+
+static int
+decode_compound(struct nfs4_compound *cp, struct rpc_rqst *req)
+{
+	u32 taglen;
+	u32 opnum, nfserr;
+	DECODE_HEAD;
+
+	READ_BUF(8);
+	READ32(cp->toplevel_status);
+	READ32(taglen);
+
+	/*
+	 * We need this if our zero-copy I/O is going to work.  Rumor has
+	 * it that the spec will soon mandate it...
+	 */
+	if (taglen != cp->taglen)
+		dprintk("nfs4: non-conforming server returns tag length mismatch!\n");
+
+	READ_BUF(taglen + 4);
+	p += XDR_QUADLEN(taglen);
+	READ32(cp->resp_nops);
+	if (cp->resp_nops > cp->req_nops) {
+		dprintk("nfs4: resp_nops > req_nops!\n");
+		goto xdr_error;
+	}
+
+	for (cp->nops = 0; cp->nops < cp->resp_nops; cp->nops++) {
+		READ_BUF(8);
+		READ32(opnum);
+		if (opnum != cp->ops[cp->nops].opnum) {
+			dprintk("nfs4: operation mismatch!\n");
+			goto xdr_error;
+		}
+		READ32(nfserr);
+		if (cp->nops == cp->resp_nops - 1) {
+			if (nfserr != cp->toplevel_status) {
+				dprintk("nfs4: status mismatch!\n");
+				goto xdr_error;
+			}
+		}
+		else if (nfserr) {
+			dprintk("nfs4: intermediate status nonzero!\n");
+			goto xdr_error;
+		}
+		cp->ops[cp->nops].nfserr = nfserr;
+
+		switch (opnum) {
+		case OP_ACCESS:
+			status = decode_access(cp, nfserr, &cp->ops[cp->nops].u.access);
+			break;
+		case OP_CLOSE:
+			status = decode_close(cp, nfserr, &cp->ops[cp->nops].u.close);
+			break;
+		case OP_COMMIT:
+			status = decode_commit(cp, nfserr, &cp->ops[cp->nops].u.commit);
+			break;
+		case OP_CREATE:
+			status = decode_create(cp, nfserr, &cp->ops[cp->nops].u.create);
+			break;
+		case OP_GETATTR:
+			status = decode_getattr(cp, nfserr, &cp->ops[cp->nops].u.getattr);
+			break;
+		case OP_GETFH:
+			status = decode_getfh(cp, nfserr, &cp->ops[cp->nops].u.getfh);
+			break;
+		case OP_LINK:
+			status = decode_link(cp, nfserr, &cp->ops[cp->nops].u.link);
+			break;
+		case OP_LOOKUP:
+			status = 0;
+			break;
+		case OP_OPEN:
+			status = decode_open(cp, nfserr, &cp->ops[cp->nops].u.open);
+			break;
+		case OP_OPEN_CONFIRM:
+			status = decode_open_confirm(cp, nfserr, &cp->ops[cp->nops].u.open_confirm);
+			break;
+		case OP_PUTFH:
+			status = 0;
+			break;
+		case OP_PUTROOTFH:
+			status = 0;
+			break;
+		case OP_READ:
+			status = decode_read(cp, nfserr, &cp->ops[cp->nops].u.read);
+			break;
+		case OP_READDIR:
+			status = decode_readdir(cp, nfserr, req, &cp->ops[cp->nops].u.readdir);
+			break;
+		case OP_READLINK:
+			status = decode_readlink(cp, nfserr, req, &cp->ops[cp->nops].u.readlink);
+			break;
+		case OP_RESTOREFH:
+			status = 0;
+			break;
+		case OP_REMOVE:
+			status = decode_remove(cp, nfserr, &cp->ops[cp->nops].u.remove);
+			break;
+		case OP_RENAME:
+			status = decode_rename(cp, nfserr, &cp->ops[cp->nops].u.rename);
+			break;
+		case OP_RENEW:
+			status = 0;
+			break;
+		case OP_SAVEFH:
+			status = 0;
+			break;
+		case OP_SETATTR:
+			status = decode_setattr(cp);
+			break;
+		case OP_SETCLIENTID:
+			status = decode_setclientid(cp, nfserr);
+			break;
+		case OP_SETCLIENTID_CONFIRM:
+			status = 0;
+			break;
+		case OP_WRITE:
+			status = decode_write(cp, nfserr, &cp->ops[cp->nops].u.write);
+			break;
+		default:
+			BUG();
+			return -EIO;
+		}
+		if (status)
+			goto xdr_error;
+	}
+
+	DECODE_TAIL;
+}
+/*
+ * END OF "GENERIC" DECODE ROUTINES.
+ */
+
+/*
+ * Decode void reply
+ */
+static int
+nfs4_xdr_dec_void(struct rpc_rqst *req, u32 *p, void *dummy)
+{
+	return 0;
+}
+
+/*
+ * Decode COMPOUND response
+ */
+static int
+nfs4_xdr_dec_compound(struct rpc_rqst *rqstp, u32 *p, struct nfs4_compound *cp)
+{
+	int status;
+	
+	cp->p = p;
+	cp->end = (u32 *) ((u8 *) rqstp->rq_rvec->iov_base + rqstp->rq_rvec->iov_len);
+
+	if ((status = decode_compound(cp, rqstp)))
+		goto out;
+	
+	status = 0;
+	if (cp->toplevel_status)
+		status = -nfs_stat_to_errno(cp->toplevel_status);
+
+out:
+	return status;
+}
+
+u32 *
+nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus)
+{
+	u32 len;
+
+	if (!*p++) {
+		if (!*p)
+			return ERR_PTR(-EAGAIN);
+		entry->eof = 1;
+		return ERR_PTR(-EBADCOOKIE);
+	}
+
+	entry->prev_cookie = entry->cookie;
+	p = xdr_decode_hyper(p, &entry->cookie);
+	entry->len = ntohl(*p++);
+	entry->name = (const char *) p;
+	p += XDR_QUADLEN(entry->len);
+
+	if (entry->cookie > COOKIE_MAX)
+		entry->cookie = COOKIE_MAX;
+	
+	/*
+	 * In case the server doesn't return an inode number,
+	 * we fake one here.  (We don't use inode number 0,
+	 * since glibc seems to choke on it...)
+	 */
+	entry->ino = 1;
+
+	len = ntohl(*p++);             /* bitmap length */
+	p += len;
+	len = ntohl(*p++);             /* attribute buffer length */
+	if (len)
+		p = xdr_decode_hyper(p, &entry->ino);
+
+	entry->eof = !p[0] && p[1];
+	return p;
+}
+
+#ifndef MAX
+# define MAX(a, b)	(((a) > (b))? (a) : (b))
+#endif
+
+#define PROC(proc, argtype, restype)				\
+    { "nfs4_" #proc,						\
+      (kxdrproc_t) nfs4_xdr_##argtype,				\
+      (kxdrproc_t) nfs4_xdr_##restype,				\
+      MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2,	\
+      0							\
+    }
+
+static struct rpc_procinfo	nfs4_procedures[] = {
+  PROC(null,		enc_void,	dec_void),
+  PROC(compound,	enc_compound,	dec_compound)
+};
+
+struct rpc_version		nfs_version4 = {
+	.number			= 4,
+	.nrprocs		= sizeof(nfs4_procedures)/sizeof(nfs4_procedures[0]),
+	.procs			= nfs4_procedures
+};
+
+/*
+ * Local variables:
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 76ab4ecc3ea8..3a23ac81e80f 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -472,6 +472,25 @@ extern void * nfs_root_data(void);
 
 #define NFS_JUKEBOX_RETRY_TIME (5 * HZ)
 
+#ifdef CONFIG_NFS_V4
+struct nfs4_client {
+        atomic_t                cl_count;       /* refcount */
+        u64                     cl_clientid;    /* constant */
+	 nfs4_verifier           cl_confirm;     
+
+        /*
+         * Starts a list of lockowners, linked through lo_list.
+	 */
+        struct list_head        cl_lockowners;  /* protected by state_spinlock */
+};
+
+/* nfs4proc.c */
+extern int nfs4_proc_renew(struct nfs_server *server);
+
+/* nfs4renewd.c */
+extern int nfs4_init_renewd(struct nfs_server *server);
+#endif /* CONFIG_NFS_V4 */
+
 #ifdef CONFIG_NFS_V4
 
 extern struct nfs4_client *nfs4_get_client(void);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index e542fe6982c5..4bb5125056e7 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -325,6 +325,219 @@ struct nfs3_readdirres {
 	int			plus;
 };
 
+#ifdef CONFIG_NFS_V4
+
+typedef u64 clientid4;
+
+struct nfs4_change_info {
+	u32				atomic;
+	u64				before;
+	u64				after;
+};
+
+struct nfs4_access {
+	u32				ac_req_access;     /* request */
+	u32 *				ac_resp_supported; /* response */
+	u32 *				ac_resp_access;    /* response */
+};
+
+struct nfs4_close {
+	char *				cl_stateid;        /* request */
+	u32				cl_seqid;          /* request */
+};
+
+struct nfs4_commit {
+	u64				co_start;          /* request */
+	u32				co_len;            /* request */
+	struct nfs_writeverf *		co_verifier;       /* response */
+};
+
+struct nfs4_create {
+	u32				cr_ftype;          /* request */
+	union {                                            /* request */
+		struct {
+			u32		textlen;
+			const char *	text;
+		} symlink;   /* NF4LNK */
+		struct {
+			u32		specdata1;
+			u32		specdata2;
+		} device;    /* NF4BLK, NF4CHR */
+	} u;
+	u32				cr_namelen;        /* request */
+	const char *			cr_name;           /* request */
+	struct iattr *			cr_attrs;          /* request */
+	struct nfs4_change_info	*	cr_cinfo;          /* response */
+};
+#define cr_textlen			u.symlink.textlen
+#define cr_text				u.symlink.text
+#define cr_specdata1			u.device.specdata1
+#define cr_specdata2			u.device.specdata2
+
+struct nfs4_getattr {
+        u32 *				gt_bmval;          /* request */
+        struct nfs_fattr *		gt_attrs;          /* response */
+	struct nfs_fsstat *		gt_fsstat;         /* response */
+	struct nfs_fsinfo *		gt_fsinfo;         /* response */
+	struct nfs_pathconf *		gt_pathconf;       /* response */
+	u32 *				gt_bmres;	   /* response */
+};
+
+struct nfs4_getfh {
+	struct nfs_fh *			gf_fhandle;       /* response */
+};
+
+struct nfs4_link {
+	u32				ln_namelen;       /* request */
+	const char *			ln_name;          /* request */
+	struct nfs4_change_info *	ln_cinfo;         /* response */
+};
+
+struct nfs4_lookup {
+	struct qstr *			lo_name;          /* request */
+};
+
+struct nfs4_open {
+	u32				op_share_access;  /* request */
+	u32				op_opentype;      /* request */
+	u32				op_createmode;    /* request */
+	union {                                           /* request */
+		struct iattr *		attrs;    /* UNCHECKED, GUARDED */
+		nfs4_verifier		verifier; /* EXCLUSIVE */
+	} u;
+	struct qstr *			op_name;          /* request */
+	char *				op_stateid;       /* response */
+	struct nfs4_change_info	*	op_cinfo;         /* response */
+	u32 *				op_rflags;        /* response */
+};
+#define op_attrs     u.attrs
+#define op_verifier  u.verifier
+
+struct nfs4_open_confirm {
+	char *				oc_stateid;       /* request */
+};
+
+struct nfs4_putfh {
+	struct nfs_fh *			pf_fhandle;       /* request */
+};
+
+struct nfs4_read {
+	u64				rd_offset;        /* request */
+	u32				rd_length;        /* request */
+	u32				*rd_eof;          /* response */
+	u32				*rd_bytes_read;   /* response */
+	struct page **			rd_pages;   /* zero-copy data */
+	unsigned int			rd_pgbase;  /* zero-copy data */
+};
+
+struct nfs4_readdir {
+	u64				rd_cookie;        /* request */
+	nfs4_verifier			rd_req_verifier;  /* request */
+	u32				rd_count;         /* request */
+	u32				rd_bmval[2];      /* request */	
+	nfs4_verifier			rd_resp_verifier; /* response */
+	struct page **			rd_pages;   /* zero-copy data */
+	unsigned int			rd_pgbase;  /* zero-copy data */
+};
+
+struct nfs4_readlink {
+	u32				rl_count;   /* zero-copy data */
+	struct page **			rl_pages;   /* zero-copy data */
+};
+
+struct nfs4_remove {
+	u32				rm_namelen;       /* request */
+	const char *			rm_name;          /* request */
+	struct nfs4_change_info *	rm_cinfo;         /* response */
+};
+
+struct nfs4_rename {
+	u32				rn_oldnamelen;    /* request */
+	const char *			rn_oldname;       /* request */
+	u32				rn_newnamelen;    /* request */
+	const char *			rn_newname;       /* request */
+	struct nfs4_change_info	*	rn_src_cinfo;     /* response */
+	struct nfs4_change_info *	rn_dst_cinfo;     /* response */
+};
+
+struct nfs4_setattr {
+	char *				st_stateid;       /* request */
+	struct iattr *			st_iap;           /* request */
+};
+
+struct nfs4_setclientid {
+	nfs4_verifier			sc_verifier;      /* request */
+	char *				sc_name;	  /* request */
+	u32				sc_prog;          /* request */
+	char				sc_netid[4];	  /* request */
+	char				sc_uaddr[24];     /* request */
+	u32				sc_cb_ident;      /* request */
+};
+
+struct nfs4_write {
+	u64				wr_offset;        /* request */
+	u32				wr_stable_how;    /* request */
+	u32				wr_len;           /* request */
+	u32 *				wr_bytes_written; /* response */
+	struct nfs_writeverf *		wr_verf;          /* response */
+	struct page **			wr_pages;   /* zero-copy data */
+	unsigned int			wr_pgbase;  /* zero-copy data */
+};
+
+struct nfs4_op {
+	u32				opnum;
+	u32				nfserr;
+	union {
+		struct nfs4_access	access;
+		struct nfs4_close	close;
+		struct nfs4_commit	commit;
+		struct nfs4_create	create;
+		struct nfs4_getattr	getattr;
+		struct nfs4_getfh	getfh;
+		struct nfs4_link	link;
+		struct nfs4_lookup	lookup;
+		struct nfs4_open	open;
+		struct nfs4_open_confirm open_confirm;
+		struct nfs4_putfh	putfh;
+		struct nfs4_read	read;
+		struct nfs4_readdir	readdir;
+		struct nfs4_readlink	readlink;
+		struct nfs4_remove	remove;
+		struct nfs4_rename	rename;
+		struct nfs4_setattr	setattr;
+		struct nfs4_setclientid	setclientid;
+		struct nfs4_write	write;
+	} u;
+};
+
+struct nfs4_compound {
+	unsigned int		flags;   /* defined below */
+	struct nfs_server *	server;
+
+	/* RENEW information */
+	int			renew_index;
+	unsigned long		timestamp;
+
+	/* scratch variables for XDR encode/decode */
+	int			nops;
+	u32 *			p;
+	u32 *			end;
+
+	/* the individual COMPOUND operations */
+	struct nfs4_op		*ops;
+
+	/* request */
+	int			req_nops;
+	u32			taglen;
+	char *			tag;
+	
+	/* response */
+	int			resp_nops;
+	int			toplevel_status;
+};
+
+#endif /* CONFIG_NFS_V4 */
+
 struct nfs_read_data {
 	struct rpc_task		task;
 	struct inode		*inode;
@@ -338,7 +551,12 @@ struct nfs_read_data {
 			struct nfs_readres  res;
 		} v3;   /* also v2 */
 #ifdef CONFIG_NFS_V4
-		/* NFSv4 data will come here... */
+		struct {
+			struct nfs4_compound  compound;
+			struct nfs4_op        ops[3];
+			u32                   res_count;
+			u32                   res_eof;
+		} v4;
 #endif
 	} u;
 };
@@ -353,11 +571,17 @@ struct nfs_write_data {
 	struct page		*pagevec[NFS_WRITE_MAXIOV];
 	union {
 		struct {
-			struct nfs_writeargs args;
-			struct nfs_writeres  res;
+			struct nfs_writeargs	args;		/* argument struct */
+			struct nfs_writeres	res;		/* result struct */
 		} v3;
 #ifdef CONFIG_NFS_V4
-		/* NFSv4 data to come here... */
+		struct {
+			struct nfs4_compound  compound;
+			struct nfs4_op        ops[3];
+			u32                   arg_count;
+			u32                   arg_stable;
+			u32                   res_count;
+		} v4;
 #endif
 	} u;
 };
@@ -430,8 +654,10 @@ struct nfs_rpc_ops {
  */
 extern struct nfs_rpc_ops	nfs_v2_clientops;
 extern struct nfs_rpc_ops	nfs_v3_clientops;
+extern struct nfs_rpc_ops	nfs_v4_clientops;
 extern struct rpc_version	nfs_version2;
 extern struct rpc_version	nfs_version3;
+extern struct rpc_version	nfs_version4;
 extern struct rpc_program	nfs_program;
 extern struct rpc_stat		nfs_rpcstat;
 
-- 
cgit v1.2.3


From 1477a825d7e6486a077608c7baf6abbb6f27ed95 Mon Sep 17 00:00:00 2001
From: Dipankar Sarma <dipankar@in.ibm.com>
Date: Tue, 15 Oct 2002 05:40:46 -0700
Subject: [PATCH] Read-Copy Update infrastructure

This is the RCU core patch from akpm's tree. It has been in his
tree since about 2.5.37-mm1 along with dcache_rcu and so far it has
worked fine. For 2.5, I am hoping that we might get the following
RCU patches included -

1. rt_rcu - ipv4 routecache lookup. Davem agreed to include this patch
   if and when you include RCU core in your tree.

2. dcache_rcu (by Maneesh Soni) - dcache lookup avoiding dcache_lock as
   much as possible. This has been akpm's tree - stable and gives us
   good yield. I have been submitting this to Viro and I will publish
   some more benchmark numbers later to help decide on this.

This RCU core implements RCU APIs, call_rcu() and synchronize_kernel(),
by monitoring a per-CPU quiescent state (idle/user etc.) counter.
call_rcu() queues a callback to be invoked after all the CPUs have
gone through a quiescent state. Queuing is per-CPU and each per-CPU
batch gets a batch number. As batches get their turn, a global
cpu mask is used to keep track of CPUs pending quiescent state.
Checking for quiescent cycle is done by saving the per-CPU
counter at the beginning of the batch and then monitoring it for change
through the local timer interrupt handler.
---
 include/linux/rcupdate.h | 134 ++++++++++++++++++++++++++
 init/main.c              |   2 +
 kernel/Makefile          |   5 +-
 kernel/rcupdate.c        | 242 +++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched.c           |   5 +
 5 files changed, 386 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/rcupdate.h
 create mode 100644 kernel/rcupdate.c

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
new file mode 100644
index 000000000000..a5ffb7bb5743
--- /dev/null
+++ b/include/linux/rcupdate.h
@@ -0,0 +1,134 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (c) IBM Corporation, 2001
+ *
+ * Author: Dipankar Sarma <dipankar@in.ibm.com>
+ * 
+ * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
+ * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
+ * Papers:
+ * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
+ * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 		http://lse.sourceforge.net/locking/rcupdate.html
+ *
+ */
+
+#ifndef __LINUX_RCUPDATE_H
+#define __LINUX_RCUPDATE_H
+
+#ifdef __KERNEL__
+
+#include <linux/cache.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/threads.h>
+
+/**
+ * struct rcu_head - callback structure for use with RCU
+ * @list: list_head to queue the update requests
+ * @func: actual update function to call after the grace period.
+ * @arg: argument to be passed to the actual update function.
+ */
+struct rcu_head {
+	struct list_head list;
+	void (*func)(void *obj);
+	void *arg;
+};
+
+#define RCU_HEAD_INIT(head) \
+		{ list: LIST_HEAD_INIT(head.list), func: NULL, arg: NULL }
+#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT(head)
+#define INIT_RCU_HEAD(ptr) do { \
+       INIT_LIST_HEAD(&(ptr)->list); (ptr)->func = NULL; (ptr)->arg = NULL; \
+} while (0)
+
+
+
+/* Control variables for rcupdate callback mechanism. */
+struct rcu_ctrlblk {
+	spinlock_t	mutex;		/* Guard this struct                  */
+	long		curbatch;	/* Current batch number.	      */
+	long		maxbatch;	/* Max requested batch number.        */
+	unsigned long	rcu_cpu_mask; 	/* CPUs that need to switch in order  */
+					/* for current batch to proceed.      */
+};
+
+/* Is batch a before batch b ? */
+static inline int rcu_batch_before(long a, long b)
+{
+        return (a - b) < 0;
+}
+
+/* Is batch a after batch b ? */
+static inline int rcu_batch_after(long a, long b)
+{
+        return (a - b) > 0;
+}
+
+/*
+ * Per-CPU data for Read-Copy UPdate.
+ * nxtlist - new callbacks are added here
+ * curlist - current batch for which quiescent cycle started if any
+ */
+struct rcu_data {
+	long		qsctr;		 /* User-mode/idle loop etc. */
+        long            last_qsctr;	 /* value of qsctr at beginning */
+                                         /* of rcu grace period */
+        long  	       	batch;           /* Batch # for current RCU batch */
+        struct list_head  nxtlist;
+        struct list_head  curlist;
+} ____cacheline_aligned_in_smp;
+
+extern struct rcu_data rcu_data[NR_CPUS];
+extern struct rcu_ctrlblk rcu_ctrlblk;
+
+#define RCU_qsctr(cpu) 		(rcu_data[(cpu)].qsctr)
+#define RCU_last_qsctr(cpu) 	(rcu_data[(cpu)].last_qsctr)
+#define RCU_batch(cpu) 		(rcu_data[(cpu)].batch)
+#define RCU_nxtlist(cpu) 	(rcu_data[(cpu)].nxtlist)
+#define RCU_curlist(cpu) 	(rcu_data[(cpu)].curlist)
+
+#define RCU_QSCTR_INVALID	0
+
+static inline int rcu_pending(int cpu) 
+{
+	if ((!list_empty(&RCU_curlist(cpu)) &&
+	     rcu_batch_before(RCU_batch(cpu), rcu_ctrlblk.curbatch)) ||
+	    (list_empty(&RCU_curlist(cpu)) &&
+			 !list_empty(&RCU_nxtlist(cpu))) ||
+	    test_bit(cpu, &rcu_ctrlblk.rcu_cpu_mask))
+		return 1;
+	else
+		return 0;
+}
+
+#define rcu_read_lock()		preempt_disable()
+#define rcu_read_unlock()	preempt_enable()
+
+extern void rcu_init(void);
+extern void rcu_check_callbacks(int cpu, int user);
+
+/* Exported interfaces */
+extern void FASTCALL(call_rcu(struct rcu_head *head, 
+                          void (*func)(void *arg), void *arg));
+extern void synchronize_kernel(void);
+
+#endif /* __KERNEL__ */
+#endif /* __LINUX_RCUPDATE_H */
diff --git a/init/main.c b/init/main.c
index f69c298b9a6f..e94bba3f7f44 100644
--- a/init/main.c
+++ b/init/main.c
@@ -31,6 +31,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/security.h>
 #include <linux/workqueue.h>
+#include <linux/rcupdate.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -398,6 +399,7 @@ asmlinkage void __init start_kernel(void)
 	printk("Kernel command line: %s\n", saved_command_line);
 	parse_options(command_line);
 	trap_init();
+	rcu_init();
 	init_IRQ();
 	sched_init();
 	softirq_init();
diff --git a/kernel/Makefile b/kernel/Makefile
index b3fce6d3ac9c..f862dedfabe9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -3,12 +3,13 @@
 #
 
 export-objs = signal.o sys.o kmod.o workqueue.o ksyms.o pm.o exec_domain.o \
-	      printk.o platform.o suspend.o dma.o module.o cpufreq.o
+	      printk.o platform.o suspend.o dma.o module.o cpufreq.o rcupdate.o
 
 obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
 	    module.o exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o capability.o ptrace.o timer.o user.o \
-	    signal.o sys.o kmod.o workqueue.o futex.o platform.o pid.o
+	    signal.o sys.o kmod.o workqueue.o futex.o platform.o pid.o \
+	    rcupdate.o
 
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
 obj-$(CONFIG_SMP) += cpu.o
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
new file mode 100644
index 000000000000..dfdf1774489d
--- /dev/null
+++ b/kernel/rcupdate.c
@@ -0,0 +1,242 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (c) IBM Corporation, 2001
+ *
+ * Author: Dipankar Sarma <dipankar@in.ibm.com>
+ * 
+ * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
+ * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
+ * Papers:
+ * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
+ * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 		http://lse.sourceforge.net/locking/rcupdate.html
+ *
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <asm/atomic.h>
+#include <asm/bitops.h>
+#include <linux/module.h>
+#include <linux/completion.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+
+/* Definition for rcupdate control block. */
+struct rcu_ctrlblk rcu_ctrlblk = 
+	{ .mutex = SPIN_LOCK_UNLOCKED, .curbatch = 1, 
+	  .maxbatch = 1, .rcu_cpu_mask = 0 };
+struct rcu_data rcu_data[NR_CPUS] __cacheline_aligned;
+
+/* Fake initialization required by compiler */
+static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
+#define RCU_tasklet(cpu) (per_cpu(rcu_tasklet, cpu))
+
+/**
+ * call_rcu - Queue an RCU update request.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual update function to be invoked after the grace period
+ * @arg: argument to be passed to the update function
+ *
+ * The update function will be invoked as soon as all CPUs have performed 
+ * a context switch or been seen in the idle loop or in a user process. 
+ * The read-side of critical section that use call_rcu() for updation must 
+ * be protected by rcu_read_lock()/rcu_read_unlock().
+ */
+void call_rcu(struct rcu_head *head, void (*func)(void *arg), void *arg)
+{
+	int cpu;
+	unsigned long flags;
+
+	head->func = func;
+	head->arg = arg;
+	local_irq_save(flags);
+	cpu = smp_processor_id();
+	list_add_tail(&head->list, &RCU_nxtlist(cpu));
+	local_irq_restore(flags);
+}
+
+/*
+ * Invoke the completed RCU callbacks. They are expected to be in
+ * a per-cpu list.
+ */
+static void rcu_do_batch(struct list_head *list)
+{
+	struct list_head *entry;
+	struct rcu_head *head;
+
+	while (!list_empty(list)) {
+		entry = list->next;
+		list_del(entry);
+		head = list_entry(entry, struct rcu_head, list);
+		head->func(head->arg);
+	}
+}
+
+/*
+ * Register a new batch of callbacks, and start it up if there is currently no
+ * active batch and the batch to be registered has not already occurred.
+ * Caller must hold the rcu_ctrlblk lock.
+ */
+static void rcu_start_batch(long newbatch)
+{
+	if (rcu_batch_before(rcu_ctrlblk.maxbatch, newbatch)) {
+		rcu_ctrlblk.maxbatch = newbatch;
+	}
+	if (rcu_batch_before(rcu_ctrlblk.maxbatch, rcu_ctrlblk.curbatch) ||
+	    (rcu_ctrlblk.rcu_cpu_mask != 0)) {
+		return;
+	}
+	rcu_ctrlblk.rcu_cpu_mask = cpu_online_map;
+}
+
+/*
+ * Check if the cpu has gone through a quiescent state (say context
+ * switch). If so and if it already hasn't done so in this RCU
+ * quiescent cycle, then indicate that it has done so.
+ */
+static void rcu_check_quiescent_state(void)
+{
+	int cpu = smp_processor_id();
+
+	if (!test_bit(cpu, &rcu_ctrlblk.rcu_cpu_mask)) {
+		return;
+	}
+
+	/* 
+	 * Races with local timer interrupt - in the worst case
+	 * we may miss one quiescent state of that CPU. That is
+	 * tolerable. So no need to disable interrupts.
+	 */
+	if (RCU_last_qsctr(cpu) == RCU_QSCTR_INVALID) {
+		RCU_last_qsctr(cpu) = RCU_qsctr(cpu);
+		return;
+	}
+	if (RCU_qsctr(cpu) == RCU_last_qsctr(cpu)) {
+		return;
+	}
+
+	spin_lock(&rcu_ctrlblk.mutex);
+	if (!test_bit(cpu, &rcu_ctrlblk.rcu_cpu_mask)) {
+		spin_unlock(&rcu_ctrlblk.mutex);
+		return;
+	}
+	clear_bit(cpu, &rcu_ctrlblk.rcu_cpu_mask);
+	RCU_last_qsctr(cpu) = RCU_QSCTR_INVALID;
+	if (rcu_ctrlblk.rcu_cpu_mask != 0) {
+		spin_unlock(&rcu_ctrlblk.mutex);
+		return;
+	}
+	rcu_ctrlblk.curbatch++;
+	rcu_start_batch(rcu_ctrlblk.maxbatch);
+	spin_unlock(&rcu_ctrlblk.mutex);
+}
+
+
+/*
+ * This does the RCU processing work from tasklet context. 
+ */
+static void rcu_process_callbacks(unsigned long unused)
+{
+	int cpu = smp_processor_id();
+	LIST_HEAD(list);
+
+	if (!list_empty(&RCU_curlist(cpu)) &&
+	    rcu_batch_after(rcu_ctrlblk.curbatch, RCU_batch(cpu))) {
+		list_splice(&RCU_curlist(cpu), &list);
+		INIT_LIST_HEAD(&RCU_curlist(cpu));
+	}
+
+	local_irq_disable();
+	if (!list_empty(&RCU_nxtlist(cpu)) && list_empty(&RCU_curlist(cpu))) {
+		list_splice(&RCU_nxtlist(cpu), &RCU_curlist(cpu));
+		INIT_LIST_HEAD(&RCU_nxtlist(cpu));
+		local_irq_enable();
+
+		/*
+		 * start the next batch of callbacks
+		 */
+		spin_lock(&rcu_ctrlblk.mutex);
+		RCU_batch(cpu) = rcu_ctrlblk.curbatch + 1;
+		rcu_start_batch(RCU_batch(cpu));
+		spin_unlock(&rcu_ctrlblk.mutex);
+	} else {
+		local_irq_enable();
+	}
+	rcu_check_quiescent_state();
+	if (!list_empty(&list))
+		rcu_do_batch(&list);
+}
+
+void rcu_check_callbacks(int cpu, int user)
+{
+	if (user || 
+	    (idle_cpu(cpu) && !in_softirq() && hardirq_count() <= 1))
+		RCU_qsctr(cpu)++;
+	tasklet_schedule(&RCU_tasklet(cpu));
+}
+
+/*
+ * Initializes rcu mechanism.  Assumed to be called early.
+ * That is before local timer(SMP) or jiffie timer (uniproc) is setup.
+ * Note that rcu_qsctr and friends are implicitly
+ * initialized due to the choice of ``0'' for RCU_CTR_INVALID.
+ */
+void __init rcu_init(void)
+{
+	int i;
+
+	memset(&rcu_data[0], 0, sizeof(rcu_data));
+	for (i = 0; i < NR_CPUS; i++) {
+		tasklet_init(&RCU_tasklet(i), rcu_process_callbacks, 0UL);
+		INIT_LIST_HEAD(&RCU_nxtlist(i));
+		INIT_LIST_HEAD(&RCU_curlist(i));
+	}
+}
+
+/* Because of FASTCALL declaration of complete, we use this wrapper */
+static void wakeme_after_rcu(void *completion)
+{
+	complete(completion);
+}
+
+/**
+ * synchronize-kernel - wait until all the CPUs have gone
+ * through a "quiescent" state. It may sleep.
+ */
+void synchronize_kernel(void)
+{
+	struct rcu_head rcu;
+	DECLARE_COMPLETION(completion);
+
+	/* Will wake me after RCU finished */
+	call_rcu(&rcu, wakeme_after_rcu, &completion);
+
+	/* Wait for it */
+	wait_for_completion(&completion);
+}
+
+
+EXPORT_SYMBOL(call_rcu);
+EXPORT_SYMBOL(synchronize_kernel);
diff --git a/kernel/sched.c b/kernel/sched.c
index 0464ac0649b8..20d2854c0bc6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -31,6 +31,7 @@
 #include <linux/blkdev.h>
 #include <linux/delay.h>
 #include <linux/timer.h>
+#include <linux/rcupdate.h>
 
 /*
  * Convert user-nice values [ -20 ... 0 ... 19 ]
@@ -865,6 +866,9 @@ void scheduler_tick(int user_ticks, int sys_ticks)
 	runqueue_t *rq = this_rq();
 	task_t *p = current;
 
+ 	if (rcu_pending(cpu))
+ 		rcu_check_callbacks(cpu, user_ticks);
+
 	if (p == rq->idle) {
 		/* note: this timer irq context must be accounted for as well */
 		if (irq_count() - HARDIRQ_OFFSET >= SOFTIRQ_OFFSET)
@@ -1023,6 +1027,7 @@ pick_next_task:
 switch_tasks:
 	prefetch(next);
 	clear_tsk_need_resched(prev);
+	RCU_qsctr(prev->thread_info->cpu)++;
 
 	if (likely(prev != next)) {
 		rq->nr_switches++;
-- 
cgit v1.2.3